Compare commits
656 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 86ea0b9212 | |||
| f8f7c996ca | |||
| c7639b9eec | |||
| 5b7d8de9d1 | |||
| 1e74b09f78 | |||
| dac9abeb79 | |||
| a03b36fb5a | |||
| a862309682 | |||
| cb01535565 | |||
| c4af0a4df0 | |||
| f113c3d21a | |||
| f43ebbb6fa | |||
| fb70bc6ffb | |||
| 4c7552ef88 | |||
| c0cefac546 | |||
| b2283a5b04 | |||
| 1ed0c61b56 | |||
| f212bc9bc8 | |||
| b8c62f95ae | |||
| 2653c617f8 | |||
| 4dd82bf4c9 | |||
| 33588ff090 | |||
| f8ab484cd2 | |||
| 2c39cd0646 | |||
| 64ae391a4a | |||
| 127d9032c3 | |||
| 81a817a39f | |||
| 51ab988e36 | |||
| 5517636850 | |||
| 2be6e02800 | |||
| 4eada16b94 | |||
| c66d776f8a | |||
| 4b1317789d | |||
| 8b7d8073d9 | |||
| 2aa1ea39a0 | |||
| cd789ed138 | |||
| 5641456ba0 | |||
| 29c1f56fcb | |||
| f4edab8946 | |||
| f04de78682 | |||
| 260b80c2f1 | |||
| cb9f01c106 | |||
| e701c881a1 | |||
| d78aa02833 | |||
| 4e54a9b496 | |||
| 1cb25946dd | |||
| e982be4109 | |||
| 1a8bbe7ff8 | |||
| 0ec9fc9027 | |||
| 07a7a474f8 | |||
| ce84f8d046 | |||
| 82f494495c | |||
| 779ec87659 | |||
| d5d4242015 | |||
| 2f93963a0a | |||
| 5073ceff13 | |||
| d15e12750b | |||
| 0c7e3898e8 | |||
| 03089613dc | |||
| 21a8459b18 | |||
| 7f1f349300 | |||
| 258f30ec5c | |||
| e96d09dee7 | |||
| ff06029253 | |||
| 15702bd9f4 | |||
| 909a7e6a91 | |||
| 2e2a47a12b | |||
| 6170f07154 | |||
| 3ad9bb59ce | |||
| c00b864017 | |||
| 404c0376d3 | |||
| 8a98aa9eaa | |||
| 80cf70d151 | |||
| ee25c61fc2 | |||
| 324c118530 | |||
| b370bc4c44 | |||
| f529d16c62 | |||
| 886fdc82d6 | |||
| 10950332a1 | |||
| 4d87adc0c8 | |||
| 13c20afe5b | |||
| d8a05807ba | |||
| 089d33b88b | |||
| b3b84c633a | |||
| 86d51bced0 | |||
| 54b864f167 | |||
| 54fecccbfb | |||
| 3745711b12 | |||
| 25bc88a438 | |||
| 9b0212d7c7 | |||
| ceaf8cd9aa | |||
| 0c3ae98fd1 | |||
| f0f64075ad | |||
| 2fae5bb340 | |||
| 9287734a24 | |||
| ff46d880cb | |||
| f78c93eaca | |||
| 1ff75403cd | |||
| 0dc8e52662 | |||
| 7c1f8a30ad | |||
| 24e545b62c | |||
| 4331b5f532 | |||
| 05db32f28f | |||
| 1cb589eadb | |||
| 6fb0dc1067 | |||
| e02a5571b6 | |||
| b097a49ed5 | |||
| 45f9966b34 | |||
| 46d8d8fc3d | |||
| 034153791b | |||
| 9399737ee6 | |||
| f7f77e587c | |||
| 7a8c077c57 | |||
| 03900b0c26 | |||
| 6be2f409e5 | |||
| 46b13b4f23 | |||
| be58bf0ab4 | |||
| 2ccef4a9f9 | |||
| f1ea12d731 | |||
| 01121d7695 | |||
| 3ed043993f | |||
| a5bdf08c1c | |||
| 88fcf01d8f | |||
| 451a858d6b | |||
| df0a0696a9 | |||
| f7341200bc | |||
| 9f252f6d41 | |||
| 397beebd21 | |||
| 7c5995f165 | |||
| aee1773e0c | |||
| ffca24435b | |||
| 2b588a2003 | |||
| 1978f24fc4 | |||
| 83d5740096 | |||
| 726380ee09 | |||
| 90599ea3c2 | |||
| 72a1f948ba | |||
| 71f22f65c4 | |||
| c9039cfd07 | |||
| f5fe575b6b | |||
| c5c78b2a66 | |||
| 390f3a49ee | |||
| dc9b44bd14 | |||
| b72e9b6a0c | |||
| b8c035e564 | |||
| eb115a1a70 | |||
| f5ca005766 | |||
| 656b9c19ea | |||
| 5855cbfcc9 | |||
| 6caa08902f | |||
| 932e07a8ee | |||
| 71d5368fea | |||
| 9f2f4d5107 | |||
| d6003c93b8 | |||
| 4055fa088d | |||
| 745a70a534 | |||
| 366e9333dd | |||
| fc1f5bfc82 | |||
| bfe33d74d3 | |||
| 9c2746fc28 | |||
| 0ad2cdef2c | |||
| 0064fad85c | |||
| 16f4978b31 | |||
| b0ec08d753 | |||
| fb8952b783 | |||
| 4216f5c028 | |||
| 539a3c5000 | |||
| 064d5707f9 | |||
| fd64f5710f | |||
| 2136a71db1 | |||
| 8308299367 | |||
| 70bed56a8a | |||
| 4c2a21832b | |||
| 356d7d4e49 | |||
| 6020e766ce | |||
| b4e963b2b1 | |||
| aebd8539ed | |||
| fea1c6b552 | |||
| fd8f5f90fd | |||
| b06fd470cc | |||
| ec1aaacb41 | |||
| bc1035c1ec | |||
| 026fd98304 | |||
| f03a4c81a5 | |||
| 79afab11c2 | |||
| 10ef102791 | |||
| 523317e760 | |||
| 82074d77b1 | |||
| 002c8359fe | |||
| 08bba20003 | |||
| 0a628d2b8f | |||
| f1969a12a1 | |||
| 3cb03fe09c | |||
| 5769144ac3 | |||
| 99c9b0a8ca | |||
| 8e9722a285 | |||
| 95276b841c | |||
| 9484d6f05e | |||
| 06f94cd476 | |||
| d4d525647c | |||
| f988c532ec | |||
| e71c8907f0 | |||
| 45ed832ec8 | |||
| a57e5f1d90 | |||
| d9fd6e0b29 | |||
| 827c11f2e1 | |||
| 647a3fabb9 | |||
| efb2a9501e | |||
| 44c009e570 | |||
| eb304b6804 | |||
| e1b9b62c4d | |||
| ad6938f068 | |||
| 1c9ccfe77b | |||
| 1fd1e2c809 | |||
| c5e973bc5b | |||
| b288c37d91 | |||
| 2f76f22202 | |||
| f7c7809d8d | |||
| 80bd7f21eb | |||
| 994d79244e | |||
| 4b2d2c07bd | |||
| 938d05f812 | |||
| 487c4e0dbf | |||
| 09dce2046a | |||
| 65963e5647 | |||
| 69f220a7e4 | |||
| 722d3039dc | |||
| 420c29610d | |||
| 6b53fac424 | |||
| 37c54735f1 | |||
| 2f0a0b49f3 | |||
| 1a8b06385a | |||
| 22d7c204f8 | |||
| a6ae0723f9 | |||
| aa4f94ac01 | |||
| 1153a9bf01 | |||
| 3d878131b9 | |||
| 20746a0fc3 | |||
| ce062d915e | |||
| c057c5c478 | |||
| eab0ec48da | |||
| 5b40eac230 | |||
| 2d782379ab | |||
| 042981d8bb | |||
| 2c2017c7db | |||
| 4aeba4d648 | |||
| de34e29188 | |||
| 0c127a97d5 | |||
| 11f047b1ae | |||
| 43f8adef66 | |||
| 2ffb77823d | |||
| 7ba8af0247 | |||
| 814544e1a0 | |||
| 477e62a5c5 | |||
| 0a629614c2 | |||
| e2d623f0d7 | |||
| 5145bfe820 | |||
| 58f66f5c3c | |||
| 746b74238b | |||
| ae56a927cf | |||
| 40ed0a7535 | |||
| beb4d740c7 | |||
| a47b6a705e | |||
| 3bfb2db6df | |||
| d30ef15a79 | |||
| 1ebf0ca5cf | |||
| eaa545a2c4 | |||
| cbe1f09536 | |||
| 246c770d5c | |||
| e88d71d792 | |||
| 929366cc81 | |||
| bb6ed59e44 | |||
| 6400d83a46 | |||
| 507d0dac3a | |||
| f058ee0daf | |||
| a66c25452a | |||
| bfc682f758 | |||
| aedbe927cb | |||
| 340d8b45fe | |||
| c95f0fdfbb | |||
| a5b73d1108 | |||
| 6157c5ff3d | |||
| e0f0dd5d4d | |||
| 059c8198a1 | |||
| 34073d12f4 | |||
| d24d80ab43 | |||
| 123ec35569 | |||
| 73aa8b649b | |||
| 28aa74d83a | |||
| d4780d2840 | |||
| 4c7b6d82cf | |||
| 37d6b9a949 | |||
| 2664094f65 | |||
| d884fea00b | |||
| 4a4fa69e93 | |||
| 801bc388e4 | |||
| 48fcfcb89b | |||
| 07db3ce463 | |||
| f9f4449079 | |||
| 0d4236e2d4 | |||
| b2db783620 | |||
| b27c53b5b6 | |||
| 6691b26674 | |||
| 131b96ddb3 | |||
| 0803d8ebaa | |||
| 19956f74ca | |||
| dd57019c80 | |||
| 9fb265ea85 | |||
| 0f9fdfc639 | |||
| 0de087d751 | |||
| 600e58f8ef | |||
| 16131c58f9 | |||
| 5106d32342 | |||
| 1456ff6bc1 | |||
| b94fb65809 | |||
| e283d8b561 | |||
| 7cd727bbff | |||
| 5532c00b04 | |||
| 8846b8b225 | |||
| 7307c98029 | |||
| 4d129c2c6b | |||
| 1e772b7dd4 | |||
| 81bb0a01b2 | |||
| 7ae8b58e1a | |||
| dde8bf8af0 | |||
| dc4addd985 | |||
| 803f62f7b7 | |||
| 91596b31ec | |||
| a27fea4ba4 | |||
| ba9a94debc | |||
| ac80d26cab | |||
| e4aea719fa | |||
| 4b18ecbd4b | |||
| c2a4c64640 | |||
| 47045dd653 | |||
| b65a85368b | |||
| daf483b097 | |||
| 838a0c5e0c | |||
| 0ccaccfcde | |||
| d1e7f5c113 | |||
| bfb5b85c41 | |||
| effd753512 | |||
| cfc777d45d | |||
| 422f65afbe | |||
| 135b554030 | |||
| 47edb4427a | |||
| bda6c7c390 | |||
| f0f7334f31 | |||
| 669f92c34b | |||
| b657c1323d | |||
| 692f401043 | |||
| 27f91ddbe3 | |||
| 72fccb2868 | |||
| a959243282 | |||
| 42895e81a8 | |||
| fb9663599e | |||
| 005685e69a | |||
| eb70f91db9 | |||
| a3eaf6130e | |||
| 2ce65ca45a | |||
| 46a14631ea | |||
| 2699cd221f | |||
| 2a7851c814 | |||
| 1356cc8e3a | |||
| 523966eaf2 | |||
| 21f5db5661 | |||
| 6b52c41b97 | |||
| 8c898bd356 | |||
| e725a73c8f | |||
| 645abfe72c | |||
| 17886bb9fa | |||
| 5b6cf4f15a | |||
| ca1d5e3a76 | |||
| 52789abda7 | |||
| 54f1f1feaa | |||
| ea33f4150f | |||
| 7ff52e60a2 | |||
| e5420e4639 | |||
| 393469ddfd | |||
| 0b03a7ab00 | |||
| dd13010bb5 | |||
| e3bd89c9e4 | |||
| 00865db0f6 | |||
| 8635abe79f | |||
| 8fbe6b42de | |||
| db12e7b563 | |||
| 77c9bda3e5 | |||
| 54547c797a | |||
| 7e0b20e8fb | |||
| 85288dccb5 | |||
| d973831dc1 | |||
| 12502c020c | |||
| ce48c317b2 | |||
| 41a277237c | |||
| 721ff2874f | |||
| 3cdca22b9d | |||
| 346611c5da | |||
| a8e538ad29 | |||
| 95ff061cf6 | |||
| 5bb5e29ffb | |||
| ac3e0b16e4 | |||
| 970b75b88d | |||
| 8f6b40c8d0 | |||
| ccebd677e3 | |||
| 75625f72f8 | |||
| f6dbe1a6bd | |||
| a914283a15 | |||
| 2a4f4d47e2 | |||
| 50350972a5 | |||
| cdb69f99a1 | |||
| 4786822e6d | |||
| 9c56f29267 | |||
| 1ee4f4c93b | |||
| 9e302542ed | |||
| 3409f8a726 | |||
| 94bfa4233d | |||
| 9c08c34007 | |||
| 880ffb4bf1 | |||
| d987c681b7 | |||
| 2ef141a5c5 | |||
| 809b97d4f9 | |||
| 4a1342b654 | |||
| fb200875d3 | |||
| 53bc79938c | |||
| 3866c1be9e | |||
| ca65ffe864 | |||
| c9638f704f | |||
| 39c57e7925 | |||
| 1b5c39dc1b | |||
| 379fca8602 | |||
| 9716f40140 | |||
| 61d346dd0a | |||
| 5edfc00b2d | |||
| 5905dcf384 | |||
| 67046273c7 | |||
| b4fd2fe40f | |||
| 7113824c59 | |||
| a2e782d07c | |||
| 4b2d030d7a | |||
| e98c97dbb1 | |||
| fd4d570b59 | |||
| 9892532aae | |||
| 66422332c4 | |||
| 8b1eb15939 | |||
| 06df4661bc | |||
| eaa126906f | |||
| 1c7cbbc27d | |||
| 0eed5ced7d | |||
| 30f3ac4889 | |||
| 0212796696 | |||
| 6c723f8329 | |||
| b1bfbbc371 | |||
| ee8eabc5ed | |||
| cf6bb0bd7a | |||
| 93b542dad2 | |||
| ec6324473a | |||
| 263afb8990 | |||
| 7016161206 | |||
| 470ef5721f | |||
| fd2c8afd33 | |||
| 8c007219f5 | |||
| a425e5ceff | |||
| d0fd3533b5 | |||
| 7d225750ac | |||
| 286319b6ec | |||
| fef323ab7d | |||
| 05c29c8c77 | |||
| d18d5c96d9 | |||
| 1da4345a50 | |||
| c5b9f4e0fa | |||
| 5bf361a1ac | |||
| e07d3b60ba | |||
| 1e2d5cf742 | |||
| 694e024ba1 | |||
| 6862425215 | |||
| 54c8074e51 | |||
| 71e1fb6dcf | |||
| 364187861d | |||
| 8a53a38543 | |||
| bc787cdf51 | |||
| dcf5181e28 | |||
| 61452d56d3 | |||
| be204ff119 | |||
| 8a865a1ce6 | |||
| a29c3c6abe | |||
| ea6fd30a30 | |||
| 8dbe9a415c | |||
| 222398154e | |||
| 3030025ea3 | |||
| 40233e66cb | |||
| 2ea75f7f76 | |||
| dbd393da58 | |||
| b9f72151ea | |||
| dc2989a47d | |||
| c86e558a57 | |||
| 3c8c1d1f5a | |||
| 1683e5b744 | |||
| 31fc656721 | |||
| 79f872c77c | |||
| 22f158e749 | |||
| ff1eac0b20 | |||
| f2d3fed9c7 | |||
| cbbdc5a820 | |||
| 8a614001fd | |||
| 7a50f2922a | |||
| da0f4ae7cf | |||
| d12310bb53 | |||
| 211b8ccfd0 | |||
| f352f9f58b | |||
| 0d70ee1abc | |||
| 032ca8141a | |||
| 3acf6e5180 | |||
| 14f2b0c756 | |||
| e0a4775205 | |||
| d056eb545f | |||
| 10f8e1f597 | |||
| 6cc789d800 | |||
| c214f38841 | |||
| 392b83c230 | |||
| 96bebd49d3 | |||
| 92950f1b88 | |||
| 07b5874802 | |||
| 6a62586a59 | |||
| 883abe7877 | |||
| fc58046a34 | |||
| b6a1eb26e7 | |||
| 42169397fe | |||
| 870d68ec1c | |||
| 12ef7f62c2 | |||
| 8b7ea67edc | |||
| 182a493b6a | |||
| 4f7781b7a2 | |||
| 3579f2fd09 | |||
| 34b8d938f7 | |||
| ea963af29b | |||
| 5ea5f6337d | |||
| 292d0a2665 | |||
| 057bdce751 | |||
| f051cc768e | |||
| 985f4075f4 | |||
| d88abc6271 | |||
| 63b99338d7 | |||
| bd3503f3c8 | |||
| d7f94076bf | |||
| 10879c8bf3 | |||
| b48d126118 | |||
| c2c2707fb6 | |||
| 5e16edc003 | |||
| e84b5e3d5d | |||
| 4d65d03074 | |||
| 222e8d3d09 | |||
| 92c7e41439 | |||
| 55f941cf18 | |||
| fa6bb1ee17 | |||
| 58ae979904 | |||
| e8d63ef273 | |||
| 41f2ae6faa | |||
| 6cf9b296e5 | |||
| 1301e66e90 | |||
| 549a8b43fe | |||
| 2c33d797ce | |||
| 5c05cfa5bc | |||
| 3e884d4b76 | |||
| 66c80aa878 | |||
| e51aba743a | |||
| 55dea38b6b | |||
| d516c93bfc | |||
| e520418f6a | |||
| ecabf88c3a | |||
| 8801f7e6de | |||
| d52ff10186 | |||
| 4ee65e0445 | |||
| 1dfc45722b | |||
| bc8e29e92a | |||
| c5df7ca990 | |||
| bda85b290e | |||
| b781602474 | |||
| 56ad1d1c60 | |||
| 744ad1deda | |||
| aee9125c96 | |||
| 262f97ce33 | |||
| 4880b71246 | |||
| 5f220b652d | |||
| f533c30564 | |||
| 2b905ae996 | |||
| c154cf9f23 | |||
| 90ec62d657 | |||
| 09ae96e4da | |||
| b664efc3f1 | |||
| 39a523c188 | |||
| d1c708e8c3 | |||
| 954465f2d6 | |||
| fb75e9e5a2 | |||
| ef8f9ce15b | |||
| 0aec913eee | |||
| fa064b6c1b | |||
| 7f151a0d6a | |||
| 8b20799a34 | |||
| 6477a36ae1 | |||
| a7a56839a9 | |||
| b33656c02c | |||
| c5ac36affe | |||
| e4f87e1a9b | |||
| b7b902f108 | |||
| 447cd8511c | |||
| 220c749af3 | |||
| 9e6d38dfea | |||
| 1283ac01bf | |||
| dbcd52da81 | |||
| 3e370ce967 | |||
| 0bc11da598 | |||
| 0a6d2bed2e | |||
| 2059e69e99 | |||
| 10523e98c5 | |||
| cae9bf99ff | |||
| 7decbce08d | |||
| c0f2a550f5 | |||
| 6688479c1c | |||
| 3dc8ae1f41 | |||
| 1290a9863f | |||
| 282a3bef73 | |||
| 1b9ce3bac7 | |||
| 1931877756 | |||
| 646265791a | |||
| e38e302b6d | |||
| 4ff19970dd | |||
| 267d9e505b | |||
| 979e0c4dd4 | |||
| 24a446bd3a | |||
| 7a362406d5 | |||
| cc0ecb49d4 | |||
| 216e02111e | |||
| 59f573e754 | |||
| d993c4883e | |||
| f81a500d72 | |||
| 89711ff036 | |||
| dc8fdc25f5 | |||
| 4f5222df1c | |||
| 1a0db9032d | |||
| b4a13562a2 | |||
| fa3225a7cf | |||
| 6aef69cc81 | |||
| 74665283ed | |||
| 4ce241893b | |||
| 784eec7748 | |||
| eeab6ba82c | |||
| 516861e0ae | |||
| 87a7a2cc59 | |||
| 8f86d76db6 | |||
| 290c162094 | |||
| 63a7e8feac | |||
| e3b4512c47 | |||
| c57204ff2f | |||
| c147f19c3a | |||
| 998ff2e4e6 | |||
| 0dd3f2e137 |
@@ -0,0 +1,3 @@
|
||||
#!/bin/sh
|
||||
echo 'Activating update_sitesmd hook script...'
|
||||
poetry run update_sitesmd
|
||||
@@ -0,0 +1,5 @@
|
||||
# These are supported funding model platforms
|
||||
|
||||
patreon: soxoj
|
||||
github: soxoj
|
||||
buy_me_a_coffee: soxoj
|
||||
@@ -0,0 +1,28 @@
|
||||
---
|
||||
name: Maigret bug report
|
||||
about: I want to report a bug in Maigret functionality
|
||||
title: ''
|
||||
labels: bug
|
||||
assignees: soxoj
|
||||
|
||||
---
|
||||
|
||||
## Checklist
|
||||
|
||||
- [ ] I'm reporting a bug in Maigret functionality
|
||||
- [ ] I've checked for similar bug reports including closed ones
|
||||
- [ ] I've checked for pull requests that attempt to fix this bug
|
||||
|
||||
## Description
|
||||
|
||||
Info about Maigret version you are running and environment (`--version`, operation system, ISP provider):
|
||||
<INSERT VERSION INFO HERE>
|
||||
|
||||
How to reproduce this bug (commandline options / conditions):
|
||||
<INSERT EXAMPLE OF CLI COMMAND HERE>
|
||||
|
||||
<DESCRIPTION>
|
||||
|
||||
<PASTE SCREENSHOT>
|
||||
|
||||
<ATTACH LOG FILE>
|
||||
@@ -0,0 +1,20 @@
|
||||
---
|
||||
name: Report invalid result
|
||||
about: I want to report invalid result of Maigret search
|
||||
title: Invalid result
|
||||
labels: false-result
|
||||
assignees: soxoj
|
||||
|
||||
---
|
||||
|
||||
Invalid link: <INSERT LINK HERE>
|
||||
|
||||
<!--
|
||||
|
||||
Put x into the box
|
||||
|
||||
[ ] ==> [x]
|
||||
|
||||
-->
|
||||
|
||||
- [ ] I'm sure that the link leads to "not found" page
|
||||
@@ -0,0 +1,6 @@
|
||||
version: 2
|
||||
updates:
|
||||
- package-ecosystem: "pip"
|
||||
directory: "/"
|
||||
schedule:
|
||||
interval: "daily"
|
||||
@@ -27,6 +27,7 @@ jobs:
|
||||
with:
|
||||
push: true
|
||||
tags: ${{ secrets.DOCKER_HUB_USERNAME }}/maigret:latest
|
||||
platforms: linux/amd64,linux/arm64
|
||||
-
|
||||
name: Image digest
|
||||
run: echo ${{ steps.docker_build.outputs.digest }}
|
||||
|
||||
@@ -0,0 +1,67 @@
|
||||
# For most projects, this workflow file will not need changing; you simply need
|
||||
# to commit it to your repository.
|
||||
#
|
||||
# You may wish to alter this file to override the set of languages analyzed,
|
||||
# or to provide custom queries or build logic.
|
||||
#
|
||||
# ******** NOTE ********
|
||||
# We have attempted to detect the languages in your repository. Please check
|
||||
# the `language` matrix defined below to confirm you have the correct set of
|
||||
# supported CodeQL languages.
|
||||
#
|
||||
name: "CodeQL"
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [ main ]
|
||||
schedule:
|
||||
- cron: '23 6 * * 6'
|
||||
|
||||
jobs:
|
||||
analyze:
|
||||
name: Analyze
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
actions: read
|
||||
contents: read
|
||||
security-events: write
|
||||
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
language: [ 'python' ]
|
||||
# CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python', 'ruby' ]
|
||||
# Learn more about CodeQL language support at https://git.io/codeql-language-support
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v2
|
||||
|
||||
# Initializes the CodeQL tools for scanning.
|
||||
- name: Initialize CodeQL
|
||||
uses: github/codeql-action/init@v1
|
||||
with:
|
||||
languages: ${{ matrix.language }}
|
||||
# If you wish to specify custom queries, you can do so here or in a config file.
|
||||
# By default, queries listed here will override any specified in a config file.
|
||||
# Prefix the list here with "+" to use these queries and those in the config file.
|
||||
# queries: ./path/to/local/query, your-org/your-repo/queries@main
|
||||
|
||||
# Autobuild attempts to build any compiled languages (C/C++, C#, or Java).
|
||||
# If this step fails, then you should remove it and run the build manually (see below)
|
||||
- name: Autobuild
|
||||
uses: github/codeql-action/autobuild@v1
|
||||
|
||||
# ℹ️ Command-line programs to run using the OS shell.
|
||||
# 📚 https://git.io/JvXDl
|
||||
|
||||
# ✏️ If the Autobuild fails above, remove it and uncomment the following three lines
|
||||
# and modify them (or add more) to build your code if your project
|
||||
# uses a compiled language
|
||||
|
||||
#- run: |
|
||||
# make bootstrap
|
||||
# make release
|
||||
|
||||
- name: Perform CodeQL Analysis
|
||||
uses: github/codeql-action/analyze@v1
|
||||
@@ -0,0 +1,55 @@
|
||||
name: Package exe with PyInstaller - Windows
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [ main, dev ]
|
||||
|
||||
jobs:
|
||||
build:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: PyInstaller Windows Build
|
||||
uses: JackMcKew/pyinstaller-action-windows@main
|
||||
with:
|
||||
path: pyinstaller
|
||||
|
||||
- name: Upload PyInstaller Binary to Workflow as Artifact
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: maigret_standalone_win32
|
||||
path: pyinstaller/dist/windows
|
||||
|
||||
- name: Download PyInstaller Binary
|
||||
uses: actions/download-artifact@v4
|
||||
with:
|
||||
name: maigret_standalone_win32
|
||||
|
||||
- name: Create New Release and Upload PyInstaller Binary to Release
|
||||
uses: ncipollo/release-action@v1.14.0
|
||||
id: create_release
|
||||
with:
|
||||
allowUpdates: true
|
||||
draft: false
|
||||
prerelease: false
|
||||
artifactErrorsFailBuild: true
|
||||
makeLatest: true
|
||||
replacesArtifacts: true
|
||||
artifacts: maigret_standalone.exe
|
||||
name: Development Windows Release [${{ github.ref_name }}]
|
||||
tag: ${{ github.ref_name }}
|
||||
body: |
|
||||
This is a development release built from the **${{ github.ref_name }}** branch.
|
||||
|
||||
Take into account that `dev` releases may be unstable.
|
||||
Please, use [the development release](https://github.com/soxoj/maigret/releases/tag/main) build from the **main** branch.
|
||||
|
||||
Instructions:
|
||||
- Download the attached file `maigret_standalone.exe` to get the Windows executable.
|
||||
- Video guide on how to run it: https://youtu.be/qIgwTZOmMmM
|
||||
- For detailed documentation, visit: https://maigret.readthedocs.io/en/latest/
|
||||
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ github.token }}
|
||||
@@ -1,13 +1,11 @@
|
||||
# This workflow will install Python dependencies, run tests and lint with a variety of Python versions
|
||||
# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
|
||||
|
||||
name: Python package
|
||||
name: Linting and testing
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [ main ]
|
||||
pull_request:
|
||||
branches: [ main ]
|
||||
types: [opened, synchronize, reopened]
|
||||
|
||||
jobs:
|
||||
build:
|
||||
@@ -15,10 +13,11 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
matrix:
|
||||
python-version: [3.6.9, 3.7, 3.8, 3.9]
|
||||
python-version: ["3.10", "3.11", "3.12"]
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v2
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@v2
|
||||
with:
|
||||
@@ -26,8 +25,15 @@ jobs:
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
python -m pip install -r test-requirements.txt
|
||||
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
|
||||
- name: Test with pytest
|
||||
python -m pip install poetry
|
||||
python -m poetry install --with dev
|
||||
- name: Test with Coverage and Pytest (Fail if coverage is low)
|
||||
run: |
|
||||
pytest --reruns 3 --reruns-delay 5
|
||||
poetry run coverage run --source=./maigret -m pytest --reruns 3 --reruns-delay 5 tests
|
||||
poetry run coverage report --fail-under=60
|
||||
poetry run coverage html
|
||||
- name: Upload coverage report
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: htmlcov
|
||||
path: htmlcov
|
||||
@@ -1,31 +1,30 @@
|
||||
# This workflow will upload a Python Package using Twine when a release is created
|
||||
# For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries
|
||||
|
||||
name: Upload Python Package
|
||||
name: Upload Python Package to PyPI when a Release is Created
|
||||
|
||||
on:
|
||||
release:
|
||||
types: [created]
|
||||
|
||||
jobs:
|
||||
deploy:
|
||||
|
||||
pypi-publish:
|
||||
name: Publish release to PyPI
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
environment:
|
||||
name: pypi
|
||||
url: https://pypi.org/p/maigret
|
||||
permissions:
|
||||
id-token: write
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v2
|
||||
with:
|
||||
python-version: '3.x'
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install setuptools wheel twine
|
||||
- name: Build and publish
|
||||
env:
|
||||
TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
|
||||
TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
|
||||
run: |
|
||||
python setup.py sdist bdist_wheel
|
||||
twine upload dist/*
|
||||
- uses: actions/checkout@v4
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: "3.x"
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install setuptools wheel
|
||||
- name: Build package
|
||||
run: |
|
||||
python setup.py sdist bdist_wheel # Could also be python -m build
|
||||
- name: Publish package distributions to PyPI
|
||||
uses: pypa/gh-action-pypi-publish@release/v1
|
||||
@@ -0,0 +1,34 @@
|
||||
name: Update sites rating and statistics
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
branches: [ dev ]
|
||||
types: [opened, synchronize]
|
||||
|
||||
jobs:
|
||||
build:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v2.3.2
|
||||
with:
|
||||
ref: ${{ github.event.pull_request.head.sha }}
|
||||
fetch-depth: 0 # otherwise, there would be errors pushing refs to the destination repository.
|
||||
|
||||
- name: build application
|
||||
run: |
|
||||
pip3 install .
|
||||
python3 ./utils/update_site_data.py --empty-only
|
||||
|
||||
- name: Commit and push changes
|
||||
run: |
|
||||
git config --global user.name "Maigret autoupdate"
|
||||
git config --global user.email "soxoj@protonmail.com"
|
||||
echo `git name-rev ${{ github.event.pull_request.head.sha }} --name-only`
|
||||
export BRANCH=`git name-rev ${{ github.event.pull_request.head.sha }} --name-only | sed 's/remotes\/origin\///'`
|
||||
echo $BRANCH
|
||||
git remote -v
|
||||
git checkout $BRANCH
|
||||
git add sites.md
|
||||
git commit -m "Updated site list and statistics"
|
||||
git push origin $BRANCH
|
||||
@@ -1,5 +1,6 @@
|
||||
# Virtual Environment
|
||||
venv/
|
||||
.venv/
|
||||
|
||||
# Editor Configurations
|
||||
.vscode/
|
||||
@@ -15,6 +16,10 @@ src/
|
||||
.ipynb_checkpoints
|
||||
*.ipynb
|
||||
|
||||
# Logs and backups
|
||||
*.log
|
||||
*.bak
|
||||
|
||||
# Output files, except requirements.txt
|
||||
*.txt
|
||||
!requirements.txt
|
||||
@@ -30,4 +35,11 @@ src/
|
||||
.coverage
|
||||
dist/
|
||||
htmlcov/
|
||||
/test_*
|
||||
/test_*
|
||||
|
||||
# Maigret files
|
||||
settings.json
|
||||
|
||||
# other
|
||||
*.egg-info
|
||||
build
|
||||
@@ -0,0 +1,16 @@
|
||||
version: 2
|
||||
|
||||
build:
|
||||
os: ubuntu-22.04
|
||||
tools:
|
||||
python: "3.10"
|
||||
|
||||
sphinx:
|
||||
configuration: docs/source/conf.py
|
||||
|
||||
formats:
|
||||
- pdf
|
||||
|
||||
python:
|
||||
install:
|
||||
- requirements: docs/requirements.txt
|
||||
@@ -2,6 +2,223 @@
|
||||
|
||||
## [Unreleased]
|
||||
|
||||
## [0.4.4] - 2022-09-03
|
||||
* Fixed some false positives by @soxoj in https://github.com/soxoj/maigret/pull/433
|
||||
* Drop Python 3.6 support by @soxoj in https://github.com/soxoj/maigret/pull/434
|
||||
* Bump xhtml2pdf from 0.2.5 to 0.2.7 by @dependabot in https://github.com/soxoj/maigret/pull/409
|
||||
* Bump reportlab from 3.6.6 to 3.6.9 by @dependabot in https://github.com/soxoj/maigret/pull/403
|
||||
* Bump markupsafe from 2.0.1 to 2.1.1 by @dependabot in https://github.com/soxoj/maigret/pull/389
|
||||
* Bump pycountry from 22.1.10 to 22.3.5 by @dependabot in https://github.com/soxoj/maigret/pull/384
|
||||
* Bump pypdf2 from 1.26.0 to 1.27.4 by @dependabot in https://github.com/soxoj/maigret/pull/438
|
||||
* Update GH actions by @soxoj in https://github.com/soxoj/maigret/pull/439
|
||||
* Bump tqdm from 4.63.0 to 4.64.0 by @dependabot in https://github.com/soxoj/maigret/pull/440
|
||||
* Bump jinja2 from 3.0.3 to 3.1.1 by @dependabot in https://github.com/soxoj/maigret/pull/441
|
||||
* Bump soupsieve from 2.3.1 to 2.3.2 by @dependabot in https://github.com/soxoj/maigret/pull/436
|
||||
* Bump pypdf2 from 1.26.0 to 1.27.4 by @dependabot in https://github.com/soxoj/maigret/pull/442
|
||||
* Bump pyvis from 0.1.9 to 0.2.0 by @dependabot in https://github.com/soxoj/maigret/pull/443
|
||||
* Bump pypdf2 from 1.27.4 to 1.27.6 by @dependabot in https://github.com/soxoj/maigret/pull/448
|
||||
* Bump typing-extensions from 4.1.1 to 4.2.0 by @dependabot in https://github.com/soxoj/maigret/pull/447
|
||||
* Bump soupsieve from 2.3.2 to 2.3.2.post1 by @dependabot in https://github.com/soxoj/maigret/pull/444
|
||||
* Bump pypdf2 from 1.27.6 to 1.27.7 by @dependabot in https://github.com/soxoj/maigret/pull/449
|
||||
* Bump pypdf2 from 1.27.7 to 1.27.8 by @dependabot in https://github.com/soxoj/maigret/pull/450
|
||||
* XMind 8 report warning and some docs update by @soxoj in https://github.com/soxoj/maigret/pull/452
|
||||
* False positive fixes 24.04.22 by @soxoj in https://github.com/soxoj/maigret/pull/455
|
||||
* Bump pypdf2 from 1.27.8 to 1.27.9 by @dependabot in https://github.com/soxoj/maigret/pull/456
|
||||
* Bump pytest from 7.0.1 to 7.1.2 by @dependabot in https://github.com/soxoj/maigret/pull/457
|
||||
* Bump jinja2 from 3.1.1 to 3.1.2 by @dependabot in https://github.com/soxoj/maigret/pull/460
|
||||
* Ubisoft forums addition by @fen0s in https://github.com/soxoj/maigret/pull/461
|
||||
* Add BYOND, Figma, BeatStars by @fen0s in https://github.com/soxoj/maigret/pull/462
|
||||
* fix Figma username definition, add a bunch of sites by @fen0s in https://github.com/soxoj/maigret/pull/464
|
||||
* Bump pypdf2 from 1.27.9 to 1.27.10 by @dependabot in https://github.com/soxoj/maigret/pull/465
|
||||
* Bump pypdf2 from 1.27.10 to 1.27.12 by @dependabot in https://github.com/soxoj/maigret/pull/466
|
||||
* Sites fixes 05 05 22 by @soxoj in https://github.com/soxoj/maigret/pull/469
|
||||
* Bump pyvis from 0.2.0 to 0.2.1 by @dependabot in https://github.com/soxoj/maigret/pull/472
|
||||
* Social analyzer websites, also fixing presense strs by @fen0s in https://github.com/soxoj/maigret/pull/471
|
||||
* Updated logic of false positive risk estimating by @soxoj in https://github.com/soxoj/maigret/pull/475
|
||||
* Improved usability of external progressbar func by @soxoj in https://github.com/soxoj/maigret/pull/476
|
||||
* New sites added, some tags/rank update by @soxoj in https://github.com/soxoj/maigret/pull/477
|
||||
* Added new sites by @soxoj in https://github.com/soxoj/maigret/pull/480
|
||||
* Added new forums, updated ranks, some utils improvements by @soxoj in https://github.com/soxoj/maigret/pull/481
|
||||
* Disabled sites with false positives results by @soxoj in https://github.com/soxoj/maigret/pull/482
|
||||
* Bump certifi from 2021.10.8 to 2022.5.18.1 by @dependabot in https://github.com/soxoj/maigret/pull/488
|
||||
* Bump psutil from 5.9.0 to 5.9.1 by @dependabot in https://github.com/soxoj/maigret/pull/490
|
||||
* Bump pypdf2 from 1.27.12 to 1.28.1 by @dependabot in https://github.com/soxoj/maigret/pull/491
|
||||
* Bump pypdf2 from 1.28.1 to 1.28.2 by @dependabot in https://github.com/soxoj/maigret/pull/493
|
||||
* added and fixed some websites in data.json by @kustermariocoding in https://github.com/soxoj/maigret/pull/494
|
||||
* Bump pypdf2 from 1.28.2 to 2.0.0 by @dependabot in https://github.com/soxoj/maigret/pull/504
|
||||
* Bump pefile from 2021.9.3 to 2022.5.30 by @dependabot in https://github.com/soxoj/maigret/pull/499
|
||||
* Updated sites list, added disabled Anilist by @soxoj in https://github.com/soxoj/maigret/pull/502
|
||||
* Bump lxml from 4.8.0 to 4.9.0 by @dependabot in https://github.com/soxoj/maigret/pull/503
|
||||
* Compatibility with Python 10 by @soxoj in https://github.com/soxoj/maigret/pull/509
|
||||
* feat: add .log & .bak files to gitignore in https://github.com/soxoj/maigret/pull/511
|
||||
* fix some sites and delete abandoned by @fen0s in https://github.com/soxoj/maigret/pull/526
|
||||
* Fixesjulyfirst by @fen0s in https://github.com/soxoj/maigret/pull/533
|
||||
* yazbel, aboutcar, zhihu by @fen0s in https://github.com/soxoj/maigret/pull/531
|
||||
* Fixes july third by @fen0s in https://github.com/soxoj/maigret/pull/535
|
||||
* Update data.json by @fen0s in https://github.com/soxoj/maigret/pull/539
|
||||
* Update data.json by @fen0s in https://github.com/soxoj/maigret/pull/540
|
||||
* Bump reportlab from 3.6.9 to 3.6.11 by @dependabot in https://github.com/soxoj/maigret/pull/543
|
||||
* Bump requests from 2.27.1 to 2.28.1 by @dependabot in https://github.com/soxoj/maigret/pull/530
|
||||
* Bump pypdf2 from 2.0.0 to 2.5.0 by @dependabot in https://github.com/soxoj/maigret/pull/542
|
||||
* Bump xhtml2pdf from 0.2.7 to 0.2.8 by @dependabot in https://github.com/soxoj/maigret/pull/522
|
||||
* Bump lxml from 4.9.0 to 4.9.1 by @dependabot in https://github.com/soxoj/maigret/pull/538
|
||||
* disable yandex music + set utf8 encoding by @fen0s in https://github.com/soxoj/maigret/pull/562
|
||||
* fix false positives by @fen0s in https://github.com/soxoj/maigret/pull/577
|
||||
* disable Instagram, fix two false positives by @fen0s in https://github.com/soxoj/maigret/pull/578
|
||||
* Bump certifi from 2022.5.18.1 to 2022.6.15 by @dependabot in https://github.com/soxoj/maigret/pull/551
|
||||
* August15 by @fen0s in https://github.com/soxoj/maigret/pull/591
|
||||
* Bump pytest-httpserver from 1.0.4 to 1.0.5 by @dependabot in https://github.com/soxoj/maigret/pull/583
|
||||
* Bump typing-extensions from 4.2.0 to 4.3.0 by @dependabot in https://github.com/soxoj/maigret/pull/549
|
||||
* Bump colorama from 0.4.4 to 0.4.5 by @dependabot in https://github.com/soxoj/maigret/pull/548
|
||||
* Bump chardet from 4.0.0 to 5.0.0 by @dependabot in https://github.com/soxoj/maigret/pull/550
|
||||
* Bump cloudscraper from 1.2.60 to 1.2.63 by @dependabot in https://github.com/soxoj/maigret/pull/600
|
||||
* Bump flake8 from 4.0.1 to 5.0.4 by @dependabot in https://github.com/soxoj/maigret/pull/598
|
||||
* Bump attrs from 21.4.0 to 22.1.0 by @dependabot in https://github.com/soxoj/maigret/pull/597
|
||||
* Bump pytest-asyncio from 0.18.2 to 0.19.0 by @dependabot in https://github.com/soxoj/maigret/pull/601
|
||||
* Bump pypdf2 from 2.5.0 to 2.10.4 by @dependabot in https://github.com/soxoj/maigret/pull/606
|
||||
* Bump pytest from 7.1.2 to 7.1.3 by @dependabot in https://github.com/soxoj/maigret/pull/613
|
||||
* Update sites.md -Gitmemory.com suppression by @C3n7ral051nt4g3ncy in https://github.com/soxoj/maigret/pull/610
|
||||
* Bump cloudscraper from 1.2.63 to 1.2.64 by @dependabot in https://github.com/soxoj/maigret/pull/614
|
||||
* Bump pycountry from 22.1.10 to 22.3.5 by @dependabot in https://github.com/soxoj/maigret/pull/607
|
||||
* add ProtonMail, disable 3 broken sites by @fen0s in https://github.com/soxoj/maigret/pull/619
|
||||
* Bump tqdm from 4.64.0 to 4.64.1 by @dependabot in https://github.com/soxoj/maigret/pull/618
|
||||
|
||||
**Full Changelog**: https://github.com/soxoj/maigret/compare/v0.4.3...v0.4.4
|
||||
|
||||
## [0.4.3] - 2022-04-13
|
||||
* Added Sites to data.json by @kustermariocoding in https://github.com/soxoj/maigret/pull/386
|
||||
* added new Websites to data.json by @kustermariocoding in https://github.com/soxoj/maigret/pull/390
|
||||
* Skipped broken tests by @soxoj in https://github.com/soxoj/maigret/pull/397
|
||||
* Added new Websites to data.json by @kustermariocoding in https://github.com/soxoj/maigret/pull/401
|
||||
* Added new Websites to data.json by @kustermariocoding in https://github.com/soxoj/maigret/pull/404
|
||||
* Updated statistics by @soxoj in https://github.com/soxoj/maigret/pull/406
|
||||
* Added new Websites to data.json by @kustermariocoding in https://github.com/soxoj/maigret/pull/413
|
||||
* Disabled houzz.com, updated sites statistics by @soxoj in https://github.com/soxoj/maigret/pull/422
|
||||
* Fixed last false positives by @soxoj in https://github.com/soxoj/maigret/pull/424
|
||||
* Fixed actual false positives by @soxoj in https://github.com/soxoj/maigret/pull/431
|
||||
|
||||
**Full Changelog**: https://github.com/soxoj/maigret/compare/v0.4.2...v0.4.3
|
||||
|
||||
## [0.4.2] - 2022-03-07
|
||||
* [ImgBot] Optimize images by @imgbot in https://github.com/soxoj/maigret/pull/319
|
||||
* Bump pytest-asyncio from 0.17.0 to 0.17.1 by @dependabot in https://github.com/soxoj/maigret/pull/321
|
||||
* Bump pytest-asyncio from 0.17.1 to 0.17.2 by @dependabot in https://github.com/soxoj/maigret/pull/323
|
||||
* Disabled Ruboard by @soxoj in https://github.com/soxoj/maigret/pull/327
|
||||
* Disable kinooh, sites list update workflow added by @soxoj in https://github.com/soxoj/maigret/pull/329
|
||||
* Bump multidict from 5.2.0 to 6.0.1 by @dependabot in https://github.com/soxoj/maigret/pull/332
|
||||
* Bump multidict from 6.0.1 to 6.0.2 by @dependabot in https://github.com/soxoj/maigret/pull/333
|
||||
* Bump pytest-httpserver from 1.0.3 to 1.0.4 by @dependabot in https://github.com/soxoj/maigret/pull/334
|
||||
* Bump pytest from 6.2.5 to 7.0.0 by @dependabot in https://github.com/soxoj/maigret/pull/339
|
||||
* Bump pytest-asyncio from 0.17.2 to 0.18.0 by @dependabot in https://github.com/soxoj/maigret/pull/340
|
||||
* Bump pytest-asyncio from 0.18.0 to 0.18.1 by @dependabot in https://github.com/soxoj/maigret/pull/343
|
||||
* Bump pytest from 7.0.0 to 7.0.1 by @dependabot in https://github.com/soxoj/maigret/pull/345
|
||||
* Bump typing-extensions from 4.0.1 to 4.1.1 by @dependabot in https://github.com/soxoj/maigret/pull/346
|
||||
* Bump lxml from 4.7.1 to 4.8.0 by @dependabot in https://github.com/soxoj/maigret/pull/350
|
||||
* Pin reportlab version by @cyb3rk0tik in https://github.com/soxoj/maigret/pull/351
|
||||
* Fix reportlab not only for testing by @cyb3rk0tik in https://github.com/soxoj/maigret/pull/352
|
||||
* Added some scripts by @soxoj in https://github.com/soxoj/maigret/pull/355
|
||||
* Added package publishing instruction by @soxoj in https://github.com/soxoj/maigret/pull/356
|
||||
* Added DB statistics autoupdate and write to sites.md by @soxoj in https://github.com/soxoj/maigret/pull/357
|
||||
* CI autoupdate by @soxoj in https://github.com/soxoj/maigret/pull/359
|
||||
* Op.gg fixes by @soxoj in https://github.com/soxoj/maigret/pull/363
|
||||
* Wikipedia fix by @soxoj in https://github.com/soxoj/maigret/pull/365
|
||||
* Disabled Netvibes and LeetCode by @soxoj in https://github.com/soxoj/maigret/pull/366
|
||||
* Fixed several false positives, improved statistics info by @soxoj in https://github.com/soxoj/maigret/pull/368
|
||||
* Fix false positives by @soxoj in https://github.com/soxoj/maigret/pull/370
|
||||
* Fixed the rest of false positives for now by @soxoj in https://github.com/soxoj/maigret/pull/371
|
||||
* Fix false positive and CI by @soxoj in https://github.com/soxoj/maigret/pull/372
|
||||
* Added new sites to data.json by @kustermariocoding in https://github.com/soxoj/maigret/pull/375
|
||||
* Fixed issue with str alexaRank by @soxoj in https://github.com/soxoj/maigret/pull/382
|
||||
* Bump tqdm from 4.62.3 to 4.63.0 by @dependabot in https://github.com/soxoj/maigret/pull/374
|
||||
* Bump pytest-asyncio from 0.18.1 to 0.18.2 by @dependabot in https://github.com/soxoj/maigret/pull/380
|
||||
* @imgbot made their first contribution in https://github.com/soxoj/maigret/pull/319
|
||||
* @kustermariocoding made their first contribution in https://github.com/soxoj/maigret/pull/375
|
||||
|
||||
**Full Changelog**: https://github.com/soxoj/maigret/compare/v0.4.1...v0.4.2
|
||||
|
||||
## [0.4.1] - 2022-01-15
|
||||
* Added dozen of sites, improved submit mode by @soxoj in https://github.com/soxoj/maigret/pull/288
|
||||
* Bump requests from 2.26.0 to 2.27.0 by @dependabot in https://github.com/soxoj/maigret/pull/292
|
||||
* changed Bayoushooter to use XenForo and foursquare to use correct checkType by @antomarsi in https://github.com/soxoj/maigret/pull/289
|
||||
* Bump requests from 2.27.0 to 2.27.1 by @dependabot in https://github.com/soxoj/maigret/pull/293
|
||||
* Added aparat.com by @soxoj in https://github.com/soxoj/maigret/pull/294
|
||||
* Fixed BongaCams, links parsing improved by @soxoj in https://github.com/soxoj/maigret/pull/297
|
||||
* Temporary fix for Twitter (#299) by @soxoj in https://github.com/soxoj/maigret/pull/300
|
||||
* Fixed TikTok checks (#303) by @soxoj in https://github.com/soxoj/maigret/pull/306
|
||||
* Bump pycountry from 20.7.3 to 22.1.10 by @dependabot in https://github.com/soxoj/maigret/pull/313
|
||||
* Pornhub search improved by @soxoj in https://github.com/soxoj/maigret/pull/315
|
||||
* Codacademy fixed by @soxoj in https://github.com/soxoj/maigret/pull/316
|
||||
* Bump pytest-asyncio from 0.16.0 to 0.17.0 by @dependabot in https://github.com/soxoj/maigret/pull/314
|
||||
|
||||
**Full Changelog**: https://github.com/soxoj/maigret/compare/v0.4.0...v0.4.1
|
||||
|
||||
## [0.4.0] - 2022-01-03
|
||||
* Delayed import of requests module, speed check command, reqs updated by @soxoj in https://github.com/soxoj/maigret/pull/189
|
||||
* Snapcraft yaml added by @soxoj in https://github.com/soxoj/maigret/pull/190
|
||||
* Create codeql-analysis.yml by @soxoj in https://github.com/soxoj/maigret/pull/191
|
||||
* Move wiki pages to ReadTheDocs by @egornagornov in https://github.com/soxoj/maigret/pull/194
|
||||
* Created ReadTheDocs requirements file by @soxoj in https://github.com/soxoj/maigret/pull/195
|
||||
* Fix incompatible version requirements by @JasperJuergensen in https://github.com/soxoj/maigret/pull/196
|
||||
* Added link to documentation by @soxoj in https://github.com/soxoj/maigret/pull/198
|
||||
* Upgraded base docker image by @soxoj in https://github.com/soxoj/maigret/pull/199
|
||||
* Run CodeQL only aflter merge and each Saturday by @soxoj in https://github.com/soxoj/maigret/pull/201
|
||||
* Added cascade settings loading from /.maigret/settings.json and ./settings.json by @soxoj in https://github.com/soxoj/maigret/pull/200
|
||||
* Documentation and settings improved by @soxoj in https://github.com/soxoj/maigret/pull/203
|
||||
* New config options added by @soxoj in https://github.com/soxoj/maigret/pull/204
|
||||
* Added export of cli entrypoint by @soxoj in https://github.com/soxoj/maigret/pull/207
|
||||
* Removed redundant logging by @soxoj in https://github.com/soxoj/maigret/pull/210
|
||||
* PyInstaller workflow by @soxoj in https://github.com/soxoj/maigret/pull/206
|
||||
* Create bug.md by @soxoj in https://github.com/soxoj/maigret/pull/213
|
||||
* Fixed path and names of report files by @soxoj in https://github.com/soxoj/maigret/pull/216
|
||||
* Box drawing logic improved, added new settings by @soxoj in https://github.com/soxoj/maigret/pull/217
|
||||
* Fixes for win32 release by @soxoj in https://github.com/soxoj/maigret/pull/218
|
||||
* Bump six from 1.15.0 to 1.16.0 by @dependabot in https://github.com/soxoj/maigret/pull/221
|
||||
* Bump flake8 from 3.8.4 to 4.0.1 by @dependabot in https://github.com/soxoj/maigret/pull/219
|
||||
* Bump aiohttp from 3.7.4 to 3.8.0 by @dependabot in https://github.com/soxoj/maigret/pull/220
|
||||
* Bump aiohttp-socks from 0.5.5 to 0.6.0 by @dependabot in https://github.com/soxoj/maigret/pull/222
|
||||
* Bump typing-extensions from 3.7.4.3 to 3.10.0.2 by @dependabot in https://github.com/soxoj/maigret/pull/224
|
||||
* Bump multidict from 5.1.0 to 5.2.0 by @dependabot in https://github.com/soxoj/maigret/pull/225
|
||||
* Bump idna from 2.10 to 3.3 by @dependabot in https://github.com/soxoj/maigret/pull/228
|
||||
* Bump pytest-cov from 2.10.1 to 3.0.0 by @dependabot in https://github.com/soxoj/maigret/pull/227
|
||||
* Bump mock from 4.0.2 to 4.0.3 by @dependabot in https://github.com/soxoj/maigret/pull/226
|
||||
* Bump certifi from 2020.12.5 to 2021.10.8 by @dependabot in https://github.com/soxoj/maigret/pull/233
|
||||
* Bump pytest-httpserver from 1.0.0 to 1.0.2 by @dependabot in https://github.com/soxoj/maigret/pull/232
|
||||
* Bump lxml from 4.6.3 to 4.6.4 by @dependabot in https://github.com/soxoj/maigret/pull/231
|
||||
* Bump pefile from 2019.4.18 to 2021.9.3 by @dependabot in https://github.com/soxoj/maigret/pull/229
|
||||
* Bump pytest-rerunfailures from 9.1.1 to 10.2 by @dependabot in https://github.com/soxoj/maigret/pull/230
|
||||
* Bump yarl from 1.6.3 to 1.7.2 by @dependabot in https://github.com/soxoj/maigret/pull/237
|
||||
* Bump async-timeout from 4.0.0 to 4.0.1 by @dependabot in https://github.com/soxoj/maigret/pull/236
|
||||
* Bump psutil from 5.7.0 to 5.8.0 by @dependabot in https://github.com/soxoj/maigret/pull/234
|
||||
* Bump jinja2 from 3.0.2 to 3.0.3 by @dependabot in https://github.com/soxoj/maigret/pull/235
|
||||
* Bump pytest from 6.2.4 to 6.2.5 by @dependabot in https://github.com/soxoj/maigret/pull/238
|
||||
* Bump tqdm from 4.55.0 to 4.62.3 by @dependabot in https://github.com/soxoj/maigret/pull/242
|
||||
* Bump arabic-reshaper from 2.1.1 to 2.1.3 by @dependabot in https://github.com/soxoj/maigret/pull/243
|
||||
* Bump pytest-asyncio from 0.14.0 to 0.16.0 by @dependabot in https://github.com/soxoj/maigret/pull/240
|
||||
* Bump chardet from 3.0.4 to 4.0.0 by @dependabot in https://github.com/soxoj/maigret/pull/241
|
||||
* Bump soupsieve from 2.1 to 2.3.1 by @dependabot in https://github.com/soxoj/maigret/pull/239
|
||||
* Bump aiohttp from 3.8.0 to 3.8.1 by @dependabot in https://github.com/soxoj/maigret/pull/246
|
||||
* Bump typing-extensions from 3.10.0.2 to 4.0.0 by @dependabot in https://github.com/soxoj/maigret/pull/245
|
||||
* Bump aiohttp-socks from 0.6.0 to 0.6.1 by @dependabot in https://github.com/soxoj/maigret/pull/249
|
||||
* Bump aiohttp-socks from 0.6.1 to 0.7.1 by @dependabot in https://github.com/soxoj/maigret/pull/250
|
||||
* Bump typing-extensions from 4.0.0 to 4.0.1 by @dependabot in https://github.com/soxoj/maigret/pull/253
|
||||
* Fixed some false positives by @soxoj in https://github.com/soxoj/maigret/pull/254
|
||||
* Disabled non-working sites by @soxoj in https://github.com/soxoj/maigret/pull/255
|
||||
* Added false results buttons to reports, fixed some falses by @soxoj in https://github.com/soxoj/maigret/pull/256
|
||||
* Fixed xHamster, added support of proxies to self-check mode by @soxoj in https://github.com/soxoj/maigret/pull/259
|
||||
* Disabled non-working sites, updated public sites list by @soxoj in https://github.com/soxoj/maigret/pull/263
|
||||
* Bump lxml from 4.6.4 to 4.6.5 by @dependabot in https://github.com/soxoj/maigret/pull/266
|
||||
* Bump lxml from 4.6.5 to 4.7.1 by @dependabot in https://github.com/soxoj/maigret/pull/269
|
||||
* Bump pytest-httpserver from 1.0.2 to 1.0.3 by @dependabot in https://github.com/soxoj/maigret/pull/270
|
||||
* Fixed failed tests (thx to Meta aka Facebook) by @soxoj in https://github.com/soxoj/maigret/pull/273
|
||||
* Fixed votetags, updated issue template by @soxoj in https://github.com/soxoj/maigret/pull/278
|
||||
* Bump async-timeout from 4.0.1 to 4.0.2 by @dependabot in https://github.com/soxoj/maigret/pull/275
|
||||
* Fixed some false positives by @soxoj in https://github.com/soxoj/maigret/pull/280
|
||||
* Bump attrs from 21.2.0 to 21.3.0 by @dependabot in https://github.com/soxoj/maigret/pull/281
|
||||
* Bump psutil from 5.8.0 to 5.9.0 by @dependabot in https://github.com/soxoj/maigret/pull/282
|
||||
* Bump attrs from 21.3.0 to 21.4.0 by @dependabot in https://github.com/soxoj/maigret/pull/283
|
||||
|
||||
**Full Changelog**: https://github.com/soxoj/maigret/compare/v0.3.1...v0.4.0
|
||||
|
||||
## [0.3.1] - 2021-10-31
|
||||
* fixed false positives
|
||||
* accelerated maigret start time by 3 times
|
||||
|
||||
@@ -2,6 +2,10 @@
|
||||
|
||||
Hey! I'm really glad you're reading this. Maigret contains a lot of sites, and it is very hard to keep all the sites operational. That's why any fix is important.
|
||||
|
||||
## Code of Conduct
|
||||
|
||||
Please read and follow the [Code of Conduct](CODE_OF_CONDUCT.md) to foster a welcoming and inclusive community.
|
||||
|
||||
## How to add a new site
|
||||
|
||||
#### Beginner level
|
||||
@@ -27,4 +31,23 @@ Always write a clear log message for your commits. One-line messages are fine fo
|
||||
|
||||
## Coding conventions
|
||||
|
||||
Start reading the code and you'll get the hang of it. ;)
|
||||
### General Guidelines
|
||||
|
||||
- Try to follow [PEP 8](https://www.python.org/dev/peps/pep-0008/) for Python code style.
|
||||
- Ensure your code passes all tests before submitting a pull request.
|
||||
|
||||
### Code Style
|
||||
|
||||
- **Indentation**: Use 4 spaces per indentation level.
|
||||
- **Imports**:
|
||||
- Standard library imports should be placed at the top.
|
||||
- Third-party imports should follow.
|
||||
- Group imports logically.
|
||||
|
||||
### Naming Conventions
|
||||
|
||||
- **Variables and Functions**: Use `snake_case`.
|
||||
- **Classes**: Use `CamelCase`.
|
||||
- **Constants**: Use `UPPER_CASE`.
|
||||
|
||||
Start reading the code and you'll get the hang of it. ;)
|
||||
@@ -1,16 +1,16 @@
|
||||
FROM python:3.9
|
||||
MAINTAINER Soxoj <soxoj@protonmail.com>
|
||||
FROM python:3.10-slim
|
||||
LABEL maintainer="Soxoj <soxoj@protonmail.com>"
|
||||
WORKDIR /app
|
||||
RUN pip install --upgrade pip
|
||||
RUN apt update && \
|
||||
apt install -y \
|
||||
RUN pip install --no-cache-dir --upgrade pip
|
||||
RUN apt-get update && \
|
||||
apt-get install --no-install-recommends -y \
|
||||
gcc \
|
||||
musl-dev \
|
||||
libxml2 \
|
||||
libxml2-dev \
|
||||
libxslt-dev
|
||||
RUN apt clean \
|
||||
&& rm -rf /var/lib/apt/lists/* /tmp/*
|
||||
ADD . .
|
||||
RUN YARL_NO_EXTENSIONS=1 python3 -m pip install .
|
||||
libxslt-dev \
|
||||
&& \
|
||||
rm -rf /var/lib/apt/lists/* /tmp/*
|
||||
COPY . .
|
||||
RUN YARL_NO_EXTENSIONS=1 python3 -m pip install --no-cache-dir .
|
||||
ENTRYPOINT ["maigret"]
|
||||
|
||||
@@ -0,0 +1,128 @@
|
||||
@echo off
|
||||
|
||||
REM check if running as admin
|
||||
|
||||
goto check_Permissions
|
||||
|
||||
:check_Permissions
|
||||
echo Administrative permissions required. Detecting permissions...
|
||||
|
||||
net session >nul 2>&1
|
||||
if %errorLevel% == 0 (
|
||||
goto 1
|
||||
) else (
|
||||
cls
|
||||
echo Failure: You MUST run this as administator, otherwise commands will fail.
|
||||
)
|
||||
|
||||
pause >nul
|
||||
|
||||
|
||||
|
||||
REM Step 2: Check if Python and pip3 are installed
|
||||
python --version >nul 2>&1
|
||||
if %errorlevel% neq 0 (
|
||||
echo Python is not installed. Please install Python 3.8 or higher.
|
||||
pause
|
||||
exit /b
|
||||
)
|
||||
|
||||
pip3 --version >nul 2>&1
|
||||
if %errorlevel% neq 0 (
|
||||
echo pip3 is not installed. Please install pip3.
|
||||
pause
|
||||
exit /b
|
||||
)
|
||||
|
||||
REM Step 3: Check Python version
|
||||
python -c "import sys; exit(0) if sys.version_info >= (3,8) else exit(1)"
|
||||
if %errorlevel% neq 0 (
|
||||
echo Python version 3.8 or higher is required.
|
||||
pause
|
||||
exit /b
|
||||
)
|
||||
|
||||
|
||||
:1
|
||||
cls
|
||||
:::===============================================================
|
||||
::: ______ __ __ _ _
|
||||
::: | ____| | \/ | (_) | |
|
||||
::: | |__ __ _ ___ _ _ | \ / | __ _ _ __ _ _ __ ___| |_
|
||||
::: | __| / _` / __| | | | | |\/| |/ _` | |/ _` | '__/ _ \ __|
|
||||
::: | |___| (_| \__ \ |_| | | | | | (_| | | (_| | | | __/ |_
|
||||
::: |______\__,_|___/\__, | |_| |_|\__,_|_|\__, |_| \___|\__|
|
||||
::: __/ | __/ |
|
||||
::: |___/ |___/
|
||||
:::
|
||||
:::===============================================================
|
||||
echo.
|
||||
for /f "delims=: tokens=*" %%A in ('findstr /b ::: "%~f0"') do @echo(%%A
|
||||
echo.
|
||||
echo ----------------------------------------------------------------
|
||||
echo Python 3.8 or higher and pip3 required.
|
||||
echo ----------------------------------------------------------------
|
||||
echo Press [I] to begin installation.
|
||||
echo Press [R] If already installed.
|
||||
echo ----------------------------------------------------------------
|
||||
choice /c IR
|
||||
if %errorlevel%==1 goto install1
|
||||
if %errorlevel%==2 goto after
|
||||
|
||||
:install1
|
||||
cls
|
||||
echo ========================================================
|
||||
echo Maigret Installation Script
|
||||
echo ========================================================
|
||||
echo.
|
||||
echo --------------------------------------------------------
|
||||
echo If your pip installation is outdated, it could cause
|
||||
echo cryptography to fail on installation.
|
||||
echo --------------------------------------------------------
|
||||
echo check for and install pip updates now?
|
||||
echo --------------------------------------------------------
|
||||
choice /c YN
|
||||
if %errorlevel%==1 goto install2
|
||||
if %errorlevel%==2 goto install3
|
||||
|
||||
:install2
|
||||
cls
|
||||
python -m pip install --upgrade pip
|
||||
goto:install3
|
||||
|
||||
:install3
|
||||
cls
|
||||
echo ========================================================
|
||||
echo Maigret Installation Script
|
||||
echo ========================================================
|
||||
echo.
|
||||
echo --------------------------------------------------------
|
||||
echo Install requirements and maigret?
|
||||
echo --------------------------------------------------------
|
||||
choice /c YN
|
||||
if %errorlevel%==1 goto install4
|
||||
if %errorlevel%==2 goto 1
|
||||
|
||||
:install4
|
||||
cls
|
||||
pip install .
|
||||
pip install maigret
|
||||
goto:after
|
||||
|
||||
:after
|
||||
cls
|
||||
echo ========================================================
|
||||
echo Maigret Background Search
|
||||
echo ========================================================
|
||||
echo.
|
||||
echo --------------------------------------------------------
|
||||
echo Please Enter Username / Email
|
||||
echo --------------------------------------------------------
|
||||
set /p input=
|
||||
maigret %input%
|
||||
echo.
|
||||
echo.
|
||||
echo.
|
||||
echo.
|
||||
pause
|
||||
goto:after
|
||||
@@ -1,7 +1,7 @@
|
||||
LINT_FILES=maigret wizard.py tests
|
||||
|
||||
test:
|
||||
coverage run --source=./maigret -m pytest tests
|
||||
coverage run --source=./maigret,./maigret/web -m pytest tests
|
||||
coverage report -m
|
||||
coverage html
|
||||
|
||||
@@ -10,13 +10,19 @@ rerun-tests:
|
||||
|
||||
lint:
|
||||
@echo 'syntax errors or undefined names'
|
||||
flake8 --count --select=E9,F63,F7,F82 --show-source --statistics ${LINT_FILES} maigret.py
|
||||
flake8 --count --select=E9,F63,F7,F82 --show-source --statistics ${LINT_FILES}
|
||||
|
||||
@echo 'warning'
|
||||
flake8 --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics --ignore=E731,W503 ${LINT_FILES} maigret.py
|
||||
flake8 --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics --ignore=E731,W503,E501 ${LINT_FILES}
|
||||
|
||||
@echo 'mypy'
|
||||
mypy ${LINT_FILES}
|
||||
mypy --check-untyped-defs ${LINT_FILES}
|
||||
|
||||
speed:
|
||||
time python3 -m maigret --version
|
||||
python3 -c "import timeit; t = timeit.Timer('import maigret'); print(t.timeit(number = 1000000))"
|
||||
python3 -X importtime -c "import maigret" 2> maigret-import.log
|
||||
python3 -m tuna maigret-import.log
|
||||
|
||||
format:
|
||||
@echo 'black'
|
||||
|
||||
@@ -3,61 +3,84 @@
|
||||
<p align="center">
|
||||
<p align="center">
|
||||
<a href="https://pypi.org/project/maigret/">
|
||||
<img alt="PyPI" src="https://img.shields.io/pypi/v/maigret?style=flat-square">
|
||||
<img alt="PyPI version badge for Maigret" src="https://img.shields.io/pypi/v/maigret?style=flat-square" />
|
||||
</a>
|
||||
<a href="https://pypi.org/project/maigret/">
|
||||
<img alt="PyPI - Downloads" src="https://img.shields.io/pypi/dw/maigret?style=flat-square">
|
||||
<a href="https://pypi.org/project/maigret/">
|
||||
<img alt="PyPI download count for Maigret" src="https://img.shields.io/pypi/dw/maigret?style=flat-square" />
|
||||
</a>
|
||||
<a href="https://pypi.org/project/maigret/">
|
||||
<img alt="Views" src="https://komarev.com/ghpvc/?username=maigret&color=brightgreen&label=views&style=flat-square">
|
||||
<a href="https://github.com/soxoj/maigret">
|
||||
<img alt="Minimum Python version required: 3.10+" src="https://img.shields.io/badge/Python-3.10%2B-brightgreen?style=flat-square" />
|
||||
</a>
|
||||
<a href="https://github.com/soxoj/maigret/blob/main/LICENSE">
|
||||
<img alt="License badge for Maigret" src="https://img.shields.io/github/license/soxoj/maigret?style=flat-square" />
|
||||
</a>
|
||||
<a href="https://github.com/soxoj/maigret">
|
||||
<img alt="View count for Maigret project" src="https://komarev.com/ghpvc/?username=maigret&color=brightgreen&label=views&style=flat-square" />
|
||||
</a>
|
||||
</p>
|
||||
<p align="center">
|
||||
<img src="https://raw.githubusercontent.com/soxoj/maigret/main/static/maigret.png" height="200"/>
|
||||
<img src="https://raw.githubusercontent.com/soxoj/maigret/main/static/maigret.png" height="300"/>
|
||||
</p>
|
||||
</p>
|
||||
|
||||
<i>The Commissioner Jules Maigret is a fictional French police detective, created by Georges Simenon. His investigation method is based on understanding the personality of different people and their interactions.</i>
|
||||
|
||||
<b>👉👉👉 [Online Telegram bot](https://t.me/osint_maigret_bot)</b>
|
||||
|
||||
## About
|
||||
|
||||
**Maigret** collect a dossier on a person **by username only**, checking for accounts on a huge number of sites and gathering all the available information from web pages. No API keys required. Maigret is an easy-to-use and powerful fork of [Sherlock](https://github.com/sherlock-project/sherlock).
|
||||
**Maigret** collects a dossier on a person **by username only**, checking for accounts on a huge number of sites and gathering all the available information from web pages. No API keys are required. Maigret is an easy-to-use and powerful fork of [Sherlock](https://github.com/sherlock-project/sherlock).
|
||||
|
||||
Currently supported more than 2000 sites ([full list](https://raw.githubusercontent.com/soxoj/maigret/main/sites.md)), search is launched against 500 popular sites in descending order of popularity by default. Also supported checking of Tor sites, I2P sites, and domains (via DNS resolving).
|
||||
Currently supports more than 3000 sites ([full list](https://github.com/soxoj/maigret/blob/main/sites.md)), search is launched against 500 popular sites in descending order of popularity by default. Also supported checking Tor sites, I2P sites, and domains (via DNS resolving).
|
||||
|
||||
## Powered By Maigret
|
||||
|
||||
These are professional tools for social media content analysis and OSINT investigations that use Maigret (banners are clickable).
|
||||
|
||||
<a href="https://github.com/SocialLinks-IO/sociallinks-api"><img height="60" alt="Social Links API" src="https://github.com/user-attachments/assets/789747b2-d7a0-4d4e-8868-ffc4427df660"></a>
|
||||
<a href="https://sociallinks.io/products/sl-crimewall"><img height="60" alt="Social Links Crimewall" src="https://github.com/user-attachments/assets/0b18f06c-2f38-477b-b946-1be1a632a9d1"></a>
|
||||
<a href="https://usersearch.ai/"><img height="60" alt="UserSearch" src="https://github.com/user-attachments/assets/66daa213-cf7d-40cf-9267-42f97cf77580"></a>
|
||||
|
||||
## Main features
|
||||
|
||||
* Profile pages parsing, [extraction](https://github.com/soxoj/socid_extractor) of personal info, links to other profiles, etc.
|
||||
* Recursive search by new usernames and other ids found
|
||||
* Profile page parsing, [extraction](https://github.com/soxoj/socid_extractor) of personal info, links to other profiles, etc.
|
||||
* Recursive search by new usernames and other IDs found
|
||||
* Search by tags (site categories, countries)
|
||||
* Censorship and captcha detection
|
||||
* Requests retries
|
||||
|
||||
See full description of Maigret features [in the Wiki](https://github.com/soxoj/maigret/wiki/Features).
|
||||
See the full description of Maigret features [in the documentation](https://maigret.readthedocs.io/en/latest/features.html).
|
||||
|
||||
## Installation
|
||||
|
||||
Maigret can be installed using pip, Docker, or simply can be launched from the cloned repo.
|
||||
Also you can run Maigret using cloud shells and Jupyter notebooks (see buttons below).
|
||||
‼️ Maigret is available online via [official Telegram bot](https://t.me/osint_maigret_bot). Consider using it if you don't want to install anything.
|
||||
|
||||
### Windows
|
||||
|
||||
Standalone EXE-binaries for Windows are located in [Releases section](https://github.com/soxoj/maigret/releases) of GitHub repository.
|
||||
|
||||
Video guide on how to run it: https://youtu.be/qIgwTZOmMmM.
|
||||
|
||||
### Installation in Cloud Shells
|
||||
|
||||
You can launch Maigret using cloud shells and Jupyter notebooks. Press one of the buttons below and follow the instructions to launch it in your browser.
|
||||
|
||||
[](https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/soxoj/maigret&tutorial=README.md)
|
||||
<a href="https://repl.it/github/soxoj/maigret"><img src="https://user-images.githubusercontent.com/27065646/92304596-bf719b00-ef7f-11ea-987f-2c1f3c323088.png" alt="Run on Repl.it" height="50"></a>
|
||||
<a href="https://repl.it/github/soxoj/maigret"><img src="https://replit.com/badge/github/soxoj/maigret" alt="Run on Replit" height="50"></a>
|
||||
|
||||
<a href="https://colab.research.google.com/gist/soxoj/879b51bc3b2f8b695abb054090645000/maigret-collab.ipynb"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab" height="45"></a>
|
||||
<a href="https://mybinder.org/v2/gist/soxoj/9d65c2f4d3bec5dd25949197ea73cf3a/HEAD"><img src="https://mybinder.org/badge_logo.svg" alt="Open In Binder" height="45"></a>
|
||||
|
||||
### Package installing
|
||||
### Local installation
|
||||
|
||||
**NOTE**: Python 3.6 or higher and pip is required, **Python 3.8 is recommended.**
|
||||
Maigret can be installed using pip, Docker, or simply can be launched from the cloned repo.
|
||||
|
||||
**NOTE**: Python 3.10 or higher and pip is required, **Python 3.11 is recommended.**
|
||||
|
||||
```bash
|
||||
# install from pypi
|
||||
pip3 install maigret
|
||||
|
||||
# or clone and install manually
|
||||
git clone https://github.com/soxoj/maigret && cd maigret
|
||||
pip3 install .
|
||||
|
||||
# usage
|
||||
maigret username
|
||||
```
|
||||
@@ -65,11 +88,14 @@ maigret username
|
||||
### Cloning a repository
|
||||
|
||||
```bash
|
||||
# or clone and install manually
|
||||
git clone https://github.com/soxoj/maigret && cd maigret
|
||||
pip3 install -r requirements.txt
|
||||
|
||||
# build and install
|
||||
pip3 install .
|
||||
|
||||
# usage
|
||||
./maigret.py username
|
||||
maigret username
|
||||
```
|
||||
|
||||
### Docker
|
||||
@@ -79,7 +105,7 @@ pip3 install -r requirements.txt
|
||||
docker pull soxoj/maigret
|
||||
|
||||
# usage
|
||||
docker run soxoj/maigret:latest username
|
||||
docker run -v /mydir:/app/reports soxoj/maigret:latest username --html
|
||||
|
||||
# manual build
|
||||
docker build -t maigret .
|
||||
@@ -88,32 +114,66 @@ docker build -t maigret .
|
||||
## Usage examples
|
||||
|
||||
```bash
|
||||
# make HTML and PDF reports
|
||||
maigret user --html --pdf
|
||||
# make HTML, PDF, and Xmind8 reports
|
||||
maigret user --html
|
||||
maigret user --pdf
|
||||
maigret user --xmind #Output not compatible with xmind 2022+
|
||||
|
||||
# search on sites marked with tags photo & dating
|
||||
maigret user --tags photo,dating
|
||||
|
||||
# search on sites marked with tag us
|
||||
maigret user --tags us
|
||||
|
||||
# search for three usernames on all available sites
|
||||
maigret user1 user2 user3 -a
|
||||
```
|
||||
|
||||
Use `maigret --help` to get full options description. Also options are documented in [the Maigret Wiki](https://github.com/soxoj/maigret/wiki/Command-line-options).
|
||||
Use `maigret --help` to get full options description. Also options [are documented](https://maigret.readthedocs.io/en/latest/command-line-options.html).
|
||||
|
||||
## Contributing
|
||||
|
||||
Maigret has open-source code, so you may contribute your own sites by adding them to `data.json` file, or bring changes to it's code!
|
||||
|
||||
For more information about development and contribution, please read the [development documentation](https://maigret.readthedocs.io/en/latest/development.html).
|
||||
|
||||
## Demo with page parsing and recursive username search
|
||||
|
||||
[PDF report](https://raw.githubusercontent.com/soxoj/maigret/main/static/report_alexaimephotographycars.pdf), [HTML report](https://htmlpreview.github.io/?https://raw.githubusercontent.com/soxoj/maigret/main/static/report_alexaimephotographycars.html)
|
||||
### Video (asciinema)
|
||||
|
||||

|
||||
<a href="https://asciinema.org/a/Ao0y7N0TTxpS0pisoprQJdylZ">
|
||||
<img src="https://asciinema.org/a/Ao0y7N0TTxpS0pisoprQJdylZ.svg" alt="asciicast" width="600">
|
||||
</a>
|
||||
|
||||
### Reports
|
||||
|
||||
[PDF report](https://raw.githubusercontent.com/soxoj/maigret/main/static/report_alexaimephotographycars.pdf), [HTML report](https://htmlpreview.github.io/?https://raw.githubusercontent.com/soxoj/maigret/main/static/report_alexaimephotographycars.html)
|
||||
|
||||

|
||||
|
||||

|
||||
|
||||

|
||||
|
||||
[Full console output](https://raw.githubusercontent.com/soxoj/maigret/main/static/recursive_search.md)
|
||||
|
||||
## Disclaimer
|
||||
|
||||
**This tool is intended for educational and lawful purposes only.** The developers do not endorse or encourage any illegal activities or misuse of this tool. Regulations regarding the collection and use of personal data vary by country and region, including but not limited to GDPR in the EU, CCPA in the USA, and similar laws worldwide.
|
||||
|
||||
It is your sole responsibility to ensure that your use of this tool complies with all applicable laws and regulations in your jurisdiction. Any illegal use of this tool is strictly prohibited, and you are fully accountable for your actions.
|
||||
|
||||
The authors and developers of this tool bear no responsibility for any misuse or unlawful activities conducted by its users.
|
||||
|
||||
## Feedback
|
||||
|
||||
If you have any questions, suggestions, or feedback, please feel free to [open an issue](https://github.com/soxoj/maigret/issues), create a [GitHub discussion](https://github.com/soxoj/maigret/discussions), or contact the author directly via [Telegram](https://t.me/soxoj).
|
||||
|
||||
## SOWEL classification
|
||||
|
||||
This tool uses the following OSINT techniques:
|
||||
- [SOTL-2.2. Search For Accounts On Other Platforms](https://sowel.soxoj.com/other-platform-accounts)
|
||||
- [SOTL-6.1. Check Logins Reuse To Find Another Account](https://sowel.soxoj.com/logins-reuse)
|
||||
- [SOTL-6.2. Check Nicknames Reuse To Find Another Account](https://sowel.soxoj.com/nicknames-reuse)
|
||||
|
||||
## License
|
||||
|
||||
MIT © [Maigret](https://github.com/soxoj/maigret)<br/>
|
||||
|
||||
@@ -10,4 +10,4 @@
|
||||
pixabay.com FALSE / FALSE 0 anonymous_user_id c1e4ee09-5674-4252-aa94-8c47b1ea80ab
|
||||
pixabay.com FALSE / FALSE 1647214439 csrftoken vfetTSvIul7gBlURt6s985JNM18GCdEwN5MWMKqX4yI73xoPgEj42dbNefjGx5fr
|
||||
pixabay.com FALSE / FALSE 1647300839 client_width 1680
|
||||
pixabay.com FALSE / FALSE 748111764839 is_human 1
|
||||
pixabay.com FALSE / FALSE 748111764839 is_human 1
|
||||
@@ -0,0 +1,20 @@
|
||||
# Minimal makefile for Sphinx documentation
|
||||
#
|
||||
|
||||
# You can set these variables from the command line, and also
|
||||
# from the environment for the first two.
|
||||
SPHINXOPTS ?=
|
||||
SPHINXBUILD ?= sphinx-build
|
||||
SOURCEDIR = source
|
||||
BUILDDIR = build
|
||||
|
||||
# Put it first so that "make" without argument is like "make help".
|
||||
help:
|
||||
@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
|
||||
|
||||
.PHONY: help Makefile
|
||||
|
||||
# Catch-all target: route all unknown targets to Sphinx using the new
|
||||
# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
|
||||
%: Makefile
|
||||
@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
|
||||
@@ -0,0 +1,35 @@
|
||||
@ECHO OFF
|
||||
|
||||
pushd %~dp0
|
||||
|
||||
REM Command file for Sphinx documentation
|
||||
|
||||
if "%SPHINXBUILD%" == "" (
|
||||
set SPHINXBUILD=sphinx-build
|
||||
)
|
||||
set SOURCEDIR=source
|
||||
set BUILDDIR=build
|
||||
|
||||
if "%1" == "" goto help
|
||||
|
||||
%SPHINXBUILD% >NUL 2>NUL
|
||||
if errorlevel 9009 (
|
||||
echo.
|
||||
echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
|
||||
echo.installed, then set the SPHINXBUILD environment variable to point
|
||||
echo.to the full path of the 'sphinx-build' executable. Alternatively you
|
||||
echo.may add the Sphinx directory to PATH.
|
||||
echo.
|
||||
echo.If you don't have Sphinx installed, grab it from
|
||||
echo.http://sphinx-doc.org/
|
||||
exit /b 1
|
||||
)
|
||||
|
||||
%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
|
||||
goto end
|
||||
|
||||
:help
|
||||
%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
|
||||
|
||||
:end
|
||||
popd
|
||||
@@ -0,0 +1,2 @@
|
||||
sphinx-copybutton
|
||||
sphinx_rtd_theme
|
||||
@@ -0,0 +1,127 @@
|
||||
.. _command-line-options:
|
||||
|
||||
Command line options
|
||||
====================
|
||||
|
||||
Usernames
|
||||
---------
|
||||
|
||||
``maigret username1 username2 ...``
|
||||
|
||||
You can specify several usernames separated by space. Usernames are
|
||||
**not** mandatory as there are other operations modes (see below).
|
||||
|
||||
Parsing of account pages and online documents
|
||||
---------------------------------------------
|
||||
|
||||
``maigret --parse URL``
|
||||
|
||||
Maigret will try to extract information about the document/account owner
|
||||
(including username and other ids) and will make a search by the
|
||||
extracted username and ids. See examples in the :ref:`extracting-information-from-pages` section.
|
||||
|
||||
Main options
|
||||
------------
|
||||
|
||||
Options are also configurable through settings files, see
|
||||
:doc:`settings section <settings>`.
|
||||
|
||||
``--tags`` - Filter sites for searching by tags: sites categories and
|
||||
two-letter country codes (**not a language!**). E.g. photo, dating, sport; jp, us, global.
|
||||
Multiple tags can be associated with one site. **Warning**: tags markup is
|
||||
not stable now. Read more :doc:`in the separate section <tags>`.
|
||||
|
||||
``-n``, ``--max-connections`` - Allowed number of concurrent connections
|
||||
**(default: 100)**.
|
||||
|
||||
``-a``, ``--all-sites`` - Use all sites for scan **(default: top 500)**.
|
||||
|
||||
``--top-sites`` - Count of sites for scan ranked by Alexa Top
|
||||
**(default: top 500)**.
|
||||
|
||||
``--timeout`` - Time (in seconds) to wait for responses from sites
|
||||
**(default: 30)**. A longer timeout will be more likely to get results
|
||||
from slow sites. On the other hand, this may cause a long delay to
|
||||
gather all results. The choice of the right timeout should be carried
|
||||
out taking into account the bandwidth of the Internet connection.
|
||||
|
||||
``--cookies-jar-file`` - File with custom cookies in Netscape format
|
||||
(aka cookies.txt). You can install an extension to your browser to
|
||||
download own cookies (`Chrome <https://chrome.google.com/webstore/detail/get-cookiestxt/bgaddhkoddajcdgocldbbfleckgcbcid>`_, `Firefox <https://addons.mozilla.org/en-US/firefox/addon/cookies-txt/>`_).
|
||||
|
||||
``--no-recursion`` - Disable parsing pages for other usernames and
|
||||
recursive search by them.
|
||||
|
||||
``--use-disabled-sites`` - Use disabled sites to search (may cause many
|
||||
false positives).
|
||||
|
||||
``--id-type`` - Specify identifier(s) type (default: username).
|
||||
Supported types: gaia_id, vk_id, yandex_public_id, ok_id, wikimapia_uid.
|
||||
Currently, you must add ``-a`` flag to run a scan on sites with custom
|
||||
id types, sites will be filtered automatically.
|
||||
|
||||
``--ignore-ids`` - Do not make search by the specified username or other
|
||||
ids. Useful for repeated scanning with found known irrelevant usernames.
|
||||
|
||||
``--db`` - Load Maigret database from a JSON file or an online, valid,
|
||||
JSON file.
|
||||
|
||||
``--retries RETRIES`` - Count of attempts to restart temporarily failed
|
||||
requests.
|
||||
|
||||
Reports
|
||||
-------
|
||||
|
||||
``-P``, ``--pdf`` - Generate a PDF report (general report on all
|
||||
usernames).
|
||||
|
||||
``-H``, ``--html`` - Generate an HTML report file (general report on all
|
||||
usernames).
|
||||
|
||||
``-X``, ``--xmind`` - Generate an XMind 8 mindmap (one report per
|
||||
username).
|
||||
|
||||
``-C``, ``--csv`` - Generate a CSV report (one report per username).
|
||||
|
||||
``-T``, ``--txt`` - Generate a TXT report (one report per username).
|
||||
|
||||
``-J``, ``--json`` - Generate a JSON report of specific type: simple,
|
||||
ndjson (one report per username). E.g. ``--json ndjson``
|
||||
|
||||
``-fo``, ``--folderoutput`` - Results will be saved to this folder,
|
||||
``results`` by default. Will be created if doesn’t exist.
|
||||
|
||||
Output options
|
||||
--------------
|
||||
|
||||
``-v``, ``--verbose`` - Display extra information and metrics.
|
||||
*(loglevel=WARNING)*
|
||||
|
||||
``-vv``, ``--info`` - Display service information. *(loglevel=INFO)*
|
||||
|
||||
``-vvv``, ``--debug``, ``-d`` - Display debugging information and site
|
||||
responses. *(loglevel=DEBUG)*
|
||||
|
||||
``--print-not-found`` - Print sites where the username was not found.
|
||||
|
||||
``--print-errors`` - Print errors messages: connection, captcha, site
|
||||
country ban, etc.
|
||||
|
||||
Other operations modes
|
||||
----------------------
|
||||
|
||||
``--version`` - Display version information and dependencies.
|
||||
|
||||
``--self-check`` - Do self-checking for sites and database and disable
|
||||
non-working ones **for current search session** by default. It’s useful
|
||||
for testing new internet connection (it depends on provider/hosting on
|
||||
which sites there will be censorship stub or captcha display). After
|
||||
checking Maigret asks if you want to save updates, answering y/Y will
|
||||
rewrite the local database.
|
||||
|
||||
``--submit URL`` - Do an automatic analysis of the given account URL or
|
||||
site main page URL to determine the site engine and methods to check
|
||||
account presence. After checking Maigret asks if you want to add the
|
||||
site, answering y/Y will rewrite the local database.
|
||||
|
||||
|
||||
@@ -0,0 +1,36 @@
|
||||
# Configuration file for the Sphinx documentation builder.
|
||||
|
||||
# -- Project information
|
||||
|
||||
project = 'Maigret'
|
||||
copyright = '2024, soxoj'
|
||||
author = 'soxoj'
|
||||
|
||||
release = '0.5.0a1'
|
||||
version = '0.5'
|
||||
|
||||
# -- General configuration
|
||||
|
||||
extensions = [
|
||||
'sphinx.ext.duration',
|
||||
'sphinx.ext.doctest',
|
||||
'sphinx.ext.autodoc',
|
||||
'sphinx.ext.autosummary',
|
||||
'sphinx.ext.intersphinx',
|
||||
'sphinx_copybutton'
|
||||
]
|
||||
|
||||
intersphinx_mapping = {
|
||||
'python': ('https://docs.python.org/3/', None),
|
||||
'sphinx': ('https://www.sphinx-doc.org/en/master/', None),
|
||||
}
|
||||
intersphinx_disabled_domains = ['std']
|
||||
|
||||
templates_path = ['_templates']
|
||||
|
||||
# -- Options for HTML output
|
||||
|
||||
html_theme = 'sphinx_rtd_theme'
|
||||
|
||||
# -- Options for EPUB output
|
||||
epub_show_urls = 'footnote'
|
||||
@@ -0,0 +1,262 @@
|
||||
.. _development:
|
||||
|
||||
Development
|
||||
==============
|
||||
|
||||
Frequently Asked Questions
|
||||
--------------------------
|
||||
|
||||
1. Where to find the list of supported sites?
|
||||
|
||||
The human-readable list of supported sites is available in the `sites.md <https://github.com/soxoj/maigret/blob/main/sites.md>`_ file in the repository.
|
||||
It's been generated automatically from the main JSON file with the list of supported sites.
|
||||
|
||||
The machine-readable JSON file with the list of supported sites is available in the
|
||||
`data.json <https://github.com/soxoj/maigret/blob/main/maigret/resources/data.json>`_ file in the directory `resources`.
|
||||
|
||||
2. Which methods to check the account presence are supported?
|
||||
|
||||
The supported methods (``checkType`` values in ``data.json``) are:
|
||||
|
||||
- ``message`` - the most reliable method, checks if any string from ``presenceStrs`` is present and none of the strings from ``absenceStrs`` are present in the HTML response
|
||||
- ``status_code`` - checks that status code of the response is 2XX
|
||||
- ``response_url`` - check if there is not redirect and the response is 2XX
|
||||
|
||||
See the details of check mechanisms in the `checking.py <https://github.com/soxoj/maigret/blob/main/maigret/checking.py#L339>`_ file.
|
||||
|
||||
Testing
|
||||
-------
|
||||
|
||||
It is recommended use Python 3.10 for testing.
|
||||
|
||||
Install test requirements:
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
poetry install --with dev
|
||||
|
||||
|
||||
Use the following commands to check Maigret:
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
# run linter and typing checks
|
||||
# order of checks:
|
||||
# - critical syntax errors or undefined names
|
||||
# - flake checks
|
||||
# - mypy checks
|
||||
make lint
|
||||
|
||||
# run black formatter
|
||||
make format
|
||||
|
||||
# run testing with coverage html report
|
||||
# current test coverage is 58%
|
||||
make test
|
||||
|
||||
# open html report
|
||||
open htmlcov/index.html
|
||||
|
||||
# get flamechart of imports to estimate startup time
|
||||
make speed
|
||||
|
||||
|
||||
How to fix false-positives
|
||||
-----------------------------------------------
|
||||
|
||||
If you want to work with sites database, don't forget to activate statistics update git hook, command for it would look like this: ``git config --local core.hooksPath .githooks/``.
|
||||
|
||||
You should make your git commits from your maigret git repo folder, or else the hook wouldn't find the statistics update script.
|
||||
|
||||
1. Determine the problematic site.
|
||||
|
||||
If you already know which site has a false-positive and want to fix it specifically, go to the next step.
|
||||
|
||||
Otherwise, simply run a search with a random username (e.g. `laiuhi3h4gi3u4hgt`) and check the results.
|
||||
Alternatively, you can use `the Telegram bot <https://t.me/osint_maigret_bot>`_.
|
||||
|
||||
2. Open the account link in your browser and check:
|
||||
|
||||
- If the site is completely gone, remove it from the list
|
||||
- If the site still works but looks different, update in data.json how we check it
|
||||
- If the site requires login to view profiles, disable checking it
|
||||
|
||||
3. Find the site in the `data.json <https://github.com/soxoj/maigret/blob/main/maigret/resources/data.json>`_ file.
|
||||
|
||||
If the ``checkType`` method is not ``message`` and you are going to fix check, update it:
|
||||
- put ``message`` in ``checkType``
|
||||
- put in ``absenceStrs`` a keyword that is present in the HTML response for an non-existing account
|
||||
- put in ``presenceStrs`` a keyword that is present in the HTML response for an existing account
|
||||
|
||||
If you have trouble determining the right keywords, you can use automatic detection by passing the account URL with the ``--submit`` option:
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
maigret --submit https://my.mail.ru/bk/alex
|
||||
|
||||
To disable checking, set ``disabled`` to ``true`` or simply run:
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
maigret --self-check --site My.Mail.ru@bk.ru
|
||||
|
||||
To debug the check method using the response HTML, you can run:
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
maigret soxoj --site My.Mail.ru@bk.ru -d 2> response.txt
|
||||
|
||||
There are few options for sites data.json helpful in various cases:
|
||||
|
||||
- ``engine`` - a predefined check for the sites of certain type (e.g. forums), see the ``engines`` section in the JSON file
|
||||
- ``headers`` - a dictionary of additional headers to be sent to the site
|
||||
- ``requestHeadOnly`` - set to ``true`` if it's enough to make a HEAD request to the site
|
||||
- ``regexCheck`` - a regex to check if the username is valid, in case of frequent false-positives
|
||||
|
||||
.. _activation-mechanism:
|
||||
|
||||
Activation mechanism
|
||||
--------------------
|
||||
|
||||
The activation mechanism helps make requests to sites requiring additional authentication like cookies, JWT tokens, or custom headers.
|
||||
|
||||
Let's study the Vimeo site check record from the Maigret database:
|
||||
|
||||
.. code-block:: json
|
||||
|
||||
"Vimeo": {
|
||||
"tags": [
|
||||
"us",
|
||||
"video"
|
||||
],
|
||||
"headers": {
|
||||
"Authorization": "jwt eyJ0..."
|
||||
},
|
||||
"activation": {
|
||||
"url": "https://vimeo.com/_rv/viewer",
|
||||
"marks": [
|
||||
"Something strange occurred. Please get in touch with the app's creator."
|
||||
],
|
||||
"method": "vimeo"
|
||||
},
|
||||
"urlProbe": "https://api.vimeo.com/users/{username}?fields=name...",
|
||||
"checkType": "status_code",
|
||||
"alexaRank": 148,
|
||||
"urlMain": "https://vimeo.com/",
|
||||
"url": "https://vimeo.com/{username}",
|
||||
"usernameClaimed": "blue",
|
||||
"usernameUnclaimed": "noonewouldeverusethis7"
|
||||
},
|
||||
|
||||
The activation method is:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
def vimeo(site, logger, cookies={}):
|
||||
headers = dict(site.headers)
|
||||
if "Authorization" in headers:
|
||||
del headers["Authorization"]
|
||||
import requests
|
||||
|
||||
r = requests.get(site.activation["url"], headers=headers)
|
||||
jwt_token = r.json()["jwt"]
|
||||
site.headers["Authorization"] = "jwt " + jwt_token
|
||||
|
||||
Here's how the activation process works when a JWT token becomes invalid:
|
||||
|
||||
1. The site check makes an HTTP request to ``urlProbe`` with the invalid token
|
||||
2. The response contains an error message specified in the ``activation``/``marks`` field
|
||||
3. When this error is detected, the ``vimeo`` activation function is triggered
|
||||
4. The activation function obtains a new JWT token and updates it in the site check record
|
||||
5. On the next site check (either through retry or a new Maigret run), the valid token is used and the check succeeds
|
||||
|
||||
Examples of activation mechanism implementation are available in `activation.py <https://github.com/soxoj/maigret/blob/main/maigret/activation.py>`_ file.
|
||||
|
||||
How to publish new version of Maigret
|
||||
-------------------------------------
|
||||
|
||||
**Collaborats rights are requires, write Soxoj to get them**.
|
||||
|
||||
For new version publishing you must create a new branch in repository
|
||||
with a bumped version number and actual changelog first. After it you
|
||||
must create a release, and GitHub action automatically create a new
|
||||
PyPi package.
|
||||
|
||||
- New branch example: https://github.com/soxoj/maigret/commit/e520418f6a25d7edacde2d73b41a8ae7c80ddf39
|
||||
- Release example: https://github.com/soxoj/maigret/releases/tag/v0.4.1
|
||||
|
||||
1. Make a new branch locally with a new version name. Check the current version number here: https://pypi.org/project/maigret/.
|
||||
**Increase only patch version (third number)** if there are no breaking changes.
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
git checkout -b 0.4.0
|
||||
|
||||
2. Update Maigret version in three files manually:
|
||||
|
||||
- setup.py
|
||||
- maigret/__version__.py
|
||||
- docs/source/conf.py
|
||||
|
||||
3. Create a new empty text section in the beginning of the file `CHANGELOG.md` with a current date:
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
## [0.4.0] - 2022-01-03
|
||||
|
||||
4. Get auto-generate release notes:
|
||||
|
||||
- Open https://github.com/soxoj/maigret/releases/new
|
||||
- Click `Choose a tag`, enter `v0.4.0` (your version)
|
||||
- Click `Create new tag`
|
||||
- Press `+ Auto-generate release notes`
|
||||
- Copy all the text from description text field below
|
||||
- Paste it to empty text section in `CHANGELOG.txt`
|
||||
- Remove redundant lines `## What's Changed` and `## New Contributors` section if it exists
|
||||
- *Close the new release page*
|
||||
|
||||
5. Commit all the changes, push, make pull request
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
git add -p
|
||||
git commit -m 'Bump to YOUR VERSION'
|
||||
git push origin head
|
||||
|
||||
|
||||
6. Merge pull request
|
||||
|
||||
7. Create new release
|
||||
|
||||
- Open https://github.com/soxoj/maigret/releases/new again
|
||||
- Click `Choose a tag`
|
||||
- Enter actual version in format `v0.4.0`
|
||||
- Also enter actual version in the field `Release title`
|
||||
- Click `Create new tag`
|
||||
- Press `+ Auto-generate release notes`
|
||||
- **Press "Publish release" button**
|
||||
|
||||
8. That's all, now you can simply wait push to PyPi. You can monitor it in Action page: https://github.com/soxoj/maigret/actions/workflows/python-publish.yml
|
||||
|
||||
Documentation updates
|
||||
---------------------
|
||||
|
||||
Documentations is auto-generated and auto-deployed from the ``docs`` directory.
|
||||
|
||||
To manually update documentation:
|
||||
|
||||
1. Change something in the ``.rst`` files in the ``docs/source`` directory.
|
||||
2. Install ``pip install -r requirements.txt`` in the docs directory.
|
||||
3. Run ``make singlehtml`` in the terminal in the docs directory.
|
||||
4. Open ``build/singlehtml/index.html`` in your browser to see the result.
|
||||
5. If everything is ok, commit and push your changes to GitHub.
|
||||
|
||||
Roadmap
|
||||
-------
|
||||
|
||||
.. warning::
|
||||
This roadmap requires updating to reflect the current project status and future plans.
|
||||
|
||||
.. figure:: https://i.imgur.com/kk8cFdR.png
|
||||
:target: https://i.imgur.com/kk8cFdR.png
|
||||
:align: center
|
||||
@@ -0,0 +1,213 @@
|
||||
.. _features:
|
||||
|
||||
Features
|
||||
========
|
||||
|
||||
This is the list of Maigret features.
|
||||
|
||||
Personal info gathering
|
||||
-----------------------
|
||||
|
||||
Maigret does the `parsing of accounts webpages and extraction <https://github.com/soxoj/socid-extractor>`_ of personal info, links to other profiles, etc.
|
||||
Extracted info displayed as an additional result in CLI output and as tables in HTML and PDF reports.
|
||||
Also, Maigret use found ids and usernames from links to start a recursive search.
|
||||
|
||||
Enabled by default, can be disabled with ``--no extracting``.
|
||||
|
||||
.. code-block:: text
|
||||
|
||||
$ python3 -m maigret soxoj --timeout 5
|
||||
[-] Starting a search on top 500 sites from the Maigret database...
|
||||
[!] You can run search by full list of sites with flag `-a`
|
||||
[*] Checking username soxoj on:
|
||||
...
|
||||
[+] GitHub: https://github.com/soxoj
|
||||
├─uid: 31013580
|
||||
├─image: https://avatars.githubusercontent.com/u/31013580?v=4
|
||||
├─created_at: 2017-08-14T17:03:07Z
|
||||
├─location: Amsterdam, Netherlands
|
||||
├─follower_count: 1304
|
||||
├─following_count: 54
|
||||
├─fullname: Soxoj
|
||||
├─public_gists_count: 3
|
||||
├─public_repos_count: 88
|
||||
├─twitter_username: sox0j
|
||||
├─bio: Head of OSINT Center of Excellence in @SocialLinks-IO
|
||||
├─is_company: Social Links
|
||||
└─blog_url: soxoj.com
|
||||
...
|
||||
|
||||
Recursive search
|
||||
----------------
|
||||
|
||||
Maigret has the ability to scan account pages for :ref:`common identifiers <supported-identifier-types>` and usernames found in links.
|
||||
When people include links to their other social media accounts, Maigret can automatically detect and initiate new searches for those profiles.
|
||||
Any information discovered through this process will be shown in both the command-line interface output and generated reports.
|
||||
|
||||
Enabled by default, can be disabled with ``--no-recursion``.
|
||||
|
||||
|
||||
.. code-block:: text
|
||||
|
||||
$ python3 -m maigret soxoj --timeout 5
|
||||
[-] Starting a search on top 500 sites from the Maigret database...
|
||||
[!] You can run search by full list of sites with flag `-a`
|
||||
[*] Checking username soxoj on:
|
||||
...
|
||||
[+] GitHub: https://github.com/soxoj
|
||||
├─uid: 31013580
|
||||
├─image: https://avatars.githubusercontent.com/u/31013580?v=4
|
||||
├─created_at: 2017-08-14T17:03:07Z
|
||||
├─location: Amsterdam, Netherlands
|
||||
├─follower_count: 1304
|
||||
├─following_count: 54
|
||||
├─fullname: Soxoj
|
||||
├─public_gists_count: 3
|
||||
├─public_repos_count: 88
|
||||
├─twitter_username: sox0j <===== another username found here
|
||||
├─bio: Head of OSINT Center of Excellence in @SocialLinks-IO
|
||||
├─is_company: Social Links
|
||||
└─blog_url: soxoj.com
|
||||
...
|
||||
Searching |████████████████████████████████████████| 500/500 [100%] in 9.1s (54.85/s)
|
||||
[-] You can see detailed site check errors with a flag `--print-errors`
|
||||
[*] Checking username sox0j on:
|
||||
[+] Telegram: https://t.me/sox0j
|
||||
├─fullname: @Sox0j
|
||||
...
|
||||
|
||||
Username permutations
|
||||
---------------------
|
||||
|
||||
Maigret can generate permutations of usernames. Just pass a few usernames in the CLI and use ``--permute`` flag.
|
||||
Thanks to `@balestek <https://github.com/balestek>`_ for the idea and implementation.
|
||||
|
||||
.. code-block:: text
|
||||
|
||||
$ python3 -m maigret --permute hope dream --timeout 5
|
||||
[-] 12 permutations from hope dream to check...
|
||||
├─ hopedream
|
||||
├─ _hopedream
|
||||
├─ hopedream_
|
||||
├─ hope_dream
|
||||
├─ hope-dream
|
||||
├─ hope.dream
|
||||
├─ dreamhope
|
||||
├─ _dreamhope
|
||||
├─ dreamhope_
|
||||
├─ dream_hope
|
||||
├─ dream-hope
|
||||
└─ dream.hope
|
||||
[-] Starting a search on top 500 sites from the Maigret database...
|
||||
[!] You can run search by full list of sites with flag `-a`
|
||||
[*] Checking username hopedream on:
|
||||
...
|
||||
|
||||
Reports
|
||||
-------
|
||||
|
||||
Maigret currently supports HTML, PDF, TXT, XMind 8 mindmap, and JSON reports.
|
||||
|
||||
HTML/PDF reports contain:
|
||||
|
||||
- profile photo
|
||||
- all the gathered personal info
|
||||
- additional information about supposed personal data (full name, gender, location), resulting from statistics of all found accounts
|
||||
|
||||
Also, there is a short text report in the CLI output after the end of a searching phase.
|
||||
|
||||
.. warning::
|
||||
XMind 8 mindmaps are incompatible with XMind 2022!
|
||||
|
||||
Tags
|
||||
----
|
||||
|
||||
The Maigret sites database very big (and will be bigger), and it is maybe an overhead to run a search for all the sites.
|
||||
Also, it is often hard to understand, what sites more interesting for us in the case of a certain person.
|
||||
|
||||
Tags markup allows selecting a subset of sites by interests (photo, messaging, finance, etc.) or by country. Tags of found accounts grouped and displayed in the reports.
|
||||
|
||||
See full description :doc:`in the Tags Wiki page <tags>`.
|
||||
|
||||
Censorship and captcha detection
|
||||
--------------------------------
|
||||
|
||||
Maigret can detect common errors such as censorship stub pages, CloudFlare captcha pages, and others.
|
||||
If you get more them 3% errors of a certain type in a session, you've got a warning message in the CLI output with recommendations to improve performance and avoid problems.
|
||||
|
||||
Retries
|
||||
-------
|
||||
|
||||
Maigret will do retries of the requests with temporary errors got (connection failures, proxy errors, etc.).
|
||||
|
||||
One attempt by default, can be changed with option ``--retries N``.
|
||||
|
||||
Archives and mirrors checking
|
||||
-----------------------------
|
||||
|
||||
The Maigret database contains not only the original websites, but also mirrors, archives, and aggregators. For example:
|
||||
|
||||
- `Picuki <https://www.picuki.com/>`_, Instagram mirror
|
||||
- (no longer available) `Reddit BigData search <https://camas.github.io/reddit-search/>`_
|
||||
- (no longer available) `Twitter shadowban <https://shadowban.eu/>`_ checker
|
||||
|
||||
It allows getting additional info about the person and checking the existence of the account even if the main site is unavailable (bot protection, captcha, etc.)
|
||||
|
||||
Activation
|
||||
----------
|
||||
The activation mechanism helps make requests to sites requiring additional authentication like cookies, JWT tokens, or custom headers.
|
||||
|
||||
It works by implementing a custom function that:
|
||||
|
||||
1. Makes a specialized HTTP request to a specific website endpoint
|
||||
2. Processes the response
|
||||
3. Updates the headers/cookies for that site in the local Maigret database
|
||||
|
||||
Since activation only triggers after encountering specific errors, a retry (or another Maigret run) is needed to obtain a valid response with the updated authentication.
|
||||
|
||||
The activation mechanism is enabled by default, and cannot be disabled at the moment.
|
||||
|
||||
See for more details in Development section :ref:`activation-mechanism`.
|
||||
|
||||
.. _extracting-information-from-pages:
|
||||
|
||||
Extraction of information from account pages
|
||||
--------------------------------------------
|
||||
|
||||
Maigret can parse URLs and content of web pages by URLs to extract info about account owner and other meta information.
|
||||
|
||||
You must specify the URL with the option ``--parse``, it's can be a link to an account or an online document. List of supported sites `see here <https://github.com/soxoj/socid-extractor#sites>`_.
|
||||
|
||||
After the end of the parsing phase, Maigret will start the search phase by :doc:`supported identifiers <supported-identifier-types>` found (usernames, ids, etc.).
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
$ maigret --parse https://docs.google.com/spreadsheets/d/1HtZKMLRXNsZ0HjtBmo0Gi03nUPiJIA4CC4jTYbCAnXw/edit\#gid\=0
|
||||
|
||||
Scanning webpage by URL https://docs.google.com/spreadsheets/d/1HtZKMLRXNsZ0HjtBmo0Gi03nUPiJIA4CC4jTYbCAnXw/edit#gid=0...
|
||||
┣╸org_name: Gooten
|
||||
┗╸mime_type: application/vnd.google-apps.ritz
|
||||
Scanning webpage by URL https://clients6.google.com/drive/v2beta/files/1HtZKMLRXNsZ0HjtBmo0Gi03nUPiJIA4CC4jTYbCAnXw?fields=alternateLink%2CcopyRequiresWriterPermission%2CcreatedDate%2Cdescription%2CdriveId%2CfileSize%2CiconLink%2Cid%2Clabels(starred%2C%20trashed)%2ClastViewedByMeDate%2CmodifiedDate%2Cshared%2CteamDriveId%2CuserPermission(id%2Cname%2CemailAddress%2Cdomain%2Crole%2CadditionalRoles%2CphotoLink%2Ctype%2CwithLink)%2Cpermissions(id%2Cname%2CemailAddress%2Cdomain%2Crole%2CadditionalRoles%2CphotoLink%2Ctype%2CwithLink)%2Cparents(id)%2Ccapabilities(canMoveItemWithinDrive%2CcanMoveItemOutOfDrive%2CcanMoveItemOutOfTeamDrive%2CcanAddChildren%2CcanEdit%2CcanDownload%2CcanComment%2CcanMoveChildrenWithinDrive%2CcanRename%2CcanRemoveChildren%2CcanMoveItemIntoTeamDrive)%2Ckind&supportsTeamDrives=true&enforceSingleParent=true&key=AIzaSyC1eQ1xj69IdTMeii5r7brs3R90eck-m7k...
|
||||
┣╸created_at: 2016-02-16T18:51:52.021Z
|
||||
┣╸updated_at: 2019-10-23T17:15:47.157Z
|
||||
┣╸gaia_id: 15696155517366416778
|
||||
┣╸fullname: Nadia Burgess
|
||||
┣╸email: nadia@gooten.com
|
||||
┣╸image: https://lh3.googleusercontent.com/a-/AOh14GheZe1CyNa3NeJInWAl70qkip4oJ7qLsD8vDy6X=s64
|
||||
┗╸email_username: nadia
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
$ maigret.py --parse https://steamcommunity.com/profiles/76561199113454789
|
||||
Scanning webpage by URL https://steamcommunity.com/profiles/76561199113454789...
|
||||
┣╸steam_id: 76561199113454789
|
||||
┣╸nickname: Pok
|
||||
┗╸username: Machine42
|
||||
|
||||
|
||||
Simple API
|
||||
----------
|
||||
|
||||
Maigret can be easily integrated with the use of Python package `maigret <https://pypi.org/project/maigret/>`_.
|
||||
|
||||
Example: the official `Telegram bot <https://github.com/soxoj/maigret-tg-bot>`_
|
||||
@@ -0,0 +1,46 @@
|
||||
.. _index:
|
||||
|
||||
Welcome to the Maigret docs!
|
||||
============================
|
||||
|
||||
**Maigret** is an easy-to-use and powerful OSINT tool for collecting a dossier on a person by a username (alias) only.
|
||||
|
||||
This is achieved by checking for accounts on a huge number of sites and gathering all the available information from web pages.
|
||||
|
||||
The project's main goal — give to OSINT researchers and pentesters a **universal tool** to get maximum information
|
||||
about a person of interest by a username and integrate it with other tools in automatization pipelines.
|
||||
|
||||
.. warning::
|
||||
**This tool is intended for educational and lawful purposes only.**
|
||||
The developers do not endorse or encourage any illegal activities or misuse of this tool.
|
||||
Regulations regarding the collection and use of personal data vary by country and region,
|
||||
including but not limited to GDPR in the EU, CCPA in the USA, and similar laws worldwide.
|
||||
|
||||
It is your sole responsibility to ensure that your use of this tool complies with all applicable laws
|
||||
and regulations in your jurisdiction. Any illegal use of this tool is strictly prohibited,
|
||||
and you are fully accountable for your actions.
|
||||
|
||||
The authors and developers of this tool bear no responsibility for any misuse
|
||||
or unlawful activities conducted by its users.
|
||||
|
||||
You may be interested in:
|
||||
-------------------------
|
||||
- :doc:`Quick start <quick-start>`
|
||||
- :doc:`Usage examples <usage-examples>`
|
||||
- :doc:`Command line options <command-line-options>`
|
||||
- :doc:`Features list <features>`
|
||||
|
||||
.. toctree::
|
||||
:hidden:
|
||||
:caption: Sections
|
||||
|
||||
quick-start
|
||||
installation
|
||||
usage-examples
|
||||
command-line-options
|
||||
features
|
||||
philosophy
|
||||
supported-identifier-types
|
||||
tags
|
||||
settings
|
||||
development
|
||||
@@ -0,0 +1,92 @@
|
||||
.. _installation:
|
||||
|
||||
Installation
|
||||
============
|
||||
|
||||
Maigret can be installed using pip, Docker, or simply can be launched from the cloned repo.
|
||||
Also, it is available online via `official Telegram bot <https://t.me/osint_maigret_bot>`_,
|
||||
source code of a bot is `available on GitHub <https://github.com/soxoj/maigret-tg-bot>`_.
|
||||
|
||||
Windows Standalone EXE-binaries
|
||||
-------------------------------
|
||||
|
||||
Standalone EXE-binaries for Windows are located in the `Releases section <https://github.com/soxoj/maigret/releases>`_ of GitHub repository.
|
||||
|
||||
Currently, the new binary is created automatically after each commit to **main** and **dev** branches.
|
||||
|
||||
Video guide on how to run it: https://youtu.be/qIgwTZOmMmM.
|
||||
|
||||
|
||||
Cloud Shells and Jupyter notebooks
|
||||
----------------------------------
|
||||
|
||||
In case you don't want to install Maigret locally, you can use cloud shells and Jupyter notebooks.
|
||||
Press one of the buttons below and follow the instructions to launch it in your browser.
|
||||
|
||||
.. image:: https://user-images.githubusercontent.com/27065646/92304704-8d146d80-ef80-11ea-8c29-0deaabb1c702.png
|
||||
:target: https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/soxoj/maigret&tutorial=README.md
|
||||
:alt: Open in Cloud Shell
|
||||
|
||||
.. image:: https://replit.com/badge/github/soxoj/maigret
|
||||
:target: https://repl.it/github/soxoj/maigret
|
||||
:alt: Run on Replit
|
||||
:height: 50
|
||||
|
||||
.. image:: https://colab.research.google.com/assets/colab-badge.svg
|
||||
:target: https://colab.research.google.com/gist/soxoj/879b51bc3b2f8b695abb054090645000/maigret-collab.ipynb
|
||||
:alt: Open In Colab
|
||||
:height: 45
|
||||
|
||||
.. image:: https://mybinder.org/badge_logo.svg
|
||||
:target: https://mybinder.org/v2/gist/soxoj/9d65c2f4d3bec5dd25949197ea73cf3a/HEAD
|
||||
:alt: Open In Binder
|
||||
:height: 45
|
||||
|
||||
Local installation from PyPi
|
||||
----------------------------
|
||||
|
||||
Please note that the sites database in the PyPI package may be outdated.
|
||||
If you encounter frequent false positive results, we recommend installing the latest development version from GitHub instead.
|
||||
|
||||
.. note::
|
||||
Python 3.10 or higher and pip is required, **Python 3.11 is recommended.**
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
# install from pypi
|
||||
pip3 install maigret
|
||||
|
||||
# usage
|
||||
maigret username
|
||||
|
||||
Development version (GitHub)
|
||||
----------------------------
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
git clone https://github.com/soxoj/maigret && cd maigret
|
||||
pip3 install .
|
||||
|
||||
# OR
|
||||
pip3 install git+https://github.com/soxoj/maigret.git
|
||||
|
||||
# usage
|
||||
maigret username
|
||||
|
||||
# OR use poetry in case you plan to develop Maigret
|
||||
pip3 install poetry
|
||||
poetry run maigret
|
||||
|
||||
Docker
|
||||
------
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
# official image of the development version, updated from the github repo
|
||||
docker pull soxoj/maigret
|
||||
|
||||
# usage
|
||||
docker run -v /mydir:/app/reports soxoj/maigret:latest username --html
|
||||
|
||||
# manual build
|
||||
docker build -t maigret .
|
||||
|
After Width: | Height: | Size: 375 KiB |
@@ -0,0 +1,17 @@
|
||||
.. _philosophy:
|
||||
|
||||
Philosophy
|
||||
==========
|
||||
|
||||
TL;DR: Username => Dossier
|
||||
|
||||
Maigret is designed to gather all the available information about person by his username.
|
||||
|
||||
What kind of information is this? First, links to person accounts. Secondly, all the machine-extractable
|
||||
pieces of info, such as: other usernames, full name, URLs to people's images, birthday, location (country,
|
||||
city, etc.), gender.
|
||||
|
||||
All this information forms some dossier, but it also useful for other tools and analytical purposes.
|
||||
Each collected piece of data has a label of a certain format (for example, ``follower_count`` for the number
|
||||
of subscribers or ``created_at`` for account creation time) so that it can be parsed and analyzed by various
|
||||
systems and stored in databases.
|
||||
@@ -0,0 +1,15 @@
|
||||
.. _quick-start:
|
||||
|
||||
Quick start
|
||||
===========
|
||||
|
||||
After :doc:`installing Maigret <installation>`, you can begin searching by providing one or more usernames to look up:
|
||||
|
||||
``maigret username1 username2 ...``
|
||||
|
||||
Maigret will search for accounts with the specified usernames across a vast number of websites. It will provide you with a list
|
||||
of URLs to any discovered accounts, along with relevant information extracted from those profiles.
|
||||
|
||||
.. image:: maigret_screenshot.png
|
||||
:alt: Maigret search results screenshot
|
||||
:align: center
|
||||
@@ -0,0 +1,29 @@
|
||||
.. _settings:
|
||||
|
||||
Settings
|
||||
==============
|
||||
|
||||
.. warning::
|
||||
The settings system is under development and may be subject to change.
|
||||
|
||||
Options are also configurable through settings files. See
|
||||
`settings JSON file <https://github.com/soxoj/maigret/blob/main/maigret/resources/settings.json>`_
|
||||
for the list of currently supported options.
|
||||
|
||||
After start Maigret tries to load configuration from the following sources in exactly the same order:
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
# relative path, based on installed package path
|
||||
resources/settings.json
|
||||
|
||||
# absolute path, configuration file in home directory
|
||||
~/.maigret/settings.json
|
||||
|
||||
# relative path, based on current working directory
|
||||
settings.json
|
||||
|
||||
Missing any of these files is not an error.
|
||||
If the next settings file contains already known option,
|
||||
this option will be rewrited. So it is possible to make
|
||||
custom configuration for different users and directories.
|
||||
@@ -0,0 +1,15 @@
|
||||
.. _supported-identifier-types:
|
||||
|
||||
Supported identifier types
|
||||
==========================
|
||||
|
||||
Maigret can search against not only ordinary usernames, but also through certain common identifiers. There is a list of all currently supported identifiers.
|
||||
|
||||
- **gaia_id** - Google inner numeric user identifier, in former times was placed in a Google Plus account URL.
|
||||
- **steam_id** - Steam inner numeric user identifier.
|
||||
- **wikimapia_uid** - Wikimapia.org inner numeric user identifier.
|
||||
- **uidme_uguid** - uID.me inner numeric user identifier.
|
||||
- **yandex_public_id** - Yandex sites inner letter user identifier. See also: `YaSeeker <https://github.com/HowToFind-bot/YaSeeker>`_.
|
||||
- **vk_id** - VK.com inner numeric user identifier.
|
||||
- **ok_id** - OK.ru inner numeric user identifier.
|
||||
- **yelp_userid** - Yelp inner user identifier.
|
||||
@@ -0,0 +1,25 @@
|
||||
.. _tags:
|
||||
|
||||
Tags
|
||||
====
|
||||
|
||||
The use of tags allows you to select a subset of the sites from big Maigret DB for search.
|
||||
|
||||
.. warning::
|
||||
Tags markup is still not stable.
|
||||
|
||||
There are several types of tags:
|
||||
|
||||
1. **Country codes**: ``us``, ``jp``, ``br``... (`ISO 3166-1 alpha-2 <https://en.wikipedia.org/wiki/ISO_3166-1_alpha-2>`_). These tags reflect the site language and regional origin of its users and are then used to locate the owner of a username. If the regional origin is difficult to establish or a site is positioned as worldwide, `no country code is given`. There could be multiple country code tags for one site.
|
||||
|
||||
2. **Site engines**. Most of them are forum engines now: ``uCoz``, ``vBulletin``, ``XenForo`` et al. Full list of engines stored in the Maigret database.
|
||||
|
||||
3. **Sites' subject/type and interests of its users**. Full list of "standard" tags is `present in the source code <https://github.com/soxoj/maigret/blob/main/maigret/sites.py#L13>`_ only for a moment.
|
||||
|
||||
Usage
|
||||
-----
|
||||
``--tags us,jp`` -- search on US and Japanese sites (actually marked as such in the Maigret database)
|
||||
|
||||
``--tags coding`` -- search on sites related to software development.
|
||||
|
||||
``--tags ucoz`` -- search on uCoz sites only (mostly CIS countries)
|
||||
@@ -0,0 +1,70 @@
|
||||
.. _usage-examples:
|
||||
|
||||
Usage examples
|
||||
==============
|
||||
|
||||
1. Search for accounts with username ``machine42`` on top 500 sites (by default, according to Alexa rank) from the Maigret DB.
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
maigret machine42
|
||||
|
||||
2. Search for accounts with username ``machine42`` on **all sites** from the Maigret DB.
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
maigret machine42 -a
|
||||
|
||||
.. note::
|
||||
Maigret will search for accounts on a huge number of sites,
|
||||
and some of them may return false positive results. At the moment, we are working on autorepair mode to deliver
|
||||
the most accurate results.
|
||||
|
||||
If you experience many false positives, you can do the following:
|
||||
|
||||
- Install the last development version of Maigret from GitHub
|
||||
- Run Maigret with ``--self-check`` flag and agree on disabling of problematic sites
|
||||
|
||||
3. Search for accounts with username ``machine42`` and generate HTML and PDF reports.
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
maigret machine42 -HP
|
||||
|
||||
or
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
maigret machine42 -a --html --pdf
|
||||
|
||||
|
||||
4. Search for accounts with username ``machine42`` on Facebook only.
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
maigret machine42 --site Facebook
|
||||
|
||||
5. Extract information from the Steam page by URL and start a search for accounts with found username ``machine42``.
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
maigret --parse https://steamcommunity.com/profiles/76561199113454789
|
||||
|
||||
6. Search for accounts with username ``machine42`` only on US and Japanese sites.
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
maigret machine42 --tags us,jp
|
||||
|
||||
7. Search for accounts with username ``machine42`` only on sites related to software development.
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
maigret machine42 --tags coding
|
||||
|
||||
8. Search for accounts with username ``machine42`` on uCoz sites only (mostly CIS countries).
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
maigret machine42 --tags ucoz
|
||||
|
||||
@@ -1,68 +1,43 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "8v6PEfyXb0Gx"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# clone the repo\n",
|
||||
"!git clone https://github.com/soxoj/maigret\n",
|
||||
"!pip3 install -r maigret/requirements.txt"
|
||||
]
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 0,
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"provenance": []
|
||||
},
|
||||
"kernelspec": {
|
||||
"name": "python3",
|
||||
"display_name": "Python 3"
|
||||
},
|
||||
"language_info": {
|
||||
"name": "python"
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "cXOQUAhDchkl"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# help\n",
|
||||
"!python3 maigret/maigret.py --help"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "SjDmpN4QGnJu"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# search\n",
|
||||
"!python3 maigret/maigret.py user"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"collapsed_sections": [],
|
||||
"include_colab_link": true,
|
||||
"name": "maigret.ipynb",
|
||||
"provenance": []
|
||||
},
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.7.10"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 1
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "acxNWJOUmLc4"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!git clone https://github.com/soxoj/maigret\n",
|
||||
"!pip3 install ./maigret/\n",
|
||||
"from IPython.display import clear_output\n",
|
||||
"clear_output()\n",
|
||||
"username = str(input(\"Username >> \"))\n",
|
||||
"!maigret {username} -a -n 10"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [],
|
||||
"metadata": {
|
||||
"id": "S3SmapMHmOoD"
|
||||
},
|
||||
"execution_count": null,
|
||||
"outputs": []
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
@@ -1,18 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
import asyncio
|
||||
import sys
|
||||
|
||||
from maigret.maigret import main
|
||||
|
||||
|
||||
def run():
|
||||
try:
|
||||
loop = asyncio.get_event_loop()
|
||||
loop.run_until_complete(main())
|
||||
except KeyboardInterrupt:
|
||||
print('Maigret is interrupted.')
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
run()
|
||||
@@ -8,5 +8,6 @@ __author_email__ = 'soxoj@protonmail.com'
|
||||
|
||||
from .__version__ import __version__
|
||||
from .checking import maigret as search
|
||||
from .maigret import main as cli
|
||||
from .sites import MaigretEngine, MaigretSite, MaigretDatabase
|
||||
from .notify import QueryNotifyPrint as Notifier
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
"""Maigret version file"""
|
||||
|
||||
__version__ = '0.3.1'
|
||||
__version__ = '0.5.0a1'
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import json
|
||||
from http.cookiejar import MozillaCookieJar
|
||||
from http.cookies import Morsel
|
||||
|
||||
import requests
|
||||
from aiohttp import CookieJar
|
||||
|
||||
|
||||
@@ -10,6 +10,8 @@ class ParsingActivator:
|
||||
def twitter(site, logger, cookies={}):
|
||||
headers = dict(site.headers)
|
||||
del headers["x-guest-token"]
|
||||
import requests
|
||||
|
||||
r = requests.post(site.activation["url"], headers=headers)
|
||||
logger.info(r)
|
||||
j = r.json()
|
||||
@@ -21,7 +23,10 @@ class ParsingActivator:
|
||||
headers = dict(site.headers)
|
||||
if "Authorization" in headers:
|
||||
del headers["Authorization"]
|
||||
import requests
|
||||
|
||||
r = requests.get(site.activation["url"], headers=headers)
|
||||
logger.debug(f"Vimeo viewer activation: {json.dumps(r.json(), indent=4)}")
|
||||
jwt_token = r.json()["jwt"]
|
||||
site.headers["Authorization"] = "jwt " + jwt_token
|
||||
|
||||
@@ -30,10 +35,47 @@ class ParsingActivator:
|
||||
headers = dict(site.headers)
|
||||
if "Authorization" in headers:
|
||||
del headers["Authorization"]
|
||||
import requests
|
||||
|
||||
r = requests.get(site.activation["url"])
|
||||
bearer_token = r.json()["accessToken"]
|
||||
site.headers["authorization"] = f"Bearer {bearer_token}"
|
||||
|
||||
@staticmethod
|
||||
def weibo(site, logger):
|
||||
headers = dict(site.headers)
|
||||
import requests
|
||||
|
||||
session = requests.Session()
|
||||
# 1 stage: get the redirect URL
|
||||
r = session.get(
|
||||
"https://weibo.com/clairekuo", headers=headers, allow_redirects=False
|
||||
)
|
||||
logger.debug(
|
||||
f"1 stage: {'success' if r.status_code == 302 else 'no 302 redirect, fail!'}"
|
||||
)
|
||||
location = r.headers.get("Location")
|
||||
|
||||
# 2 stage: go to passport visitor page
|
||||
headers["Referer"] = location
|
||||
r = session.get(location, headers=headers)
|
||||
logger.debug(
|
||||
f"2 stage: {'success' if r.status_code == 200 else 'no 200 response, fail!'}"
|
||||
)
|
||||
|
||||
# 3 stage: gen visitor token
|
||||
headers["Referer"] = location
|
||||
r = session.post(
|
||||
"https://passport.weibo.com/visitor/genvisitor2",
|
||||
headers=headers,
|
||||
data={'cb': 'visitor_gray_callback', 'tid': '', 'from': 'weibo'},
|
||||
)
|
||||
cookies = r.headers.get('set-cookie')
|
||||
logger.debug(
|
||||
f"3 stage: {'success' if r.status_code == 200 and cookies else 'no 200 response and cookies, fail!'}"
|
||||
)
|
||||
site.headers["Cookie"] = cookies
|
||||
|
||||
|
||||
def import_aiohttp_cookies(cookiestxt_filename):
|
||||
cookies_obj = MozillaCookieJar(cookiestxt_filename)
|
||||
|
||||
@@ -1,37 +1,40 @@
|
||||
# Standard library imports
|
||||
import ast
|
||||
import asyncio
|
||||
import logging
|
||||
import random
|
||||
import re
|
||||
import ssl
|
||||
import sys
|
||||
from typing import Dict, List, Optional, Tuple
|
||||
from urllib.parse import quote
|
||||
|
||||
# Third party imports
|
||||
import aiodns
|
||||
from alive_progress import alive_bar
|
||||
from aiohttp import ClientSession, TCPConnector, http_exceptions
|
||||
from aiohttp.client_exceptions import ClientConnectorError, ServerDisconnectedError
|
||||
from python_socks import _errors as proxy_errors
|
||||
from socid_extractor import extract
|
||||
|
||||
try:
|
||||
from mock import Mock
|
||||
except ImportError:
|
||||
from unittest.mock import Mock
|
||||
|
||||
import re
|
||||
import ssl
|
||||
import sys
|
||||
import tqdm
|
||||
from typing import Tuple, Optional, Dict, List
|
||||
from urllib.parse import quote
|
||||
|
||||
import aiodns
|
||||
import tqdm.asyncio
|
||||
from python_socks import _errors as proxy_errors
|
||||
from socid_extractor import extract
|
||||
from aiohttp import TCPConnector, ClientSession, http_exceptions
|
||||
from aiohttp.client_exceptions import ServerDisconnectedError, ClientConnectorError
|
||||
|
||||
from .activation import ParsingActivator, import_aiohttp_cookies
|
||||
# Local imports
|
||||
from . import errors
|
||||
from .activation import ParsingActivator, import_aiohttp_cookies
|
||||
from .errors import CheckError
|
||||
from .executors import (
|
||||
AsyncExecutor,
|
||||
AsyncioSimpleExecutor,
|
||||
AsyncioProgressbarQueueExecutor,
|
||||
)
|
||||
from .result import QueryResult, QueryStatus
|
||||
from .result import MaigretCheckResult, MaigretCheckStatus
|
||||
from .sites import MaigretDatabase, MaigretSite
|
||||
from .types import QueryOptions, QueryResultWrapper
|
||||
from .utils import get_random_user_agent, ascii_data_display
|
||||
from .utils import ascii_data_display, get_random_user_agent
|
||||
|
||||
|
||||
SUPPORTED_IDS = (
|
||||
@@ -55,118 +58,128 @@ class CheckerBase:
|
||||
|
||||
class SimpleAiohttpChecker(CheckerBase):
|
||||
def __init__(self, *args, **kwargs):
|
||||
proxy = kwargs.get('proxy')
|
||||
cookie_jar = kwargs.get('cookie_jar')
|
||||
self.proxy = kwargs.get('proxy')
|
||||
self.cookie_jar = kwargs.get('cookie_jar')
|
||||
self.logger = kwargs.get('logger', Mock())
|
||||
|
||||
# moved here to speed up the launch of Maigret
|
||||
from aiohttp_socks import ProxyConnector
|
||||
|
||||
# make http client session
|
||||
connector = (
|
||||
ProxyConnector.from_url(proxy) if proxy else TCPConnector(ssl=False)
|
||||
)
|
||||
connector.verify_ssl = False
|
||||
self.session = ClientSession(
|
||||
connector=connector, trust_env=True, cookie_jar=cookie_jar
|
||||
)
|
||||
self.url = None
|
||||
self.headers = None
|
||||
self.allow_redirects = True
|
||||
self.timeout = 0
|
||||
self.method = 'get'
|
||||
|
||||
def prepare(self, url, headers=None, allow_redirects=True, timeout=0, method='get'):
|
||||
if method == 'get':
|
||||
request_method = self.session.get
|
||||
else:
|
||||
request_method = self.session.head
|
||||
|
||||
future = request_method(
|
||||
url=url,
|
||||
headers=headers,
|
||||
allow_redirects=allow_redirects,
|
||||
timeout=timeout,
|
||||
)
|
||||
|
||||
return future
|
||||
self.url = url
|
||||
self.headers = headers
|
||||
self.allow_redirects = allow_redirects
|
||||
self.timeout = timeout
|
||||
self.method = method
|
||||
return None
|
||||
|
||||
async def close(self):
|
||||
await self.session.close()
|
||||
|
||||
async def check(self, future) -> Tuple[str, int, Optional[CheckError]]:
|
||||
html_text = None
|
||||
status_code = 0
|
||||
error: Optional[CheckError] = CheckError("Unknown")
|
||||
pass
|
||||
|
||||
async def _make_request(
|
||||
self, session, url, headers, allow_redirects, timeout, method, logger
|
||||
) -> Tuple[str, int, Optional[CheckError]]:
|
||||
try:
|
||||
response = await future
|
||||
request_method = session.get if method == 'get' else session.head
|
||||
async with request_method(
|
||||
url=url,
|
||||
headers=headers,
|
||||
allow_redirects=allow_redirects,
|
||||
timeout=timeout,
|
||||
) as response:
|
||||
status_code = response.status
|
||||
response_content = await response.content.read()
|
||||
charset = response.charset or "utf-8"
|
||||
decoded_content = response_content.decode(charset, "ignore")
|
||||
|
||||
status_code = response.status
|
||||
response_content = await response.content.read()
|
||||
charset = response.charset or "utf-8"
|
||||
decoded_content = response_content.decode(charset, "ignore")
|
||||
html_text = decoded_content
|
||||
error = CheckError("Connection lost") if status_code == 0 else None
|
||||
logger.debug(decoded_content)
|
||||
|
||||
error = None
|
||||
if status_code == 0:
|
||||
error = CheckError("Connection lost")
|
||||
|
||||
self.logger.debug(html_text)
|
||||
return decoded_content, status_code, error
|
||||
|
||||
except asyncio.TimeoutError as e:
|
||||
error = CheckError("Request timeout", str(e))
|
||||
return None, 0, CheckError("Request timeout", str(e))
|
||||
except ClientConnectorError as e:
|
||||
error = CheckError("Connecting failure", str(e))
|
||||
return None, 0, CheckError("Connecting failure", str(e))
|
||||
except ServerDisconnectedError as e:
|
||||
error = CheckError("Server disconnected", str(e))
|
||||
return None, 0, CheckError("Server disconnected", str(e))
|
||||
except http_exceptions.BadHttpMessage as e:
|
||||
error = CheckError("HTTP", str(e))
|
||||
return None, 0, CheckError("HTTP", str(e))
|
||||
except proxy_errors.ProxyError as e:
|
||||
error = CheckError("Proxy", str(e))
|
||||
return None, 0, CheckError("Proxy", str(e))
|
||||
except KeyboardInterrupt:
|
||||
error = CheckError("Interrupted")
|
||||
return None, 0, CheckError("Interrupted")
|
||||
except Exception as e:
|
||||
# python-specific exceptions
|
||||
if sys.version_info.minor > 6 and (
|
||||
isinstance(e, ssl.SSLCertVerificationError)
|
||||
or isinstance(e, ssl.SSLError)
|
||||
):
|
||||
error = CheckError("SSL", str(e))
|
||||
return None, 0, CheckError("SSL", str(e))
|
||||
else:
|
||||
self.logger.debug(e, exc_info=True)
|
||||
error = CheckError("Unexpected", str(e))
|
||||
logger.debug(e, exc_info=True)
|
||||
return None, 0, CheckError("Unexpected", str(e))
|
||||
|
||||
return str(html_text), status_code, error
|
||||
async def check(self) -> Tuple[str, int, Optional[CheckError]]:
|
||||
from aiohttp_socks import ProxyConnector
|
||||
|
||||
connector = (
|
||||
ProxyConnector.from_url(self.proxy)
|
||||
if self.proxy
|
||||
else TCPConnector(ssl=False)
|
||||
)
|
||||
connector.verify_ssl = False
|
||||
|
||||
async with ClientSession(
|
||||
connector=connector,
|
||||
trust_env=True,
|
||||
# TODO: tests
|
||||
cookie_jar=self.cookie_jar if self.cookie_jar else None,
|
||||
) as session:
|
||||
html_text, status_code, error = await self._make_request(
|
||||
session,
|
||||
self.url,
|
||||
self.headers,
|
||||
self.allow_redirects,
|
||||
self.timeout,
|
||||
self.method,
|
||||
self.logger,
|
||||
)
|
||||
|
||||
if error and str(error) == "Invalid proxy response":
|
||||
self.logger.debug(error, exc_info=True)
|
||||
|
||||
return str(html_text) if html_text else '', status_code, error
|
||||
|
||||
|
||||
class ProxiedAiohttpChecker(SimpleAiohttpChecker):
|
||||
def __init__(self, *args, **kwargs):
|
||||
proxy = kwargs.get('proxy')
|
||||
cookie_jar = kwargs.get('cookie_jar')
|
||||
self.proxy = kwargs.get('proxy')
|
||||
self.cookie_jar = kwargs.get('cookie_jar')
|
||||
self.logger = kwargs.get('logger', Mock())
|
||||
|
||||
# moved here to speed up the launch of Maigret
|
||||
from aiohttp_socks import ProxyConnector
|
||||
|
||||
connector = ProxyConnector.from_url(proxy)
|
||||
connector.verify_ssl = False
|
||||
self.session = ClientSession(
|
||||
connector=connector, trust_env=True, cookie_jar=cookie_jar
|
||||
)
|
||||
|
||||
|
||||
class AiodnsDomainResolver(CheckerBase):
|
||||
if sys.platform == 'win32': # Temporary workaround for Windows
|
||||
asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
loop = asyncio.get_event_loop()
|
||||
self.logger = kwargs.get('logger', Mock())
|
||||
self.resolver = aiodns.DNSResolver(loop=loop)
|
||||
|
||||
def prepare(self, url, headers=None, allow_redirects=True, timeout=0, method='get'):
|
||||
return self.resolver.query(url, 'A')
|
||||
self.url = url
|
||||
return None
|
||||
|
||||
async def check(self, future) -> Tuple[str, int, Optional[CheckError]]:
|
||||
async def check(self) -> Tuple[str, int, Optional[CheckError]]:
|
||||
status = 404
|
||||
error = None
|
||||
text = ''
|
||||
|
||||
try:
|
||||
res = await future
|
||||
res = await self.resolver.query(self.url, 'A')
|
||||
text = str(res[0].host)
|
||||
status = 200
|
||||
except aiodns.error.DNSError:
|
||||
@@ -185,7 +198,7 @@ class CheckerMock:
|
||||
def prepare(self, url, headers=None, allow_redirects=True, timeout=0, method='get'):
|
||||
return None
|
||||
|
||||
async def check(self, future) -> Tuple[str, int, Optional[CheckError]]:
|
||||
async def check(self) -> Tuple[str, int, Optional[CheckError]]:
|
||||
await asyncio.sleep(0)
|
||||
return '', 0, None
|
||||
|
||||
@@ -272,14 +285,16 @@ def process_site_result(
|
||||
)
|
||||
|
||||
if site.activation and html_text and is_need_activation:
|
||||
logger.debug(f"Activation for {site.name}")
|
||||
method = site.activation["method"]
|
||||
try:
|
||||
activate_fun = getattr(ParsingActivator(), method)
|
||||
# TODO: async call
|
||||
activate_fun(site, logger)
|
||||
except AttributeError:
|
||||
except AttributeError as e:
|
||||
logger.warning(
|
||||
f"Activation method {method} for site {site.name} not found!"
|
||||
f"Activation method {method} for site {site.name} not found!",
|
||||
exc_info=True,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
@@ -307,7 +322,7 @@ def process_site_result(
|
||||
break
|
||||
|
||||
def build_result(status, **kwargs):
|
||||
return QueryResult(
|
||||
return MaigretCheckResult(
|
||||
username,
|
||||
site_name,
|
||||
url,
|
||||
@@ -319,11 +334,11 @@ def process_site_result(
|
||||
|
||||
if check_error:
|
||||
logger.warning(check_error)
|
||||
result = QueryResult(
|
||||
result = MaigretCheckResult(
|
||||
username,
|
||||
site_name,
|
||||
url,
|
||||
QueryStatus.UNKNOWN,
|
||||
MaigretCheckStatus.UNKNOWN,
|
||||
query_time=response_time,
|
||||
error=check_error,
|
||||
context=str(CheckError),
|
||||
@@ -335,15 +350,15 @@ def process_site_result(
|
||||
[(absence_flag in html_text) for absence_flag in site.absence_strs]
|
||||
)
|
||||
if not is_absence_detected and is_presense_detected:
|
||||
result = build_result(QueryStatus.CLAIMED)
|
||||
result = build_result(MaigretCheckStatus.CLAIMED)
|
||||
else:
|
||||
result = build_result(QueryStatus.AVAILABLE)
|
||||
result = build_result(MaigretCheckStatus.AVAILABLE)
|
||||
elif check_type in "status_code":
|
||||
# Checks if the status code of the response is 2XX
|
||||
if 200 <= status_code < 300:
|
||||
result = build_result(QueryStatus.CLAIMED)
|
||||
result = build_result(MaigretCheckStatus.CLAIMED)
|
||||
else:
|
||||
result = build_result(QueryStatus.AVAILABLE)
|
||||
result = build_result(MaigretCheckStatus.AVAILABLE)
|
||||
elif check_type == "response_url":
|
||||
# For this detection method, we have turned off the redirect.
|
||||
# So, there is no need to check the response URL: it will always
|
||||
@@ -351,9 +366,9 @@ def process_site_result(
|
||||
# code indicates that the request was successful (i.e. no 404, or
|
||||
# forward to some odd redirect).
|
||||
if 200 <= status_code < 300 and is_presense_detected:
|
||||
result = build_result(QueryStatus.CLAIMED)
|
||||
result = build_result(MaigretCheckStatus.CLAIMED)
|
||||
else:
|
||||
result = build_result(QueryStatus.AVAILABLE)
|
||||
result = build_result(MaigretCheckStatus.AVAILABLE)
|
||||
else:
|
||||
# It should be impossible to ever get here...
|
||||
raise ValueError(
|
||||
@@ -362,25 +377,13 @@ def process_site_result(
|
||||
|
||||
extracted_ids_data = {}
|
||||
|
||||
if is_parsing_enabled and result.status == QueryStatus.CLAIMED:
|
||||
try:
|
||||
extracted_ids_data = extract(html_text)
|
||||
except Exception as e:
|
||||
logger.warning(f"Error while parsing {site.name}: {e}", exc_info=True)
|
||||
|
||||
if is_parsing_enabled and result.status == MaigretCheckStatus.CLAIMED:
|
||||
extracted_ids_data = extract_ids_data(html_text, logger, site)
|
||||
if extracted_ids_data:
|
||||
new_usernames = {}
|
||||
for k, v in extracted_ids_data.items():
|
||||
if "username" in k:
|
||||
new_usernames[v] = "username"
|
||||
if k in SUPPORTED_IDS:
|
||||
new_usernames[v] = k
|
||||
|
||||
results_info["ids_usernames"] = new_usernames
|
||||
links = ascii_data_display(extracted_ids_data.get("links", "[]"))
|
||||
if "website" in extracted_ids_data:
|
||||
links.append(extracted_ids_data["website"])
|
||||
results_info["ids_links"] = links
|
||||
new_usernames = parse_usernames(extracted_ids_data, logger)
|
||||
results_info = update_results_info(
|
||||
results_info, extracted_ids_data, new_usernames
|
||||
)
|
||||
result.ids_data = extracted_ids_data
|
||||
|
||||
# Save status of request
|
||||
@@ -395,7 +398,7 @@ def process_site_result(
|
||||
|
||||
|
||||
def make_site_result(
|
||||
site: MaigretSite, username: str, options: QueryOptions, logger
|
||||
site: MaigretSite, username: str, options: QueryOptions, logger, *args, **kwargs
|
||||
) -> QueryResultWrapper:
|
||||
results_site: QueryResultWrapper = {}
|
||||
|
||||
@@ -412,6 +415,8 @@ def make_site_result(
|
||||
|
||||
headers = {
|
||||
"User-Agent": get_random_user_agent(),
|
||||
# tell server that we want to close connection after request
|
||||
"Connection": "close",
|
||||
}
|
||||
|
||||
headers.update(site.headers)
|
||||
@@ -419,6 +424,10 @@ def make_site_result(
|
||||
if "url" not in site.__dict__:
|
||||
logger.error("No URL for site %s", site.name)
|
||||
|
||||
if kwargs.get('retry') and hasattr(site, "mirrors"):
|
||||
site.url_main = random.choice(site.mirrors)
|
||||
logger.info(f"Use {site.url_main} as a main url of site {site}")
|
||||
|
||||
# URL of user on site (if it exists)
|
||||
url = site.url.format(
|
||||
urlMain=site.url_main, urlSubpath=site.url_subpath, username=quote(username)
|
||||
@@ -433,29 +442,29 @@ def make_site_result(
|
||||
# site check is disabled
|
||||
if site.disabled and not options['forced']:
|
||||
logger.debug(f"Site {site.name} is disabled, skipping...")
|
||||
results_site["status"] = QueryResult(
|
||||
results_site["status"] = MaigretCheckResult(
|
||||
username,
|
||||
site.name,
|
||||
url,
|
||||
QueryStatus.ILLEGAL,
|
||||
MaigretCheckStatus.ILLEGAL,
|
||||
error=CheckError("Check is disabled"),
|
||||
)
|
||||
# current username type could not be applied
|
||||
elif site.type != options["id_type"]:
|
||||
results_site["status"] = QueryResult(
|
||||
results_site["status"] = MaigretCheckResult(
|
||||
username,
|
||||
site.name,
|
||||
url,
|
||||
QueryStatus.ILLEGAL,
|
||||
MaigretCheckStatus.ILLEGAL,
|
||||
error=CheckError('Unsupported identifier type', f'Want "{site.type}"'),
|
||||
)
|
||||
# username is not allowed.
|
||||
elif site.regex_check and re.search(site.regex_check, username) is None:
|
||||
results_site["status"] = QueryResult(
|
||||
results_site["status"] = MaigretCheckResult(
|
||||
username,
|
||||
site.name,
|
||||
url,
|
||||
QueryStatus.ILLEGAL,
|
||||
MaigretCheckStatus.ILLEGAL,
|
||||
error=CheckError(
|
||||
'Unsupported username format', f'Want "{site.regex_check}"'
|
||||
),
|
||||
@@ -514,7 +523,8 @@ def make_site_result(
|
||||
|
||||
# Store future request object in the results object
|
||||
results_site["future"] = future
|
||||
results_site["checker"] = checker
|
||||
|
||||
results_site["checker"] = checker
|
||||
|
||||
return results_site
|
||||
|
||||
@@ -522,14 +532,19 @@ def make_site_result(
|
||||
async def check_site_for_username(
|
||||
site, username, options: QueryOptions, logger, query_notify, *args, **kwargs
|
||||
) -> Tuple[str, QueryResultWrapper]:
|
||||
default_result = make_site_result(site, username, options, logger)
|
||||
future = default_result.get("future")
|
||||
if not future:
|
||||
default_result = make_site_result(
|
||||
site, username, options, logger, retry=kwargs.get('retry')
|
||||
)
|
||||
# future = default_result.get("future")
|
||||
# if not future:
|
||||
# return site.name, default_result
|
||||
|
||||
checker = default_result.get("checker")
|
||||
if not checker:
|
||||
print(f"error, no checker for {site.name}")
|
||||
return site.name, default_result
|
||||
|
||||
checker = default_result["checker"]
|
||||
|
||||
response = await checker.check(future=future)
|
||||
response = await checker.check()
|
||||
|
||||
response_result = process_site_result(
|
||||
response, query_notify, logger, default_result, site
|
||||
@@ -541,8 +556,8 @@ async def check_site_for_username(
|
||||
|
||||
|
||||
async def debug_ip_request(checker, logger):
|
||||
future = checker.prepare(url="https://icanhazip.com")
|
||||
ip, status, check_error = await checker.check(future)
|
||||
checker.prepare(url="https://icanhazip.com")
|
||||
ip, status, check_error = await checker.check()
|
||||
if ip:
|
||||
logger.debug(f"My IP is: {ip.strip()}")
|
||||
else:
|
||||
@@ -578,6 +593,8 @@ async def maigret(
|
||||
cookies=None,
|
||||
retries=0,
|
||||
check_domains=False,
|
||||
*args,
|
||||
**kwargs,
|
||||
) -> QueryResultWrapper:
|
||||
"""Main search func
|
||||
|
||||
@@ -595,7 +612,7 @@ async def maigret(
|
||||
is_parsing_enabled -- Extract additional info from account pages.
|
||||
id_type -- Type of username to search.
|
||||
Default is 'username', see all supported here:
|
||||
https://github.com/soxoj/maigret/wiki/Supported-identifier-types
|
||||
https://maigret.readthedocs.io/en/latest/supported-identifier-types.html
|
||||
max_connections -- Maximum number of concurrent connections allowed.
|
||||
Default is 100.
|
||||
no_progressbar -- Displaying of ASCII progressbar during scanner.
|
||||
@@ -655,10 +672,15 @@ async def maigret(
|
||||
# setup parallel executor
|
||||
executor: Optional[AsyncExecutor] = None
|
||||
if no_progressbar:
|
||||
# TODO: switch to AsyncioProgressbarQueueExecutor with progress object mock
|
||||
executor = AsyncioSimpleExecutor(logger=logger)
|
||||
else:
|
||||
executor = AsyncioProgressbarQueueExecutor(
|
||||
logger=logger, in_parallel=max_connections, timeout=timeout + 0.5
|
||||
logger=logger,
|
||||
in_parallel=max_connections,
|
||||
timeout=timeout + 0.5,
|
||||
*args,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
# make options objects for all the requests
|
||||
@@ -689,18 +711,21 @@ async def maigret(
|
||||
continue
|
||||
default_result: QueryResultWrapper = {
|
||||
'site': site,
|
||||
'status': QueryResult(
|
||||
'status': MaigretCheckResult(
|
||||
username,
|
||||
sitename,
|
||||
'',
|
||||
QueryStatus.UNKNOWN,
|
||||
MaigretCheckStatus.UNKNOWN,
|
||||
error=CheckError('Request failed'),
|
||||
),
|
||||
}
|
||||
tasks_dict[sitename] = (
|
||||
check_site_for_username,
|
||||
[site, username, options, logger, query_notify],
|
||||
{'default': (sitename, default_result)},
|
||||
{
|
||||
'default': (sitename, default_result),
|
||||
'retry': retries - attempts + 1,
|
||||
},
|
||||
)
|
||||
|
||||
cur_results = await executor.run(tasks_dict.values())
|
||||
@@ -723,10 +748,8 @@ async def maigret(
|
||||
|
||||
# closing http client session
|
||||
await clearweb_checker.close()
|
||||
if tor_proxy:
|
||||
await tor_checker.close()
|
||||
if i2p_proxy:
|
||||
await i2p_checker.close()
|
||||
await tor_checker.close()
|
||||
await i2p_checker.close()
|
||||
|
||||
# notify caller that all queries are finished
|
||||
query_notify.finish()
|
||||
@@ -761,20 +784,23 @@ def timeout_check(value):
|
||||
|
||||
async def site_self_check(
|
||||
site: MaigretSite,
|
||||
logger,
|
||||
logger: logging.Logger,
|
||||
semaphore,
|
||||
db: MaigretDatabase,
|
||||
silent=False,
|
||||
proxy=None,
|
||||
tor_proxy=None,
|
||||
i2p_proxy=None,
|
||||
skip_errors=False,
|
||||
cookies=None,
|
||||
):
|
||||
changes = {
|
||||
"disabled": False,
|
||||
}
|
||||
|
||||
check_data = [
|
||||
(site.username_claimed, QueryStatus.CLAIMED),
|
||||
(site.username_unclaimed, QueryStatus.AVAILABLE),
|
||||
(site.username_claimed, MaigretCheckStatus.CLAIMED),
|
||||
(site.username_unclaimed, MaigretCheckStatus.AVAILABLE),
|
||||
]
|
||||
|
||||
logger.info(f"Checking {site.name}...")
|
||||
@@ -790,8 +816,10 @@ async def site_self_check(
|
||||
forced=True,
|
||||
no_progressbar=True,
|
||||
retries=1,
|
||||
proxy=proxy,
|
||||
tor_proxy=tor_proxy,
|
||||
i2p_proxy=i2p_proxy,
|
||||
cookies=cookies,
|
||||
)
|
||||
|
||||
# don't disable entries with other ids types
|
||||
@@ -805,19 +833,27 @@ async def site_self_check(
|
||||
|
||||
result = results_dict[site.name]["status"]
|
||||
|
||||
if result.error and 'Cannot connect to host' in result.error.desc:
|
||||
changes["disabled"] = True
|
||||
|
||||
site_status = result.status
|
||||
|
||||
if site_status != status:
|
||||
if site_status == QueryStatus.UNKNOWN:
|
||||
if site_status == MaigretCheckStatus.UNKNOWN:
|
||||
msgs = site.absence_strs
|
||||
etype = site.check_type
|
||||
logger.warning(
|
||||
f"Error while searching {username} in {site.name}: {result.context}, {msgs}, type {etype}"
|
||||
)
|
||||
# don't disable sites after the error
|
||||
# meaning that the site could be available, but returned error for the check
|
||||
# e.g. many sites protected by cloudflare and available in general
|
||||
if skip_errors:
|
||||
pass
|
||||
# don't disable in case of available username
|
||||
if status == QueryStatus.CLAIMED:
|
||||
elif status == MaigretCheckStatus.CLAIMED:
|
||||
changes["disabled"] = True
|
||||
elif status == QueryStatus.CLAIMED:
|
||||
elif status == MaigretCheckStatus.CLAIMED:
|
||||
logger.warning(
|
||||
f"Not found `{username}` in {site.name}, must be claimed"
|
||||
)
|
||||
@@ -832,20 +868,27 @@ async def site_self_check(
|
||||
|
||||
if changes["disabled"] != site.disabled:
|
||||
site.disabled = changes["disabled"]
|
||||
logger.info(f"Switching property 'disabled' for {site.name} to {site.disabled}")
|
||||
db.update_site(site)
|
||||
if not silent:
|
||||
action = "Disabled" if site.disabled else "Enabled"
|
||||
print(f"{action} site {site.name}...")
|
||||
|
||||
# remove service tag "unchecked"
|
||||
if "unchecked" in site.tags:
|
||||
site.tags.remove("unchecked")
|
||||
db.update_site(site)
|
||||
|
||||
return changes
|
||||
|
||||
|
||||
async def self_check(
|
||||
db: MaigretDatabase,
|
||||
site_data: dict,
|
||||
logger,
|
||||
logger: logging.Logger,
|
||||
silent=False,
|
||||
max_connections=10,
|
||||
proxy=None,
|
||||
tor_proxy=None,
|
||||
i2p_proxy=None,
|
||||
) -> bool:
|
||||
@@ -856,31 +899,79 @@ async def self_check(
|
||||
def disabled_count(lst):
|
||||
return len(list(filter(lambda x: x.disabled, lst)))
|
||||
|
||||
unchecked_old_count = len(
|
||||
[site for site in all_sites.values() if "unchecked" in site.tags]
|
||||
)
|
||||
disabled_old_count = disabled_count(all_sites.values())
|
||||
|
||||
for _, site in all_sites.items():
|
||||
check_coro = site_self_check(
|
||||
site, logger, sem, db, silent, tor_proxy, i2p_proxy
|
||||
site, logger, sem, db, silent, proxy, tor_proxy, i2p_proxy, skip_errors=True
|
||||
)
|
||||
future = asyncio.ensure_future(check_coro)
|
||||
tasks.append(future)
|
||||
|
||||
for f in tqdm.asyncio.tqdm.as_completed(tasks):
|
||||
await f
|
||||
if tasks:
|
||||
with alive_bar(len(tasks), title='Self-checking', force_tty=True) as progress:
|
||||
for f in asyncio.as_completed(tasks):
|
||||
await f
|
||||
progress() # Update the progress bar
|
||||
|
||||
unchecked_new_count = len(
|
||||
[site for site in all_sites.values() if "unchecked" in site.tags]
|
||||
)
|
||||
disabled_new_count = disabled_count(all_sites.values())
|
||||
total_disabled = disabled_new_count - disabled_old_count
|
||||
|
||||
if total_disabled >= 0:
|
||||
message = "Disabled"
|
||||
else:
|
||||
message = "Enabled"
|
||||
total_disabled *= -1
|
||||
if total_disabled:
|
||||
if total_disabled >= 0:
|
||||
message = "Disabled"
|
||||
else:
|
||||
message = "Enabled"
|
||||
total_disabled *= -1
|
||||
|
||||
if not silent:
|
||||
print(
|
||||
f"{message} {total_disabled} ({disabled_old_count} => {disabled_new_count}) checked sites. "
|
||||
"Run with `--info` flag to get more information"
|
||||
)
|
||||
if not silent:
|
||||
print(
|
||||
f"{message} {total_disabled} ({disabled_old_count} => {disabled_new_count}) checked sites. "
|
||||
"Run with `--info` flag to get more information"
|
||||
)
|
||||
|
||||
return total_disabled != 0
|
||||
if unchecked_new_count != unchecked_old_count:
|
||||
print(f"Unchecked sites verified: {unchecked_old_count - unchecked_new_count}")
|
||||
|
||||
return total_disabled != 0 or unchecked_new_count != unchecked_old_count
|
||||
|
||||
|
||||
def extract_ids_data(html_text, logger, site) -> Dict:
|
||||
try:
|
||||
return extract(html_text)
|
||||
except Exception as e:
|
||||
logger.warning(f"Error while parsing {site.name}: {e}", exc_info=True)
|
||||
return {}
|
||||
|
||||
|
||||
def parse_usernames(extracted_ids_data, logger) -> Dict:
|
||||
new_usernames = {}
|
||||
for k, v in extracted_ids_data.items():
|
||||
if "username" in k and not "usernames" in k:
|
||||
new_usernames[v] = "username"
|
||||
elif "usernames" in k:
|
||||
try:
|
||||
tree = ast.literal_eval(v)
|
||||
if type(tree) == list:
|
||||
for n in tree:
|
||||
new_usernames[n] = "username"
|
||||
except Exception as e:
|
||||
logger.warning(e)
|
||||
if k in SUPPORTED_IDS:
|
||||
new_usernames[v] = k
|
||||
return new_usernames
|
||||
|
||||
|
||||
def update_results_info(results_info, extracted_ids_data, new_usernames):
|
||||
results_info["ids_usernames"] = new_usernames
|
||||
links = ascii_data_display(extracted_ids_data.get("links", "[]"))
|
||||
if "website" in extracted_ids_data:
|
||||
links.append(extracted_ids_data["website"])
|
||||
results_info["ids_links"] = links
|
||||
return results_info
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
from typing import Dict, List, Any
|
||||
from typing import Dict, List, Any, Tuple
|
||||
|
||||
from .result import QueryResult
|
||||
from .result import MaigretCheckResult
|
||||
from .types import QueryResultWrapper
|
||||
|
||||
|
||||
@@ -58,13 +58,18 @@ COMMON_ERRORS = {
|
||||
'Сайт заблокирован хостинг-провайдером': CheckError(
|
||||
'Site-specific', 'Site is disabled (Beget)'
|
||||
),
|
||||
'Generated by cloudfront (CloudFront)': CheckError('Request blocked', 'Cloudflare'),
|
||||
'/cdn-cgi/challenge-platform/h/b/orchestrate/chl_page': CheckError(
|
||||
'Just a moment: bot redirect challenge', 'Cloudflare'
|
||||
),
|
||||
}
|
||||
|
||||
ERRORS_TYPES = {
|
||||
'Captcha': 'Try to switch to another IP address or to use service cookies',
|
||||
'Bot protection': 'Try to switch to another IP address',
|
||||
'Censorship': 'switch to another internet service provider',
|
||||
'Censorship': 'Switch to another internet service provider',
|
||||
'Request timeout': 'Try to increase timeout or to switch to another internet service provider',
|
||||
'Connecting failure': 'Try to decrease number of parallel connections (e.g. -n 10)',
|
||||
}
|
||||
|
||||
# TODO: checking for reason
|
||||
@@ -109,7 +114,7 @@ def extract_and_group(search_res: QueryResultWrapper) -> List[Dict[str, Any]]:
|
||||
errors_counts: Dict[str, int] = {}
|
||||
for r in search_res.values():
|
||||
if r and isinstance(r, dict) and r.get('status'):
|
||||
if not isinstance(r['status'], QueryResult):
|
||||
if not isinstance(r['status'], MaigretCheckResult):
|
||||
continue
|
||||
|
||||
err = r['status'].error
|
||||
@@ -128,3 +133,45 @@ def extract_and_group(search_res: QueryResultWrapper) -> List[Dict[str, Any]]:
|
||||
)
|
||||
|
||||
return counts
|
||||
|
||||
|
||||
def notify_about_errors(
|
||||
search_results: QueryResultWrapper, query_notify, show_statistics=False
|
||||
) -> List[Tuple]:
|
||||
"""
|
||||
Prepare error notifications in search results, text + symbol,
|
||||
to be displayed by notify object.
|
||||
|
||||
Example:
|
||||
[
|
||||
("Too many errors of type "timeout" (50.0%)", "!")
|
||||
("Verbose error statistics:", "-")
|
||||
]
|
||||
"""
|
||||
results = []
|
||||
|
||||
errs = extract_and_group(search_results)
|
||||
was_errs_displayed = False
|
||||
for e in errs:
|
||||
if not is_important(e):
|
||||
continue
|
||||
text = f'Too many errors of type "{e["err"]}" ({round(e["perc"],2)}%)'
|
||||
solution = solution_of(e['err'])
|
||||
if solution:
|
||||
text = '. '.join([text, solution.capitalize()])
|
||||
|
||||
results.append((text, '!'))
|
||||
was_errs_displayed = True
|
||||
|
||||
if show_statistics:
|
||||
results.append(('Verbose error statistics:', '-'))
|
||||
for e in errs:
|
||||
text = f'{e["err"]}: {round(e["perc"],2)}%'
|
||||
results.append((text, '!'))
|
||||
|
||||
if was_errs_displayed:
|
||||
results.append(
|
||||
('You can see detailed site check errors with a flag `--print-errors`', '-')
|
||||
)
|
||||
|
||||
return results
|
||||
|
||||
@@ -1,8 +1,10 @@
|
||||
import asyncio
|
||||
import time
|
||||
import tqdm
|
||||
import sys
|
||||
from typing import Iterable, Any, List
|
||||
import time
|
||||
from typing import Any, Iterable, List
|
||||
|
||||
import alive_progress
|
||||
from alive_progress import alive_bar
|
||||
|
||||
from .types import QueryDraft
|
||||
|
||||
@@ -34,9 +36,14 @@ class AsyncExecutor:
|
||||
class AsyncioSimpleExecutor(AsyncExecutor):
|
||||
def __init__(self, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
self.semaphore = asyncio.Semaphore(kwargs.get('in_parallel', 100))
|
||||
|
||||
async def _run(self, tasks: Iterable[QueryDraft]):
|
||||
futures = [f(*args, **kwargs) for f, args, kwargs in tasks]
|
||||
async def sem_task(f, args, kwargs):
|
||||
async with self.semaphore:
|
||||
return await f(*args, **kwargs)
|
||||
|
||||
futures = [sem_task(f, args, kwargs) for f, args, kwargs in tasks]
|
||||
return await asyncio.gather(*futures)
|
||||
|
||||
|
||||
@@ -46,9 +53,20 @@ class AsyncioProgressbarExecutor(AsyncExecutor):
|
||||
|
||||
async def _run(self, tasks: Iterable[QueryDraft]):
|
||||
futures = [f(*args, **kwargs) for f, args, kwargs in tasks]
|
||||
total_tasks = len(futures)
|
||||
results = []
|
||||
for f in tqdm.asyncio.tqdm.as_completed(futures):
|
||||
results.append(await f)
|
||||
|
||||
# Use alive_bar for progress tracking
|
||||
with alive_bar(total_tasks, title='Searching', force_tty=True) as progress:
|
||||
# Chunk progress updates for efficiency
|
||||
async def track_task(task):
|
||||
result = await task
|
||||
progress() # Update progress bar once task completes
|
||||
return result
|
||||
|
||||
# Use gather to run tasks concurrently and track progress
|
||||
results = await asyncio.gather(*(track_task(f) for f in futures))
|
||||
|
||||
return results
|
||||
|
||||
|
||||
@@ -66,8 +84,12 @@ class AsyncioProgressbarSemaphoreExecutor(AsyncExecutor):
|
||||
async def semaphore_gather(tasks: Iterable[QueryDraft]):
|
||||
coros = [_wrap_query(q) for q in tasks]
|
||||
results = []
|
||||
for f in tqdm.asyncio.tqdm.as_completed(coros):
|
||||
results.append(await f)
|
||||
|
||||
# Use alive_bar correctly as a context manager
|
||||
with alive_bar(len(coros), title='Searching', force_tty=True) as progress:
|
||||
for f in asyncio.as_completed(coros):
|
||||
results.append(await f)
|
||||
progress() # Update the progress bar
|
||||
return results
|
||||
|
||||
return await semaphore_gather(tasks)
|
||||
@@ -77,11 +99,35 @@ class AsyncioProgressbarQueueExecutor(AsyncExecutor):
|
||||
def __init__(self, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
self.workers_count = kwargs.get('in_parallel', 10)
|
||||
self.progress_func = kwargs.get('progress_func', tqdm.tqdm)
|
||||
self.queue = asyncio.Queue(self.workers_count)
|
||||
self.timeout = kwargs.get('timeout')
|
||||
# Pass a progress function; alive_bar by default
|
||||
self.progress_func = kwargs.get('progress_func', alive_bar)
|
||||
self.progress = None
|
||||
|
||||
# TODO: tests
|
||||
async def increment_progress(self, count):
|
||||
"""Update progress by calling the provided progress function."""
|
||||
if self.progress:
|
||||
if asyncio.iscoroutinefunction(self.progress):
|
||||
await self.progress(count)
|
||||
else:
|
||||
self.progress(count)
|
||||
await asyncio.sleep(0)
|
||||
|
||||
# TODO: tests
|
||||
async def stop_progress(self):
|
||||
"""Stop the progress tracking."""
|
||||
if hasattr(self.progress, "close") and self.progress:
|
||||
close_func = self.progress.close
|
||||
if asyncio.iscoroutinefunction(close_func):
|
||||
await close_func()
|
||||
else:
|
||||
close_func()
|
||||
await asyncio.sleep(0)
|
||||
|
||||
async def worker(self):
|
||||
"""Consume tasks from the queue and process them."""
|
||||
while True:
|
||||
try:
|
||||
f, args, kwargs = self.queue.get_nowait()
|
||||
@@ -96,23 +142,35 @@ class AsyncioProgressbarQueueExecutor(AsyncExecutor):
|
||||
result = kwargs.get('default')
|
||||
|
||||
self.results.append(result)
|
||||
self.progress.update(1)
|
||||
|
||||
if self.progress:
|
||||
await self.increment_progress(1)
|
||||
|
||||
self.queue.task_done()
|
||||
|
||||
async def _run(self, queries: Iterable[QueryDraft]):
|
||||
"""Main runner function to execute tasks with progress tracking."""
|
||||
self.results: List[Any] = []
|
||||
|
||||
queries_list = list(queries)
|
||||
|
||||
min_workers = min(len(queries_list), self.workers_count)
|
||||
|
||||
workers = [create_task_func()(self.worker()) for _ in range(min_workers)]
|
||||
|
||||
self.progress = self.progress_func(total=len(queries_list))
|
||||
for t in queries_list:
|
||||
await self.queue.put(t)
|
||||
await self.queue.join()
|
||||
for w in workers:
|
||||
w.cancel()
|
||||
self.progress.close()
|
||||
# Initialize the progress bar
|
||||
if self.progress_func:
|
||||
with self.progress_func(
|
||||
len(queries_list), title="Searching", force_tty=True
|
||||
) as bar:
|
||||
self.progress = bar # Assign alive_bar's callable to self.progress
|
||||
|
||||
# Add tasks to the queue
|
||||
for t in queries_list:
|
||||
await self.queue.put(t)
|
||||
|
||||
# Wait for tasks to complete
|
||||
await self.queue.join()
|
||||
|
||||
# Cancel any remaining workers
|
||||
for w in workers:
|
||||
w.cancel()
|
||||
|
||||
return self.results
|
||||
|
||||
@@ -1,13 +1,17 @@
|
||||
"""
|
||||
Maigret main module
|
||||
"""
|
||||
|
||||
import ast
|
||||
import asyncio
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
import platform
|
||||
import re
|
||||
from argparse import ArgumentParser, RawDescriptionHelpFormatter
|
||||
from typing import List, Tuple
|
||||
import os.path as path
|
||||
|
||||
from socid_extractor import extract, parse
|
||||
|
||||
@@ -39,26 +43,7 @@ from .submit import Submitter
|
||||
from .types import QueryResultWrapper
|
||||
from .utils import get_dict_ascii_tree
|
||||
from .settings import Settings
|
||||
|
||||
|
||||
def notify_about_errors(search_results: QueryResultWrapper, query_notify):
|
||||
errs = errors.extract_and_group(search_results)
|
||||
was_errs_displayed = False
|
||||
for e in errs:
|
||||
if not errors.is_important(e):
|
||||
continue
|
||||
text = f'Too many errors of type "{e["err"]}" ({e["perc"]}%)'
|
||||
solution = errors.solution_of(e['err'])
|
||||
if solution:
|
||||
text = '. '.join([text, solution.capitalize()])
|
||||
|
||||
query_notify.warning(text, '!')
|
||||
was_errs_displayed = True
|
||||
|
||||
if was_errs_displayed:
|
||||
query_notify.warning(
|
||||
'You can see detailed site check errors with a flag `--print-errors`'
|
||||
)
|
||||
from .permutator import Permute
|
||||
|
||||
|
||||
def extract_ids_from_page(url, logger, timeout=5) -> dict:
|
||||
@@ -84,8 +69,17 @@ def extract_ids_from_page(url, logger, timeout=5) -> dict:
|
||||
else:
|
||||
print(get_dict_ascii_tree(info.items(), new_line=False), ' ')
|
||||
for k, v in info.items():
|
||||
if 'username' in k:
|
||||
# TODO: merge with the same functionality in checking module
|
||||
if 'username' in k and not 'usernames' in k:
|
||||
results[v] = 'username'
|
||||
elif 'usernames' in k:
|
||||
try:
|
||||
tree = ast.literal_eval(v)
|
||||
if type(tree) == list:
|
||||
for n in tree:
|
||||
results[n] = 'username'
|
||||
except Exception as e:
|
||||
logger.warning(e)
|
||||
if k in SUPPORTED_IDS:
|
||||
results[v] = k
|
||||
|
||||
@@ -111,7 +105,7 @@ def extract_ids_from_results(results: QueryResultWrapper, db: MaigretDatabase) -
|
||||
return ids_results
|
||||
|
||||
|
||||
def setup_arguments_parser():
|
||||
def setup_arguments_parser(settings: Settings):
|
||||
from aiohttp import __version__ as aiohttp_version
|
||||
from requests import __version__ as requests_version
|
||||
from socid_extractor import __version__ as socid_version
|
||||
@@ -128,7 +122,9 @@ def setup_arguments_parser():
|
||||
|
||||
parser = ArgumentParser(
|
||||
formatter_class=RawDescriptionHelpFormatter,
|
||||
description=f"Maigret v{__version__}",
|
||||
description=f"Maigret v{__version__}\n"
|
||||
"Documentation: https://maigret.readthedocs.io/\n"
|
||||
"All settings are also configurable through files, see docs.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"username",
|
||||
@@ -148,9 +144,9 @@ def setup_arguments_parser():
|
||||
metavar='TIMEOUT',
|
||||
dest="timeout",
|
||||
type=timeout_check,
|
||||
default=30,
|
||||
help="Time in seconds to wait for response to requests. "
|
||||
"Default timeout of 30.0s. "
|
||||
default=settings.timeout,
|
||||
help="Time in seconds to wait for response to requests "
|
||||
f"(default {settings.timeout}s). "
|
||||
"A longer timeout will be more likely to get results from slow sites. "
|
||||
"On the other hand, this may cause a long delay to gather all results. ",
|
||||
)
|
||||
@@ -159,7 +155,7 @@ def setup_arguments_parser():
|
||||
action="store",
|
||||
type=int,
|
||||
metavar='RETRIES',
|
||||
default=1,
|
||||
default=settings.retries_count,
|
||||
help="Attempts to restart temporarily failed requests.",
|
||||
)
|
||||
parser.add_argument(
|
||||
@@ -168,21 +164,21 @@ def setup_arguments_parser():
|
||||
action="store",
|
||||
type=int,
|
||||
dest="connections",
|
||||
default=100,
|
||||
help="Allowed number of concurrent connections.",
|
||||
default=settings.max_connections,
|
||||
help=f"Allowed number of concurrent connections (default {settings.max_connections}).",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--no-recursion",
|
||||
action="store_true",
|
||||
dest="disable_recursive_search",
|
||||
default=False,
|
||||
default=(not settings.recursive_search),
|
||||
help="Disable recursive search by additional data extracted from pages.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--no-extracting",
|
||||
action="store_true",
|
||||
dest="disable_extracting",
|
||||
default=False,
|
||||
default=(not settings.info_extracting),
|
||||
help="Disable parsing pages for additional data and other usernames.",
|
||||
)
|
||||
parser.add_argument(
|
||||
@@ -192,18 +188,24 @@ def setup_arguments_parser():
|
||||
choices=SUPPORTED_IDS,
|
||||
help="Specify identifier(s) type (default: username).",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--permute",
|
||||
action="store_true",
|
||||
default=False,
|
||||
help="Permute at least 2 usernames to generate more possible usernames.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--db",
|
||||
metavar="DB_FILE",
|
||||
dest="db_file",
|
||||
default=None,
|
||||
default=settings.sites_db_path,
|
||||
help="Load Maigret database from a JSON file or HTTP web resource.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--cookies-jar-file",
|
||||
metavar="COOKIE_FILE",
|
||||
dest="cookie_file",
|
||||
default=None,
|
||||
default=settings.cookie_jar_file,
|
||||
help="File with cookies.",
|
||||
)
|
||||
parser.add_argument(
|
||||
@@ -211,7 +213,7 @@ def setup_arguments_parser():
|
||||
action="append",
|
||||
metavar='IGNORED_IDS',
|
||||
dest="ignore_ids_list",
|
||||
default=[],
|
||||
default=settings.ignore_ids_list,
|
||||
help="Do not make search by the specified username or other ids.",
|
||||
)
|
||||
# reports options
|
||||
@@ -219,7 +221,7 @@ def setup_arguments_parser():
|
||||
"--folderoutput",
|
||||
"-fo",
|
||||
dest="folderoutput",
|
||||
default="reports",
|
||||
default=settings.reports_path,
|
||||
metavar="PATH",
|
||||
help="If using multiple usernames, the output of the results will be saved to this folder.",
|
||||
)
|
||||
@@ -229,27 +231,27 @@ def setup_arguments_parser():
|
||||
metavar='PROXY_URL',
|
||||
action="store",
|
||||
dest="proxy",
|
||||
default=None,
|
||||
default=settings.proxy_url,
|
||||
help="Make requests over a proxy. e.g. socks5://127.0.0.1:1080",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--tor-proxy",
|
||||
metavar='TOR_PROXY_URL',
|
||||
action="store",
|
||||
default='socks5://127.0.0.1:9050',
|
||||
default=settings.tor_proxy_url,
|
||||
help="Specify URL of your Tor gateway. Default is socks5://127.0.0.1:9050",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--i2p-proxy",
|
||||
metavar='I2P_PROXY_URL',
|
||||
action="store",
|
||||
default='http://127.0.0.1:4444',
|
||||
default=settings.i2p_proxy_url,
|
||||
help="Specify URL of your I2P gateway. Default is http://127.0.0.1:4444",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--with-domains",
|
||||
action="store_true",
|
||||
default=False,
|
||||
default=settings.domain_search,
|
||||
help="Enable (experimental) feature of checking domains on usernames.",
|
||||
)
|
||||
|
||||
@@ -261,13 +263,13 @@ def setup_arguments_parser():
|
||||
"--all-sites",
|
||||
action="store_true",
|
||||
dest="all_sites",
|
||||
default=False,
|
||||
default=settings.scan_all_sites,
|
||||
help="Use all sites for scan.",
|
||||
)
|
||||
filter_group.add_argument(
|
||||
"--top-sites",
|
||||
action="store",
|
||||
default=500,
|
||||
default=settings.top_sites_count,
|
||||
metavar="N",
|
||||
type=int,
|
||||
help="Count of sites for scan ranked by Alexa Top (default: 500).",
|
||||
@@ -280,13 +282,13 @@ def setup_arguments_parser():
|
||||
action="append",
|
||||
metavar='SITE_NAME',
|
||||
dest="site_list",
|
||||
default=[],
|
||||
default=settings.scan_sites_list,
|
||||
help="Limit analysis to just the specified sites (multiple option).",
|
||||
)
|
||||
filter_group.add_argument(
|
||||
"--use-disabled-sites",
|
||||
action="store_true",
|
||||
default=False,
|
||||
default=settings.scan_disabled_sites,
|
||||
help="Use disabled sites to search (may cause many false positives).",
|
||||
)
|
||||
|
||||
@@ -313,7 +315,7 @@ def setup_arguments_parser():
|
||||
modes_group.add_argument(
|
||||
"--self-check",
|
||||
action="store_true",
|
||||
default=False,
|
||||
default=settings.self_check_enabled,
|
||||
help="Do self check for sites and database and disable non-working ones.",
|
||||
)
|
||||
modes_group.add_argument(
|
||||
@@ -322,7 +324,15 @@ def setup_arguments_parser():
|
||||
default=False,
|
||||
help="Show database statistics (most frequent sites engines and tags).",
|
||||
)
|
||||
|
||||
modes_group.add_argument(
|
||||
"--web",
|
||||
metavar='PORT',
|
||||
type=int,
|
||||
nargs='?',
|
||||
const=5000, # default if --web is provided without a port
|
||||
default=settings.web_interface_port,
|
||||
help="Launches the web interface on the specified port (default: 5000 if no PORT is provided).",
|
||||
)
|
||||
output_group = parser.add_argument_group(
|
||||
'Output options', 'Options to change verbosity and view of the console output'
|
||||
)
|
||||
@@ -330,14 +340,14 @@ def setup_arguments_parser():
|
||||
"--print-not-found",
|
||||
action="store_true",
|
||||
dest="print_not_found",
|
||||
default=False,
|
||||
default=settings.print_not_found,
|
||||
help="Print sites where the username was not found.",
|
||||
)
|
||||
output_group.add_argument(
|
||||
"--print-errors",
|
||||
action="store_true",
|
||||
dest="print_check_errors",
|
||||
default=False,
|
||||
default=settings.print_check_errors,
|
||||
help="Print errors messages: connection, captcha, site country ban, etc.",
|
||||
)
|
||||
output_group.add_argument(
|
||||
@@ -369,14 +379,14 @@ def setup_arguments_parser():
|
||||
"--no-color",
|
||||
action="store_true",
|
||||
dest="no_color",
|
||||
default=False,
|
||||
default=(not settings.colored_print),
|
||||
help="Don't color terminal output",
|
||||
)
|
||||
output_group.add_argument(
|
||||
"--no-progressbar",
|
||||
action="store_true",
|
||||
dest="no_progressbar",
|
||||
default=False,
|
||||
default=(not settings.show_progressbar),
|
||||
help="Don't show progressbar.",
|
||||
)
|
||||
|
||||
@@ -388,7 +398,7 @@ def setup_arguments_parser():
|
||||
"--txt",
|
||||
action="store_true",
|
||||
dest="txt",
|
||||
default=False,
|
||||
default=settings.txt_report,
|
||||
help="Create a TXT report (one report per username).",
|
||||
)
|
||||
report_group.add_argument(
|
||||
@@ -396,7 +406,7 @@ def setup_arguments_parser():
|
||||
"--csv",
|
||||
action="store_true",
|
||||
dest="csv",
|
||||
default=False,
|
||||
default=settings.csv_report,
|
||||
help="Create a CSV report (one report per username).",
|
||||
)
|
||||
report_group.add_argument(
|
||||
@@ -404,7 +414,7 @@ def setup_arguments_parser():
|
||||
"--html",
|
||||
action="store_true",
|
||||
dest="html",
|
||||
default=False,
|
||||
default=settings.html_report,
|
||||
help="Create an HTML report file (general report on all usernames).",
|
||||
)
|
||||
report_group.add_argument(
|
||||
@@ -412,7 +422,7 @@ def setup_arguments_parser():
|
||||
"--xmind",
|
||||
action="store_true",
|
||||
dest="xmind",
|
||||
default=False,
|
||||
default=settings.xmind_report,
|
||||
help="Generate an XMind 8 mindmap report (one report per username).",
|
||||
)
|
||||
report_group.add_argument(
|
||||
@@ -420,7 +430,7 @@ def setup_arguments_parser():
|
||||
"--pdf",
|
||||
action="store_true",
|
||||
dest="pdf",
|
||||
default=False,
|
||||
default=settings.pdf_report,
|
||||
help="Generate a PDF report (general report on all usernames).",
|
||||
)
|
||||
report_group.add_argument(
|
||||
@@ -428,7 +438,7 @@ def setup_arguments_parser():
|
||||
"--graph",
|
||||
action="store_true",
|
||||
dest="graph",
|
||||
default=False,
|
||||
default=settings.graph_report,
|
||||
help="Generate a graph report (general report on all usernames).",
|
||||
)
|
||||
report_group.add_argument(
|
||||
@@ -437,7 +447,7 @@ def setup_arguments_parser():
|
||||
action="store",
|
||||
metavar='TYPE',
|
||||
dest="json",
|
||||
default='',
|
||||
default=settings.json_report_type,
|
||||
choices=SUPPORTED_JSON_REPORT_FORMATS,
|
||||
help=f"Generate a JSON report of specific type: {', '.join(SUPPORTED_JSON_REPORT_FORMATS)}"
|
||||
" (one report per username).",
|
||||
@@ -445,7 +455,7 @@ def setup_arguments_parser():
|
||||
|
||||
parser.add_argument(
|
||||
"--reports-sorting",
|
||||
default='default',
|
||||
default=settings.report_sorting,
|
||||
choices=('default', 'data'),
|
||||
help="Method of results sorting in reports (default: in order of getting the result)",
|
||||
)
|
||||
@@ -453,9 +463,6 @@ def setup_arguments_parser():
|
||||
|
||||
|
||||
async def main():
|
||||
arg_parser = setup_arguments_parser()
|
||||
args = arg_parser.parse_args()
|
||||
|
||||
# Logging
|
||||
log_level = logging.ERROR
|
||||
logging.basicConfig(
|
||||
@@ -463,23 +470,47 @@ async def main():
|
||||
datefmt='%H:%M:%S',
|
||||
level=log_level,
|
||||
)
|
||||
logger = logging.getLogger('maigret')
|
||||
logger.setLevel(log_level)
|
||||
|
||||
# Load settings
|
||||
settings = Settings()
|
||||
settings_loaded, err = settings.load()
|
||||
|
||||
if not settings_loaded:
|
||||
logger.error(err)
|
||||
sys.exit(3)
|
||||
|
||||
arg_parser = setup_arguments_parser(settings)
|
||||
args = arg_parser.parse_args()
|
||||
|
||||
# Re-set logging level based on args
|
||||
if args.debug:
|
||||
log_level = logging.DEBUG
|
||||
elif args.info:
|
||||
log_level = logging.INFO
|
||||
elif args.verbose:
|
||||
log_level = logging.WARNING
|
||||
|
||||
logger = logging.getLogger('maigret')
|
||||
logger.setLevel(log_level)
|
||||
|
||||
if args.web is not None:
|
||||
from maigret.web.app import app
|
||||
|
||||
port = (
|
||||
args.web if args.web else 5000
|
||||
) # args.web is either the specified port or 5000 by const
|
||||
app.run(port=port)
|
||||
|
||||
# Usernames initial list
|
||||
usernames = {
|
||||
u: args.id_type
|
||||
for u in args.username
|
||||
if u and u not in ['-'] and u not in args.ignore_ids_list
|
||||
}
|
||||
original_usernames = ""
|
||||
if args.permute and len(usernames) > 1 and args.id_type == 'username':
|
||||
original_usernames = " ".join(usernames.keys())
|
||||
usernames = Permute(usernames).gather(method='strict')
|
||||
|
||||
parsing_enabled = not args.disable_extracting
|
||||
recursive_search_enabled = not args.disable_recursive_search
|
||||
@@ -497,16 +528,7 @@ async def main():
|
||||
if args.tags:
|
||||
args.tags = list(set(str(args.tags).split(',')))
|
||||
|
||||
settings = Settings(
|
||||
os.path.join(
|
||||
os.path.dirname(os.path.realpath(__file__)), "resources/settings.json"
|
||||
)
|
||||
)
|
||||
|
||||
if args.db_file is None:
|
||||
args.db_file = os.path.join(
|
||||
os.path.dirname(os.path.realpath(__file__)), "resources/data.json"
|
||||
)
|
||||
db_file = path.join(path.dirname(path.realpath(__file__)), args.db_file)
|
||||
|
||||
if args.top_sites == 0 or args.all_sites:
|
||||
args.top_sites = sys.maxsize
|
||||
@@ -521,7 +543,7 @@ async def main():
|
||||
)
|
||||
|
||||
# Create object with all information about sites we are aware of.
|
||||
db = MaigretDatabase().load_from_path(args.db_file)
|
||||
db = MaigretDatabase().load_from_path(db_file)
|
||||
get_top_sites_for_id = lambda x: db.ranked_sites_dict(
|
||||
top=args.top_sites,
|
||||
tags=args.tags,
|
||||
@@ -533,18 +555,28 @@ async def main():
|
||||
site_data = get_top_sites_for_id(args.id_type)
|
||||
|
||||
if args.new_site_to_submit:
|
||||
submitter = Submitter(db=db, logger=logger, settings=settings)
|
||||
submitter = Submitter(db=db, logger=logger, settings=settings, args=args)
|
||||
is_submitted = await submitter.dialog(args.new_site_to_submit, args.cookie_file)
|
||||
if is_submitted:
|
||||
db.save_to_file(args.db_file)
|
||||
db.save_to_file(db_file)
|
||||
await submitter.close()
|
||||
|
||||
# Database self-checking
|
||||
if args.self_check:
|
||||
print('Maigret sites database self-checking...')
|
||||
if len(site_data) == 0:
|
||||
query_notify.warning(
|
||||
'No sites to self-check with the current filters! Exiting...'
|
||||
)
|
||||
return
|
||||
|
||||
query_notify.success(
|
||||
f'Maigret sites database self-check started for {len(site_data)} sites...'
|
||||
)
|
||||
is_need_update = await self_check(
|
||||
db,
|
||||
site_data,
|
||||
logger,
|
||||
proxy=args.proxy,
|
||||
max_connections=args.connections,
|
||||
tor_proxy=args.tor_proxy,
|
||||
i2p_proxy=args.i2p_proxy,
|
||||
@@ -554,27 +586,39 @@ async def main():
|
||||
'y',
|
||||
'',
|
||||
):
|
||||
db.save_to_file(args.db_file)
|
||||
db.save_to_file(db_file)
|
||||
print('Database was successfully updated.')
|
||||
else:
|
||||
print('Updates will be applied only for current search session.')
|
||||
print('Scan sessions flags stats: ' + str(db.get_scan_stats(site_data)))
|
||||
|
||||
if args.verbose or args.debug:
|
||||
query_notify.info(
|
||||
'Scan sessions flags stats: ' + str(db.get_scan_stats(site_data))
|
||||
)
|
||||
|
||||
# Database statistics
|
||||
if args.stats:
|
||||
print(db.get_db_stats(db.sites_dict))
|
||||
print(db.get_db_stats())
|
||||
|
||||
report_dir = path.join(os.getcwd(), args.folderoutput)
|
||||
|
||||
# Make reports folder is not exists
|
||||
os.makedirs(args.folderoutput, exist_ok=True)
|
||||
os.makedirs(report_dir, exist_ok=True)
|
||||
|
||||
# Define one report filename template
|
||||
report_filepath_tpl = os.path.join(args.folderoutput, 'report_{username}{postfix}')
|
||||
report_filepath_tpl = path.join(report_dir, 'report_{username}{postfix}')
|
||||
|
||||
if usernames == {}:
|
||||
# magic params to exit after init
|
||||
query_notify.warning('No usernames to check, exiting.')
|
||||
sys.exit(0)
|
||||
|
||||
if len(usernames) > 1 and args.permute and args.id_type == 'username':
|
||||
query_notify.warning(
|
||||
f"{len(usernames)} permutations from {original_usernames} to check..."
|
||||
+ get_dict_ascii_tree(usernames, prepend="\t")
|
||||
)
|
||||
|
||||
if not site_data:
|
||||
query_notify.warning('No sites to check, exiting!')
|
||||
sys.exit(2)
|
||||
@@ -638,7 +682,11 @@ async def main():
|
||||
check_domains=args.with_domains,
|
||||
)
|
||||
|
||||
notify_about_errors(results, query_notify)
|
||||
errs = errors.notify_about_errors(
|
||||
results, query_notify, show_statistics=args.verbose
|
||||
)
|
||||
for e in errs:
|
||||
query_notify.warning(*e)
|
||||
|
||||
if args.reports_sorting == "data":
|
||||
results = sort_report_by_data_points(results)
|
||||
@@ -648,25 +696,30 @@ async def main():
|
||||
# TODO: tests
|
||||
if recursive_search_enabled:
|
||||
extracted_ids = extract_ids_from_results(results, db)
|
||||
query_notify.warning(f'Extracted IDs: {extracted_ids}')
|
||||
usernames.update(extracted_ids)
|
||||
|
||||
# reporting for a one username
|
||||
if args.xmind:
|
||||
username = username.replace('/', '_')
|
||||
filename = report_filepath_tpl.format(username=username, postfix='.xmind')
|
||||
save_xmind_report(filename, username, results)
|
||||
query_notify.warning(f'XMind report for {username} saved in {filename}')
|
||||
|
||||
if args.csv:
|
||||
username = username.replace('/', '_')
|
||||
filename = report_filepath_tpl.format(username=username, postfix='.csv')
|
||||
save_csv_report(filename, username, results)
|
||||
query_notify.warning(f'CSV report for {username} saved in {filename}')
|
||||
|
||||
if args.txt:
|
||||
username = username.replace('/', '_')
|
||||
filename = report_filepath_tpl.format(username=username, postfix='.txt')
|
||||
save_txt_report(filename, username, results)
|
||||
query_notify.warning(f'TXT report for {username} saved in {filename}')
|
||||
|
||||
if args.json:
|
||||
username = username.replace('/', '_')
|
||||
filename = report_filepath_tpl.format(
|
||||
username=username, postfix=f'_{args.json}.json'
|
||||
)
|
||||
@@ -684,17 +737,24 @@ async def main():
|
||||
username = report_context['username']
|
||||
|
||||
if args.html:
|
||||
filename = report_filepath_tpl.format(username=username, postfix='.html')
|
||||
username = username.replace('/', '_')
|
||||
filename = report_filepath_tpl.format(
|
||||
username=username, postfix='_plain.html'
|
||||
)
|
||||
save_html_report(filename, report_context)
|
||||
query_notify.warning(f'HTML report on all usernames saved in {filename}')
|
||||
|
||||
if args.pdf:
|
||||
username = username.replace('/', '_')
|
||||
filename = report_filepath_tpl.format(username=username, postfix='.pdf')
|
||||
save_pdf_report(filename, report_context)
|
||||
query_notify.warning(f'PDF report on all usernames saved in {filename}')
|
||||
|
||||
if args.graph:
|
||||
filename = report_filepath_tpl.format(username=username, postfix='.html')
|
||||
username = username.replace('/', '_')
|
||||
filename = report_filepath_tpl.format(
|
||||
username=username, postfix='_graph.html'
|
||||
)
|
||||
save_graph_report(filename, general_results, db)
|
||||
query_notify.warning(f'Graph report on all usernames saved in {filename}')
|
||||
|
||||
@@ -704,13 +764,16 @@ async def main():
|
||||
print(text_report)
|
||||
|
||||
# update database
|
||||
db.save_to_file(args.db_file)
|
||||
db.save_to_file(db_file)
|
||||
|
||||
|
||||
def run():
|
||||
try:
|
||||
loop = asyncio.get_event_loop()
|
||||
loop.run_until_complete(main())
|
||||
if sys.version_info.minor >= 10:
|
||||
asyncio.run(main())
|
||||
else:
|
||||
loop = asyncio.get_event_loop()
|
||||
loop.run_until_complete(main())
|
||||
except KeyboardInterrupt:
|
||||
print('Maigret is interrupted.')
|
||||
sys.exit(1)
|
||||
|
||||
@@ -3,11 +3,12 @@
|
||||
This module defines the objects for notifying the caller about the
|
||||
results of queries.
|
||||
"""
|
||||
|
||||
import sys
|
||||
|
||||
from colorama import Fore, Style, init
|
||||
|
||||
from .result import QueryStatus
|
||||
from .result import MaigretCheckStatus
|
||||
from .utils import get_dict_ascii_tree
|
||||
|
||||
|
||||
@@ -211,6 +212,10 @@ class QueryNotifyPrint(QueryNotify):
|
||||
else:
|
||||
print(msg)
|
||||
|
||||
def success(self, message, symbol="+"):
|
||||
msg = f"[{symbol}] {message}"
|
||||
self._colored_print(Fore.GREEN, msg)
|
||||
|
||||
def warning(self, message, symbol="-"):
|
||||
msg = f"[{symbol}] {message}"
|
||||
self._colored_print(Fore.YELLOW, msg)
|
||||
@@ -240,7 +245,7 @@ class QueryNotifyPrint(QueryNotify):
|
||||
ids_data_text = get_dict_ascii_tree(self.result.ids_data.items(), " ")
|
||||
|
||||
# Output to the terminal is desired.
|
||||
if result.status == QueryStatus.CLAIMED:
|
||||
if result.status == MaigretCheckStatus.CLAIMED:
|
||||
color = Fore.BLUE if is_similar else Fore.GREEN
|
||||
status = "?" if is_similar else "+"
|
||||
notify = self.make_terminal_notify(
|
||||
@@ -250,7 +255,7 @@ class QueryNotifyPrint(QueryNotify):
|
||||
color,
|
||||
result.site_url_user + ids_data_text,
|
||||
)
|
||||
elif result.status == QueryStatus.AVAILABLE:
|
||||
elif result.status == MaigretCheckStatus.AVAILABLE:
|
||||
if not self.print_found_only:
|
||||
notify = self.make_terminal_notify(
|
||||
"-",
|
||||
@@ -259,7 +264,7 @@ class QueryNotifyPrint(QueryNotify):
|
||||
Fore.YELLOW,
|
||||
"Not found!" + ids_data_text,
|
||||
)
|
||||
elif result.status == QueryStatus.UNKNOWN:
|
||||
elif result.status == MaigretCheckStatus.UNKNOWN:
|
||||
if not self.skip_check_errors:
|
||||
notify = self.make_terminal_notify(
|
||||
"?",
|
||||
@@ -268,7 +273,7 @@ class QueryNotifyPrint(QueryNotify):
|
||||
Fore.RED,
|
||||
str(self.result.error) + ids_data_text,
|
||||
)
|
||||
elif result.status == QueryStatus.ILLEGAL:
|
||||
elif result.status == MaigretCheckStatus.ILLEGAL:
|
||||
if not self.print_found_only:
|
||||
text = "Illegal Username Format For This Site!"
|
||||
notify = self.make_terminal_notify(
|
||||
|
||||
@@ -0,0 +1,26 @@
|
||||
# License MIT. by balestek https://github.com/balestek
|
||||
from itertools import permutations
|
||||
|
||||
|
||||
class Permute:
|
||||
def __init__(self, elements: dict):
|
||||
self.separators = ["", "_", "-", "."]
|
||||
self.elements = elements
|
||||
|
||||
def gather(self, method: str = "strict" or "all") -> dict:
|
||||
permutations_dict = {}
|
||||
for i in range(1, len(self.elements) + 1):
|
||||
for subset in permutations(self.elements, i):
|
||||
if i == 1:
|
||||
if method == "all":
|
||||
permutations_dict[subset[0]] = self.elements[subset[0]]
|
||||
permutations_dict["_" + subset[0]] = self.elements[subset[0]]
|
||||
permutations_dict[subset[0] + "_"] = self.elements[subset[0]]
|
||||
else:
|
||||
for separator in self.separators:
|
||||
perm = separator.join(subset)
|
||||
permutations_dict[perm] = self.elements[subset[0]]
|
||||
if separator == "":
|
||||
permutations_dict["_" + perm] = self.elements[subset[0]]
|
||||
permutations_dict[perm + "_"] = self.elements[subset[0]]
|
||||
return permutations_dict
|
||||
@@ -8,14 +8,17 @@ from datetime import datetime
|
||||
from typing import Dict, Any
|
||||
|
||||
import xmind
|
||||
from dateutil.tz import gettz
|
||||
from dateutil.parser import parse as parse_datetime_str
|
||||
from jinja2 import Template
|
||||
|
||||
from .checking import SUPPORTED_IDS
|
||||
from .result import QueryStatus
|
||||
from .result import MaigretCheckStatus
|
||||
from .sites import MaigretDatabase
|
||||
from .utils import is_country_tag, CaseConverter, enrich_link_str
|
||||
|
||||
|
||||
ADDITIONAL_TZINFO = {"CDT": gettz("America/Chicago")}
|
||||
SUPPORTED_JSON_REPORT_FORMATS = [
|
||||
"simple",
|
||||
"ndjson",
|
||||
@@ -67,7 +70,7 @@ def save_txt_report(filename: str, username: str, results: dict):
|
||||
def save_html_report(filename: str, context: dict):
|
||||
template, _ = generate_report_template(is_pdf=False)
|
||||
filled_template = template.render(**context)
|
||||
with open(filename, "w") as f:
|
||||
with open(filename, "w", encoding="utf-8") as f:
|
||||
f.write(filled_template)
|
||||
|
||||
|
||||
@@ -139,7 +142,7 @@ def save_graph_report(filename: str, username_results: list, db: MaigretDatabase
|
||||
if not status: # FIXME: currently in case of timeout
|
||||
continue
|
||||
|
||||
if dictionary["status"].status != QueryStatus.CLAIMED:
|
||||
if dictionary["status"].status != MaigretCheckStatus.CLAIMED:
|
||||
continue
|
||||
|
||||
site_fallback_name = dictionary.get(
|
||||
@@ -292,8 +295,12 @@ def generate_report_context(username_results: list):
|
||||
first_seen = created_at
|
||||
else:
|
||||
try:
|
||||
known_time = parse_datetime_str(first_seen)
|
||||
new_time = parse_datetime_str(created_at)
|
||||
known_time = parse_datetime_str(
|
||||
first_seen, tzinfos=ADDITIONAL_TZINFO
|
||||
)
|
||||
new_time = parse_datetime_str(
|
||||
created_at, tzinfos=ADDITIONAL_TZINFO
|
||||
)
|
||||
if new_time < known_time:
|
||||
first_seen = created_at
|
||||
except Exception as e:
|
||||
@@ -302,6 +309,7 @@ def generate_report_context(username_results: list):
|
||||
first_seen,
|
||||
created_at,
|
||||
str(e),
|
||||
exc_info=True,
|
||||
)
|
||||
|
||||
for k, v in status.ids_data.items():
|
||||
@@ -333,7 +341,7 @@ def generate_report_context(username_results: list):
|
||||
new_ids.append((u, utype))
|
||||
usernames[u] = {"type": utype}
|
||||
|
||||
if status.status == QueryStatus.CLAIMED:
|
||||
if status.status == MaigretCheckStatus.CLAIMED:
|
||||
found_accounts += 1
|
||||
dictionary["found"] = True
|
||||
else:
|
||||
@@ -413,7 +421,7 @@ def generate_txt_report(username: str, results: dict, file):
|
||||
continue
|
||||
if (
|
||||
dictionary.get("status")
|
||||
and dictionary["status"].status == QueryStatus.CLAIMED
|
||||
and dictionary["status"].status == MaigretCheckStatus.CLAIMED
|
||||
):
|
||||
exists_counter += 1
|
||||
file.write(dictionary["url_user"] + "\n")
|
||||
@@ -430,7 +438,7 @@ def generate_json_report(username: str, results: dict, file, report_type):
|
||||
if not site_result or not site_result.get("status"):
|
||||
continue
|
||||
|
||||
if site_result["status"].status != QueryStatus.CLAIMED:
|
||||
if site_result["status"].status != MaigretCheckStatus.CLAIMED:
|
||||
continue
|
||||
|
||||
data = dict(site_result)
|
||||
@@ -491,7 +499,7 @@ def design_xmind_sheet(sheet, username, results):
|
||||
continue
|
||||
result_status = dictionary.get("status")
|
||||
# TODO: fix the reason
|
||||
if not result_status or result_status.status != QueryStatus.CLAIMED:
|
||||
if not result_status or result_status.status != MaigretCheckStatus.CLAIMED:
|
||||
continue
|
||||
|
||||
stripped_tags = list(map(lambda x: x.strip(), result_status.tags))
|
||||
|
||||
@@ -1,17 +1,58 @@
|
||||
{
|
||||
"presence_strings": [
|
||||
"user not found",
|
||||
"404",
|
||||
"Page not found",
|
||||
"error 404",
|
||||
"username",
|
||||
"not found",
|
||||
"пользователь",
|
||||
"profile",
|
||||
"lastname",
|
||||
"firstname",
|
||||
"DisplayName",
|
||||
"biography",
|
||||
"title",
|
||||
"birthday",
|
||||
"репутация",
|
||||
"информация",
|
||||
"e-mail"
|
||||
"e-mail",
|
||||
"body",
|
||||
"html",
|
||||
"style"
|
||||
],
|
||||
"supposed_usernames": [
|
||||
"alex", "god", "admin", "red", "blue", "john"]
|
||||
"alex", "god", "admin", "red", "blue", "john"
|
||||
],
|
||||
"retries_count": 0,
|
||||
"sites_db_path": "resources/data.json",
|
||||
"timeout": 30,
|
||||
"max_connections": 100,
|
||||
"recursive_search": true,
|
||||
"info_extracting": true,
|
||||
"cookie_jar_file": null,
|
||||
"ignore_ids_list": [],
|
||||
"reports_path": "reports",
|
||||
"proxy_url": null,
|
||||
"tor_proxy_url": "socks5://127.0.0.1:9050",
|
||||
"i2p_proxy_url": "http://127.0.0.1:4444",
|
||||
"domain_search": false,
|
||||
"scan_all_sites": false,
|
||||
"top_sites_count": 500,
|
||||
"scan_disabled_sites": false,
|
||||
"scan_sites_list": [],
|
||||
"self_check_enabled": false,
|
||||
"print_not_found": false,
|
||||
"print_check_errors": false,
|
||||
"colored_print": true,
|
||||
"show_progressbar": true,
|
||||
"report_sorting": "default",
|
||||
"json_report_type": "",
|
||||
"txt_report": false,
|
||||
"csv_report": false,
|
||||
"xmind_report": false,
|
||||
"graph_report": false,
|
||||
"pdf_report": false,
|
||||
"html_report": false,
|
||||
"web_interface_port": 5000
|
||||
}
|
||||
@@ -38,4 +38,8 @@ div {
|
||||
border-bottom-color: #3e3e3e;
|
||||
border-bottom-width: 1px;
|
||||
border-bottom-style: solid;
|
||||
}
|
||||
.invalid-button {
|
||||
position: absolute;
|
||||
left: 10px;
|
||||
}
|
||||
@@ -2,10 +2,11 @@
|
||||
|
||||
This module defines various objects for recording the results of queries.
|
||||
"""
|
||||
|
||||
from enum import Enum
|
||||
|
||||
|
||||
class QueryStatus(Enum):
|
||||
class MaigretCheckStatus(Enum):
|
||||
"""Query Status Enumeration.
|
||||
|
||||
Describes status of query about a given username.
|
||||
@@ -28,10 +29,9 @@ class QueryStatus(Enum):
|
||||
return self.value
|
||||
|
||||
|
||||
class QueryResult:
|
||||
"""Query Result Object.
|
||||
|
||||
Describes result of query about a given username.
|
||||
class MaigretCheckResult:
|
||||
"""
|
||||
Describes result of checking a given username on a given site
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
@@ -46,11 +46,7 @@ class QueryResult:
|
||||
error=None,
|
||||
tags=[],
|
||||
):
|
||||
"""Create Query Result Object.
|
||||
|
||||
Contains information about a specific method of detecting usernames on
|
||||
a given type of web sites.
|
||||
|
||||
"""
|
||||
Keyword Arguments:
|
||||
self -- This object.
|
||||
username -- String indicating username that query result
|
||||
@@ -97,7 +93,10 @@ class QueryResult:
|
||||
}
|
||||
|
||||
def is_found(self):
|
||||
return self.status == QueryStatus.CLAIMED
|
||||
return self.status == MaigretCheckStatus.CLAIMED
|
||||
|
||||
def __repr__(self):
|
||||
return f"<{self.__str__()}>"
|
||||
|
||||
def __str__(self):
|
||||
"""Convert Object To String.
|
||||
|
||||
@@ -1,28 +1,85 @@
|
||||
import os
|
||||
import os.path as path
|
||||
import json
|
||||
from typing import List
|
||||
|
||||
SETTINGS_FILES_PATHS = [
|
||||
path.join(path.dirname(path.realpath(__file__)), "resources/settings.json"),
|
||||
'~/.maigret/settings.json',
|
||||
path.join(os.getcwd(), 'settings.json'),
|
||||
]
|
||||
|
||||
|
||||
class Settings:
|
||||
# main maigret setting
|
||||
retries_count: int
|
||||
sites_db_path: str
|
||||
timeout: int
|
||||
max_connections: int
|
||||
recursive_search: bool
|
||||
info_extracting: bool
|
||||
cookie_jar_file: str
|
||||
ignore_ids_list: List
|
||||
reports_path: str
|
||||
proxy_url: str
|
||||
tor_proxy_url: str
|
||||
i2p_proxy_url: str
|
||||
domain_search: bool
|
||||
scan_all_sites: bool
|
||||
top_sites_count: int
|
||||
scan_disabled_sites: bool
|
||||
scan_sites_list: List
|
||||
self_check_enabled: bool
|
||||
print_not_found: bool
|
||||
print_check_errors: bool
|
||||
colored_print: bool
|
||||
show_progressbar: bool
|
||||
report_sorting: str
|
||||
json_report_type: str
|
||||
txt_report: bool
|
||||
csv_report: bool
|
||||
xmind_report: bool
|
||||
pdf_report: bool
|
||||
html_report: bool
|
||||
graph_report: bool
|
||||
web_interface_port: int
|
||||
|
||||
# submit mode settings
|
||||
presence_strings: list
|
||||
supposed_usernames: list
|
||||
|
||||
def __init__(self, filename):
|
||||
data = {}
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
try:
|
||||
with open(filename, "r", encoding="utf-8") as file:
|
||||
try:
|
||||
def load(self, paths=None):
|
||||
was_inited = False
|
||||
|
||||
if not paths:
|
||||
paths = SETTINGS_FILES_PATHS
|
||||
|
||||
for filename in paths:
|
||||
data = {}
|
||||
|
||||
try:
|
||||
with open(filename, "r", encoding="utf-8") as file:
|
||||
data = json.load(file)
|
||||
except Exception as error:
|
||||
raise ValueError(
|
||||
f"Problem with parsing json contents of "
|
||||
f"settings file '{filename}': {str(error)}."
|
||||
)
|
||||
except FileNotFoundError as error:
|
||||
raise FileNotFoundError(
|
||||
f"Problem while attempting to access settings file '{filename}'."
|
||||
) from error
|
||||
except FileNotFoundError:
|
||||
# treast as a normal situation
|
||||
pass
|
||||
except Exception as error:
|
||||
return False, ValueError(
|
||||
f"Problem with parsing json contents of "
|
||||
f"settings file '{filename}': {str(error)}."
|
||||
)
|
||||
|
||||
self.__dict__.update(data)
|
||||
self.__dict__.update(data)
|
||||
if data:
|
||||
was_inited = True
|
||||
|
||||
return (
|
||||
was_inited,
|
||||
f'None of the default settings files found: {", ".join(paths)}',
|
||||
)
|
||||
|
||||
@property
|
||||
def json(self):
|
||||
|
||||
@@ -5,8 +5,6 @@ import json
|
||||
import sys
|
||||
from typing import Optional, List, Dict, Any, Tuple
|
||||
|
||||
import requests
|
||||
|
||||
from .utils import CaseConverter, URLMatcher, is_country_tag
|
||||
|
||||
|
||||
@@ -23,6 +21,7 @@ class MaigretEngine:
|
||||
|
||||
|
||||
class MaigretSite:
|
||||
# Fields that should not be serialized when converting site to JSON
|
||||
NOT_SERIALIZABLE_FIELDS = [
|
||||
"name",
|
||||
"engineData",
|
||||
@@ -33,37 +32,65 @@ class MaigretSite:
|
||||
"urlRegexp",
|
||||
]
|
||||
|
||||
# Username known to exist on the site
|
||||
username_claimed = ""
|
||||
# Username known to not exist on the site
|
||||
username_unclaimed = ""
|
||||
# Additional URL path component, e.g. /forum in https://example.com/forum/users/{username}
|
||||
url_subpath = ""
|
||||
# Main site URL (the main page)
|
||||
url_main = ""
|
||||
# Full URL pattern for username page, e.g. https://example.com/forum/users/{username}
|
||||
url = ""
|
||||
# Whether site is disabled. Not used by Maigret without --use-disabled argument
|
||||
disabled = False
|
||||
# Whether a positive result indicates accounts with similar usernames rather than exact matches
|
||||
similar_search = False
|
||||
# Whether to ignore 403 status codes
|
||||
ignore403 = False
|
||||
# Site category tags
|
||||
tags: List[str] = []
|
||||
|
||||
# Type of identifier (username, gaia_id etc); see SUPPORTED_IDS in checking.py
|
||||
type = "username"
|
||||
# Custom HTTP headers
|
||||
headers: Dict[str, str] = {}
|
||||
# Error message substrings
|
||||
errors: Dict[str, str] = {}
|
||||
# Site activation requirements
|
||||
activation: Dict[str, Any] = {}
|
||||
# Regular expression for username validation
|
||||
regex_check = None
|
||||
# URL to probe site status
|
||||
url_probe = None
|
||||
# Type of check to perform
|
||||
check_type = ""
|
||||
# Whether to only send HEAD requests (GET by default)
|
||||
request_head_only = ""
|
||||
# GET parameters to include in requests
|
||||
get_params: Dict[str, Any] = {}
|
||||
|
||||
# Substrings in HTML response that indicate profile exists
|
||||
presense_strs: List[str] = []
|
||||
# Substrings in HTML response that indicate profile doesn't exist
|
||||
absence_strs: List[str] = []
|
||||
# Site statistics
|
||||
stats: Dict[str, Any] = {}
|
||||
|
||||
# Site engine name
|
||||
engine = None
|
||||
# Engine-specific configuration
|
||||
engine_data: Dict[str, Any] = {}
|
||||
# Engine instance
|
||||
engine_obj: Optional["MaigretEngine"] = None
|
||||
# Future for async requests
|
||||
request_future = None
|
||||
# Alexa traffic rank
|
||||
alexa_rank = None
|
||||
# Source (in case a site is a mirror of another site)
|
||||
source = None
|
||||
|
||||
# URL protocol (http/https)
|
||||
protocol = ''
|
||||
|
||||
def __init__(self, name, information):
|
||||
@@ -82,6 +109,54 @@ class MaigretSite:
|
||||
def __str__(self):
|
||||
return f"{self.name} ({self.url_main})"
|
||||
|
||||
def __is_equal_by_url_or_name(self, url_or_name_str: str):
|
||||
lower_url_or_name_str = url_or_name_str.lower()
|
||||
lower_url = self.url.lower()
|
||||
lower_name = self.name.lower()
|
||||
lower_url_main = self.url_main.lower()
|
||||
|
||||
return (
|
||||
lower_name == lower_url_or_name_str
|
||||
or (lower_url_main and lower_url_main == lower_url_or_name_str)
|
||||
or (lower_url_main and lower_url_main in lower_url_or_name_str)
|
||||
or (lower_url_main and lower_url_or_name_str in lower_url_main)
|
||||
or (lower_url and lower_url_or_name_str in lower_url)
|
||||
)
|
||||
|
||||
def __eq__(self, other):
|
||||
if isinstance(other, MaigretSite):
|
||||
# Compare only relevant attributes, not internal state like request_future
|
||||
attrs_to_compare = [
|
||||
'name',
|
||||
'url_main',
|
||||
'url_subpath',
|
||||
'type',
|
||||
'headers',
|
||||
'errors',
|
||||
'activation',
|
||||
'regex_check',
|
||||
'url_probe',
|
||||
'check_type',
|
||||
'request_head_only',
|
||||
'get_params',
|
||||
'presense_strs',
|
||||
'absence_strs',
|
||||
'stats',
|
||||
'engine',
|
||||
'engine_data',
|
||||
'alexa_rank',
|
||||
'source',
|
||||
'protocol',
|
||||
]
|
||||
|
||||
return all(
|
||||
getattr(self, attr) == getattr(other, attr) for attr in attrs_to_compare
|
||||
)
|
||||
elif isinstance(other, str):
|
||||
# Compare only by name (exactly) or url_main (partial similarity)
|
||||
return self.__is_equal_by_url_or_name(other)
|
||||
return False
|
||||
|
||||
def update_detectors(self):
|
||||
if "url" in self.__dict__:
|
||||
url = self.url
|
||||
@@ -103,6 +178,10 @@ class MaigretSite:
|
||||
return None
|
||||
|
||||
def extract_id_from_url(self, url: str) -> Optional[Tuple[str, str]]:
|
||||
"""
|
||||
Extracts username from url.
|
||||
It's outdated, detects only a format of https://example.com/{username}
|
||||
"""
|
||||
if not self.url_regexp:
|
||||
return None
|
||||
|
||||
@@ -225,6 +304,15 @@ class MaigretDatabase:
|
||||
def sites_dict(self):
|
||||
return {site.name: site for site in self._sites}
|
||||
|
||||
def has_site(self, site: MaigretSite):
|
||||
for s in self._sites:
|
||||
if site == s:
|
||||
return True
|
||||
return False
|
||||
|
||||
def __contains__(self, site):
|
||||
return self.has_site(site)
|
||||
|
||||
def ranked_sites_dict(
|
||||
self,
|
||||
reverse=False,
|
||||
@@ -236,6 +324,17 @@ class MaigretDatabase:
|
||||
):
|
||||
"""
|
||||
Ranking and filtering of the sites list
|
||||
|
||||
Args:
|
||||
reverse (bool, optional): Reverse the sorting order. Defaults to False.
|
||||
top (int, optional): Maximum number of sites to return. Defaults to sys.maxsize.
|
||||
tags (list, optional): List of tags to filter sites by. Defaults to empty list.
|
||||
names (list, optional): List of site names (or urls, see MaigretSite.__eq__) to filter by. Defaults to empty list.
|
||||
disabled (bool, optional): Whether to include disabled sites. Defaults to True.
|
||||
id_type (str, optional): Type of identifier to filter by. Defaults to "username".
|
||||
|
||||
Returns:
|
||||
dict: Dictionary of filtered and ranked sites, with site names as keys and MaigretSite objects as values
|
||||
"""
|
||||
normalized_names = list(map(str.lower, names))
|
||||
normalized_tags = list(map(str.lower, tags))
|
||||
@@ -359,6 +458,8 @@ class MaigretDatabase:
|
||||
if not is_url_valid:
|
||||
raise FileNotFoundError(f"Invalid data file URL '{url}'.")
|
||||
|
||||
import requests
|
||||
|
||||
try:
|
||||
response = requests.get(url=url)
|
||||
except Exception as error:
|
||||
@@ -419,41 +520,92 @@ class MaigretDatabase:
|
||||
results[_id] = _type
|
||||
return results
|
||||
|
||||
def get_db_stats(self, sites_dict):
|
||||
if not sites_dict:
|
||||
sites_dict = self.sites_dict()
|
||||
|
||||
def get_db_stats(self, is_markdown=False):
|
||||
# Initialize counters
|
||||
sites_dict = self.sites_dict
|
||||
urls = {}
|
||||
tags = {}
|
||||
output = ""
|
||||
disabled_count = 0
|
||||
total_count = len(sites_dict)
|
||||
message_checks_one_factor = 0
|
||||
status_checks = 0
|
||||
|
||||
for _, site in sites_dict.items():
|
||||
# Collect statistics
|
||||
for site in sites_dict.values():
|
||||
# Count disabled sites
|
||||
if site.disabled:
|
||||
disabled_count += 1
|
||||
|
||||
# Count URL types
|
||||
url_type = site.get_url_template()
|
||||
urls[url_type] = urls.get(url_type, 0) + 1
|
||||
|
||||
# Count check types for enabled sites
|
||||
if not site.disabled:
|
||||
if site.check_type == 'message':
|
||||
if not (site.absence_strs and site.presense_strs):
|
||||
message_checks_one_factor += 1
|
||||
elif site.check_type == 'status_code':
|
||||
status_checks += 1
|
||||
|
||||
# Count tags
|
||||
if not site.tags:
|
||||
tags["NO_TAGS"] = tags.get("NO_TAGS", 0) + 1
|
||||
|
||||
for tag in filter(lambda x: not is_country_tag(x), site.tags):
|
||||
tags[tag] = tags.get(tag, 0) + 1
|
||||
|
||||
output += f"Enabled/total sites: {total_count - disabled_count}/{total_count}\n"
|
||||
output += "Top profile URLs:\n"
|
||||
for url, count in sorted(urls.items(), key=lambda x: x[1], reverse=True)[:20]:
|
||||
# Calculate percentages
|
||||
total_count = len(sites_dict)
|
||||
enabled_count = total_count - disabled_count
|
||||
enabled_perc = round(100 * enabled_count / total_count, 2)
|
||||
checks_perc = round(100 * message_checks_one_factor / enabled_count, 2)
|
||||
status_checks_perc = round(100 * status_checks / enabled_count, 2)
|
||||
|
||||
# Sites with probing and activation (kinda special cases, let's watch them)
|
||||
site_with_probing = []
|
||||
site_with_activation = []
|
||||
for site in sites_dict.values():
|
||||
|
||||
def get_site_label(site):
|
||||
return f"{site.name}{' (disabled)' if site.disabled else ''}"
|
||||
|
||||
if site.url_probe:
|
||||
site_with_probing.append(get_site_label(site))
|
||||
if site.activation:
|
||||
site_with_activation.append(get_site_label(site))
|
||||
|
||||
# Format output
|
||||
separator = "\n\n"
|
||||
output = [
|
||||
f"Enabled/total sites: {enabled_count}/{total_count} = {enabled_perc}%",
|
||||
f"Incomplete message checks: {message_checks_one_factor}/{enabled_count} = {checks_perc}% (false positive risks)",
|
||||
f"Status code checks: {status_checks}/{enabled_count} = {status_checks_perc}% (false positive risks)",
|
||||
f"False positive risk (total): {checks_perc + status_checks_perc:.2f}%",
|
||||
f"Sites with probing: {', '.join(sorted(site_with_probing))}",
|
||||
f"Sites with activation: {', '.join(sorted(site_with_activation))}",
|
||||
self._format_top_items("profile URLs", urls, 20, is_markdown),
|
||||
self._format_top_items("tags", tags, 20, is_markdown, self._tags),
|
||||
]
|
||||
|
||||
return separator.join(output)
|
||||
|
||||
def _format_top_items(
|
||||
self, title, items_dict, limit, is_markdown, valid_items=None
|
||||
):
|
||||
"""Helper method to format top items lists"""
|
||||
output = f"Top {limit} {title}:\n"
|
||||
for item, count in sorted(items_dict.items(), key=lambda x: x[1], reverse=True)[
|
||||
:limit
|
||||
]:
|
||||
if count == 1:
|
||||
break
|
||||
output += f"{count}\t{url}\n"
|
||||
|
||||
output += "Top tags:\n"
|
||||
for tag, count in sorted(tags.items(), key=lambda x: x[1], reverse=True)[:200]:
|
||||
mark = ""
|
||||
if tag not in self._tags:
|
||||
mark = " (non-standard)"
|
||||
output += f"{count}\t{tag}{mark}\n"
|
||||
|
||||
mark = (
|
||||
" (non-standard)"
|
||||
if valid_items is not None and item not in valid_items
|
||||
else ""
|
||||
)
|
||||
output += (
|
||||
f"- ({count})\t`{item}`{mark}\n"
|
||||
if is_markdown
|
||||
else f"{count}\t{item}{mark}\n"
|
||||
)
|
||||
return output
|
||||
|
||||
@@ -1,16 +1,44 @@
|
||||
import asyncio
|
||||
import json
|
||||
import re
|
||||
from typing import List
|
||||
import xml.etree.ElementTree as ET
|
||||
import requests
|
||||
import os
|
||||
import logging
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
from aiohttp import ClientSession, TCPConnector
|
||||
from aiohttp_socks import ProxyConnector
|
||||
import cloudscraper
|
||||
from colorama import Fore, Style
|
||||
|
||||
from .activation import import_aiohttp_cookies
|
||||
from .checking import maigret
|
||||
from .result import QueryStatus
|
||||
from .result import MaigretCheckResult
|
||||
from .settings import Settings
|
||||
from .sites import MaigretDatabase, MaigretSite, MaigretEngine
|
||||
from .utils import get_random_user_agent, get_match_ratio
|
||||
from .sites import MaigretDatabase, MaigretEngine, MaigretSite
|
||||
from .utils import get_random_user_agent
|
||||
from .checking import site_self_check
|
||||
from .utils import get_match_ratio, generate_random_username
|
||||
|
||||
|
||||
class CloudflareSession:
|
||||
def __init__(self):
|
||||
self.scraper = cloudscraper.create_scraper()
|
||||
|
||||
async def get(self, *args, **kwargs):
|
||||
await asyncio.sleep(0)
|
||||
res = self.scraper.get(*args, **kwargs)
|
||||
self.last_text = res.text
|
||||
self.status = res.status_code
|
||||
return self
|
||||
|
||||
def status_code(self):
|
||||
return self.status
|
||||
|
||||
async def text(self):
|
||||
await asyncio.sleep(0)
|
||||
return self.last_text
|
||||
|
||||
async def close(self):
|
||||
pass
|
||||
|
||||
|
||||
class Submitter:
|
||||
@@ -18,22 +46,45 @@ class Submitter:
|
||||
"User-Agent": get_random_user_agent(),
|
||||
}
|
||||
|
||||
SEPARATORS = "\"'"
|
||||
SEPARATORS = "\"'\n"
|
||||
|
||||
RATIO = 0.6
|
||||
TOP_FEATURES = 5
|
||||
URL_RE = re.compile(r"https?://(www\.)?")
|
||||
|
||||
def __init__(self, db: MaigretDatabase, settings: Settings, logger):
|
||||
def __init__(self, db: MaigretDatabase, settings: Settings, logger, args):
|
||||
self.settings = settings
|
||||
self.args = args
|
||||
self.db = db
|
||||
self.logger = logger
|
||||
|
||||
from aiohttp_socks import ProxyConnector
|
||||
|
||||
proxy = self.args.proxy
|
||||
cookie_jar = None
|
||||
if args.cookie_file:
|
||||
if not os.path.exists(args.cookie_file):
|
||||
logger.error(f"Cookie file {args.cookie_file} does not exist!")
|
||||
else:
|
||||
cookie_jar = import_aiohttp_cookies(args.cookie_file)
|
||||
|
||||
connector = ProxyConnector.from_url(proxy) if proxy else TCPConnector(ssl=False)
|
||||
connector.verify_ssl = False
|
||||
self.session = ClientSession(
|
||||
connector=connector, trust_env=True, cookie_jar=cookie_jar
|
||||
)
|
||||
|
||||
async def close(self):
|
||||
await self.session.close()
|
||||
|
||||
@staticmethod
|
||||
def get_alexa_rank(site_url_main):
|
||||
import requests
|
||||
import xml.etree.ElementTree as ElementTree
|
||||
|
||||
url = f"http://data.alexa.com/data?cli=10&url={site_url_main}"
|
||||
xml_data = requests.get(url).text
|
||||
root = ET.fromstring(xml_data)
|
||||
root = ElementTree.fromstring(xml_data)
|
||||
alexa_rank = 0
|
||||
|
||||
try:
|
||||
@@ -48,69 +99,18 @@ class Submitter:
|
||||
return "/".join(url.split("/", 3)[:3])
|
||||
|
||||
async def site_self_check(self, site, semaphore, silent=False):
|
||||
changes = {
|
||||
"disabled": False,
|
||||
}
|
||||
|
||||
check_data = [
|
||||
(site.username_claimed, QueryStatus.CLAIMED),
|
||||
(site.username_unclaimed, QueryStatus.AVAILABLE),
|
||||
]
|
||||
|
||||
self.logger.info(f"Checking {site.name}...")
|
||||
|
||||
for username, status in check_data:
|
||||
results_dict = await maigret(
|
||||
username=username,
|
||||
site_dict={site.name: site},
|
||||
logger=self.logger,
|
||||
timeout=30,
|
||||
id_type=site.type,
|
||||
forced=True,
|
||||
no_progressbar=True,
|
||||
)
|
||||
|
||||
# don't disable entries with other ids types
|
||||
# TODO: make normal checking
|
||||
if site.name not in results_dict:
|
||||
self.logger.info(results_dict)
|
||||
changes["disabled"] = True
|
||||
continue
|
||||
|
||||
result = results_dict[site.name]["status"]
|
||||
|
||||
site_status = result.status
|
||||
|
||||
if site_status != status:
|
||||
if site_status == QueryStatus.UNKNOWN:
|
||||
msgs = site.absence_strs
|
||||
etype = site.check_type
|
||||
self.logger.warning(
|
||||
"Error while searching '%s' in %s: %s, %s, check type %s",
|
||||
username,
|
||||
site.name,
|
||||
result.context,
|
||||
msgs,
|
||||
etype,
|
||||
)
|
||||
# don't disable in case of available username
|
||||
if status == QueryStatus.CLAIMED:
|
||||
changes["disabled"] = True
|
||||
elif status == QueryStatus.CLAIMED:
|
||||
self.logger.warning(
|
||||
f"Not found `{username}` in {site.name}, must be claimed"
|
||||
)
|
||||
self.logger.info(results_dict[site.name])
|
||||
changes["disabled"] = True
|
||||
else:
|
||||
self.logger.warning(
|
||||
f"Found `{username}` in {site.name}, must be available"
|
||||
)
|
||||
self.logger.info(results_dict[site.name])
|
||||
changes["disabled"] = True
|
||||
|
||||
self.logger.info(f"Site {site.name} checking is finished")
|
||||
|
||||
# Call the general function from the checking.py
|
||||
changes = await site_self_check(
|
||||
site=site,
|
||||
logger=self.logger,
|
||||
semaphore=semaphore,
|
||||
db=self.db,
|
||||
silent=silent,
|
||||
proxy=self.args.proxy,
|
||||
cookies=self.args.cookie_file,
|
||||
# Don't skip errors in submit mode - we need check both false positives/true negatives
|
||||
skip_errors=False,
|
||||
)
|
||||
return changes
|
||||
|
||||
def generate_additional_fields_dialog(self, engine: MaigretEngine, dialog):
|
||||
@@ -125,21 +125,21 @@ class Submitter:
|
||||
fields['urlSubpath'] = f'/{subpath}'
|
||||
return fields
|
||||
|
||||
async def detect_known_engine(self, url_exists, url_mainpage) -> List[MaigretSite]:
|
||||
try:
|
||||
r = requests.get(url_mainpage)
|
||||
self.logger.debug(r.text)
|
||||
except Exception as e:
|
||||
self.logger.warning(e)
|
||||
print("Some error while checking main page")
|
||||
return []
|
||||
async def detect_known_engine(
|
||||
self, url_exists, url_mainpage, session, follow_redirects, headers
|
||||
) -> [List[MaigretSite], str]:
|
||||
|
||||
session = session or self.session
|
||||
resp_text, _ = await self.get_html_response_to_compare(
|
||||
url_exists, session, follow_redirects, headers
|
||||
)
|
||||
|
||||
for engine in self.db.engines:
|
||||
strs_to_check = engine.__dict__.get("presenseStrs")
|
||||
if strs_to_check and r and r.text:
|
||||
if strs_to_check and resp_text:
|
||||
all_strs_in_response = True
|
||||
for s in strs_to_check:
|
||||
if s not in r.text:
|
||||
if s not in resp_text:
|
||||
all_strs_in_response = False
|
||||
sites = []
|
||||
if all_strs_in_response:
|
||||
@@ -159,7 +159,7 @@ class Submitter:
|
||||
for u in usernames_to_check:
|
||||
site_data = {
|
||||
"urlMain": url_mainpage,
|
||||
"name": url_mainpage.split("//")[1],
|
||||
"name": url_mainpage.split("//")[1].split("/")[0],
|
||||
"engine": engine_name,
|
||||
"usernameClaimed": u,
|
||||
"usernameUnclaimed": "noonewouldeverusethis7",
|
||||
@@ -175,123 +175,234 @@ class Submitter:
|
||||
)
|
||||
sites.append(maigret_site)
|
||||
|
||||
return sites
|
||||
return sites, resp_text
|
||||
|
||||
return []
|
||||
return [], resp_text
|
||||
|
||||
def extract_username_dialog(self, url):
|
||||
@staticmethod
|
||||
def extract_username_dialog(url):
|
||||
url_parts = url.rstrip("/").split("/")
|
||||
supposed_username = url_parts[-1].strip('@')
|
||||
entered_username = input(
|
||||
f'Is "{supposed_username}" a valid username? If not, write it manually: '
|
||||
f"{Fore.GREEN}[?] Is \"{supposed_username}\" a valid username? If not, write it manually: {Style.RESET_ALL}"
|
||||
)
|
||||
return entered_username if entered_username else supposed_username
|
||||
|
||||
async def check_features_manually(
|
||||
self, url_exists, url_mainpage, cookie_file, redirects=False
|
||||
@staticmethod
|
||||
async def get_html_response_to_compare(
|
||||
url: str, session: ClientSession = None, redirects=False, headers: Dict = None
|
||||
):
|
||||
custom_headers = {}
|
||||
while True:
|
||||
header_key = input(
|
||||
'Specify custom header if you need or just press Enter to skip. Header name: '
|
||||
async with session.get(
|
||||
url, allow_redirects=redirects, headers=headers
|
||||
) as response:
|
||||
# Try different encodings or fallback to 'ignore' errors
|
||||
try:
|
||||
html_response = await response.text(encoding='utf-8')
|
||||
except UnicodeDecodeError:
|
||||
try:
|
||||
html_response = await response.text(encoding='latin1')
|
||||
except UnicodeDecodeError:
|
||||
html_response = await response.text(errors='ignore')
|
||||
return html_response, response.status
|
||||
|
||||
async def check_features_manually(
|
||||
self,
|
||||
username: str,
|
||||
url_exists: str,
|
||||
cookie_filename="", # TODO: use cookies
|
||||
session: ClientSession = None,
|
||||
follow_redirects=False,
|
||||
headers: dict = None,
|
||||
) -> Tuple[List[str], List[str], str, str]:
|
||||
|
||||
random_username = generate_random_username()
|
||||
url_of_non_existing_account = url_exists.lower().replace(
|
||||
username.lower(), random_username
|
||||
)
|
||||
|
||||
try:
|
||||
session = session or self.session
|
||||
first_html_response, first_status = await self.get_html_response_to_compare(
|
||||
url_exists, session, follow_redirects, headers
|
||||
)
|
||||
if not header_key:
|
||||
break
|
||||
header_value = input('Header value: ')
|
||||
custom_headers[header_key.strip()] = header_value.strip()
|
||||
second_html_response, second_status = (
|
||||
await self.get_html_response_to_compare(
|
||||
url_of_non_existing_account, session, follow_redirects, headers
|
||||
)
|
||||
)
|
||||
await session.close()
|
||||
except Exception as e:
|
||||
self.logger.error(
|
||||
f"Error while getting HTTP response for username {username}: {e}",
|
||||
exc_info=True,
|
||||
)
|
||||
return None, None, str(e), random_username
|
||||
|
||||
supposed_username = self.extract_username_dialog(url_exists)
|
||||
non_exist_username = "noonewouldeverusethis7"
|
||||
|
||||
url_user = url_exists.replace(supposed_username, "{username}")
|
||||
url_not_exists = url_exists.replace(supposed_username, non_exist_username)
|
||||
|
||||
headers = dict(self.HEADERS)
|
||||
headers.update(custom_headers)
|
||||
|
||||
# cookies
|
||||
cookie_dict = None
|
||||
if cookie_file:
|
||||
self.logger.info(f'Use {cookie_file} for cookies')
|
||||
cookie_jar = import_aiohttp_cookies(cookie_file)
|
||||
cookie_dict = {c.key: c.value for c in cookie_jar}
|
||||
|
||||
exists_resp = requests.get(
|
||||
url_exists, cookies=cookie_dict, headers=headers, allow_redirects=redirects
|
||||
self.logger.info(f"URL with existing account: {url_exists}")
|
||||
self.logger.info(
|
||||
f"HTTP response status for URL with existing account: {first_status}"
|
||||
)
|
||||
self.logger.debug(url_exists)
|
||||
self.logger.debug(exists_resp.status_code)
|
||||
self.logger.debug(exists_resp.text)
|
||||
|
||||
non_exists_resp = requests.get(
|
||||
url_not_exists,
|
||||
cookies=cookie_dict,
|
||||
headers=headers,
|
||||
allow_redirects=redirects,
|
||||
self.logger.info(
|
||||
f"HTTP response length URL with existing account: {len(first_html_response)}"
|
||||
)
|
||||
self.logger.debug(url_not_exists)
|
||||
self.logger.debug(non_exists_resp.status_code)
|
||||
self.logger.debug(non_exists_resp.text)
|
||||
self.logger.debug(first_html_response)
|
||||
|
||||
a = exists_resp.text
|
||||
b = non_exists_resp.text
|
||||
self.logger.info(f"URL with existing account: {url_of_non_existing_account}")
|
||||
self.logger.info(
|
||||
f"HTTP response status for URL with non-existing account: {second_status}"
|
||||
)
|
||||
self.logger.info(
|
||||
f"HTTP response length URL with non-existing account: {len(second_html_response)}"
|
||||
)
|
||||
self.logger.debug(second_html_response)
|
||||
|
||||
tokens_a = set(re.split(f'[{self.SEPARATORS}]', a))
|
||||
tokens_b = set(re.split(f'[{self.SEPARATORS}]', b))
|
||||
# TODO: filter by errors, move to dialog function
|
||||
if (
|
||||
"/cdn-cgi/challenge-platform" in first_html_response
|
||||
or "\t\t\t\tnow: " in first_html_response
|
||||
or "Sorry, you have been blocked" in first_html_response
|
||||
):
|
||||
self.logger.info("Cloudflare detected, skipping")
|
||||
return None, None, "Cloudflare detected, skipping", random_username
|
||||
|
||||
tokens_a = set(re.split(f'[{self.SEPARATORS}]', first_html_response))
|
||||
tokens_b = set(re.split(f'[{self.SEPARATORS}]', second_html_response))
|
||||
|
||||
a_minus_b = tokens_a.difference(tokens_b)
|
||||
b_minus_a = tokens_b.difference(tokens_a)
|
||||
|
||||
if len(a_minus_b) == len(b_minus_a) == 0:
|
||||
print("The pages for existing and non-existing account are the same!")
|
||||
a_minus_b = list(map(lambda x: x.strip('\\'), a_minus_b))
|
||||
b_minus_a = list(map(lambda x: x.strip('\\'), b_minus_a))
|
||||
|
||||
top_features_count = int(
|
||||
input(
|
||||
f"Specify count of features to extract [default {self.TOP_FEATURES}]: "
|
||||
# Filter out strings containing usernames
|
||||
a_minus_b = [s for s in a_minus_b if username.lower() not in s.lower()]
|
||||
b_minus_a = [s for s in b_minus_a if random_username.lower() not in s.lower()]
|
||||
|
||||
def filter_tokens(token: str, html_response: str) -> bool:
|
||||
is_in_html = token in html_response
|
||||
is_long_str = len(token) >= 50
|
||||
is_number = re.match(r'^\d\.?\d+$', token) or re.match(r':^\d+$', token)
|
||||
is_whitelisted_number = token in ['200', '404', '403']
|
||||
|
||||
return not (
|
||||
is_in_html or is_long_str or (is_number and not is_whitelisted_number)
|
||||
)
|
||||
or self.TOP_FEATURES
|
||||
|
||||
a_minus_b = list(
|
||||
filter(lambda t: filter_tokens(t, second_html_response), a_minus_b)
|
||||
)
|
||||
b_minus_a = list(
|
||||
filter(lambda t: filter_tokens(t, first_html_response), b_minus_a)
|
||||
)
|
||||
|
||||
if len(a_minus_b) == len(b_minus_a) == 0:
|
||||
return (
|
||||
None,
|
||||
None,
|
||||
"HTTP responses for pages with existing and non-existing accounts are the same",
|
||||
random_username,
|
||||
)
|
||||
|
||||
match_fun = get_match_ratio(self.settings.presence_strings)
|
||||
|
||||
presence_list = sorted(a_minus_b, key=match_fun, reverse=True)[
|
||||
:top_features_count
|
||||
: self.TOP_FEATURES
|
||||
]
|
||||
|
||||
print("Detected text features of existing account: " + ", ".join(presence_list))
|
||||
features = input("If features was not detected correctly, write it manually: ")
|
||||
|
||||
if features:
|
||||
presence_list = list(map(str.strip, features.split(",")))
|
||||
|
||||
absence_list = sorted(b_minus_a, key=match_fun, reverse=True)[
|
||||
:top_features_count
|
||||
: self.TOP_FEATURES
|
||||
]
|
||||
|
||||
self.logger.info(f"Detected presence features: {presence_list}")
|
||||
self.logger.info(f"Detected absence features: {absence_list}")
|
||||
|
||||
return presence_list, absence_list, "Found", random_username
|
||||
|
||||
async def add_site(self, site):
|
||||
sem = asyncio.Semaphore(1)
|
||||
print(
|
||||
"Detected text features of non-existing account: " + ", ".join(absence_list)
|
||||
f"{Fore.BLUE}{Style.BRIGHT}[*] Adding site {site.name}, let's check it...{Style.RESET_ALL}"
|
||||
)
|
||||
features = input("If features was not detected correctly, write it manually: ")
|
||||
|
||||
if features:
|
||||
absence_list = list(map(str.strip, features.split(",")))
|
||||
result = await self.site_self_check(site, sem)
|
||||
if result["disabled"]:
|
||||
print(f"Checks failed for {site.name}, please, verify them manually.")
|
||||
return {
|
||||
"valid": False,
|
||||
"reason": "checks_failed",
|
||||
}
|
||||
|
||||
site_data = {
|
||||
"absenceStrs": absence_list,
|
||||
"presenseStrs": presence_list,
|
||||
"url": url_user,
|
||||
"urlMain": url_mainpage,
|
||||
"usernameClaimed": supposed_username,
|
||||
"usernameUnclaimed": non_exist_username,
|
||||
"checkType": "message",
|
||||
while True:
|
||||
print("\nAvailable fields to edit:")
|
||||
editable_fields = {
|
||||
'1': 'name',
|
||||
'2': 'tags',
|
||||
'3': 'url',
|
||||
'4': 'url_main',
|
||||
'5': 'username_claimed',
|
||||
'6': 'username_unclaimed',
|
||||
'7': 'presense_strs',
|
||||
'8': 'absence_strs',
|
||||
}
|
||||
|
||||
for num, field in editable_fields.items():
|
||||
current_value = getattr(site, field)
|
||||
print(f"{num}. {field} (current: {current_value})")
|
||||
|
||||
print("0. finish editing")
|
||||
print("10. reject and block domain")
|
||||
print("11. invalid params, remove")
|
||||
|
||||
choice = input("\nSelect field number to edit (0-8): ").strip()
|
||||
|
||||
if choice == '0':
|
||||
break
|
||||
|
||||
if choice == '10':
|
||||
return {
|
||||
"valid": False,
|
||||
"reason": "manual block",
|
||||
}
|
||||
|
||||
if choice == '11':
|
||||
return {
|
||||
"valid": False,
|
||||
"reason": "remove",
|
||||
}
|
||||
|
||||
if choice in editable_fields:
|
||||
field = editable_fields[choice]
|
||||
current_value = getattr(site, field)
|
||||
new_value = input(
|
||||
f"Enter new value for {field} (current: {current_value}): "
|
||||
).strip()
|
||||
|
||||
if field in ['tags', 'presense_strs', 'absence_strs']:
|
||||
new_value = list(map(str.strip, new_value.split(',')))
|
||||
|
||||
if new_value:
|
||||
setattr(site, field, new_value)
|
||||
print(f"Updated {field} to: {new_value}")
|
||||
|
||||
self.logger.info(site.json)
|
||||
self.db.update_site(site)
|
||||
return {
|
||||
"valid": True,
|
||||
}
|
||||
|
||||
if headers != self.HEADERS:
|
||||
site_data['headers'] = headers
|
||||
|
||||
site = MaigretSite(url_mainpage.split("/")[-1], site_data)
|
||||
return site
|
||||
|
||||
async def dialog(self, url_exists, cookie_file):
|
||||
"""
|
||||
An implementation of the submit mode:
|
||||
- User provides a URL of a existing social media account
|
||||
- Maigret tries to detect the site engine and understand how to check
|
||||
for account presence with HTTP responses analysis
|
||||
- If detection succeeds, Maigret generates a new site entry/replace old one in the database
|
||||
"""
|
||||
old_site = None
|
||||
additional_options_enabled = self.logger.level in (
|
||||
logging.DEBUG,
|
||||
logging.WARNING,
|
||||
)
|
||||
|
||||
domain_raw = self.URL_RE.sub("", url_exists).strip().strip("/")
|
||||
domain_raw = domain_raw.split("/")[0]
|
||||
self.logger.info('Domain is %s', domain_raw)
|
||||
@@ -302,9 +413,11 @@ class Submitter:
|
||||
)
|
||||
|
||||
if matched_sites:
|
||||
# TODO: update the existing site
|
||||
print(
|
||||
f'Sites with domain "{domain_raw}" already exists in the Maigret database!'
|
||||
f"{Fore.YELLOW}[!] Sites with domain \"{domain_raw}\" already exists in the Maigret database!{Style.RESET_ALL}"
|
||||
)
|
||||
|
||||
status = lambda s: "(disabled)" if s.disabled else ""
|
||||
url_block = lambda s: f"\n\t{s.url_main}\n\t{s.url}"
|
||||
print(
|
||||
@@ -316,31 +429,130 @@ class Submitter:
|
||||
)
|
||||
)
|
||||
|
||||
if input("Do you want to continue? [yN] ").lower() in "n":
|
||||
if (
|
||||
input(
|
||||
f"{Fore.GREEN}[?] Do you want to continue? [yN] {Style.RESET_ALL}"
|
||||
).lower()
|
||||
in "n"
|
||||
):
|
||||
return False
|
||||
|
||||
site_names = [site.name for site in matched_sites]
|
||||
site_name = (
|
||||
input(
|
||||
f"{Fore.GREEN}[?] Which site do you want to update in case of success? 1st by default. [{', '.join(site_names)}] {Style.RESET_ALL}"
|
||||
)
|
||||
or matched_sites[0].name
|
||||
)
|
||||
old_site = next(
|
||||
(site for site in matched_sites if site.name == site_name), None
|
||||
)
|
||||
print(
|
||||
f'{Fore.GREEN}[+] We will update site "{old_site.name}" in case of success.{Style.RESET_ALL}'
|
||||
)
|
||||
|
||||
# Check if the site check is ordinary or not
|
||||
if old_site and (old_site.url_probe or old_site.activation):
|
||||
skip = input(
|
||||
f"{Fore.RED}[!] The site check depends on activation / probing mechanism! Consider to update it manually. Continue? [yN]{Style.RESET_ALL}"
|
||||
)
|
||||
if skip.lower() in ['n', '']:
|
||||
return False
|
||||
|
||||
# TODO: urlProbe support
|
||||
# TODO: activation support
|
||||
|
||||
url_mainpage = self.extract_mainpage_url(url_exists)
|
||||
|
||||
# headers update
|
||||
custom_headers = dict(self.HEADERS)
|
||||
while additional_options_enabled:
|
||||
header_key = input(
|
||||
f'{Fore.GREEN}[?] Specify custom header if you need or just press Enter to skip. Header name: {Style.RESET_ALL}'
|
||||
)
|
||||
if not header_key:
|
||||
break
|
||||
header_value = input(f'{Fore.GREEN}[?] Header value: {Style.RESET_ALL}')
|
||||
custom_headers[header_key.strip()] = header_value.strip()
|
||||
|
||||
# redirects settings update
|
||||
redirects = False
|
||||
if additional_options_enabled:
|
||||
redirects = (
|
||||
'y'
|
||||
in input(
|
||||
f'{Fore.GREEN}[?] Should we do redirects automatically? [yN] {Style.RESET_ALL}'
|
||||
).lower()
|
||||
)
|
||||
|
||||
print('Detecting site engine, please wait...')
|
||||
sites = []
|
||||
text = None
|
||||
try:
|
||||
sites = await self.detect_known_engine(url_exists, url_mainpage)
|
||||
sites, text = await self.detect_known_engine(
|
||||
url_exists,
|
||||
url_exists,
|
||||
session=None,
|
||||
follow_redirects=redirects,
|
||||
headers=custom_headers,
|
||||
)
|
||||
except KeyboardInterrupt:
|
||||
print('Engine detect process is interrupted.')
|
||||
|
||||
if 'cloudflare' in text.lower():
|
||||
print(
|
||||
'Cloudflare protection detected. I will use cloudscraper for further work'
|
||||
)
|
||||
# self.session = CloudflareSession()
|
||||
|
||||
if not sites:
|
||||
print("Unable to detect site engine, lets generate checking features")
|
||||
sites = [
|
||||
|
||||
supposed_username = self.extract_username_dialog(url_exists)
|
||||
self.logger.info(f"Supposed username: {supposed_username}")
|
||||
|
||||
# TODO: pass status_codes
|
||||
# check it here and suggest to enable / auto-enable redirects
|
||||
presence_list, absence_list, status, non_exist_username = (
|
||||
await self.check_features_manually(
|
||||
url_exists, url_mainpage, cookie_file
|
||||
username=supposed_username,
|
||||
url_exists=url_exists,
|
||||
cookie_filename=cookie_file,
|
||||
follow_redirects=redirects,
|
||||
headers=custom_headers,
|
||||
)
|
||||
]
|
||||
)
|
||||
|
||||
if status == "Found":
|
||||
site_data = {
|
||||
"absenceStrs": absence_list,
|
||||
"presenseStrs": presence_list,
|
||||
"url": url_exists.replace(supposed_username, '{username}'),
|
||||
"urlMain": url_mainpage,
|
||||
"usernameClaimed": supposed_username,
|
||||
"usernameUnclaimed": non_exist_username,
|
||||
"headers": custom_headers,
|
||||
"checkType": "message",
|
||||
}
|
||||
self.logger.info(json.dumps(site_data, indent=4))
|
||||
|
||||
if custom_headers != self.HEADERS:
|
||||
site_data['headers'] = custom_headers
|
||||
|
||||
site = MaigretSite(url_mainpage.split("/")[-1], site_data)
|
||||
sites.append(site)
|
||||
|
||||
else:
|
||||
print(
|
||||
f"{Fore.RED}[!] The check for site failed! Reason: {status}{Style.RESET_ALL}"
|
||||
)
|
||||
return False
|
||||
|
||||
self.logger.debug(sites[0].__dict__)
|
||||
|
||||
sem = asyncio.Semaphore(1)
|
||||
|
||||
print("Checking, please wait...")
|
||||
print(f"{Fore.GREEN}[*] Checking, please wait...{Style.RESET_ALL}")
|
||||
found = False
|
||||
chosen_site = None
|
||||
for s in sites:
|
||||
@@ -352,7 +564,7 @@ class Submitter:
|
||||
|
||||
if not found:
|
||||
print(
|
||||
f"Sorry, we couldn't find params to detect account presence/absence in {chosen_site.name}."
|
||||
f"{Fore.RED}[!] The check for site '{chosen_site.name}' failed!{Style.RESET_ALL}"
|
||||
)
|
||||
print(
|
||||
"Try to run this mode again and increase features count or choose others."
|
||||
@@ -362,22 +574,90 @@ class Submitter:
|
||||
else:
|
||||
if (
|
||||
input(
|
||||
f"Site {chosen_site.name} successfully checked. Do you want to save it in the Maigret DB? [Yn] "
|
||||
f"{Fore.GREEN}[?] Site {chosen_site.name} successfully checked. Do you want to save it in the Maigret DB? [Yn] {Style.RESET_ALL}"
|
||||
)
|
||||
.lower()
|
||||
.strip("y")
|
||||
):
|
||||
return False
|
||||
|
||||
chosen_site.name = input("Change site name if you want: ") or chosen_site.name
|
||||
chosen_site.tags = list(map(str.strip, input("Site tags: ").split(',')))
|
||||
rank = Submitter.get_alexa_rank(chosen_site.url_main)
|
||||
if rank:
|
||||
print(f'New alexa rank: {rank}')
|
||||
chosen_site.alexa_rank = rank
|
||||
if self.args.verbose:
|
||||
self.logger.info(
|
||||
"Verbose mode is enabled, additional settings are available"
|
||||
)
|
||||
source = input(
|
||||
f"{Fore.GREEN}[?] Name the source site if it is mirror: {Style.RESET_ALL}"
|
||||
)
|
||||
if source:
|
||||
chosen_site.source = source
|
||||
|
||||
self.logger.debug(chosen_site.json)
|
||||
default_site_name = old_site.name if old_site else chosen_site.name
|
||||
new_name = (
|
||||
input(
|
||||
f"{Fore.GREEN}[?] Change site name if you want [{default_site_name}]: {Style.RESET_ALL}"
|
||||
)
|
||||
or default_site_name
|
||||
)
|
||||
if new_name != default_site_name:
|
||||
self.logger.info(f"New site name is {new_name}")
|
||||
chosen_site.name = new_name
|
||||
|
||||
default_tags_str = ""
|
||||
if old_site:
|
||||
default_tags_str = f' [{", ".join(old_site.tags)}]'
|
||||
|
||||
new_tags = input(
|
||||
f"{Fore.GREEN}[?] Site tags{default_tags_str}: {Style.RESET_ALL}"
|
||||
)
|
||||
if new_tags:
|
||||
chosen_site.tags = list(map(str.strip, new_tags.split(',')))
|
||||
else:
|
||||
chosen_site.tags = []
|
||||
self.logger.info(f"Site tags are: {', '.join(chosen_site.tags)}")
|
||||
# rank = Submitter.get_alexa_rank(chosen_site.url_main)
|
||||
# if rank:
|
||||
# print(f'New alexa rank: {rank}')
|
||||
# chosen_site.alexa_rank = rank
|
||||
|
||||
self.logger.info(chosen_site.json)
|
||||
site_data = chosen_site.strip_engine_data()
|
||||
self.logger.debug(site_data.json)
|
||||
self.db.update_site(site_data)
|
||||
self.logger.info(site_data.json)
|
||||
|
||||
if old_site:
|
||||
# Update old site with new values and log changes
|
||||
fields_to_check = {
|
||||
'url': 'URL',
|
||||
'url_main': 'Main URL',
|
||||
'username_claimed': 'Username claimed',
|
||||
'username_unclaimed': 'Username unclaimed',
|
||||
'check_type': 'Check type',
|
||||
'presense_strs': 'Presence strings',
|
||||
'absence_strs': 'Absence strings',
|
||||
'tags': 'Tags',
|
||||
'source': 'Source',
|
||||
'headers': 'Headers',
|
||||
}
|
||||
|
||||
for field, display_name in fields_to_check.items():
|
||||
old_value = getattr(old_site, field)
|
||||
new_value = getattr(site_data, field)
|
||||
if field == 'tags' and not new_tags:
|
||||
continue
|
||||
if str(old_value) != str(new_value):
|
||||
print(
|
||||
f"{Fore.YELLOW}[*] '{display_name}' updated: {Fore.RED}{old_value} {Fore.YELLOW}to {Fore.GREEN}{new_value}{Style.RESET_ALL}"
|
||||
)
|
||||
old_site.__dict__[field] = new_value
|
||||
|
||||
# update the site
|
||||
final_site = old_site if old_site else site_data
|
||||
self.db.update_site(final_site)
|
||||
|
||||
# save the db in file
|
||||
if self.args.db_file != self.settings.sites_db_path:
|
||||
print(
|
||||
f"{Fore.GREEN}[+] Maigret DB is saved to {self.args.db}.{Style.RESET_ALL}"
|
||||
)
|
||||
self.db.save_to_file(self.args.db)
|
||||
|
||||
return True
|
||||
|
||||
@@ -1,7 +1,9 @@
|
||||
# coding: utf8
|
||||
import ast
|
||||
import difflib
|
||||
import re
|
||||
import random
|
||||
import string
|
||||
from typing import Any
|
||||
|
||||
|
||||
@@ -41,7 +43,7 @@ def enrich_link_str(link: str) -> str:
|
||||
|
||||
|
||||
class URLMatcher:
|
||||
_HTTP_URL_RE_STR = "^https?://(www.)?(.+)$"
|
||||
_HTTP_URL_RE_STR = "^https?://(www.|m.)?(.+)$"
|
||||
HTTP_URL_RE = re.compile(_HTTP_URL_RE_STR)
|
||||
UNSAFE_SYMBOLS = ".?"
|
||||
|
||||
@@ -65,7 +67,7 @@ class URLMatcher:
|
||||
)
|
||||
regexp_str = self._HTTP_URL_RE_STR.replace("(.+)", url_regexp)
|
||||
|
||||
return re.compile(regexp_str)
|
||||
return re.compile(regexp_str, re.IGNORECASE)
|
||||
|
||||
|
||||
def ascii_data_display(data: str) -> Any:
|
||||
@@ -73,15 +75,22 @@ def ascii_data_display(data: str) -> Any:
|
||||
|
||||
|
||||
def get_dict_ascii_tree(items, prepend="", new_line=True):
|
||||
new_result = b'\xe2\x94\x9c'.decode()
|
||||
new_line = b'\xe2\x94\x80'.decode()
|
||||
last_result = b'\xe2\x94\x94'.decode()
|
||||
skip_result = b'\xe2\x94\x82'.decode()
|
||||
|
||||
text = ""
|
||||
for num, item in enumerate(items):
|
||||
box_symbol = "┣╸" if num != len(items) - 1 else "┗╸"
|
||||
box_symbol = (
|
||||
new_result + new_line if num != len(items) - 1 else last_result + new_line
|
||||
)
|
||||
|
||||
if type(item) == tuple:
|
||||
field_name, field_value = item
|
||||
if field_value.startswith("['"):
|
||||
is_last_item = num == len(items) - 1
|
||||
prepend_symbols = " " * 3 if is_last_item else " ┃ "
|
||||
prepend_symbols = " " * 3 if is_last_item else f" {skip_result} "
|
||||
data = ascii_data_display(field_value)
|
||||
field_value = get_dict_ascii_tree(data, prepend_symbols)
|
||||
text += f"\n{prepend}{box_symbol}{field_name}: {field_value}"
|
||||
@@ -111,3 +120,7 @@ def get_match_ratio(base_strs: list):
|
||||
)
|
||||
|
||||
return get_match_inner
|
||||
|
||||
|
||||
def generate_random_username():
|
||||
return ''.join(random.choices(string.ascii_lowercase, k=10))
|
||||
|
||||
@@ -0,0 +1,280 @@
|
||||
# app.py
|
||||
from flask import (
|
||||
Flask,
|
||||
render_template,
|
||||
request,
|
||||
send_file,
|
||||
Response,
|
||||
flash,
|
||||
redirect,
|
||||
url_for,
|
||||
)
|
||||
import logging
|
||||
import os
|
||||
import asyncio
|
||||
from datetime import datetime
|
||||
from threading import Thread
|
||||
import maigret
|
||||
import maigret.settings
|
||||
from maigret.sites import MaigretDatabase
|
||||
from maigret.report import generate_report_context
|
||||
|
||||
app = Flask(__name__)
|
||||
app.secret_key = 'your-secret-key-here'
|
||||
|
||||
# Add background job tracking
|
||||
background_jobs = {}
|
||||
job_results = {}
|
||||
|
||||
# Configuration
|
||||
MAIGRET_DB_FILE = os.path.join('maigret', 'resources', 'data.json')
|
||||
COOKIES_FILE = "cookies.txt"
|
||||
UPLOAD_FOLDER = 'uploads'
|
||||
REPORTS_FOLDER = os.path.abspath('/tmp/maigret_reports')
|
||||
|
||||
os.makedirs(UPLOAD_FOLDER, exist_ok=True)
|
||||
os.makedirs(REPORTS_FOLDER, exist_ok=True)
|
||||
|
||||
|
||||
def setup_logger(log_level, name):
|
||||
logger = logging.getLogger(name)
|
||||
logger.setLevel(log_level)
|
||||
return logger
|
||||
|
||||
|
||||
async def maigret_search(username, options):
|
||||
logger = setup_logger(logging.WARNING, 'maigret')
|
||||
try:
|
||||
db = MaigretDatabase().load_from_path(MAIGRET_DB_FILE)
|
||||
sites = db.ranked_sites_dict(top=int(options.get('top_sites', 500)))
|
||||
|
||||
results = await maigret.search(
|
||||
username=username,
|
||||
site_dict=sites,
|
||||
timeout=int(options.get('timeout', 30)),
|
||||
logger=logger,
|
||||
id_type=options.get('id_type', 'username'),
|
||||
cookies=COOKIES_FILE if options.get('use_cookies') else None,
|
||||
)
|
||||
return results
|
||||
except Exception as e:
|
||||
logger.error(f"Error during search: {str(e)}")
|
||||
raise
|
||||
|
||||
|
||||
async def search_multiple_usernames(usernames, options):
|
||||
results = []
|
||||
for username in usernames:
|
||||
try:
|
||||
search_results = await maigret_search(username.strip(), options)
|
||||
results.append((username.strip(), options['id_type'], search_results))
|
||||
except Exception as e:
|
||||
logging.error(f"Error searching username {username}: {str(e)}")
|
||||
return results
|
||||
|
||||
|
||||
def process_search_task(usernames, options, timestamp):
|
||||
try:
|
||||
# Setup event loop for async operations
|
||||
loop = asyncio.new_event_loop()
|
||||
asyncio.set_event_loop(loop)
|
||||
|
||||
# Run the search
|
||||
general_results = loop.run_until_complete(
|
||||
search_multiple_usernames(usernames, options)
|
||||
)
|
||||
|
||||
# Create session folder
|
||||
session_folder = os.path.join(REPORTS_FOLDER, f"search_{timestamp}")
|
||||
os.makedirs(session_folder, exist_ok=True)
|
||||
|
||||
# Save the combined graph
|
||||
graph_path = os.path.join(session_folder, "combined_graph.html")
|
||||
maigret.report.save_graph_report(
|
||||
graph_path,
|
||||
general_results,
|
||||
MaigretDatabase().load_from_path(MAIGRET_DB_FILE),
|
||||
)
|
||||
|
||||
# Save individual reports
|
||||
individual_reports = []
|
||||
for username, id_type, results in general_results:
|
||||
report_base = os.path.join(session_folder, f"report_{username}")
|
||||
|
||||
csv_path = f"{report_base}.csv"
|
||||
json_path = f"{report_base}.json"
|
||||
pdf_path = f"{report_base}.pdf"
|
||||
html_path = f"{report_base}.html"
|
||||
|
||||
context = generate_report_context(general_results)
|
||||
|
||||
maigret.report.save_csv_report(csv_path, username, results)
|
||||
maigret.report.save_json_report(
|
||||
json_path, username, results, report_type='ndjson'
|
||||
)
|
||||
maigret.report.save_pdf_report(pdf_path, context)
|
||||
maigret.report.save_html_report(html_path, context)
|
||||
|
||||
claimed_profiles = []
|
||||
for site_name, site_data in results.items():
|
||||
if (
|
||||
site_data.get('status')
|
||||
and site_data['status'].status
|
||||
== maigret.result.MaigretCheckStatus.CLAIMED
|
||||
):
|
||||
claimed_profiles.append(
|
||||
{
|
||||
'site_name': site_name,
|
||||
'url': site_data.get('url_user', ''),
|
||||
'tags': (
|
||||
site_data.get('status').tags
|
||||
if site_data.get('status')
|
||||
else []
|
||||
),
|
||||
}
|
||||
)
|
||||
|
||||
individual_reports.append(
|
||||
{
|
||||
'username': username,
|
||||
'csv_file': os.path.join(
|
||||
f"search_{timestamp}", f"report_{username}.csv"
|
||||
),
|
||||
'json_file': os.path.join(
|
||||
f"search_{timestamp}", f"report_{username}.json"
|
||||
),
|
||||
'pdf_file': os.path.join(
|
||||
f"search_{timestamp}", f"report_{username}.pdf"
|
||||
),
|
||||
'html_file': os.path.join(
|
||||
f"search_{timestamp}", f"report_{username}.html"
|
||||
),
|
||||
'claimed_profiles': claimed_profiles,
|
||||
}
|
||||
)
|
||||
|
||||
# Save results and mark job as complete
|
||||
job_results[timestamp] = {
|
||||
'status': 'completed',
|
||||
'session_folder': f"search_{timestamp}",
|
||||
'graph_file': os.path.join(f"search_{timestamp}", "combined_graph.html"),
|
||||
'usernames': usernames,
|
||||
'individual_reports': individual_reports,
|
||||
}
|
||||
except Exception as e:
|
||||
job_results[timestamp] = {'status': 'failed', 'error': str(e)}
|
||||
finally:
|
||||
background_jobs[timestamp]['completed'] = True
|
||||
|
||||
|
||||
@app.route('/')
|
||||
def index():
|
||||
return render_template('index.html')
|
||||
|
||||
|
||||
@app.route('/search', methods=['POST'])
|
||||
def search():
|
||||
usernames_input = request.form.get('usernames', '').strip()
|
||||
if not usernames_input:
|
||||
flash('At least one username is required', 'danger')
|
||||
return redirect(url_for('index'))
|
||||
|
||||
usernames = [
|
||||
u.strip() for u in usernames_input.replace(',', ' ').split() if u.strip()
|
||||
]
|
||||
|
||||
# Create timestamp for this search session
|
||||
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
|
||||
logging.info(f"Starting search for usernames: {usernames}")
|
||||
|
||||
options = {
|
||||
'top_sites': request.form.get('top_sites', '500'),
|
||||
'timeout': request.form.get('timeout', '30'),
|
||||
'id_type': 'username', # fixed as username
|
||||
'use_cookies': 'use_cookies' in request.form,
|
||||
}
|
||||
|
||||
# Start background job
|
||||
background_jobs[timestamp] = {
|
||||
'completed': False,
|
||||
'thread': Thread(
|
||||
target=process_search_task, args=(usernames, options, timestamp)
|
||||
),
|
||||
}
|
||||
background_jobs[timestamp]['thread'].start()
|
||||
|
||||
logging.info(f"Search job started with timestamp: {timestamp}")
|
||||
|
||||
# Redirect to status page
|
||||
return redirect(url_for('status', timestamp=timestamp))
|
||||
|
||||
|
||||
@app.route('/status/<timestamp>')
|
||||
def status(timestamp):
|
||||
logging.info(f"Status check for timestamp: {timestamp}")
|
||||
|
||||
# Validate timestamp
|
||||
if timestamp not in background_jobs:
|
||||
flash('Invalid search session', 'danger')
|
||||
return redirect(url_for('index'))
|
||||
|
||||
# Check if job is completed
|
||||
if background_jobs[timestamp]['completed']:
|
||||
result = job_results.get(timestamp)
|
||||
if not result:
|
||||
flash('No results found for this search session', 'warning')
|
||||
return redirect(url_for('index'))
|
||||
|
||||
if result['status'] == 'completed':
|
||||
# Redirect to results page once done
|
||||
return redirect(url_for('results', session_id=result['session_folder']))
|
||||
else:
|
||||
error_msg = result.get('error', 'Unknown error occurred')
|
||||
flash(f'Search failed: {error_msg}', 'danger')
|
||||
return redirect(url_for('index'))
|
||||
|
||||
# If job is still running, show status page with a simple spinner
|
||||
return render_template('status.html', timestamp=timestamp)
|
||||
|
||||
|
||||
@app.route('/results/<session_id>')
|
||||
def results(session_id):
|
||||
if not session_id.startswith('search_'):
|
||||
flash('Invalid results session format', 'danger')
|
||||
return redirect(url_for('index'))
|
||||
|
||||
result_data = next(
|
||||
(
|
||||
r
|
||||
for r in job_results.values()
|
||||
if r.get('status') == 'completed' and r['session_folder'] == session_id
|
||||
),
|
||||
None,
|
||||
)
|
||||
|
||||
return render_template(
|
||||
'results.html',
|
||||
usernames=result_data['usernames'],
|
||||
graph_file=result_data['graph_file'],
|
||||
individual_reports=result_data['individual_reports'],
|
||||
timestamp=session_id.replace('search_', ''),
|
||||
)
|
||||
|
||||
|
||||
@app.route('/reports/<path:filename>')
|
||||
def download_report(filename):
|
||||
try:
|
||||
file_path = os.path.join(REPORTS_FOLDER, filename)
|
||||
return send_file(file_path)
|
||||
except Exception as e:
|
||||
logging.error(f"Error serving file {filename}: {str(e)}")
|
||||
return "File not found", 404
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
||||
)
|
||||
app.run(debug=True)
|
||||
@@ -0,0 +1,44 @@
|
||||
<!-- templates/base.html -->
|
||||
<!DOCTYPE html>
|
||||
<html lang="en" data-bs-theme="dark">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>Maigret Web Interface</title>
|
||||
<link href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0/dist/css/bootstrap.min.css" rel="stylesheet">
|
||||
<style>
|
||||
body {
|
||||
padding-top: 2rem;
|
||||
}
|
||||
.form-container {
|
||||
max-width: auto;
|
||||
margin: auto;
|
||||
}
|
||||
[data-bs-theme="dark"] {
|
||||
--bs-body-bg: #212529;
|
||||
--bs-body-color: #dee2e6;
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<div class="container">
|
||||
<div class="mb-3">
|
||||
<button class="btn btn-outline-secondary" id="theme-toggle">
|
||||
Toggle Dark/Light Mode
|
||||
</button>
|
||||
</div>
|
||||
{% block content %}{% endblock %}
|
||||
</div>
|
||||
<script src="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0/dist/js/bootstrap.bundle.min.js"></script>
|
||||
<script>
|
||||
document.getElementById('theme-toggle').addEventListener('click', function() {
|
||||
const html = document.documentElement;
|
||||
if (html.getAttribute('data-bs-theme') === 'dark') {
|
||||
html.setAttribute('data-bs-theme', 'light');
|
||||
} else {
|
||||
html.setAttribute('data-bs-theme', 'dark');
|
||||
}
|
||||
});
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
||||
@@ -0,0 +1,35 @@
|
||||
{% extends "base.html" %}
|
||||
{% block content %}
|
||||
<div class="form-container">
|
||||
<h1 class="mb-4">Maigret Web Interface</h1>
|
||||
|
||||
{% if error %}
|
||||
<div class="alert alert-danger">{{ error }}</div>
|
||||
{% endif %}
|
||||
|
||||
<form method="POST" action="{{ url_for('search') }}" class="mb-4">
|
||||
<div class="mb-3">
|
||||
<label for="usernames" class="form-label">Usernames to Search</label>
|
||||
<textarea class="form-control" id="usernames" name="usernames" rows="3" required
|
||||
placeholder="Enter one or more usernames (separated by spaces or commas)"></textarea>
|
||||
</div>
|
||||
|
||||
<div class="mb-3">
|
||||
<label for="top_sites" class="form-label">Number of Top Sites to Check</label>
|
||||
<input type="number" class="form-control" id="top_sites" name="top_sites" value="500" min="1" max="10000">
|
||||
</div>
|
||||
|
||||
<div class="mb-3">
|
||||
<label for="timeout" class="form-label">Timeout (seconds)</label>
|
||||
<input type="number" class="form-control" id="timeout" name="timeout" value="30" min="1" max="120">
|
||||
</div>
|
||||
|
||||
<div class="mb-3 form-check">
|
||||
<input type="checkbox" class="form-check-input" id="use_cookies" name="use_cookies">
|
||||
<label class="form-check-label" for="use_cookies">Use Cookies File</label>
|
||||
</div>
|
||||
|
||||
<button type="submit" class="btn btn-primary">Search</button>
|
||||
</form>
|
||||
</div>
|
||||
{% endblock %}
|
||||
@@ -0,0 +1,56 @@
|
||||
{% extends "base.html" %}
|
||||
{% block content %}
|
||||
<div class="form-container">
|
||||
<h1 class="mb-4">Search Results</h1>
|
||||
|
||||
{% with messages = get_flashed_messages() %}
|
||||
{% if messages %}
|
||||
{% for message in messages %}
|
||||
<div class="alert alert-info">{{ message }}</div>
|
||||
{% endfor %}
|
||||
{% endif %}
|
||||
{% endwith %}
|
||||
|
||||
<p>The search has completed. Below are the results:</p>
|
||||
|
||||
<!-- Display the combined graph if available -->
|
||||
{% if graph_file %}
|
||||
<h3>Combined Graph</h3>
|
||||
<iframe src="{{ url_for('download_report', filename=graph_file) }}" style="width:100%; height:600px; border:none;"></iframe>
|
||||
{% endif %}
|
||||
|
||||
<hr>
|
||||
|
||||
<!-- Display individual reports -->
|
||||
{% if individual_reports %}
|
||||
<h3>Individual Reports</h3>
|
||||
<ul class="list-group">
|
||||
{% for report in individual_reports %}
|
||||
<li class="list-group-item">
|
||||
<h5>{{ report.username }}</h5>
|
||||
<p>
|
||||
<a href="{{ url_for('download_report', filename=report.csv_file) }}">CSV Report</a> |
|
||||
<a href="{{ url_for('download_report', filename=report.json_file) }}">JSON Report</a> |
|
||||
<a href="{{ url_for('download_report', filename=report.pdf_file) }}">PDF Report</a> |
|
||||
<a href="{{ url_for('download_report', filename=report.html_file) }}">HTML Report</a>
|
||||
</p>
|
||||
{% if report.claimed_profiles %}
|
||||
<strong>Claimed Profiles:</strong>
|
||||
<ul>
|
||||
{% for profile in report.claimed_profiles %}
|
||||
<li>
|
||||
<a href="{{ profile.url }}" target="_blank">{{ profile.site_name }}</a> (Tags: {{ profile.tags|join(', ') }})
|
||||
</li>
|
||||
{% endfor %}
|
||||
</ul>
|
||||
{% else %}
|
||||
<p>No claimed profiles found.</p>
|
||||
{% endif %}
|
||||
</li>
|
||||
{% endfor %}
|
||||
</ul>
|
||||
{% else %}
|
||||
<p>No individual reports available.</p>
|
||||
{% endif %}
|
||||
</div>
|
||||
{% endblock %}
|
||||
@@ -0,0 +1,16 @@
|
||||
{% extends "base.html" %}
|
||||
{% block content %}
|
||||
<div class="container mt-4 text-center">
|
||||
<h2>Search in progress...</h2>
|
||||
<p>Your request is being processed in the background. This page will automatically redirect once the results are ready.</p>
|
||||
<div class="spinner-border text-primary" role="status">
|
||||
<span class="visually-hidden">Loading...</span>
|
||||
</div>
|
||||
<script>
|
||||
// Auto-refresh the page every 5 seconds to check completion
|
||||
setTimeout(function() {
|
||||
window.location.reload();
|
||||
}, 5000);
|
||||
</script>
|
||||
</div>
|
||||
{% endblock %}
|
||||
@@ -0,0 +1,47 @@
|
||||
# Download this first to avoid compatibility issues:
|
||||
#
|
||||
# sudo zypper in python3-devel
|
||||
# sudo zypper in python3-dev
|
||||
#
|
||||
# Then run 'pip3 install -r opensuse.txt' as usual.
|
||||
#
|
||||
aiodns>=3.0.0
|
||||
aiohttp>=3.8.6
|
||||
aiohttp-socks>=0.7.1
|
||||
arabic-reshaper~=3.0.0
|
||||
async-timeout
|
||||
attrs>=22.2.0
|
||||
certifi>=2023.7.22
|
||||
chardet>=5.0.0
|
||||
colorama
|
||||
future>=0.18.3
|
||||
future-annotations>=1.0.0
|
||||
html5lib>=1.1
|
||||
idna>=3.4
|
||||
Jinja2
|
||||
lxml>=4.9.2
|
||||
MarkupSafe
|
||||
mock>=4.0.3
|
||||
multidict
|
||||
pycountry>=22.3.5
|
||||
PyPDF2>=3.0.1
|
||||
PySocks>=1.7.1
|
||||
python-bidi>=0.4.2
|
||||
requests
|
||||
requests-futures>=1.0.0
|
||||
six>=1.16.0
|
||||
socid-extractor>=0.0.24
|
||||
soupsieve>=2.3.2.post1
|
||||
stem>=1.8.1
|
||||
torrequest>=0.1.0
|
||||
tqdm
|
||||
typing-extensions
|
||||
webencodings>=0.5.1
|
||||
svglib
|
||||
xhtml2pdf~=0.2.11
|
||||
XMind>=1.2.0
|
||||
yarl
|
||||
networkx
|
||||
pyvis>=0.2.1
|
||||
reportlab
|
||||
cloudscraper>=1.2.71
|
||||
@@ -0,0 +1,7 @@
|
||||
#!/usr/bin/env python3
|
||||
import asyncio
|
||||
|
||||
import maigret
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(maigret.cli())
|
||||
@@ -0,0 +1,55 @@
|
||||
# -*- mode: python ; coding: utf-8 -*-
|
||||
from PyInstaller.utils.hooks import collect_all
|
||||
|
||||
datas = []
|
||||
binaries = []
|
||||
hiddenimports = []
|
||||
|
||||
full_import_modules = ['maigret', 'socid_extractor', 'arabic_reshaper', 'pyvis', 'reportlab.graphics.barcode']
|
||||
|
||||
for module in full_import_modules:
|
||||
tmp_ret = collect_all(module)
|
||||
datas += tmp_ret[0]; binaries += tmp_ret[1]; hiddenimports += tmp_ret[2]
|
||||
|
||||
hiddenimports += ['PySocks', 'beautifulsoup4', 'python-dateutil',
|
||||
'future-annotations', 'six', 'python-bidi',
|
||||
'typing-extensions', 'attrs', 'torrequest']
|
||||
|
||||
block_cipher = None
|
||||
|
||||
|
||||
a = Analysis(['maigret_standalone.py'],
|
||||
pathex=[],
|
||||
binaries=binaries,
|
||||
datas=datas,
|
||||
hiddenimports=hiddenimports,
|
||||
hookspath=[],
|
||||
hooksconfig={},
|
||||
runtime_hooks=[],
|
||||
excludes=[],
|
||||
win_no_prefer_redirects=False,
|
||||
win_private_assemblies=False,
|
||||
cipher=block_cipher,
|
||||
noarchive=False)
|
||||
|
||||
pyz = PYZ(a.pure, a.zipped_data,
|
||||
cipher=block_cipher)
|
||||
|
||||
exe = EXE(pyz,
|
||||
a.scripts,
|
||||
a.binaries,
|
||||
a.zipfiles,
|
||||
a.datas,
|
||||
[],
|
||||
name='maigret_standalone',
|
||||
debug=False,
|
||||
bootloader_ignore_signals=False,
|
||||
strip=False,
|
||||
upx=True,
|
||||
upx_exclude=[],
|
||||
runtime_tmpdir=None,
|
||||
console=True,
|
||||
disable_windowed_traceback=False,
|
||||
target_arch=None,
|
||||
codesign_identity=None,
|
||||
entitlements_file=None )
|
||||
@@ -0,0 +1,5 @@
|
||||
maigret @ https://github.com/soxoj/maigret/archive/refs/heads/main.zip
|
||||
pefile==2023.2.7 # do not bump while pyinstaller is 6.11.1, there is a conflict
|
||||
psutil==6.1.0
|
||||
pyinstaller==6.11.1
|
||||
pywin32-ctypes==0.2.3
|
||||
@@ -0,0 +1,96 @@
|
||||
[build-system]
|
||||
requires = ["poetry-core"]
|
||||
build-backend = "poetry.core.masonry.api"
|
||||
|
||||
[tool.poetry]
|
||||
name = "maigret"
|
||||
version = "0.5.0a1"
|
||||
description = "🕵️♂️ Collect a dossier on a person by username from thousands of sites."
|
||||
authors = ["Soxoj <soxoj@protonmail.com>"]
|
||||
readme = "README.md"
|
||||
license = "MIT License"
|
||||
homepage = "https://pypi.org/project/maigret"
|
||||
documentation = "https://maigret.readthedocs.io"
|
||||
repository = "https://github.com/soxoj/maigret"
|
||||
classifiers = [
|
||||
"Development Status :: 5 - Production/Stable",
|
||||
"Programming Language :: Python :: 3",
|
||||
"Intended Audience :: Information Technology",
|
||||
"Operating System :: OS Independent",
|
||||
"License :: OSI Approved :: MIT License",
|
||||
"Natural Language :: English"
|
||||
]
|
||||
|
||||
[tool.poetry.urls]
|
||||
"Bug Tracker" = "https://github.com/soxoj/maigret/issues"
|
||||
|
||||
[tool.poetry.dependencies]
|
||||
# poetry install
|
||||
# Install only production dependencies:
|
||||
# poetry install --without dev
|
||||
# Install with dev dependencies:
|
||||
# poetry install --with dev
|
||||
python = "^3.10"
|
||||
aiodns = "^3.0.0"
|
||||
aiohttp = "^3.11.10"
|
||||
aiohttp-socks = "^0.9.1"
|
||||
arabic-reshaper = "^3.0.0"
|
||||
async-timeout = "^5.0.1"
|
||||
attrs = "^24.2.0"
|
||||
certifi = "^2024.8.30"
|
||||
chardet = "^5.0.0"
|
||||
colorama = "^0.4.6"
|
||||
future = "^1.0.0"
|
||||
future-annotations= "^1.0.0"
|
||||
html5lib = "^1.1"
|
||||
idna = "^3.4"
|
||||
Jinja2 = "^3.1.3"
|
||||
lxml = "^5.3.0"
|
||||
MarkupSafe = "^3.0.2"
|
||||
mock = "^5.1.0"
|
||||
multidict = "^6.0.4"
|
||||
pycountry = "^24.6.1"
|
||||
PyPDF2 = "^3.0.1"
|
||||
PySocks = "^1.7.1"
|
||||
python-bidi = "^0.6.3"
|
||||
requests = "^2.31.0"
|
||||
requests-futures = "^1.0.2"
|
||||
six = "^1.17.0"
|
||||
socid-extractor = "^0.0.27"
|
||||
soupsieve = "^2.6"
|
||||
stem = "^1.8.1"
|
||||
torrequest = "^0.1.0"
|
||||
alive_progress = "^3.2.0"
|
||||
typing-extensions = "^4.8.0"
|
||||
webencodings = "^0.5.1"
|
||||
xhtml2pdf = "^0.2.11"
|
||||
XMind = "^1.2.0"
|
||||
yarl = "^1.18.3"
|
||||
networkx = "^2.6.3"
|
||||
pyvis = "^0.3.2"
|
||||
reportlab = "^4.2.0"
|
||||
cloudscraper = "^1.2.71"
|
||||
flask = {extras = ["async"], version = "^3.1.0"}
|
||||
asgiref = "^3.8.1"
|
||||
platformdirs = "^4.3.6"
|
||||
|
||||
|
||||
[tool.poetry.group.dev.dependencies]
|
||||
# How to add a new dev dependency: poetry add black --group dev
|
||||
# Install dev dependencies with: poetry install --with dev
|
||||
flake8 = "^7.1.1"
|
||||
pytest = "^8.3.4"
|
||||
pytest-asyncio = "^0.25.0"
|
||||
pytest-cov = "^6.0.0"
|
||||
pytest-httpserver = "^1.0.0"
|
||||
pytest-rerunfailures = "^15.0"
|
||||
reportlab = "^4.2.0"
|
||||
mypy = "^1.13.0"
|
||||
tuna = "^0.5.11"
|
||||
coverage = "^7.6.9"
|
||||
black = "^24.10.0"
|
||||
|
||||
[tool.poetry.scripts]
|
||||
# Run with: poetry run maigret <username>
|
||||
maigret = "maigret.maigret:run"
|
||||
update_sitesmd = "utils.update_site_data:main"
|
||||
@@ -3,3 +3,4 @@
|
||||
filterwarnings =
|
||||
error
|
||||
ignore::UserWarning
|
||||
asyncio_mode=auto
|
||||
@@ -1,41 +0,0 @@
|
||||
aiodns==3.0.0
|
||||
aiohttp==3.7.4
|
||||
aiohttp-socks==0.5.5
|
||||
arabic-reshaper==2.1.1
|
||||
async-timeout==3.0.1
|
||||
attrs==20.3.0
|
||||
beautifulsoup4==4.9.3
|
||||
bs4==0.0.1
|
||||
certifi==2020.12.5
|
||||
chardet==3.0.4
|
||||
colorama==0.4.4
|
||||
python-dateutil==2.8.1
|
||||
future==0.18.2
|
||||
future-annotations==1.0.0
|
||||
html5lib==1.1
|
||||
idna==2.10
|
||||
Jinja2==2.11.3
|
||||
lxml==4.6.3
|
||||
MarkupSafe==1.1.1
|
||||
mock==4.0.2
|
||||
multidict==5.1.0
|
||||
pycountry==20.7.3
|
||||
PyPDF2==1.26.0
|
||||
PySocks==1.7.1
|
||||
python-bidi==0.4.2
|
||||
python-socks==1.1.2
|
||||
requests>=2.24.0
|
||||
requests-futures==1.0.0
|
||||
six==1.15.0
|
||||
socid-extractor>=0.0.21
|
||||
soupsieve==2.1
|
||||
stem==1.8.0
|
||||
torrequest==0.1.0
|
||||
tqdm==4.55.0
|
||||
typing-extensions==3.7.4.3
|
||||
webencodings==0.5.1
|
||||
xhtml2pdf==0.2.5
|
||||
XMind==1.2.0
|
||||
yarl==1.6.3
|
||||
networkx==2.5.1
|
||||
pyvis==0.1.9
|
||||
@@ -1,9 +0,0 @@
|
||||
[egg_info]
|
||||
tag_build =
|
||||
tag_date = 0
|
||||
|
||||
[flake8]
|
||||
per-file-ignores = __init__.py:F401
|
||||
|
||||
[mypy]
|
||||
ignore_missing_imports = True
|
||||
@@ -1,26 +0,0 @@
|
||||
from setuptools import (
|
||||
setup,
|
||||
find_packages,
|
||||
)
|
||||
|
||||
|
||||
with open('README.md') as fh:
|
||||
long_description = fh.read()
|
||||
|
||||
with open('requirements.txt') as rf:
|
||||
requires = rf.read().splitlines()
|
||||
|
||||
setup(name='maigret',
|
||||
version='0.3.1',
|
||||
description='Collect a dossier on a person by username from a huge number of sites',
|
||||
long_description=long_description,
|
||||
long_description_content_type="text/markdown",
|
||||
url='https://github.com/soxoj/maigret',
|
||||
install_requires=requires,
|
||||
entry_points={'console_scripts': ['maigret = maigret.maigret:run']},
|
||||
packages=find_packages(),
|
||||
include_package_data=True,
|
||||
author='Soxoj',
|
||||
author_email='soxoj@protonmail.com',
|
||||
license='MIT',
|
||||
zip_safe=False)
|
||||
@@ -0,0 +1,32 @@
|
||||
title: Maigret
|
||||
icon: static/maigret.png
|
||||
name: maigret
|
||||
summary: 🕵️♂️ Collect a dossier on a person by username from thousands of sites.
|
||||
description: |
|
||||
**Maigret** collects a dossier on a person **by username only**, checking for accounts on a huge number of sites and gathering all the available information from web pages. No API keys required. Maigret is an easy-to-use and powerful fork of Sherlock.
|
||||
|
||||
Currently supported more than 3000 sites, search is launched against 500 popular sites in descending order of popularity by default. Also supported checking of Tor sites, I2P sites, and domains (via DNS resolving).
|
||||
|
||||
version: 0.5.0a1
|
||||
license: MIT
|
||||
base: core22
|
||||
confinement: strict
|
||||
|
||||
source-code: https://github.com/soxoj/maigret
|
||||
issues:
|
||||
- https://github.com/soxoj/maigret/issues
|
||||
donation:
|
||||
- https://patreon.com/soxoj
|
||||
contact:
|
||||
- mailto:soxoj@protonmail.com
|
||||
|
||||
parts:
|
||||
maigret:
|
||||
plugin: python
|
||||
source: .
|
||||
|
||||
type: app
|
||||
apps:
|
||||
maigret:
|
||||
command: bin/maigret
|
||||
plugs: [ network, network-bind, home ]
|
||||
|
Before Width: | Height: | Size: 15 KiB After Width: | Height: | Size: 45 KiB |
|
Before Width: | Height: | Size: 44 KiB After Width: | Height: | Size: 1.6 MiB |
|
Before Width: | Height: | Size: 607 KiB After Width: | Height: | Size: 451 KiB |
|
Before Width: | Height: | Size: 773 KiB After Width: | Height: | Size: 351 KiB |
@@ -1,6 +0,0 @@
|
||||
flake8==3.8.4
|
||||
pytest==6.2.4
|
||||
pytest-asyncio==0.14.0
|
||||
pytest-cov==2.10.1
|
||||
pytest-httpserver==1.0.0
|
||||
pytest-rerunfailures==9.1.1
|
||||
@@ -7,17 +7,42 @@ from _pytest.mark import Mark
|
||||
|
||||
from maigret.sites import MaigretDatabase
|
||||
from maigret.maigret import setup_arguments_parser
|
||||
from maigret.settings import Settings
|
||||
from aiohttp import web
|
||||
|
||||
|
||||
LOCAL_SERVER_PORT = 8080
|
||||
|
||||
CUR_PATH = os.path.dirname(os.path.realpath(__file__))
|
||||
JSON_FILE = os.path.join(CUR_PATH, '../maigret/resources/data.json')
|
||||
SETTINGS_FILE = os.path.join(CUR_PATH, '../maigret/resources/settings.json')
|
||||
TEST_JSON_FILE = os.path.join(CUR_PATH, 'db.json')
|
||||
LOCAL_TEST_JSON_FILE = os.path.join(CUR_PATH, 'local.json')
|
||||
empty_mark = Mark('', (), {})
|
||||
|
||||
|
||||
RESULTS_EXAMPLE = {
|
||||
'Reddit': {
|
||||
'cookies': None,
|
||||
'parsing_enabled': False,
|
||||
'url_main': 'https://www.reddit.com/',
|
||||
'username': 'Skyeng',
|
||||
},
|
||||
'GooglePlayStore': {
|
||||
'cookies': None,
|
||||
'http_status': 200,
|
||||
'is_similar': False,
|
||||
'parsing_enabled': False,
|
||||
'rank': 1,
|
||||
'url_main': 'https://play.google.com/store',
|
||||
'url_user': 'https://play.google.com/store/apps/developer?id=Skyeng',
|
||||
'username': 'Skyeng',
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def by_slow_marker(item):
|
||||
return item.get_closest_marker('slow', default=empty_mark)
|
||||
return item.get_closest_marker('slow', default=empty_mark).name
|
||||
|
||||
|
||||
def pytest_collection_modifyitems(items):
|
||||
@@ -57,11 +82,37 @@ def reports_autoclean():
|
||||
remove_test_reports()
|
||||
|
||||
|
||||
@pytest.fixture(scope='session')
|
||||
def settings():
|
||||
settings = Settings()
|
||||
settings.load([SETTINGS_FILE])
|
||||
return settings
|
||||
|
||||
|
||||
@pytest.fixture(scope='session')
|
||||
def argparser():
|
||||
return setup_arguments_parser()
|
||||
settings = Settings()
|
||||
settings.load([SETTINGS_FILE])
|
||||
return setup_arguments_parser(settings)
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def httpserver_listen_address():
|
||||
return ("localhost", 8989)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
async def cookie_test_server():
|
||||
async def handle_cookies(request):
|
||||
print(f"Received cookies: {request.cookies}")
|
||||
cookies_dict = {k: v for k, v in request.cookies.items()}
|
||||
return web.json_response({'cookies': cookies_dict})
|
||||
|
||||
app = web.Application()
|
||||
app.router.add_get('/cookies', handle_cookies)
|
||||
runner = web.AppRunner(app)
|
||||
await runner.setup()
|
||||
server = web.TCPSite(runner, port=LOCAL_SERVER_PORT)
|
||||
await server.start()
|
||||
yield server
|
||||
await runner.cleanup()
|
||||
|
||||
@@ -1,25 +1,62 @@
|
||||
{
|
||||
"engines": {},
|
||||
"engines": {
|
||||
"Discourse": {
|
||||
"name": "Discourse",
|
||||
"site": {
|
||||
"presenseStrs": [
|
||||
"<meta name=\"generator\" content=\"Discourse"
|
||||
],
|
||||
"absenceStrs": [
|
||||
"Oops! That page doesn\u2019t exist or is private.",
|
||||
"wrap not-found-container"
|
||||
],
|
||||
"checkType": "message",
|
||||
"url": "{urlMain}/u/{username}/summary"
|
||||
},
|
||||
"presenseStrs": [
|
||||
"<meta name=\"generator\" content=\"Discourse"
|
||||
]
|
||||
}
|
||||
},
|
||||
"sites": {
|
||||
"GooglePlayStore": {
|
||||
"ValidActive": {
|
||||
"tags": ["global", "us"],
|
||||
"disabled": false,
|
||||
"checkType": "status_code",
|
||||
"alexaRank": 1,
|
||||
"url": "https://play.google.com/store/apps/developer?id={username}",
|
||||
"urlMain": "https://play.google.com/store",
|
||||
"usernameClaimed": "Facebook_nosuchname",
|
||||
"usernameClaimed": "KONAMI",
|
||||
"usernameUnclaimed": "noonewouldeverusethis7"
|
||||
},
|
||||
"Reddit": {
|
||||
"tags": ["news", "social", "us"],
|
||||
"InvalidActive": {
|
||||
"tags": ["global", "us"],
|
||||
"disabled": false,
|
||||
"checkType": "status_code",
|
||||
"presenseStrs": ["totalKarma"],
|
||||
"alexaRank": 1,
|
||||
"url": "https://play.google.com/store/apps/dev?id={username}",
|
||||
"urlMain": "https://play.google.com/store",
|
||||
"usernameClaimed": "KONAMI",
|
||||
"usernameUnclaimed": "noonewouldeverusethis7"
|
||||
},
|
||||
"ValidInactive": {
|
||||
"tags": ["global", "us"],
|
||||
"disabled": true,
|
||||
"alexaRank": 17,
|
||||
"url": "https://www.reddit.com/user/{username}",
|
||||
"urlMain": "https://www.reddit.com/",
|
||||
"usernameClaimed": "blue",
|
||||
"checkType": "status_code",
|
||||
"alexaRank": 1,
|
||||
"url": "https://play.google.com/store/apps/developer?id={username}",
|
||||
"urlMain": "https://play.google.com/store",
|
||||
"usernameClaimed": "KONAMI",
|
||||
"usernameUnclaimed": "noonewouldeverusethis7"
|
||||
},
|
||||
"InvalidInactive": {
|
||||
"tags": ["global", "us"],
|
||||
"disabled": true,
|
||||
"checkType": "status_code",
|
||||
"alexaRank": 1,
|
||||
"url": "https://play.google.com/store/apps/dev?id={username}",
|
||||
"urlMain": "https://play.google.com/store",
|
||||
"usernameClaimed": "KONAMI",
|
||||
"usernameUnclaimed": "noonewouldeverusethis7"
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,10 +1,13 @@
|
||||
"""Maigret activation test functions"""
|
||||
|
||||
import json
|
||||
import yarl
|
||||
|
||||
import aiohttp
|
||||
import pytest
|
||||
from mock import Mock
|
||||
|
||||
from tests.conftest import LOCAL_SERVER_PORT
|
||||
from maigret.activation import ParsingActivator, import_aiohttp_cookies
|
||||
|
||||
COOKIES_TXT = """# HTTP Cookie File downloaded with cookies.txt by Genuinous @genuinous
|
||||
@@ -18,39 +21,38 @@ xss.is FALSE / TRUE 0 xf_csrf test
|
||||
xss.is FALSE / TRUE 1642709308 xf_user tset
|
||||
.xss.is TRUE / FALSE 0 muchacho_cache test
|
||||
.xss.is TRUE / FALSE 1924905600 132_evc test
|
||||
httpbin.org FALSE / FALSE 0 a b
|
||||
localhost FALSE / FALSE 0 a b
|
||||
"""
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="periodically fails")
|
||||
@pytest.mark.skip("captcha")
|
||||
@pytest.mark.slow
|
||||
def test_twitter_activation(default_db):
|
||||
twitter_site = default_db.sites_dict['Twitter']
|
||||
token1 = twitter_site.headers['x-guest-token']
|
||||
def test_vimeo_activation(default_db):
|
||||
vimeo_site = default_db.sites_dict['Vimeo']
|
||||
token1 = vimeo_site.headers['Authorization']
|
||||
|
||||
ParsingActivator.twitter(twitter_site, Mock())
|
||||
token2 = twitter_site.headers['x-guest-token']
|
||||
ParsingActivator.vimeo(vimeo_site, Mock())
|
||||
token2 = vimeo_site.headers['Authorization']
|
||||
|
||||
assert token1 != token2
|
||||
|
||||
|
||||
@pytest.mark.slow
|
||||
@pytest.mark.asyncio
|
||||
async def test_import_aiohttp_cookies():
|
||||
async def test_import_aiohttp_cookies(cookie_test_server):
|
||||
cookies_filename = 'cookies_test.txt'
|
||||
with open(cookies_filename, 'w') as f:
|
||||
f.write(COOKIES_TXT)
|
||||
|
||||
cookie_jar = import_aiohttp_cookies(cookies_filename)
|
||||
assert list(cookie_jar._cookies.keys()) == ['xss.is', 'httpbin.org']
|
||||
url = f'http://localhost:{LOCAL_SERVER_PORT}/cookies'
|
||||
|
||||
url = 'https://httpbin.org/cookies'
|
||||
connector = aiohttp.TCPConnector(ssl=False)
|
||||
session = aiohttp.ClientSession(
|
||||
connector=connector, trust_env=True, cookie_jar=cookie_jar
|
||||
)
|
||||
cookies = cookie_jar.filter_cookies(yarl.URL(url))
|
||||
assert cookies['a'].value == 'b'
|
||||
|
||||
response = await session.get(url=url)
|
||||
result = json.loads(await response.content.read())
|
||||
await session.close()
|
||||
async with aiohttp.ClientSession(cookie_jar=cookie_jar) as session:
|
||||
async with session.get(url=url) as response:
|
||||
result = await response.json()
|
||||
print(f"Server response: {result}")
|
||||
|
||||
assert result == {'cookies': {'a': 'b'}}
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
"""Maigret command-line arguments parsing tests"""
|
||||
|
||||
from argparse import Namespace
|
||||
from typing import Dict, Any
|
||||
|
||||
@@ -7,7 +8,7 @@ DEFAULT_ARGS: Dict[str, Any] = {
|
||||
'connections': 100,
|
||||
'cookie_file': None,
|
||||
'csv': False,
|
||||
'db_file': None,
|
||||
'db_file': 'resources/data.json',
|
||||
'debug': False,
|
||||
'disable_extracting': False,
|
||||
'disable_recursive_search': False,
|
||||
@@ -23,11 +24,12 @@ DEFAULT_ARGS: Dict[str, Any] = {
|
||||
'no_progressbar': False,
|
||||
'parse_url': '',
|
||||
'pdf': False,
|
||||
'permute': False,
|
||||
'print_check_errors': False,
|
||||
'print_not_found': False,
|
||||
'proxy': None,
|
||||
'reports_sorting': 'default',
|
||||
'retries': 1,
|
||||
'retries': 0,
|
||||
'self_check': False,
|
||||
'site_list': [],
|
||||
'stats': False,
|
||||
@@ -40,6 +42,7 @@ DEFAULT_ARGS: Dict[str, Any] = {
|
||||
'use_disabled_sites': False,
|
||||
'username': [],
|
||||
'verbose': False,
|
||||
'web': 5000,
|
||||
'with_domains': False,
|
||||
'xmind': False,
|
||||
}
|
||||
@@ -53,7 +56,8 @@ def test_args_search_mode(argparser):
|
||||
want_args = dict(DEFAULT_ARGS)
|
||||
want_args.update({'username': ['username']})
|
||||
|
||||
assert args == Namespace(**want_args)
|
||||
for arg in vars(args):
|
||||
assert getattr(args, arg) == want_args[arg]
|
||||
|
||||
|
||||
def test_args_search_mode_several_usernames(argparser):
|
||||
@@ -64,7 +68,8 @@ def test_args_search_mode_several_usernames(argparser):
|
||||
want_args = dict(DEFAULT_ARGS)
|
||||
want_args.update({'username': ['username1', 'username2']})
|
||||
|
||||
assert args == Namespace(**want_args)
|
||||
for arg in vars(args):
|
||||
assert getattr(args, arg) == want_args[arg]
|
||||
|
||||
|
||||
def test_args_self_check_mode(argparser):
|
||||
@@ -79,7 +84,8 @@ def test_args_self_check_mode(argparser):
|
||||
}
|
||||
)
|
||||
|
||||
assert args == Namespace(**want_args)
|
||||
for arg in vars(args):
|
||||
assert getattr(args, arg) == want_args[arg]
|
||||
|
||||
|
||||
def test_args_multiple_sites(argparser):
|
||||
@@ -95,4 +101,5 @@ def test_args_multiple_sites(argparser):
|
||||
}
|
||||
)
|
||||
|
||||
assert args == Namespace(**want_args)
|
||||
for arg in vars(args):
|
||||
assert getattr(args, arg) == want_args[arg]
|
||||
|
||||
@@ -1,8 +1,10 @@
|
||||
"""Maigret data test functions"""
|
||||
|
||||
import pytest
|
||||
from maigret.utils import is_country_tag
|
||||
|
||||
|
||||
@pytest.mark.slow
|
||||
def test_tags_validity(default_db):
|
||||
unknown_tags = set()
|
||||
|
||||
@@ -13,4 +15,7 @@ def test_tags_validity(default_db):
|
||||
if tag not in tags:
|
||||
unknown_tags.add(tag)
|
||||
|
||||
# make sure all tags are known
|
||||
# if you see "unchecked" tag error, please, do
|
||||
# maigret --db `pwd`/maigret/resources/data.json --self-check --tag unchecked --use-disabled-sites
|
||||
assert unknown_tags == set()
|
||||
|
||||
@@ -0,0 +1,58 @@
|
||||
import pytest
|
||||
from maigret.errors import notify_about_errors, CheckError
|
||||
from maigret.types import QueryResultWrapper
|
||||
from maigret.result import MaigretCheckResult, MaigretCheckStatus
|
||||
|
||||
|
||||
def test_notify_about_errors():
|
||||
results = {
|
||||
'site1': {
|
||||
'status': MaigretCheckResult(
|
||||
'', '', '', MaigretCheckStatus.UNKNOWN, error=CheckError('Captcha')
|
||||
)
|
||||
},
|
||||
'site2': {
|
||||
'status': MaigretCheckResult(
|
||||
'',
|
||||
'',
|
||||
'',
|
||||
MaigretCheckStatus.UNKNOWN,
|
||||
error=CheckError('Bot protection'),
|
||||
)
|
||||
},
|
||||
'site3': {
|
||||
'status': MaigretCheckResult(
|
||||
'',
|
||||
'',
|
||||
'',
|
||||
MaigretCheckStatus.UNKNOWN,
|
||||
error=CheckError('Access denied'),
|
||||
)
|
||||
},
|
||||
'site4': {
|
||||
'status': MaigretCheckResult(
|
||||
'', '', '', MaigretCheckStatus.CLAIMED, error=None
|
||||
)
|
||||
},
|
||||
}
|
||||
|
||||
results = notify_about_errors(results, query_notify=None, show_statistics=True)
|
||||
|
||||
# Check the output
|
||||
expected_output = [
|
||||
(
|
||||
'Too many errors of type "Captcha" (25.0%). Try to switch to another ip address or to use service cookies',
|
||||
'!',
|
||||
),
|
||||
(
|
||||
'Too many errors of type "Bot protection" (25.0%). Try to switch to another ip address',
|
||||
'!',
|
||||
),
|
||||
('Too many errors of type "Access denied" (25.0%)', '!'),
|
||||
('Verbose error statistics:', '-'),
|
||||
('Captcha: 25.0%', '!'),
|
||||
('Bot protection: 25.0%', '!'),
|
||||
('Access denied: 25.0%', '!'),
|
||||
('You can see detailed site check errors with a flag `--print-errors`', '-'),
|
||||
]
|
||||
assert results == expected_output
|
||||
@@ -1,4 +1,5 @@
|
||||
"""Maigret checking logic test functions"""
|
||||
|
||||
import pytest
|
||||
import asyncio
|
||||
import logging
|
||||
@@ -48,6 +49,7 @@ async def test_asyncio_progressbar_semaphore_executor():
|
||||
assert executor.execution_time < 0.4
|
||||
|
||||
|
||||
@pytest.mark.slow
|
||||
@pytest.mark.asyncio
|
||||
async def test_asyncio_progressbar_queue_executor():
|
||||
tasks = [(func, [n], {}) for n in range(10)]
|
||||
@@ -55,19 +57,22 @@ async def test_asyncio_progressbar_queue_executor():
|
||||
executor = AsyncioProgressbarQueueExecutor(logger=logger, in_parallel=2)
|
||||
assert await executor.run(tasks) == [0, 1, 3, 2, 4, 6, 7, 5, 9, 8]
|
||||
assert executor.execution_time > 0.5
|
||||
assert executor.execution_time < 0.6
|
||||
assert executor.execution_time < 0.7
|
||||
|
||||
executor = AsyncioProgressbarQueueExecutor(logger=logger, in_parallel=3)
|
||||
assert await executor.run(tasks) == [0, 3, 1, 4, 6, 2, 7, 9, 5, 8]
|
||||
assert executor.execution_time > 0.4
|
||||
assert executor.execution_time < 0.5
|
||||
assert executor.execution_time < 0.6
|
||||
|
||||
executor = AsyncioProgressbarQueueExecutor(logger=logger, in_parallel=5)
|
||||
assert await executor.run(tasks) == [0, 3, 6, 1, 4, 7, 9, 2, 5, 8]
|
||||
assert await executor.run(tasks) in (
|
||||
[0, 3, 6, 1, 4, 7, 9, 2, 5, 8],
|
||||
[0, 3, 6, 1, 4, 9, 7, 2, 5, 8],
|
||||
)
|
||||
assert executor.execution_time > 0.3
|
||||
assert executor.execution_time < 0.4
|
||||
assert executor.execution_time < 0.5
|
||||
|
||||
executor = AsyncioProgressbarQueueExecutor(logger=logger, in_parallel=10)
|
||||
assert await executor.run(tasks) == [0, 3, 6, 9, 1, 4, 7, 2, 5, 8]
|
||||
assert executor.execution_time > 0.2
|
||||
assert executor.execution_time < 0.3
|
||||
assert executor.execution_time < 0.4
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
"""Maigret main module test functions"""
|
||||
|
||||
import asyncio
|
||||
import copy
|
||||
|
||||
@@ -11,94 +12,35 @@ from maigret.maigret import (
|
||||
extract_ids_from_results,
|
||||
)
|
||||
from maigret.sites import MaigretSite
|
||||
from maigret.result import QueryResult, QueryStatus
|
||||
|
||||
|
||||
RESULTS_EXAMPLE = {
|
||||
'Reddit': {
|
||||
'cookies': None,
|
||||
'parsing_enabled': False,
|
||||
'url_main': 'https://www.reddit.com/',
|
||||
'username': 'Facebook',
|
||||
},
|
||||
'GooglePlayStore': {
|
||||
'cookies': None,
|
||||
'http_status': 200,
|
||||
'is_similar': False,
|
||||
'parsing_enabled': False,
|
||||
'rank': 1,
|
||||
'url_main': 'https://play.google.com/store',
|
||||
'url_user': 'https://play.google.com/store/apps/developer?id=Facebook',
|
||||
'username': 'Facebook',
|
||||
},
|
||||
}
|
||||
from maigret.result import MaigretCheckResult, MaigretCheckStatus
|
||||
from tests.conftest import RESULTS_EXAMPLE
|
||||
|
||||
|
||||
@pytest.mark.slow
|
||||
def test_self_check_db_positive_disable(test_db):
|
||||
logger = Mock()
|
||||
assert test_db.sites[0].disabled is False
|
||||
|
||||
loop = asyncio.get_event_loop()
|
||||
loop.run_until_complete(
|
||||
self_check(test_db, test_db.sites_dict, logger, silent=True)
|
||||
)
|
||||
|
||||
assert test_db.sites[0].disabled is True
|
||||
|
||||
|
||||
@pytest.mark.slow
|
||||
def test_self_check_db_positive_enable(test_db):
|
||||
@pytest.mark.asyncio
|
||||
async def test_self_check_db(test_db):
|
||||
# initalize logger to debug
|
||||
logger = Mock()
|
||||
|
||||
test_db.sites[0].disabled = True
|
||||
test_db.sites[0].username_claimed = 'Facebook'
|
||||
assert test_db.sites[0].disabled is True
|
||||
assert test_db.sites_dict['InvalidActive'].disabled is False
|
||||
assert test_db.sites_dict['ValidInactive'].disabled is True
|
||||
assert test_db.sites_dict['ValidActive'].disabled is False
|
||||
assert test_db.sites_dict['InvalidInactive'].disabled is True
|
||||
|
||||
loop = asyncio.get_event_loop()
|
||||
loop.run_until_complete(
|
||||
self_check(test_db, test_db.sites_dict, logger, silent=True)
|
||||
)
|
||||
await self_check(test_db, test_db.sites_dict, logger, silent=False)
|
||||
|
||||
assert test_db.sites[0].disabled is False
|
||||
|
||||
|
||||
@pytest.mark.slow
|
||||
def test_self_check_db_negative_disabled(test_db):
|
||||
logger = Mock()
|
||||
|
||||
test_db.sites[0].disabled = True
|
||||
assert test_db.sites[0].disabled is True
|
||||
|
||||
loop = asyncio.get_event_loop()
|
||||
loop.run_until_complete(
|
||||
self_check(test_db, test_db.sites_dict, logger, silent=True)
|
||||
)
|
||||
|
||||
assert test_db.sites[0].disabled is True
|
||||
|
||||
|
||||
@pytest.mark.slow
|
||||
def test_self_check_db_negative_enabled(test_db):
|
||||
logger = Mock()
|
||||
|
||||
test_db.sites[0].disabled = False
|
||||
test_db.sites[0].username_claimed = 'Facebook'
|
||||
assert test_db.sites[0].disabled is False
|
||||
|
||||
loop = asyncio.get_event_loop()
|
||||
loop.run_until_complete(
|
||||
self_check(test_db, test_db.sites_dict, logger, silent=True)
|
||||
)
|
||||
|
||||
assert test_db.sites[0].disabled is False
|
||||
assert test_db.sites_dict['InvalidActive'].disabled is True
|
||||
assert test_db.sites_dict['ValidInactive'].disabled is False
|
||||
assert test_db.sites_dict['ValidActive'].disabled is False
|
||||
assert test_db.sites_dict['InvalidInactive'].disabled is True
|
||||
|
||||
|
||||
@pytest.mark.slow
|
||||
@pytest.mark.skip(reason="broken, fixme")
|
||||
def test_maigret_results(test_db):
|
||||
logger = Mock()
|
||||
|
||||
username = 'Facebook'
|
||||
username = 'Skyeng'
|
||||
loop = asyncio.get_event_loop()
|
||||
results = loop.run_until_complete(
|
||||
maigret(username, site_dict=test_db.sites_dict, logger=logger, timeout=30)
|
||||
@@ -125,12 +67,12 @@ def test_maigret_results(test_db):
|
||||
del results['GooglePlayStore']['site']
|
||||
|
||||
reddit_status = results['Reddit']['status']
|
||||
assert isinstance(reddit_status, QueryResult)
|
||||
assert reddit_status.status == QueryStatus.ILLEGAL
|
||||
assert isinstance(reddit_status, MaigretCheckResult)
|
||||
assert reddit_status.status == MaigretCheckStatus.ILLEGAL
|
||||
|
||||
playstore_status = results['GooglePlayStore']['status']
|
||||
assert isinstance(playstore_status, QueryResult)
|
||||
assert playstore_status.status == QueryStatus.CLAIMED
|
||||
assert isinstance(playstore_status, MaigretCheckResult)
|
||||
assert playstore_status.status == MaigretCheckStatus.CLAIMED
|
||||
|
||||
del results['Reddit']['status']
|
||||
del results['GooglePlayStore']['status']
|
||||
@@ -142,6 +84,7 @@ def test_maigret_results(test_db):
|
||||
assert results == RESULTS_EXAMPLE
|
||||
|
||||
|
||||
@pytest.mark.slow
|
||||
def test_extract_ids_from_url(default_db):
|
||||
assert default_db.extract_ids_from_url('https://www.reddit.com/user/test') == {
|
||||
'test': 'username'
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
from maigret.errors import CheckError
|
||||
from maigret.notify import QueryNotifyPrint
|
||||
from maigret.result import QueryStatus, QueryResult
|
||||
from maigret.result import MaigretCheckStatus, MaigretCheckResult
|
||||
|
||||
|
||||
def test_notify_illegal():
|
||||
@@ -8,9 +8,9 @@ def test_notify_illegal():
|
||||
|
||||
assert (
|
||||
n.update(
|
||||
QueryResult(
|
||||
MaigretCheckResult(
|
||||
username="test",
|
||||
status=QueryStatus.ILLEGAL,
|
||||
status=MaigretCheckStatus.ILLEGAL,
|
||||
site_name="TEST_SITE",
|
||||
site_url_user="http://example.com/test",
|
||||
)
|
||||
@@ -24,9 +24,9 @@ def test_notify_claimed():
|
||||
|
||||
assert (
|
||||
n.update(
|
||||
QueryResult(
|
||||
MaigretCheckResult(
|
||||
username="test",
|
||||
status=QueryStatus.CLAIMED,
|
||||
status=MaigretCheckStatus.CLAIMED,
|
||||
site_name="TEST_SITE",
|
||||
site_url_user="http://example.com/test",
|
||||
)
|
||||
@@ -40,9 +40,9 @@ def test_notify_available():
|
||||
|
||||
assert (
|
||||
n.update(
|
||||
QueryResult(
|
||||
MaigretCheckResult(
|
||||
username="test",
|
||||
status=QueryStatus.AVAILABLE,
|
||||
status=MaigretCheckStatus.AVAILABLE,
|
||||
site_name="TEST_SITE",
|
||||
site_url_user="http://example.com/test",
|
||||
)
|
||||
@@ -53,9 +53,9 @@ def test_notify_available():
|
||||
|
||||
def test_notify_unknown():
|
||||
n = QueryNotifyPrint(color=False)
|
||||
result = QueryResult(
|
||||
result = MaigretCheckResult(
|
||||
username="test",
|
||||
status=QueryStatus.UNKNOWN,
|
||||
status=MaigretCheckStatus.UNKNOWN,
|
||||
site_name="TEST_SITE",
|
||||
site_url_user="http://example.com/test",
|
||||
)
|
||||
|
||||
@@ -0,0 +1,50 @@
|
||||
import pytest
|
||||
from maigret.permutator import Permute
|
||||
|
||||
|
||||
def test_gather_strict():
|
||||
elements = {'a': 1, 'b': 2}
|
||||
permute = Permute(elements)
|
||||
result = permute.gather(method="strict")
|
||||
expected = {
|
||||
'a_b': 1,
|
||||
'b_a': 2,
|
||||
'a-b': 1,
|
||||
'b-a': 2,
|
||||
'a.b': 1,
|
||||
'b.a': 2,
|
||||
'ab': 1,
|
||||
'ba': 2,
|
||||
'_ab': 1,
|
||||
'ab_': 1,
|
||||
'_ba': 2,
|
||||
'ba_': 2,
|
||||
}
|
||||
assert result == expected
|
||||
|
||||
|
||||
def test_gather_all():
|
||||
elements = {'a': 1, 'b': 2}
|
||||
permute = Permute(elements)
|
||||
result = permute.gather(method="all")
|
||||
expected = {
|
||||
'a': 1,
|
||||
'_a': 1,
|
||||
'a_': 1,
|
||||
'b': 2,
|
||||
'_b': 2,
|
||||
'b_': 2,
|
||||
'a_b': 1,
|
||||
'b_a': 2,
|
||||
'a-b': 1,
|
||||
'b-a': 2,
|
||||
'a.b': 1,
|
||||
'b.a': 2,
|
||||
'ab': 1,
|
||||
'ba': 2,
|
||||
'_ab': 1,
|
||||
'ab_': 1,
|
||||
'_ba': 2,
|
||||
'ba_': 2,
|
||||
}
|
||||
assert result == expected
|
||||
@@ -1,7 +1,9 @@
|
||||
"""Maigret reports test functions"""
|
||||
|
||||
import copy
|
||||
import json
|
||||
import os
|
||||
import pytest
|
||||
from io import StringIO
|
||||
|
||||
import xmind
|
||||
@@ -18,12 +20,12 @@ from maigret.report import (
|
||||
generate_json_report,
|
||||
get_plaintext_report,
|
||||
)
|
||||
from maigret.result import QueryResult, QueryStatus
|
||||
from maigret.result import MaigretCheckResult, MaigretCheckStatus
|
||||
from maigret.sites import MaigretSite
|
||||
|
||||
|
||||
GOOD_RESULT = QueryResult('', '', '', QueryStatus.CLAIMED)
|
||||
BAD_RESULT = QueryResult('', '', '', QueryStatus.AVAILABLE)
|
||||
GOOD_RESULT = MaigretCheckResult('', '', '', MaigretCheckStatus.CLAIMED)
|
||||
BAD_RESULT = MaigretCheckResult('', '', '', MaigretCheckStatus.AVAILABLE)
|
||||
|
||||
EXAMPLE_RESULTS = {
|
||||
'GitHub': {
|
||||
@@ -31,11 +33,11 @@ EXAMPLE_RESULTS = {
|
||||
'parsing_enabled': True,
|
||||
'url_main': 'https://www.github.com/',
|
||||
'url_user': 'https://www.github.com/test',
|
||||
'status': QueryResult(
|
||||
'status': MaigretCheckResult(
|
||||
'test',
|
||||
'GitHub',
|
||||
'https://www.github.com/test',
|
||||
QueryStatus.CLAIMED,
|
||||
MaigretCheckStatus.CLAIMED,
|
||||
tags=['test_tag'],
|
||||
),
|
||||
'http_status': 200,
|
||||
@@ -424,6 +426,7 @@ def test_html_report_broken():
|
||||
assert SUPPOSED_BROKEN_INTERESTS in report_text
|
||||
|
||||
|
||||
@pytest.mark.skip(reason='connection reset, fixme')
|
||||
def test_pdf_report():
|
||||
report_name = 'report_test.pdf'
|
||||
context = generate_report_context(TEST)
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
"""Maigret Database test functions"""
|
||||
|
||||
from maigret.sites import MaigretDatabase, MaigretSite
|
||||
from maigret.utils import URLMatcher
|
||||
|
||||
EXAMPLE_DB = {
|
||||
'engines': {
|
||||
@@ -116,7 +116,7 @@ def test_site_url_detector():
|
||||
|
||||
assert (
|
||||
db.sites[0].url_regexp.pattern
|
||||
== r'^https?://(www.)?forum\.amperka\.ru/members/\?username=(.+?)$'
|
||||
== r'^https?://(www.|m.)?forum\.amperka\.ru/members/\?username=(.+?)$'
|
||||
)
|
||||
assert (
|
||||
db.sites[0].detect_username('http://forum.amperka.ru/members/?username=test')
|
||||
@@ -203,3 +203,20 @@ def test_get_url_template():
|
||||
},
|
||||
)
|
||||
assert site.get_url_template() == "SUBDOMAIN"
|
||||
|
||||
|
||||
def test_has_site_url_or_name(default_db):
|
||||
# by the same url or partial match
|
||||
assert default_db.has_site("https://aback.com.ua/user/") == True
|
||||
assert default_db.has_site("https://aback.com.ua") == True
|
||||
|
||||
# acceptable partial match
|
||||
assert default_db.has_site("https://aback.com.ua/use") == True
|
||||
assert default_db.has_site("https://aback.com") == True
|
||||
|
||||
# by name
|
||||
assert default_db.has_site("Aback") == True
|
||||
|
||||
# false
|
||||
assert default_db.has_site("https://aeifgoai3h4g8a3u4g5") == False
|
||||
assert default_db.has_site("aeifgoai3h4g8a3u4g5") == False
|
||||
|
||||
@@ -0,0 +1,278 @@
|
||||
import pytest
|
||||
from unittest.mock import AsyncMock, MagicMock, patch
|
||||
from maigret.submit import Submitter, MaigretSite, MaigretEngine
|
||||
from aiohttp import ClientSession
|
||||
from maigret.sites import MaigretDatabase
|
||||
from maigret.settings import Settings
|
||||
import logging
|
||||
|
||||
|
||||
@pytest.mark.slow
|
||||
@pytest.mark.asyncio
|
||||
async def test_detect_known_engine(test_db, local_test_db):
|
||||
# Use the database fixture instead of mocking
|
||||
mock_db = test_db
|
||||
mock_settings = MagicMock()
|
||||
mock_logger = MagicMock()
|
||||
mock_args = MagicMock()
|
||||
mock_args.cookie_file = ""
|
||||
mock_args.proxy = ""
|
||||
|
||||
# Mock the supposed usernames
|
||||
mock_settings.supposed_usernames = ["adam"]
|
||||
# Create the Submitter instance
|
||||
submitter = Submitter(test_db, mock_settings, mock_logger, mock_args)
|
||||
|
||||
# Call the method with test URLs
|
||||
url_exists = "https://devforum.zoom.us/u/adam"
|
||||
url_mainpage = "https://devforum.zoom.us/"
|
||||
# Mock extract_username_dialog to return "adam"
|
||||
submitter.extract_username_dialog = MagicMock(return_value="adam")
|
||||
|
||||
sites, resp_text = await submitter.detect_known_engine(
|
||||
url_exists, url_mainpage, session=None, follow_redirects=False, headers=None
|
||||
)
|
||||
|
||||
# Assertions
|
||||
assert len(sites) == 2
|
||||
assert sites[0].name == "devforum.zoom.us"
|
||||
assert sites[0].url_main == "https://devforum.zoom.us/"
|
||||
assert sites[0].engine == "Discourse"
|
||||
assert sites[0].username_claimed == "adam"
|
||||
assert sites[0].username_unclaimed == "noonewouldeverusethis7"
|
||||
assert resp_text != ""
|
||||
|
||||
await submitter.close()
|
||||
|
||||
# Create the Submitter instance without engines
|
||||
submitter = Submitter(local_test_db, mock_settings, mock_logger, mock_args)
|
||||
sites, resp_text = await submitter.detect_known_engine(
|
||||
url_exists, url_mainpage, session=None, follow_redirects=False, headers=None
|
||||
)
|
||||
assert len(sites) == 0
|
||||
|
||||
await submitter.close()
|
||||
|
||||
|
||||
@pytest.mark.slow
|
||||
@pytest.mark.asyncio
|
||||
async def test_check_features_manually_success(settings):
|
||||
# Setup
|
||||
db = MaigretDatabase()
|
||||
logger = logging.getLogger("test_logger")
|
||||
args = type(
|
||||
'Args', (object,), {'proxy': None, 'cookie_file': None, 'verbose': False}
|
||||
)()
|
||||
|
||||
submitter = Submitter(db, settings, logger, args)
|
||||
|
||||
username = "KONAMI"
|
||||
url_exists = "https://play.google.com/store/apps/developer?id=KONAMI"
|
||||
|
||||
# Execute
|
||||
presence_list, absence_list, status, random_username = (
|
||||
await submitter.check_features_manually(
|
||||
username=username,
|
||||
url_exists=url_exists,
|
||||
session=ClientSession(),
|
||||
follow_redirects=False,
|
||||
headers=None,
|
||||
)
|
||||
)
|
||||
await submitter.close()
|
||||
# Assert
|
||||
assert status == "Found", "Expected status to be 'Found'"
|
||||
assert isinstance(presence_list, list), "Presence list should be a list"
|
||||
assert isinstance(absence_list, list), "Absence list should be a list"
|
||||
assert isinstance(random_username, str), "Random username should be a string"
|
||||
assert (
|
||||
random_username != username
|
||||
), "Random username should not be the same as the input username"
|
||||
assert sorted(presence_list) == sorted(
|
||||
[
|
||||
' title=',
|
||||
'og:title',
|
||||
'display: none;',
|
||||
'4;0',
|
||||
'main-title',
|
||||
]
|
||||
)
|
||||
assert sorted(absence_list) == sorted(
|
||||
[
|
||||
' body {',
|
||||
' </style>',
|
||||
'><title>Not Found</title>',
|
||||
' <style nonce=',
|
||||
' .rounded {',
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.slow
|
||||
@pytest.mark.asyncio
|
||||
async def test_check_features_manually_success(settings):
|
||||
# Setup
|
||||
db = MaigretDatabase()
|
||||
logger = logging.getLogger("test_logger")
|
||||
args = type(
|
||||
'Args', (object,), {'proxy': None, 'cookie_file': None, 'verbose': False}
|
||||
)()
|
||||
|
||||
submitter = Submitter(db, settings, logger, args)
|
||||
|
||||
username = "abel"
|
||||
url_exists = "https://community.cloudflare.com/badges/1/basic?username=abel"
|
||||
|
||||
# Execute
|
||||
presence_list, absence_list, status, random_username = (
|
||||
await submitter.check_features_manually(
|
||||
username=username,
|
||||
url_exists=url_exists,
|
||||
session=ClientSession(),
|
||||
follow_redirects=False,
|
||||
headers=None,
|
||||
)
|
||||
)
|
||||
await submitter.close()
|
||||
|
||||
# Assert
|
||||
assert status == "Cloudflare detected, skipping"
|
||||
assert presence_list is None
|
||||
assert absence_list is None
|
||||
assert random_username != username
|
||||
|
||||
|
||||
@pytest.mark.slow
|
||||
@pytest.mark.asyncio
|
||||
async def test_dialog_adds_site_positive(settings):
|
||||
# Initialize necessary objects
|
||||
db = MaigretDatabase()
|
||||
logger = logging.getLogger("test_logger")
|
||||
logger.setLevel(logging.INFO)
|
||||
args = type(
|
||||
'Args',
|
||||
(object,),
|
||||
{
|
||||
'proxy': None,
|
||||
'cookie_file': None,
|
||||
'verbose': False,
|
||||
'db_file': 'test_db.json',
|
||||
'db': 'test_db.json',
|
||||
},
|
||||
)()
|
||||
|
||||
submitter = Submitter(db, settings, logger, args)
|
||||
|
||||
# Mock user inputs
|
||||
user_inputs = [
|
||||
'KONAMI', # Manually input username
|
||||
'y', # Save the site in the Maigret DB
|
||||
'GooglePlayStore', # Custom site name
|
||||
'', # no custom tags
|
||||
]
|
||||
|
||||
with patch('builtins.input', side_effect=user_inputs):
|
||||
result = await submitter.dialog(
|
||||
"https://play.google.com/store/apps/developer?id=KONAMI", None
|
||||
)
|
||||
await submitter.close()
|
||||
|
||||
assert result is True
|
||||
assert len(db.sites) == 1
|
||||
|
||||
site = db.sites[0]
|
||||
assert site.url_main == "https://play.google.com"
|
||||
assert site.name == "GooglePlayStore"
|
||||
assert site.tags == []
|
||||
assert site.presense_strs != []
|
||||
assert site.absence_strs != []
|
||||
assert site.username_claimed == "KONAMI"
|
||||
assert site.check_type == "message"
|
||||
|
||||
|
||||
@pytest.mark.slow
|
||||
@pytest.mark.asyncio
|
||||
async def test_dialog_replace_site(settings, test_db):
|
||||
# Initialize necessary objects
|
||||
db = test_db
|
||||
logger = logging.getLogger("test_logger")
|
||||
logger.setLevel(logging.DEBUG)
|
||||
args = type(
|
||||
'Args',
|
||||
(object,),
|
||||
{
|
||||
'proxy': None,
|
||||
'cookie_file': None,
|
||||
'verbose': False,
|
||||
'db_file': 'test_db.json',
|
||||
'db': 'test_db.json',
|
||||
},
|
||||
)()
|
||||
|
||||
assert len(db.sites) == 4
|
||||
|
||||
submitter = Submitter(db, settings, logger, args)
|
||||
|
||||
# Mock user inputs
|
||||
user_inputs = [
|
||||
'y', # Similar sites found, continue
|
||||
'InvalidActive', # Choose site to replace
|
||||
'', # Custom headers
|
||||
'y', # Should we do redirects automatically?
|
||||
'KONAMI', # Manually input username
|
||||
'y', # Save the site in the Maigret DB
|
||||
'', # Custom site name
|
||||
'', # no custom tags
|
||||
]
|
||||
|
||||
with patch('builtins.input', side_effect=user_inputs):
|
||||
result = await submitter.dialog(
|
||||
"https://play.google.com/store/apps/developer?id=KONAMI", None
|
||||
)
|
||||
await submitter.close()
|
||||
|
||||
assert result is True
|
||||
assert len(db.sites) == 4
|
||||
|
||||
site = db.sites_dict["InvalidActive"]
|
||||
assert site.name == "InvalidActive"
|
||||
assert site.url_main == "https://play.google.com"
|
||||
assert site.tags == ['global', 'us']
|
||||
assert site.presense_strs != []
|
||||
assert site.absence_strs != []
|
||||
assert site.username_claimed == "KONAMI"
|
||||
assert site.check_type == "message"
|
||||
|
||||
|
||||
@pytest.mark.slow
|
||||
@pytest.mark.asyncio
|
||||
async def test_dialog_adds_site_negative(settings):
|
||||
# Initialize necessary objects
|
||||
db = MaigretDatabase()
|
||||
logger = logging.getLogger("test_logger")
|
||||
logger.setLevel(logging.INFO)
|
||||
args = type(
|
||||
'Args',
|
||||
(object,),
|
||||
{
|
||||
'proxy': None,
|
||||
'cookie_file': None,
|
||||
'verbose': False,
|
||||
'db_file': 'test_db.json',
|
||||
'db': 'test_db.json',
|
||||
},
|
||||
)()
|
||||
|
||||
submitter = Submitter(db, settings, logger, args)
|
||||
|
||||
# Mock user inputs
|
||||
user_inputs = [
|
||||
'sokrat', # Manually input username
|
||||
'y', # Save the site in the Maigret DB
|
||||
]
|
||||
|
||||
with patch('builtins.input', side_effect=user_inputs):
|
||||
result = await submitter.dialog("https://icq.im/sokrat", None)
|
||||
await submitter.close()
|
||||
|
||||
assert result is False
|
||||
@@ -1,4 +1,5 @@
|
||||
"""Maigret utils test functions"""
|
||||
|
||||
import itertools
|
||||
import re
|
||||
|
||||
@@ -73,7 +74,7 @@ def test_url_extract_main_part():
|
||||
['/', ''],
|
||||
]
|
||||
|
||||
url_regexp = re.compile('^https?://(www.)?flickr.com/photos/(.+?)$')
|
||||
url_regexp = re.compile(r'^https?://(www\.)?flickr.com/photos/(.+?)$')
|
||||
# combine parts variations
|
||||
for url_parts in itertools.product(*parts):
|
||||
url = ''.join(url_parts)
|
||||
@@ -98,7 +99,7 @@ def test_url_make_profile_url_regexp():
|
||||
# ensure all combinations match pattern
|
||||
assert (
|
||||
URLMatcher.make_profile_url_regexp(url).pattern
|
||||
== r'^https?://(www.)?flickr\.com/photos/(.+?)$'
|
||||
== r'^https?://(www.|m.)?flickr\.com/photos/(.+?)$'
|
||||
)
|
||||
|
||||
|
||||
@@ -123,19 +124,19 @@ def test_get_dict_ascii_tree():
|
||||
assert (
|
||||
ascii_tree
|
||||
== """
|
||||
┣╸uid: dXJpOm5vZGU6VXNlcjoyNjQwMzQxNQ==
|
||||
┣╸legacy_id: 26403415
|
||||
┣╸username: alexaimephotographycars
|
||||
┣╸name: Alex Aimé
|
||||
┣╸links:
|
||||
┃ ┗╸ www.instagram.com/street.reality.photography/
|
||||
┣╸created_at: 2018-05-04T10:17:01.000+0000
|
||||
┣╸image: https://drscdn.500px.org/user_avatar/26403415/q%3D85_w%3D300_h%3D300/v2?webp=true&v=2&sig=0235678a4f7b65e007e864033ebfaf5ef6d87fad34f80a8639d985320c20fe3b
|
||||
┣╸image_bg: https://drscdn.500px.org/user_cover/26403415/q%3D65_m%3D2048/v2?webp=true&v=1&sig=bea411fb158391a4fdad498874ff17088f91257e59dfb376ff67e3a44c3a4201
|
||||
┣╸website: www.instagram.com/street.reality.photography/
|
||||
┣╸facebook_link: www.instagram.com/street.reality.photography/
|
||||
┣╸instagram_username: Street.Reality.Photography
|
||||
┗╸twitter_username: Alexaimephotogr"""
|
||||
├─uid: dXJpOm5vZGU6VXNlcjoyNjQwMzQxNQ==
|
||||
├─legacy_id: 26403415
|
||||
├─username: alexaimephotographycars
|
||||
├─name: Alex Aimé
|
||||
├─links:
|
||||
│ └─ www.instagram.com/street.reality.photography/
|
||||
├─created_at: 2018-05-04T10:17:01.000+0000
|
||||
├─image: https://drscdn.500px.org/user_avatar/26403415/q%3D85_w%3D300_h%3D300/v2?webp=true&v=2&sig=0235678a4f7b65e007e864033ebfaf5ef6d87fad34f80a8639d985320c20fe3b
|
||||
├─image_bg: https://drscdn.500px.org/user_cover/26403415/q%3D65_m%3D2048/v2?webp=true&v=1&sig=bea411fb158391a4fdad498874ff17088f91257e59dfb376ff67e3a44c3a4201
|
||||
├─website: www.instagram.com/street.reality.photography/
|
||||
├─facebook_link: www.instagram.com/street.reality.photography/
|
||||
├─instagram_username: Street.Reality.Photography
|
||||
└─twitter_username: Alexaimephotogr"""
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -3,7 +3,7 @@ import random
|
||||
from argparse import ArgumentParser, RawDescriptionHelpFormatter
|
||||
|
||||
from maigret.maigret import MaigretDatabase
|
||||
from maigret.submit import get_alexa_rank
|
||||
from maigret.submit import Submitter
|
||||
|
||||
|
||||
def update_tags(site):
|
||||
@@ -22,7 +22,7 @@ def update_tags(site):
|
||||
site.disabled = True
|
||||
|
||||
print(f'Old alexa rank: {site.alexa_rank}')
|
||||
rank = get_alexa_rank(site.url_main)
|
||||
rank = Submitter.get_alexa_rank(site.url_main)
|
||||
if rank:
|
||||
print(f'New alexa rank: {rank}')
|
||||
site.alexa_rank = rank
|
||||
@@ -36,6 +36,7 @@ if __name__ == '__main__':
|
||||
parser.add_argument("--base","-b", metavar="BASE_FILE",
|
||||
dest="base_file", default="maigret/resources/data.json",
|
||||
help="JSON file with sites data to update.")
|
||||
parser.add_argument("--name", help="Name of site to check")
|
||||
|
||||
pool = list()
|
||||
|
||||
@@ -45,12 +46,17 @@ if __name__ == '__main__':
|
||||
db.load_from_file(args.base_file).sites
|
||||
|
||||
while True:
|
||||
site = random.choice(db.sites)
|
||||
if args.name:
|
||||
sites = list(db.ranked_sites_dict(names=[args.name]).values())
|
||||
site = random.choice(sites)
|
||||
else:
|
||||
site = random.choice(db.sites)
|
||||
|
||||
if site.engine == 'uCoz':
|
||||
continue
|
||||
|
||||
if not 'in' in site.tags:
|
||||
continue
|
||||
# if not 'in' in site.tags:
|
||||
# continue
|
||||
|
||||
update_tags(site)
|
||||
|
||||
|
||||
@@ -0,0 +1,144 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Maigret: Supported Site Listing with Alexa ranking and country tags
|
||||
This module generates the listing of supported sites in file `SITES.md`
|
||||
and pretty prints file with sites data.
|
||||
"""
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
from argparse import ArgumentParser, RawDescriptionHelpFormatter
|
||||
|
||||
from maigret.maigret import get_response
|
||||
from maigret.sites import MaigretDatabase, MaigretEngine
|
||||
|
||||
async def check_engine_of_site(site_name, sites_with_engines, future, engine_name, semaphore, logger):
|
||||
async with semaphore:
|
||||
response = await get_response(request_future=future,
|
||||
site_name=site_name,
|
||||
logger=logger)
|
||||
|
||||
html_text, status_code, error_text, expection_text = response
|
||||
|
||||
if html_text and engine_name in html_text:
|
||||
sites_with_engines.append(site_name)
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = ArgumentParser(formatter_class=RawDescriptionHelpFormatter
|
||||
)
|
||||
parser.add_argument("--base","-b", metavar="BASE_FILE",
|
||||
dest="base_file", default="maigret/resources/data.json",
|
||||
help="JSON file with sites data to update.")
|
||||
|
||||
parser.add_argument('--engine', '-e', help='check only selected engine', type=str)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
log_level = logging.INFO
|
||||
logging.basicConfig(
|
||||
format='[%(filename)s:%(lineno)d] %(levelname)-3s %(asctime)s %(message)s',
|
||||
datefmt='%H:%M:%S',
|
||||
level=log_level
|
||||
)
|
||||
logger = logging.getLogger('engines-check')
|
||||
logger.setLevel(log_level)
|
||||
|
||||
db = MaigretDatabase()
|
||||
sites_subset = db.load_from_file(args.base_file).sites
|
||||
sites = {site.name: site for site in sites_subset}
|
||||
|
||||
with open(args.base_file, "r", encoding="utf-8") as data_file:
|
||||
sites_info = json.load(data_file)
|
||||
engines = sites_info['engines']
|
||||
|
||||
for engine_name, engine_data in engines.items():
|
||||
if args.engine and args.engine != engine_name:
|
||||
continue
|
||||
|
||||
if not 'presenseStrs' in engine_data:
|
||||
print(f'No features to automatically detect sites on engine {engine_name}')
|
||||
continue
|
||||
|
||||
engine_obj = MaigretEngine(engine_name, engine_data)
|
||||
|
||||
# setup connections for checking both engine and usernames
|
||||
connector = aiohttp.TCPConnector(ssl=False)
|
||||
connector.verify_ssl=False
|
||||
session = aiohttp.ClientSession(connector=connector)
|
||||
|
||||
sem = asyncio.Semaphore(100)
|
||||
loop = asyncio.get_event_loop()
|
||||
tasks = []
|
||||
|
||||
# check sites without engine if they look like sites on this engine
|
||||
new_engine_sites = []
|
||||
for site_name, site_data in sites.items():
|
||||
if site_data.engine:
|
||||
continue
|
||||
|
||||
future = session.get(url=site_data.url_main,
|
||||
allow_redirects=True,
|
||||
timeout=10,
|
||||
)
|
||||
|
||||
check_engine_coro = check_engine_of_site(site_name, new_engine_sites, future, engine_name, sem, logger)
|
||||
future = asyncio.ensure_future(check_engine_coro)
|
||||
tasks.append(future)
|
||||
|
||||
# progress bar
|
||||
with alive_progress(len(tasks), title='Checking sites') as progress:
|
||||
for f in asyncio.as_completed(tasks):
|
||||
loop.run_until_complete(f)
|
||||
progress()
|
||||
|
||||
print(f'Total detected {len(new_engine_sites)} sites on engine {engine_name}')
|
||||
# dict with new found engine sites
|
||||
new_sites = {site_name: sites[site_name] for site_name in new_engine_sites}
|
||||
|
||||
# update sites obj from engine
|
||||
for site_name, site in new_sites.items():
|
||||
site.request_future = None
|
||||
site.engine = engine_name
|
||||
site.update_from_engine(engine_obj)
|
||||
|
||||
async def update_site_data(site_name, site_data, all_sites, logger, no_progressbar):
|
||||
updates = await site_self_check(site_name, site_data, logger, no_progressbar)
|
||||
all_sites[site_name].update(updates)
|
||||
|
||||
tasks = []
|
||||
# for new_site_name, new_site_data in new_sites.items():
|
||||
# coro = update_site_data(new_site_name, new_site_data, new_sites, logger)
|
||||
# future = asyncio.ensure_future(coro)
|
||||
# tasks.append(future)
|
||||
|
||||
# asyncio.gather(*tasks)
|
||||
for new_site_name, new_site_data in new_sites.items():
|
||||
coro = update_site_data(new_site_name, new_site_data, new_sites, logger, no_progressbar=True)
|
||||
loop.run_until_complete(coro)
|
||||
|
||||
updated_sites_count = 0
|
||||
|
||||
for s in new_sites:
|
||||
site = new_sites[s]
|
||||
site.request_future = None
|
||||
|
||||
if site.disabled:
|
||||
print(f'{site.name} failed username checking of engine {engine_name}')
|
||||
continue
|
||||
|
||||
site = site.strip_engine_data()
|
||||
|
||||
db.update_site(site)
|
||||
updated_sites_count += 1
|
||||
db.save_to_file(args.base_file)
|
||||
|
||||
print(f'Site "{s}": ' + json.dumps(site.json, indent=4))
|
||||
|
||||
print(f'Updated total {updated_sites_count} sites!')
|
||||
print(f'Checking all sites on engine {engine_name}')
|
||||
|
||||
loop.run_until_complete(session.close())
|
||||
|
||||
print("\nFinished updating supported site listing!")
|
||||
@@ -0,0 +1,282 @@
|
||||
#!/usr/bin/env python3
|
||||
import json
|
||||
import random
|
||||
import re
|
||||
|
||||
import alive_progress
|
||||
from mock import Mock
|
||||
import requests
|
||||
|
||||
from maigret.maigret import *
|
||||
from maigret.result import MaigretCheckStatus
|
||||
from maigret.sites import MaigretSite
|
||||
|
||||
URL_RE = re.compile(r"https?://(www\.)?")
|
||||
TIMEOUT = 200
|
||||
|
||||
|
||||
async def maigret_check(site, site_data, username, status, logger):
|
||||
query_notify = Mock()
|
||||
logger.debug(f'Checking {site}...')
|
||||
|
||||
for username, status in [(username, status)]:
|
||||
results = await maigret(
|
||||
username,
|
||||
{site: site_data},
|
||||
logger,
|
||||
query_notify,
|
||||
timeout=TIMEOUT,
|
||||
forced=True,
|
||||
no_progressbar=True,
|
||||
)
|
||||
|
||||
if results[site]['status'].status != status:
|
||||
if results[site]['status'].status == MaigretCheckStatus.UNKNOWN:
|
||||
msg = site_data.absence_strs
|
||||
etype = site_data.check_type
|
||||
context = results[site]['status'].context
|
||||
|
||||
logger.debug(f'Error while searching {username} in {site}, must be claimed. Context: {context}')
|
||||
# if site_data.get('errors'):
|
||||
# continue
|
||||
return False
|
||||
|
||||
if status == MaigretCheckStatus.CLAIMED:
|
||||
logger.debug(f'Not found {username} in {site}, must be claimed')
|
||||
logger.debug(results[site])
|
||||
pass
|
||||
else:
|
||||
logger.debug(f'Found {username} in {site}, must be available')
|
||||
logger.debug(results[site])
|
||||
pass
|
||||
return False
|
||||
|
||||
return site_data
|
||||
|
||||
|
||||
async def check_and_add_maigret_site(site_data, semaphore, logger, ok_usernames, bad_usernames):
|
||||
async with semaphore:
|
||||
sitename = site_data.name
|
||||
positive = False
|
||||
negative = False
|
||||
|
||||
for ok_username in ok_usernames:
|
||||
site_data.username_claimed = ok_username
|
||||
status = MaigretCheckStatus.CLAIMED
|
||||
if await maigret_check(sitename, site_data, ok_username, status, logger):
|
||||
# print(f'{sitename} positive case is okay')
|
||||
positive = True
|
||||
break
|
||||
|
||||
for bad_username in bad_usernames:
|
||||
site_data.username_unclaimed = bad_username
|
||||
status = MaigretCheckStatus.AVAILABLE
|
||||
if await maigret_check(sitename, site_data, bad_username, status, logger):
|
||||
# print(f'{sitename} negative case is okay')
|
||||
negative = True
|
||||
break
|
||||
|
||||
if positive and negative:
|
||||
site_data = site_data.strip_engine_data()
|
||||
|
||||
db.update_site(site_data)
|
||||
print(site_data.json)
|
||||
try:
|
||||
db.save_to_file(args.base_file)
|
||||
except Exception as e:
|
||||
logging.error(e, exc_info=True)
|
||||
print(f'Saved new site {sitename}...')
|
||||
ok_sites.append(site_data)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = ArgumentParser(formatter_class=RawDescriptionHelpFormatter
|
||||
)
|
||||
parser.add_argument("--base", "-b", metavar="BASE_FILE",
|
||||
dest="base_file", default="maigret/resources/data.json",
|
||||
help="JSON file with sites data to update.")
|
||||
|
||||
parser.add_argument("--add-engine", dest="add_engine", help="Additional engine to check")
|
||||
|
||||
parser.add_argument("--only-engine", dest="only_engine", help="Use only this engine from detected to check")
|
||||
|
||||
parser.add_argument('--check', help='only check sites in database', action='store_true')
|
||||
|
||||
parser.add_argument('--random', help='shuffle list of urls', action='store_true', default=False)
|
||||
|
||||
parser.add_argument('--top', help='top count of records in file', type=int, default=10000)
|
||||
|
||||
parser.add_argument('--filter', help='substring to filter input urls', type=str, default='')
|
||||
|
||||
parser.add_argument('--username', help='preferable username to check with', type=str)
|
||||
|
||||
parser.add_argument(
|
||||
"--info",
|
||||
"-vv",
|
||||
action="store_true",
|
||||
dest="info",
|
||||
default=False,
|
||||
help="Display service information.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--verbose",
|
||||
"-v",
|
||||
action="store_true",
|
||||
dest="verbose",
|
||||
default=False,
|
||||
help="Display extra information and metrics.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-d",
|
||||
"--debug",
|
||||
"-vvv",
|
||||
action="store_true",
|
||||
dest="debug",
|
||||
default=False,
|
||||
help="Saving debugging information and sites responses in debug.txt.",
|
||||
)
|
||||
|
||||
parser.add_argument("urls_file",
|
||||
metavar='URLS_FILE',
|
||||
action="store",
|
||||
help="File with base site URLs"
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
log_level = logging.ERROR
|
||||
if args.debug:
|
||||
log_level = logging.DEBUG
|
||||
elif args.info:
|
||||
log_level = logging.INFO
|
||||
elif args.verbose:
|
||||
log_level = logging.WARNING
|
||||
|
||||
logging.basicConfig(
|
||||
format='[%(filename)s:%(lineno)d] %(levelname)-3s %(asctime)s %(message)s',
|
||||
datefmt='%H:%M:%S',
|
||||
level=log_level
|
||||
)
|
||||
logger = logging.getLogger('engines-check')
|
||||
logger.setLevel(log_level)
|
||||
|
||||
db = MaigretDatabase()
|
||||
sites_subset = db.load_from_file(args.base_file).sites
|
||||
sites = {site.name: site for site in sites_subset}
|
||||
engines = db.engines
|
||||
|
||||
# TODO: usernames extractors
|
||||
ok_usernames = ['alex', 'god', 'admin', 'red', 'blue', 'john']
|
||||
if args.username:
|
||||
ok_usernames = [args.username] + ok_usernames
|
||||
|
||||
bad_usernames = ['noonewouldeverusethis7']
|
||||
|
||||
with open(args.urls_file, 'r') as urls_file:
|
||||
urls = urls_file.read().splitlines()
|
||||
if args.random:
|
||||
random.shuffle(urls)
|
||||
urls = urls[:args.top]
|
||||
|
||||
raw_maigret_data = json.dumps({site.name: site.json for site in sites_subset})
|
||||
|
||||
new_sites = []
|
||||
for site in alive_progress.alive_it(urls):
|
||||
site_lowercase = site.lower()
|
||||
|
||||
domain_raw = URL_RE.sub('', site_lowercase).strip().strip('/')
|
||||
domain_raw = domain_raw.split('/')[0]
|
||||
|
||||
if args.filter and args.filter not in domain_raw:
|
||||
logger.debug('Site %s skipped due to filtering by "%s"', domain_raw, args.filter)
|
||||
continue
|
||||
|
||||
if domain_raw in raw_maigret_data:
|
||||
logger.debug(f'Site {domain_raw} already exists in the Maigret database!')
|
||||
continue
|
||||
|
||||
if '"' in domain_raw:
|
||||
logger.debug(f'Invalid site {domain_raw}')
|
||||
continue
|
||||
|
||||
main_page_url = '/'.join(site.split('/', 3)[:3])
|
||||
|
||||
site_data = {
|
||||
'url': site,
|
||||
'urlMain': main_page_url,
|
||||
'name': domain_raw,
|
||||
}
|
||||
|
||||
try:
|
||||
r = requests.get(main_page_url, timeout=5)
|
||||
except:
|
||||
r = None
|
||||
pass
|
||||
|
||||
detected_engines = []
|
||||
|
||||
for e in engines:
|
||||
strs_to_check = e.__dict__.get('presenseStrs')
|
||||
if strs_to_check and r and r.text:
|
||||
all_strs_in_response = True
|
||||
for s in strs_to_check:
|
||||
if not s in r.text:
|
||||
all_strs_in_response = False
|
||||
if all_strs_in_response:
|
||||
engine_name = e.__dict__.get('name')
|
||||
detected_engines.append(engine_name)
|
||||
logger.info(f'Detected engine {engine_name} for site {main_page_url}')
|
||||
|
||||
if args.only_engine and args.only_engine in detected_engines:
|
||||
detected_engines = [args.only_engine]
|
||||
elif not detected_engines and args.add_engine:
|
||||
logging.debug('Could not detect any engine, applying default engine %s...', args.add_engine)
|
||||
detected_engines = [args.add_engine]
|
||||
|
||||
def create_site_from_engine(sitename, data, e):
|
||||
site = MaigretSite(sitename, data)
|
||||
site.update_from_engine(db.engines_dict[e])
|
||||
site.engine = e
|
||||
return site
|
||||
|
||||
for engine_name in detected_engines:
|
||||
site = create_site_from_engine(domain_raw, site_data, engine_name)
|
||||
new_sites.append(site)
|
||||
logger.debug(site.json)
|
||||
|
||||
# if engine_name == "phpBB":
|
||||
# site_data_with_subpath = dict(site_data)
|
||||
# site_data_with_subpath["urlSubpath"] = "/forum"
|
||||
# site = create_site_from_engine(domain_raw, site_data_with_subpath, engine_name)
|
||||
# new_sites.append(site)
|
||||
|
||||
# except Exception as e:
|
||||
# print(f'Error: {str(e)}')
|
||||
# pass
|
||||
|
||||
print(f'Found {len(new_sites)}/{len(urls)} new sites')
|
||||
|
||||
if args.check:
|
||||
for s in new_sites:
|
||||
print(s.url_main)
|
||||
sys.exit(0)
|
||||
|
||||
sem = asyncio.Semaphore(20)
|
||||
loop = asyncio.get_event_loop()
|
||||
|
||||
ok_sites = []
|
||||
tasks = []
|
||||
for site in new_sites:
|
||||
check_coro = check_and_add_maigret_site(site, sem, logger, ok_usernames, bad_usernames)
|
||||
future = asyncio.ensure_future(check_coro)
|
||||
tasks.append(future)
|
||||
|
||||
with alive_progress(len(tasks), title='Checking sites') as progress:
|
||||
for f in asyncio.as_completed(tasks):
|
||||
progress()
|
||||
try:
|
||||
loop.run_until_complete(f)
|
||||
except asyncio.exceptions.TimeoutError:
|
||||
pass
|
||||
|
||||
print(f'Found and saved {len(ok_sites)} sites!')
|
||||
@@ -0,0 +1,36 @@
|
||||
import sys
|
||||
import difflib
|
||||
import requests
|
||||
|
||||
|
||||
a = requests.get(sys.argv[1]).text
|
||||
b = requests.get(sys.argv[2]).text
|
||||
|
||||
|
||||
tokens_a = set(a.split('"'))
|
||||
tokens_b = set(b.split('"'))
|
||||
|
||||
a_minus_b = tokens_a.difference(tokens_b)
|
||||
b_minus_a = tokens_b.difference(tokens_a)
|
||||
|
||||
print(a_minus_b)
|
||||
print(b_minus_a)
|
||||
|
||||
print(len(a_minus_b))
|
||||
print(len(b_minus_a))
|
||||
|
||||
desired_strings = ["username", "not found", "пользователь", "profile", "lastname", "firstname", "biography",
|
||||
"birthday", "репутация", "информация", "e-mail"]
|
||||
|
||||
|
||||
def get_match_ratio(x):
|
||||
return round(max([
|
||||
difflib.SequenceMatcher(a=x.lower(), b=y).ratio()
|
||||
for y in desired_strings
|
||||
]), 2)
|
||||
|
||||
|
||||
RATIO = 0.6
|
||||
|
||||
print(sorted(a_minus_b, key=get_match_ratio, reverse=True)[:10])
|
||||
print(sorted(b_minus_a, key=get_match_ratio, reverse=True)[:10])
|
||||
@@ -3,13 +3,12 @@
|
||||
This module generates the listing of supported sites in file `SITES.md`
|
||||
and pretty prints file with sites data.
|
||||
"""
|
||||
import json
|
||||
import sys
|
||||
import requests
|
||||
import logging
|
||||
import threading
|
||||
import xml.etree.ElementTree as ET
|
||||
from datetime import datetime
|
||||
from datetime import datetime, timezone
|
||||
from argparse import ArgumentParser, RawDescriptionHelpFormatter
|
||||
|
||||
from maigret.maigret import MaigretDatabase
|
||||
@@ -25,11 +24,12 @@ RANKS.update({
|
||||
'100000000': '100M',
|
||||
})
|
||||
|
||||
SEMAPHORE = threading.Semaphore(10)
|
||||
SEMAPHORE = threading.Semaphore(20)
|
||||
|
||||
|
||||
def get_rank(domain_to_query, site, print_errors=True):
|
||||
with SEMAPHORE:
|
||||
#Retrieve ranking data via alexa API
|
||||
# Retrieve ranking data via alexa API
|
||||
url = f"http://data.alexa.com/data?cli=10&url={domain_to_query}"
|
||||
xml_data = requests.get(url).text
|
||||
root = ET.fromstring(xml_data)
|
||||
@@ -67,7 +67,7 @@ def get_step_rank(rank):
|
||||
return get_readable_rank(list(filter(lambda x: x >= rank, valid_step_ranks))[0])
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
def main():
|
||||
parser = ArgumentParser(formatter_class=RawDescriptionHelpFormatter
|
||||
)
|
||||
parser.add_argument("--base","-b", metavar="BASE_FILE",
|
||||
@@ -86,6 +86,8 @@ if __name__ == '__main__':
|
||||
db = MaigretDatabase()
|
||||
sites_subset = db.load_from_file(args.base_file).sites
|
||||
|
||||
print(f"\nUpdating supported sites list (don't worry, it's needed)...")
|
||||
|
||||
with open("sites.md", "w") as site_file:
|
||||
site_file.write(f"""
|
||||
## List of supported sites (search methods): total {len(sites_subset)}\n
|
||||
@@ -114,7 +116,7 @@ Rank data fetched from Alexa by domains.
|
||||
sys.stdout.flush()
|
||||
index = index + 1
|
||||
|
||||
sites_full_list = [(s, s.alexa_rank) for s in sites_subset]
|
||||
sites_full_list = [(s, int(s.alexa_rank)) for s in sites_subset]
|
||||
|
||||
sites_full_list.sort(reverse=False, key=lambda x: x[1])
|
||||
|
||||
@@ -137,7 +139,15 @@ Rank data fetched from Alexa by domains.
|
||||
site_file.write(f'1. {favicon} [{site}]({url_main})*: top {valid_rank}{tags}*{note}\n')
|
||||
db.update_site(site)
|
||||
|
||||
site_file.write(f'\nAlexa.com rank data fetched at ({datetime.utcnow()} UTC)\n')
|
||||
site_file.write(f'\nThe list was updated at ({datetime.now(timezone.utc).date()})\n')
|
||||
db.save_to_file(args.base_file)
|
||||
|
||||
print("\nFinished updating supported site listing!")
|
||||
statistics_text = db.get_db_stats(is_markdown=True)
|
||||
site_file.write('## Statistics\n\n')
|
||||
site_file.write(statistics_text)
|
||||
|
||||
print("Finished updating supported site listing!")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
||||
@@ -1,56 +1,49 @@
|
||||
#!/usr/bin/env python3
|
||||
import asyncio
|
||||
import logging
|
||||
import maigret
|
||||
|
||||
|
||||
# top popular sites from the Maigret database
|
||||
TOP_SITES_COUNT = 300
|
||||
# Maigret HTTP requests timeout
|
||||
TIMEOUT = 10
|
||||
# max parallel requests
|
||||
MAX_CONNECTIONS = 50
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
# setup logging and asyncio
|
||||
def main():
|
||||
logger = logging.getLogger('maigret')
|
||||
logger.setLevel(logging.WARNING)
|
||||
loop = asyncio.get_event_loop()
|
||||
|
||||
# setup Maigret
|
||||
db = maigret.MaigretDatabase().load_from_file('./maigret/resources/data.json')
|
||||
# also can be downloaded from web
|
||||
# db = MaigretDatabase().load_from_url(MAIGRET_DB_URL)
|
||||
|
||||
# user input
|
||||
username = input('Enter username to search: ')
|
||||
|
||||
sites_count_raw = input(
|
||||
f'Select the number of sites to search ({TOP_SITES_COUNT} for default, {len(db.sites_dict)} max): '
|
||||
sites_count = (
|
||||
int(
|
||||
input(
|
||||
f'Select the number of sites to search ({TOP_SITES_COUNT} for default, {len(db.sites_dict)} max): '
|
||||
)
|
||||
)
|
||||
or TOP_SITES_COUNT
|
||||
)
|
||||
sites_count = int(sites_count_raw) or TOP_SITES_COUNT
|
||||
|
||||
sites = db.ranked_sites_dict(top=sites_count)
|
||||
|
||||
show_progressbar_raw = input('Do you want to show a progressbar? [Yn] ')
|
||||
show_progressbar = show_progressbar_raw.lower() != 'n'
|
||||
|
||||
extract_info_raw = input(
|
||||
'Do you want to extract additional info from accounts\' pages? [Yn] '
|
||||
show_progressbar = input('Do you want to show a progressbar? [Yn] ').lower() != 'n'
|
||||
extract_info = (
|
||||
input(
|
||||
'Do you want to extract additional info from accounts\' pages? [Yn] '
|
||||
).lower()
|
||||
!= 'n'
|
||||
)
|
||||
extract_info = extract_info_raw.lower() != 'n'
|
||||
|
||||
use_notifier_raw = input(
|
||||
'Do you want to use notifier for displaying results while searching? [Yn] '
|
||||
use_notifier = (
|
||||
input(
|
||||
'Do you want to use notifier for displaying results while searching? [Yn] '
|
||||
).lower()
|
||||
!= 'n'
|
||||
)
|
||||
use_notifier = use_notifier_raw.lower() != 'n'
|
||||
|
||||
notifier = None
|
||||
if use_notifier:
|
||||
notifier = maigret.Notifier(print_found_only=True, skip_check_errors=True)
|
||||
|
||||
# search!
|
||||
search_func = maigret.search(
|
||||
username=username,
|
||||
site_dict=sites,
|
||||
@@ -58,7 +51,7 @@ if __name__ == '__main__':
|
||||
logger=logger,
|
||||
max_connections=MAX_CONNECTIONS,
|
||||
query_notify=notifier,
|
||||
no_progressbar=(not show_progressbar),
|
||||
no_progressbar=not show_progressbar,
|
||||
is_parsing_enabled=extract_info,
|
||||
)
|
||||
|
||||
@@ -69,3 +62,7 @@ if __name__ == '__main__':
|
||||
for sitename, data in results.items():
|
||||
is_found = data['status'].is_found()
|
||||
print(f'{sitename} - {"Found!" if is_found else "Not found"}')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
||||