mirror of
https://github.com/soxoj/maigret.git
synced 2026-05-09 16:14:32 +00:00
Compare commits
432 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 48c9363f6c | |||
| db69eaa290 | |||
| 3e6cad63f0 | |||
| 0ab12d95d8 | |||
| b26a711ace | |||
| 94e1e8e22e | |||
| 0e77ee47b4 | |||
| 003247453b | |||
| 373f40dee8 | |||
| dd485e8d9c | |||
| a57f9734a1 | |||
| 0bdc49f493 | |||
| 1cb25946dd | |||
| e982be4109 | |||
| 1a8bbe7ff8 | |||
| 0ec9fc9027 | |||
| 07a7a474f8 | |||
| ce84f8d046 | |||
| 82f494495c | |||
| 779ec87659 | |||
| d5d4242015 | |||
| 2f93963a0a | |||
| 5073ceff13 | |||
| d15e12750b | |||
| 0c7e3898e8 | |||
| 03089613dc | |||
| 21a8459b18 | |||
| 7f1f349300 | |||
| 258f30ec5c | |||
| e96d09dee7 | |||
| ff06029253 | |||
| 15702bd9f4 | |||
| 909a7e6a91 | |||
| 2e2a47a12b | |||
| 6170f07154 | |||
| 3ad9bb59ce | |||
| c00b864017 | |||
| 404c0376d3 | |||
| 8a98aa9eaa | |||
| 80cf70d151 | |||
| ee25c61fc2 | |||
| 324c118530 | |||
| b370bc4c44 | |||
| f529d16c62 | |||
| 886fdc82d6 | |||
| 10950332a1 | |||
| 4d87adc0c8 | |||
| 13c20afe5b | |||
| d8a05807ba | |||
| 089d33b88b | |||
| b3b84c633a | |||
| 86d51bced0 | |||
| 54b864f167 | |||
| 54fecccbfb | |||
| 3745711b12 | |||
| 25bc88a438 | |||
| 9b0212d7c7 | |||
| ceaf8cd9aa | |||
| 0c3ae98fd1 | |||
| f0f64075ad | |||
| 2fae5bb340 | |||
| 9287734a24 | |||
| ff46d880cb | |||
| f78c93eaca | |||
| 1ff75403cd | |||
| 0dc8e52662 | |||
| 7c1f8a30ad | |||
| 24e545b62c | |||
| 4331b5f532 | |||
| 05db32f28f | |||
| 1cb589eadb | |||
| 6fb0dc1067 | |||
| e02a5571b6 | |||
| b097a49ed5 | |||
| 45f9966b34 | |||
| 46d8d8fc3d | |||
| 034153791b | |||
| 9399737ee6 | |||
| f7f77e587c | |||
| 7a8c077c57 | |||
| 03900b0c26 | |||
| 6be2f409e5 | |||
| 46b13b4f23 | |||
| be58bf0ab4 | |||
| 2ccef4a9f9 | |||
| f1ea12d731 | |||
| 01121d7695 | |||
| 3ed043993f | |||
| a5bdf08c1c | |||
| 88fcf01d8f | |||
| 451a858d6b | |||
| df0a0696a9 | |||
| f7341200bc | |||
| 9f252f6d41 | |||
| 397beebd21 | |||
| 7c5995f165 | |||
| aee1773e0c | |||
| ffca24435b | |||
| 2b588a2003 | |||
| 1978f24fc4 | |||
| 83d5740096 | |||
| 726380ee09 | |||
| 90599ea3c2 | |||
| 72a1f948ba | |||
| 71f22f65c4 | |||
| c9039cfd07 | |||
| f5fe575b6b | |||
| c5c78b2a66 | |||
| 390f3a49ee | |||
| dc9b44bd14 | |||
| b72e9b6a0c | |||
| b8c035e564 | |||
| eb115a1a70 | |||
| f5ca005766 | |||
| 656b9c19ea | |||
| 5855cbfcc9 | |||
| 6caa08902f | |||
| 932e07a8ee | |||
| 71d5368fea | |||
| 9f2f4d5107 | |||
| d6003c93b8 | |||
| 4055fa088d | |||
| 745a70a534 | |||
| 366e9333dd | |||
| fc1f5bfc82 | |||
| bfe33d74d3 | |||
| 9c2746fc28 | |||
| 0ad2cdef2c | |||
| 0064fad85c | |||
| 16f4978b31 | |||
| b0ec08d753 | |||
| fb8952b783 | |||
| 4216f5c028 | |||
| 539a3c5000 | |||
| 064d5707f9 | |||
| fd64f5710f | |||
| 2136a71db1 | |||
| 8308299367 | |||
| 70bed56a8a | |||
| 4c2a21832b | |||
| 356d7d4e49 | |||
| 6020e766ce | |||
| b4e963b2b1 | |||
| aebd8539ed | |||
| fea1c6b552 | |||
| fd8f5f90fd | |||
| b06fd470cc | |||
| ec1aaacb41 | |||
| bc1035c1ec | |||
| 026fd98304 | |||
| f03a4c81a5 | |||
| 79afab11c2 | |||
| 10ef102791 | |||
| 523317e760 | |||
| 82074d77b1 | |||
| 002c8359fe | |||
| 08bba20003 | |||
| 0a628d2b8f | |||
| f1969a12a1 | |||
| 3cb03fe09c | |||
| 5769144ac3 | |||
| 99c9b0a8ca | |||
| 8e9722a285 | |||
| 95276b841c | |||
| 9484d6f05e | |||
| 06f94cd476 | |||
| d4d525647c | |||
| f988c532ec | |||
| e71c8907f0 | |||
| 45ed832ec8 | |||
| a57e5f1d90 | |||
| d9fd6e0b29 | |||
| 827c11f2e1 | |||
| 647a3fabb9 | |||
| efb2a9501e | |||
| 44c009e570 | |||
| eb304b6804 | |||
| e1b9b62c4d | |||
| ad6938f068 | |||
| 1c9ccfe77b | |||
| 1fd1e2c809 | |||
| c5e973bc5b | |||
| b288c37d91 | |||
| 2f76f22202 | |||
| f7c7809d8d | |||
| 80bd7f21eb | |||
| 994d79244e | |||
| 4b2d2c07bd | |||
| 938d05f812 | |||
| 487c4e0dbf | |||
| 09dce2046a | |||
| 65963e5647 | |||
| 69f220a7e4 | |||
| 722d3039dc | |||
| 420c29610d | |||
| 6b53fac424 | |||
| 37c54735f1 | |||
| 2f0a0b49f3 | |||
| 1a8b06385a | |||
| 22d7c204f8 | |||
| a6ae0723f9 | |||
| aa4f94ac01 | |||
| 1153a9bf01 | |||
| 3d878131b9 | |||
| 20746a0fc3 | |||
| ce062d915e | |||
| c057c5c478 | |||
| eab0ec48da | |||
| 5b40eac230 | |||
| 2d782379ab | |||
| 042981d8bb | |||
| 2c2017c7db | |||
| 4aeba4d648 | |||
| de34e29188 | |||
| 0c127a97d5 | |||
| 11f047b1ae | |||
| 43f8adef66 | |||
| 2ffb77823d | |||
| 7ba8af0247 | |||
| 814544e1a0 | |||
| 477e62a5c5 | |||
| 0a629614c2 | |||
| e2d623f0d7 | |||
| 5145bfe820 | |||
| 58f66f5c3c | |||
| 746b74238b | |||
| ae56a927cf | |||
| 40ed0a7535 | |||
| beb4d740c7 | |||
| a47b6a705e | |||
| 3bfb2db6df | |||
| d30ef15a79 | |||
| 1ebf0ca5cf | |||
| eaa545a2c4 | |||
| cbe1f09536 | |||
| 246c770d5c | |||
| e88d71d792 | |||
| 929366cc81 | |||
| bb6ed59e44 | |||
| 6400d83a46 | |||
| 507d0dac3a | |||
| f058ee0daf | |||
| a66c25452a | |||
| bfc682f758 | |||
| aedbe927cb | |||
| 340d8b45fe | |||
| c95f0fdfbb | |||
| a5b73d1108 | |||
| 6157c5ff3d | |||
| e0f0dd5d4d | |||
| 059c8198a1 | |||
| 34073d12f4 | |||
| d24d80ab43 | |||
| 123ec35569 | |||
| 73aa8b649b | |||
| 28aa74d83a | |||
| d4780d2840 | |||
| 4c7b6d82cf | |||
| 37d6b9a949 | |||
| 2664094f65 | |||
| d884fea00b | |||
| 4a4fa69e93 | |||
| 801bc388e4 | |||
| 48fcfcb89b | |||
| 07db3ce463 | |||
| f9f4449079 | |||
| 0d4236e2d4 | |||
| b2db783620 | |||
| b27c53b5b6 | |||
| 6691b26674 | |||
| 131b96ddb3 | |||
| 0803d8ebaa | |||
| 19956f74ca | |||
| dd57019c80 | |||
| 9fb265ea85 | |||
| 0f9fdfc639 | |||
| 0de087d751 | |||
| 600e58f8ef | |||
| 16131c58f9 | |||
| 5106d32342 | |||
| 1456ff6bc1 | |||
| b94fb65809 | |||
| e283d8b561 | |||
| 7cd727bbff | |||
| 5532c00b04 | |||
| 8846b8b225 | |||
| 7307c98029 | |||
| 4d129c2c6b | |||
| 1e772b7dd4 | |||
| 81bb0a01b2 | |||
| 7ae8b58e1a | |||
| dde8bf8af0 | |||
| dc4addd985 | |||
| 803f62f7b7 | |||
| 91596b31ec | |||
| a27fea4ba4 | |||
| ba9a94debc | |||
| ac80d26cab | |||
| e4aea719fa | |||
| 4b18ecbd4b | |||
| c2a4c64640 | |||
| 47045dd653 | |||
| b65a85368b | |||
| daf483b097 | |||
| 838a0c5e0c | |||
| 0ccaccfcde | |||
| d1e7f5c113 | |||
| bfb5b85c41 | |||
| effd753512 | |||
| cfc777d45d | |||
| 422f65afbe | |||
| 135b554030 | |||
| 47edb4427a | |||
| bda6c7c390 | |||
| f0f7334f31 | |||
| 669f92c34b | |||
| b657c1323d | |||
| 692f401043 | |||
| 27f91ddbe3 | |||
| 72fccb2868 | |||
| a959243282 | |||
| 42895e81a8 | |||
| fb9663599e | |||
| 005685e69a | |||
| eb70f91db9 | |||
| a3eaf6130e | |||
| 2ce65ca45a | |||
| 46a14631ea | |||
| 2699cd221f | |||
| 2a7851c814 | |||
| 1356cc8e3a | |||
| 523966eaf2 | |||
| 21f5db5661 | |||
| 6b52c41b97 | |||
| 8c898bd356 | |||
| e725a73c8f | |||
| 645abfe72c | |||
| 17886bb9fa | |||
| 5b6cf4f15a | |||
| ca1d5e3a76 | |||
| 52789abda7 | |||
| 54f1f1feaa | |||
| ea33f4150f | |||
| 7ff52e60a2 | |||
| e5420e4639 | |||
| 393469ddfd | |||
| 0b03a7ab00 | |||
| dd13010bb5 | |||
| e3bd89c9e4 | |||
| 00865db0f6 | |||
| 8635abe79f | |||
| 8fbe6b42de | |||
| db12e7b563 | |||
| 77c9bda3e5 | |||
| 54547c797a | |||
| 7e0b20e8fb | |||
| 85288dccb5 | |||
| d973831dc1 | |||
| 12502c020c | |||
| ce48c317b2 | |||
| 41a277237c | |||
| 721ff2874f | |||
| 3cdca22b9d | |||
| 346611c5da | |||
| a8e538ad29 | |||
| 95ff061cf6 | |||
| 5bb5e29ffb | |||
| ac3e0b16e4 | |||
| 970b75b88d | |||
| 8f6b40c8d0 | |||
| ccebd677e3 | |||
| 75625f72f8 | |||
| f6dbe1a6bd | |||
| a914283a15 | |||
| 2a4f4d47e2 | |||
| 50350972a5 | |||
| cdb69f99a1 | |||
| 4786822e6d | |||
| 9c56f29267 | |||
| 1ee4f4c93b | |||
| 9e302542ed | |||
| 3409f8a726 | |||
| 94bfa4233d | |||
| 9c08c34007 | |||
| 880ffb4bf1 | |||
| d987c681b7 | |||
| 2ef141a5c5 | |||
| 809b97d4f9 | |||
| 4a1342b654 | |||
| fb200875d3 | |||
| 53bc79938c | |||
| 3866c1be9e | |||
| ca65ffe864 | |||
| c9638f704f | |||
| 39c57e7925 | |||
| 1b5c39dc1b | |||
| 379fca8602 | |||
| 9716f40140 | |||
| 61d346dd0a | |||
| 5edfc00b2d | |||
| 5905dcf384 | |||
| 67046273c7 | |||
| b4fd2fe40f | |||
| 7113824c59 | |||
| a2e782d07c | |||
| 4b2d030d7a | |||
| e98c97dbb1 | |||
| fd4d570b59 | |||
| 9892532aae | |||
| 66422332c4 | |||
| 8b1eb15939 | |||
| 06df4661bc | |||
| eaa126906f | |||
| 1c7cbbc27d | |||
| 0eed5ced7d | |||
| 30f3ac4889 | |||
| 0212796696 | |||
| 6c723f8329 | |||
| b1bfbbc371 | |||
| ee8eabc5ed | |||
| cf6bb0bd7a | |||
| 93b542dad2 | |||
| ec6324473a | |||
| 263afb8990 | |||
| 7016161206 | |||
| 7d225750ac | |||
| 286319b6ec | |||
| fef323ab7d | |||
| 05c29c8c77 | |||
| d18d5c96d9 | |||
| 1da4345a50 | |||
| c5b9f4e0fa |
Executable
+2
@@ -0,0 +1,2 @@
|
|||||||
|
#!/bin/sh
|
||||||
|
python3 ./utils/update_site_data.py
|
||||||
@@ -1,3 +1,5 @@
|
|||||||
# These are supported funding model platforms
|
# These are supported funding model platforms
|
||||||
|
|
||||||
patreon: soxoj
|
patreon: soxoj
|
||||||
|
github: soxoj
|
||||||
|
buy_me_a_coffee: soxoj
|
||||||
@@ -15,10 +15,14 @@ assignees: soxoj
|
|||||||
|
|
||||||
## Description
|
## Description
|
||||||
|
|
||||||
Info about Maigret version you are running and environment (`--version`, operation system, ISP provuder):
|
Info about Maigret version you are running and environment (`--version`, operation system, ISP provider):
|
||||||
<INSERT VERSION INFO HERE>
|
<INSERT VERSION INFO HERE>
|
||||||
|
|
||||||
How to reproduce this bug (commandline options / conditions):
|
How to reproduce this bug (commandline options / conditions):
|
||||||
<INSERT EXAMPLE OF CLI COMMAND HERE>
|
<INSERT EXAMPLE OF CLI COMMAND HERE>
|
||||||
|
|
||||||
<DESCRIPTION>
|
<DESCRIPTION>
|
||||||
|
|
||||||
|
<PASTE SCREENSHOT>
|
||||||
|
|
||||||
|
<ATTACH LOG FILE>
|
||||||
|
|||||||
@@ -27,6 +27,7 @@ jobs:
|
|||||||
with:
|
with:
|
||||||
push: true
|
push: true
|
||||||
tags: ${{ secrets.DOCKER_HUB_USERNAME }}/maigret:latest
|
tags: ${{ secrets.DOCKER_HUB_USERNAME }}/maigret:latest
|
||||||
|
platforms: linux/amd64,linux/arm64
|
||||||
-
|
-
|
||||||
name: Image digest
|
name: Image digest
|
||||||
run: echo ${{ steps.docker_build.outputs.digest }}
|
run: echo ${{ steps.docker_build.outputs.digest }}
|
||||||
|
|||||||
@@ -2,23 +2,69 @@ name: Package exe with PyInstaller - Windows
|
|||||||
|
|
||||||
on:
|
on:
|
||||||
push:
|
push:
|
||||||
branches: [ main ]
|
branches: [ main, dev ]
|
||||||
pull_request:
|
|
||||||
branches: [ main ]
|
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
build:
|
build:
|
||||||
|
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v2
|
- name: Checkout
|
||||||
- name: PyInstaller Windows
|
uses: actions/checkout@v4
|
||||||
uses: JackMcKew/pyinstaller-action-windows@main
|
|
||||||
with:
|
|
||||||
path: pyinstaller
|
|
||||||
|
|
||||||
- uses: actions/upload-artifact@v2
|
- name: TEST PyInstaller Windows Build
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
echo "test" > maigret_standalone_win32
|
||||||
|
|
||||||
|
- name: TEST Upload PyInstaller Binary to Workflow as Artifact
|
||||||
|
uses: actions/upload-artifact@v4
|
||||||
with:
|
with:
|
||||||
name: maigret_standalone_win32
|
name: maigret_standalone_win32
|
||||||
path: pyinstaller/dist/windows # or path/to/artifact
|
path: maigret_standalone_win32
|
||||||
|
|
||||||
|
# - name: PyInstaller Windows Build
|
||||||
|
# uses: JackMcKew/pyinstaller-action-windows@main
|
||||||
|
# with:
|
||||||
|
# path: pyinstaller
|
||||||
|
|
||||||
|
# - name: Upload PyInstaller Binary to Workflow as Artifact
|
||||||
|
# uses: actions/upload-artifact@v4
|
||||||
|
# with:
|
||||||
|
# name: maigret_standalone_win32
|
||||||
|
# path: pyinstaller/dist/windows
|
||||||
|
|
||||||
|
- name: Download PyInstaller Binary
|
||||||
|
uses: actions/download-artifact@v4
|
||||||
|
with:
|
||||||
|
name: maigret_standalone_win32
|
||||||
|
|
||||||
|
- name: Remove Previous Release
|
||||||
|
uses: liudonghua123/delete-release-action@v1
|
||||||
|
with:
|
||||||
|
release_name: Windows Release ${{ github.ref_name }}
|
||||||
|
env:
|
||||||
|
GITHUB_TOKEN: ${{ github.token }}
|
||||||
|
|
||||||
|
- name: Create New Release
|
||||||
|
uses: actions/create-release@v1
|
||||||
|
id: create_release
|
||||||
|
with:
|
||||||
|
draft: false
|
||||||
|
prerelease: true
|
||||||
|
release_name: Windows Release [${{ github.ref_name }}]
|
||||||
|
tag_name: ${{ github.ref_name }}-${{ github.run_number }}
|
||||||
|
body: |
|
||||||
|
This is a development release, built from the branch **${{ github.ref_name }}**.
|
||||||
|
Download the attached file "maigret_standalone_win32.zip" to get the Windows executable.
|
||||||
|
env:
|
||||||
|
GITHUB_TOKEN: ${{ github.token }}
|
||||||
|
|
||||||
|
- name: Upload PyInstaller Binary to Release
|
||||||
|
uses: actions/upload-release-asset@v1
|
||||||
|
env:
|
||||||
|
GITHUB_TOKEN: ${{ github.token }}
|
||||||
|
with:
|
||||||
|
upload_url: ${{ steps.create_release.outputs.upload_url }}
|
||||||
|
asset_path: ./maigret_standalone_win32
|
||||||
|
asset_name: maigret_standalone_win32
|
||||||
|
asset_content_type: application/zip
|
||||||
@@ -2,6 +2,7 @@ name: Linting and testing
|
|||||||
|
|
||||||
on:
|
on:
|
||||||
push:
|
push:
|
||||||
|
branches: [ main ]
|
||||||
pull_request:
|
pull_request:
|
||||||
branches: [ main ]
|
branches: [ main ]
|
||||||
types: [opened, synchronize, reopened]
|
types: [opened, synchronize, reopened]
|
||||||
@@ -12,7 +13,7 @@ jobs:
|
|||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
strategy:
|
strategy:
|
||||||
matrix:
|
matrix:
|
||||||
python-version: [3.6.9, 3.7, 3.8, 3.9]
|
python-version: ["3.10", "3.11", "3.12"]
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v2
|
- uses: actions/checkout@v2
|
||||||
@@ -23,8 +24,8 @@ jobs:
|
|||||||
- name: Install dependencies
|
- name: Install dependencies
|
||||||
run: |
|
run: |
|
||||||
python -m pip install --upgrade pip
|
python -m pip install --upgrade pip
|
||||||
python -m pip install -r test-requirements.txt
|
python -m pip install poetry
|
||||||
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
|
python -m poetry install --with dev
|
||||||
- name: Test with pytest
|
- name: Test with pytest
|
||||||
run: |
|
run: |
|
||||||
pytest --reruns 3 --reruns-delay 5
|
poetry run pytest --reruns 3 --reruns-delay 5
|
||||||
|
|||||||
@@ -2,7 +2,7 @@ name: Update sites rating and statistics
|
|||||||
|
|
||||||
on:
|
on:
|
||||||
pull_request:
|
pull_request:
|
||||||
branches: [ main ]
|
branches: [ dev ]
|
||||||
types: [opened, synchronize]
|
types: [opened, synchronize]
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
# Virtual Environment
|
# Virtual Environment
|
||||||
venv/
|
venv/
|
||||||
|
.venv/
|
||||||
|
|
||||||
# Editor Configurations
|
# Editor Configurations
|
||||||
.vscode/
|
.vscode/
|
||||||
@@ -15,6 +16,10 @@ src/
|
|||||||
.ipynb_checkpoints
|
.ipynb_checkpoints
|
||||||
*.ipynb
|
*.ipynb
|
||||||
|
|
||||||
|
# Logs and backups
|
||||||
|
*.log
|
||||||
|
*.bak
|
||||||
|
|
||||||
# Output files, except requirements.txt
|
# Output files, except requirements.txt
|
||||||
*.txt
|
*.txt
|
||||||
!requirements.txt
|
!requirements.txt
|
||||||
@@ -34,3 +39,7 @@ htmlcov/
|
|||||||
|
|
||||||
# Maigret files
|
# Maigret files
|
||||||
settings.json
|
settings.json
|
||||||
|
|
||||||
|
# other
|
||||||
|
*.egg-info
|
||||||
|
build
|
||||||
|
|||||||
@@ -0,0 +1,16 @@
|
|||||||
|
version: 2
|
||||||
|
|
||||||
|
build:
|
||||||
|
os: ubuntu-22.04
|
||||||
|
tools:
|
||||||
|
python: "3.10"
|
||||||
|
|
||||||
|
sphinx:
|
||||||
|
configuration: docs/source/conf.py
|
||||||
|
|
||||||
|
formats:
|
||||||
|
- pdf
|
||||||
|
|
||||||
|
python:
|
||||||
|
install:
|
||||||
|
- requirements: docs/requirements.txt
|
||||||
@@ -2,6 +2,103 @@
|
|||||||
|
|
||||||
## [Unreleased]
|
## [Unreleased]
|
||||||
|
|
||||||
|
## [0.4.4] - 2022-09-03
|
||||||
|
* Fixed some false positives by @soxoj in https://github.com/soxoj/maigret/pull/433
|
||||||
|
* Drop Python 3.6 support by @soxoj in https://github.com/soxoj/maigret/pull/434
|
||||||
|
* Bump xhtml2pdf from 0.2.5 to 0.2.7 by @dependabot in https://github.com/soxoj/maigret/pull/409
|
||||||
|
* Bump reportlab from 3.6.6 to 3.6.9 by @dependabot in https://github.com/soxoj/maigret/pull/403
|
||||||
|
* Bump markupsafe from 2.0.1 to 2.1.1 by @dependabot in https://github.com/soxoj/maigret/pull/389
|
||||||
|
* Bump pycountry from 22.1.10 to 22.3.5 by @dependabot in https://github.com/soxoj/maigret/pull/384
|
||||||
|
* Bump pypdf2 from 1.26.0 to 1.27.4 by @dependabot in https://github.com/soxoj/maigret/pull/438
|
||||||
|
* Update GH actions by @soxoj in https://github.com/soxoj/maigret/pull/439
|
||||||
|
* Bump tqdm from 4.63.0 to 4.64.0 by @dependabot in https://github.com/soxoj/maigret/pull/440
|
||||||
|
* Bump jinja2 from 3.0.3 to 3.1.1 by @dependabot in https://github.com/soxoj/maigret/pull/441
|
||||||
|
* Bump soupsieve from 2.3.1 to 2.3.2 by @dependabot in https://github.com/soxoj/maigret/pull/436
|
||||||
|
* Bump pypdf2 from 1.26.0 to 1.27.4 by @dependabot in https://github.com/soxoj/maigret/pull/442
|
||||||
|
* Bump pyvis from 0.1.9 to 0.2.0 by @dependabot in https://github.com/soxoj/maigret/pull/443
|
||||||
|
* Bump pypdf2 from 1.27.4 to 1.27.6 by @dependabot in https://github.com/soxoj/maigret/pull/448
|
||||||
|
* Bump typing-extensions from 4.1.1 to 4.2.0 by @dependabot in https://github.com/soxoj/maigret/pull/447
|
||||||
|
* Bump soupsieve from 2.3.2 to 2.3.2.post1 by @dependabot in https://github.com/soxoj/maigret/pull/444
|
||||||
|
* Bump pypdf2 from 1.27.6 to 1.27.7 by @dependabot in https://github.com/soxoj/maigret/pull/449
|
||||||
|
* Bump pypdf2 from 1.27.7 to 1.27.8 by @dependabot in https://github.com/soxoj/maigret/pull/450
|
||||||
|
* XMind 8 report warning and some docs update by @soxoj in https://github.com/soxoj/maigret/pull/452
|
||||||
|
* False positive fixes 24.04.22 by @soxoj in https://github.com/soxoj/maigret/pull/455
|
||||||
|
* Bump pypdf2 from 1.27.8 to 1.27.9 by @dependabot in https://github.com/soxoj/maigret/pull/456
|
||||||
|
* Bump pytest from 7.0.1 to 7.1.2 by @dependabot in https://github.com/soxoj/maigret/pull/457
|
||||||
|
* Bump jinja2 from 3.1.1 to 3.1.2 by @dependabot in https://github.com/soxoj/maigret/pull/460
|
||||||
|
* Ubisoft forums addition by @fen0s in https://github.com/soxoj/maigret/pull/461
|
||||||
|
* Add BYOND, Figma, BeatStars by @fen0s in https://github.com/soxoj/maigret/pull/462
|
||||||
|
* fix Figma username definition, add a bunch of sites by @fen0s in https://github.com/soxoj/maigret/pull/464
|
||||||
|
* Bump pypdf2 from 1.27.9 to 1.27.10 by @dependabot in https://github.com/soxoj/maigret/pull/465
|
||||||
|
* Bump pypdf2 from 1.27.10 to 1.27.12 by @dependabot in https://github.com/soxoj/maigret/pull/466
|
||||||
|
* Sites fixes 05 05 22 by @soxoj in https://github.com/soxoj/maigret/pull/469
|
||||||
|
* Bump pyvis from 0.2.0 to 0.2.1 by @dependabot in https://github.com/soxoj/maigret/pull/472
|
||||||
|
* Social analyzer websites, also fixing presense strs by @fen0s in https://github.com/soxoj/maigret/pull/471
|
||||||
|
* Updated logic of false positive risk estimating by @soxoj in https://github.com/soxoj/maigret/pull/475
|
||||||
|
* Improved usability of external progressbar func by @soxoj in https://github.com/soxoj/maigret/pull/476
|
||||||
|
* New sites added, some tags/rank update by @soxoj in https://github.com/soxoj/maigret/pull/477
|
||||||
|
* Added new sites by @soxoj in https://github.com/soxoj/maigret/pull/480
|
||||||
|
* Added new forums, updated ranks, some utils improvements by @soxoj in https://github.com/soxoj/maigret/pull/481
|
||||||
|
* Disabled sites with false positives results by @soxoj in https://github.com/soxoj/maigret/pull/482
|
||||||
|
* Bump certifi from 2021.10.8 to 2022.5.18.1 by @dependabot in https://github.com/soxoj/maigret/pull/488
|
||||||
|
* Bump psutil from 5.9.0 to 5.9.1 by @dependabot in https://github.com/soxoj/maigret/pull/490
|
||||||
|
* Bump pypdf2 from 1.27.12 to 1.28.1 by @dependabot in https://github.com/soxoj/maigret/pull/491
|
||||||
|
* Bump pypdf2 from 1.28.1 to 1.28.2 by @dependabot in https://github.com/soxoj/maigret/pull/493
|
||||||
|
* added and fixed some websites in data.json by @kustermariocoding in https://github.com/soxoj/maigret/pull/494
|
||||||
|
* Bump pypdf2 from 1.28.2 to 2.0.0 by @dependabot in https://github.com/soxoj/maigret/pull/504
|
||||||
|
* Bump pefile from 2021.9.3 to 2022.5.30 by @dependabot in https://github.com/soxoj/maigret/pull/499
|
||||||
|
* Updated sites list, added disabled Anilist by @soxoj in https://github.com/soxoj/maigret/pull/502
|
||||||
|
* Bump lxml from 4.8.0 to 4.9.0 by @dependabot in https://github.com/soxoj/maigret/pull/503
|
||||||
|
* Compatibility with Python 10 by @soxoj in https://github.com/soxoj/maigret/pull/509
|
||||||
|
* feat: add .log & .bak files to gitignore in https://github.com/soxoj/maigret/pull/511
|
||||||
|
* fix some sites and delete abandoned by @fen0s in https://github.com/soxoj/maigret/pull/526
|
||||||
|
* Fixesjulyfirst by @fen0s in https://github.com/soxoj/maigret/pull/533
|
||||||
|
* yazbel, aboutcar, zhihu by @fen0s in https://github.com/soxoj/maigret/pull/531
|
||||||
|
* Fixes july third by @fen0s in https://github.com/soxoj/maigret/pull/535
|
||||||
|
* Update data.json by @fen0s in https://github.com/soxoj/maigret/pull/539
|
||||||
|
* Update data.json by @fen0s in https://github.com/soxoj/maigret/pull/540
|
||||||
|
* Bump reportlab from 3.6.9 to 3.6.11 by @dependabot in https://github.com/soxoj/maigret/pull/543
|
||||||
|
* Bump requests from 2.27.1 to 2.28.1 by @dependabot in https://github.com/soxoj/maigret/pull/530
|
||||||
|
* Bump pypdf2 from 2.0.0 to 2.5.0 by @dependabot in https://github.com/soxoj/maigret/pull/542
|
||||||
|
* Bump xhtml2pdf from 0.2.7 to 0.2.8 by @dependabot in https://github.com/soxoj/maigret/pull/522
|
||||||
|
* Bump lxml from 4.9.0 to 4.9.1 by @dependabot in https://github.com/soxoj/maigret/pull/538
|
||||||
|
* disable yandex music + set utf8 encoding by @fen0s in https://github.com/soxoj/maigret/pull/562
|
||||||
|
* fix false positives by @fen0s in https://github.com/soxoj/maigret/pull/577
|
||||||
|
* disable Instagram, fix two false positives by @fen0s in https://github.com/soxoj/maigret/pull/578
|
||||||
|
* Bump certifi from 2022.5.18.1 to 2022.6.15 by @dependabot in https://github.com/soxoj/maigret/pull/551
|
||||||
|
* August15 by @fen0s in https://github.com/soxoj/maigret/pull/591
|
||||||
|
* Bump pytest-httpserver from 1.0.4 to 1.0.5 by @dependabot in https://github.com/soxoj/maigret/pull/583
|
||||||
|
* Bump typing-extensions from 4.2.0 to 4.3.0 by @dependabot in https://github.com/soxoj/maigret/pull/549
|
||||||
|
* Bump colorama from 0.4.4 to 0.4.5 by @dependabot in https://github.com/soxoj/maigret/pull/548
|
||||||
|
* Bump chardet from 4.0.0 to 5.0.0 by @dependabot in https://github.com/soxoj/maigret/pull/550
|
||||||
|
* Bump cloudscraper from 1.2.60 to 1.2.63 by @dependabot in https://github.com/soxoj/maigret/pull/600
|
||||||
|
* Bump flake8 from 4.0.1 to 5.0.4 by @dependabot in https://github.com/soxoj/maigret/pull/598
|
||||||
|
* Bump attrs from 21.4.0 to 22.1.0 by @dependabot in https://github.com/soxoj/maigret/pull/597
|
||||||
|
* Bump pytest-asyncio from 0.18.2 to 0.19.0 by @dependabot in https://github.com/soxoj/maigret/pull/601
|
||||||
|
* Bump pypdf2 from 2.5.0 to 2.10.4 by @dependabot in https://github.com/soxoj/maigret/pull/606
|
||||||
|
* Bump pytest from 7.1.2 to 7.1.3 by @dependabot in https://github.com/soxoj/maigret/pull/613
|
||||||
|
* Update sites.md -Gitmemory.com suppression by @C3n7ral051nt4g3ncy in https://github.com/soxoj/maigret/pull/610
|
||||||
|
* Bump cloudscraper from 1.2.63 to 1.2.64 by @dependabot in https://github.com/soxoj/maigret/pull/614
|
||||||
|
* Bump pycountry from 22.1.10 to 22.3.5 by @dependabot in https://github.com/soxoj/maigret/pull/607
|
||||||
|
* add ProtonMail, disable 3 broken sites by @fen0s in https://github.com/soxoj/maigret/pull/619
|
||||||
|
* Bump tqdm from 4.64.0 to 4.64.1 by @dependabot in https://github.com/soxoj/maigret/pull/618
|
||||||
|
|
||||||
|
**Full Changelog**: https://github.com/soxoj/maigret/compare/v0.4.3...v0.4.4
|
||||||
|
|
||||||
|
## [0.4.3] - 2022-04-13
|
||||||
|
* Added Sites to data.json by @kustermariocoding in https://github.com/soxoj/maigret/pull/386
|
||||||
|
* added new Websites to data.json by @kustermariocoding in https://github.com/soxoj/maigret/pull/390
|
||||||
|
* Skipped broken tests by @soxoj in https://github.com/soxoj/maigret/pull/397
|
||||||
|
* Added new Websites to data.json by @kustermariocoding in https://github.com/soxoj/maigret/pull/401
|
||||||
|
* Added new Websites to data.json by @kustermariocoding in https://github.com/soxoj/maigret/pull/404
|
||||||
|
* Updated statistics by @soxoj in https://github.com/soxoj/maigret/pull/406
|
||||||
|
* Added new Websites to data.json by @kustermariocoding in https://github.com/soxoj/maigret/pull/413
|
||||||
|
* Disabled houzz.com, updated sites statistics by @soxoj in https://github.com/soxoj/maigret/pull/422
|
||||||
|
* Fixed last false positives by @soxoj in https://github.com/soxoj/maigret/pull/424
|
||||||
|
* Fixed actual false positives by @soxoj in https://github.com/soxoj/maigret/pull/431
|
||||||
|
|
||||||
|
**Full Changelog**: https://github.com/soxoj/maigret/compare/v0.4.2...v0.4.3
|
||||||
|
|
||||||
## [0.4.2] - 2022-03-07
|
## [0.4.2] - 2022-03-07
|
||||||
* [ImgBot] Optimize images by @imgbot in https://github.com/soxoj/maigret/pull/319
|
* [ImgBot] Optimize images by @imgbot in https://github.com/soxoj/maigret/pull/319
|
||||||
* Bump pytest-asyncio from 0.17.0 to 0.17.1 by @dependabot in https://github.com/soxoj/maigret/pull/321
|
* Bump pytest-asyncio from 0.17.0 to 0.17.1 by @dependabot in https://github.com/soxoj/maigret/pull/321
|
||||||
|
|||||||
+24
-1
@@ -2,6 +2,10 @@
|
|||||||
|
|
||||||
Hey! I'm really glad you're reading this. Maigret contains a lot of sites, and it is very hard to keep all the sites operational. That's why any fix is important.
|
Hey! I'm really glad you're reading this. Maigret contains a lot of sites, and it is very hard to keep all the sites operational. That's why any fix is important.
|
||||||
|
|
||||||
|
## Code of Conduct
|
||||||
|
|
||||||
|
Please read and follow the [Code of Conduct](CODE_OF_CONDUCT.md) to foster a welcoming and inclusive community.
|
||||||
|
|
||||||
## How to add a new site
|
## How to add a new site
|
||||||
|
|
||||||
#### Beginner level
|
#### Beginner level
|
||||||
@@ -27,4 +31,23 @@ Always write a clear log message for your commits. One-line messages are fine fo
|
|||||||
|
|
||||||
## Coding conventions
|
## Coding conventions
|
||||||
|
|
||||||
Start reading the code and you'll get the hang of it. ;)
|
### General Guidelines
|
||||||
|
|
||||||
|
- Try to follow [PEP 8](https://www.python.org/dev/peps/pep-0008/) for Python code style.
|
||||||
|
- Ensure your code passes all tests before submitting a pull request.
|
||||||
|
|
||||||
|
### Code Style
|
||||||
|
|
||||||
|
- **Indentation**: Use 4 spaces per indentation level.
|
||||||
|
- **Imports**:
|
||||||
|
- Standard library imports should be placed at the top.
|
||||||
|
- Third-party imports should follow.
|
||||||
|
- Group imports logically.
|
||||||
|
|
||||||
|
### Naming Conventions
|
||||||
|
|
||||||
|
- **Variables and Functions**: Use `snake_case`.
|
||||||
|
- **Classes**: Use `CamelCase`.
|
||||||
|
- **Constants**: Use `UPPER_CASE`.
|
||||||
|
|
||||||
|
Start reading the code and you'll get the hang of it. ;)
|
||||||
+10
-10
@@ -1,16 +1,16 @@
|
|||||||
FROM python:3.9-slim
|
FROM python:3.10-slim
|
||||||
MAINTAINER Soxoj <soxoj@protonmail.com>
|
LABEL maintainer="Soxoj <soxoj@protonmail.com>"
|
||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
RUN pip install --upgrade pip
|
RUN pip install --no-cache-dir --upgrade pip
|
||||||
RUN apt update && \
|
RUN apt-get update && \
|
||||||
apt install -y \
|
apt-get install --no-install-recommends -y \
|
||||||
gcc \
|
gcc \
|
||||||
musl-dev \
|
musl-dev \
|
||||||
libxml2 \
|
libxml2 \
|
||||||
libxml2-dev \
|
libxml2-dev \
|
||||||
libxslt-dev
|
libxslt-dev \
|
||||||
RUN apt clean \
|
&& \
|
||||||
&& rm -rf /var/lib/apt/lists/* /tmp/*
|
rm -rf /var/lib/apt/lists/* /tmp/*
|
||||||
ADD . .
|
COPY . .
|
||||||
RUN YARL_NO_EXTENSIONS=1 python3 -m pip install .
|
RUN YARL_NO_EXTENSIONS=1 python3 -m pip install --no-cache-dir .
|
||||||
ENTRYPOINT ["maigret"]
|
ENTRYPOINT ["maigret"]
|
||||||
|
|||||||
+128
@@ -0,0 +1,128 @@
|
|||||||
|
@echo off
|
||||||
|
|
||||||
|
REM check if running as admin
|
||||||
|
|
||||||
|
goto check_Permissions
|
||||||
|
|
||||||
|
:check_Permissions
|
||||||
|
echo Administrative permissions required. Detecting permissions...
|
||||||
|
|
||||||
|
net session >nul 2>&1
|
||||||
|
if %errorLevel% == 0 (
|
||||||
|
goto 1
|
||||||
|
) else (
|
||||||
|
cls
|
||||||
|
echo Failure: You MUST run this as administator, otherwise commands will fail.
|
||||||
|
)
|
||||||
|
|
||||||
|
pause >nul
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
REM Step 2: Check if Python and pip3 are installed
|
||||||
|
python --version >nul 2>&1
|
||||||
|
if %errorlevel% neq 0 (
|
||||||
|
echo Python is not installed. Please install Python 3.8 or higher.
|
||||||
|
pause
|
||||||
|
exit /b
|
||||||
|
)
|
||||||
|
|
||||||
|
pip3 --version >nul 2>&1
|
||||||
|
if %errorlevel% neq 0 (
|
||||||
|
echo pip3 is not installed. Please install pip3.
|
||||||
|
pause
|
||||||
|
exit /b
|
||||||
|
)
|
||||||
|
|
||||||
|
REM Step 3: Check Python version
|
||||||
|
python -c "import sys; exit(0) if sys.version_info >= (3,8) else exit(1)"
|
||||||
|
if %errorlevel% neq 0 (
|
||||||
|
echo Python version 3.8 or higher is required.
|
||||||
|
pause
|
||||||
|
exit /b
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
:1
|
||||||
|
cls
|
||||||
|
:::===============================================================
|
||||||
|
::: ______ __ __ _ _
|
||||||
|
::: | ____| | \/ | (_) | |
|
||||||
|
::: | |__ __ _ ___ _ _ | \ / | __ _ _ __ _ _ __ ___| |_
|
||||||
|
::: | __| / _` / __| | | | | |\/| |/ _` | |/ _` | '__/ _ \ __|
|
||||||
|
::: | |___| (_| \__ \ |_| | | | | | (_| | | (_| | | | __/ |_
|
||||||
|
::: |______\__,_|___/\__, | |_| |_|\__,_|_|\__, |_| \___|\__|
|
||||||
|
::: __/ | __/ |
|
||||||
|
::: |___/ |___/
|
||||||
|
:::
|
||||||
|
:::===============================================================
|
||||||
|
echo.
|
||||||
|
for /f "delims=: tokens=*" %%A in ('findstr /b ::: "%~f0"') do @echo(%%A
|
||||||
|
echo.
|
||||||
|
echo ----------------------------------------------------------------
|
||||||
|
echo Python 3.8 or higher and pip3 required.
|
||||||
|
echo ----------------------------------------------------------------
|
||||||
|
echo Press [I] to begin installation.
|
||||||
|
echo Press [R] If already installed.
|
||||||
|
echo ----------------------------------------------------------------
|
||||||
|
choice /c IR
|
||||||
|
if %errorlevel%==1 goto install1
|
||||||
|
if %errorlevel%==2 goto after
|
||||||
|
|
||||||
|
:install1
|
||||||
|
cls
|
||||||
|
echo ========================================================
|
||||||
|
echo Maigret Installation Script
|
||||||
|
echo ========================================================
|
||||||
|
echo.
|
||||||
|
echo --------------------------------------------------------
|
||||||
|
echo If your pip installation is outdated, it could cause
|
||||||
|
echo cryptography to fail on installation.
|
||||||
|
echo --------------------------------------------------------
|
||||||
|
echo check for and install pip updates now?
|
||||||
|
echo --------------------------------------------------------
|
||||||
|
choice /c YN
|
||||||
|
if %errorlevel%==1 goto install2
|
||||||
|
if %errorlevel%==2 goto install3
|
||||||
|
|
||||||
|
:install2
|
||||||
|
cls
|
||||||
|
python -m pip install --upgrade pip
|
||||||
|
goto:install3
|
||||||
|
|
||||||
|
:install3
|
||||||
|
cls
|
||||||
|
echo ========================================================
|
||||||
|
echo Maigret Installation Script
|
||||||
|
echo ========================================================
|
||||||
|
echo.
|
||||||
|
echo --------------------------------------------------------
|
||||||
|
echo Install requirements and maigret?
|
||||||
|
echo --------------------------------------------------------
|
||||||
|
choice /c YN
|
||||||
|
if %errorlevel%==1 goto install4
|
||||||
|
if %errorlevel%==2 goto 1
|
||||||
|
|
||||||
|
:install4
|
||||||
|
cls
|
||||||
|
pip install .
|
||||||
|
pip install maigret
|
||||||
|
goto:after
|
||||||
|
|
||||||
|
:after
|
||||||
|
cls
|
||||||
|
echo ========================================================
|
||||||
|
echo Maigret Background Search
|
||||||
|
echo ========================================================
|
||||||
|
echo.
|
||||||
|
echo --------------------------------------------------------
|
||||||
|
echo Please Enter Username / Email
|
||||||
|
echo --------------------------------------------------------
|
||||||
|
set /p input=
|
||||||
|
maigret %input%
|
||||||
|
echo.
|
||||||
|
echo.
|
||||||
|
echo.
|
||||||
|
echo.
|
||||||
|
pause
|
||||||
|
goto:after
|
||||||
@@ -10,16 +10,16 @@ rerun-tests:
|
|||||||
|
|
||||||
lint:
|
lint:
|
||||||
@echo 'syntax errors or undefined names'
|
@echo 'syntax errors or undefined names'
|
||||||
flake8 --count --select=E9,F63,F7,F82 --show-source --statistics ${LINT_FILES} maigret.py
|
flake8 --count --select=E9,F63,F7,F82 --show-source --statistics ${LINT_FILES}
|
||||||
|
|
||||||
@echo 'warning'
|
@echo 'warning'
|
||||||
flake8 --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics --ignore=E731,W503,E501 ${LINT_FILES} maigret.py
|
flake8 --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics --ignore=E731,W503,E501 ${LINT_FILES}
|
||||||
|
|
||||||
@echo 'mypy'
|
@echo 'mypy'
|
||||||
mypy ${LINT_FILES}
|
mypy --check-untyped-defs ${LINT_FILES}
|
||||||
|
|
||||||
speed:
|
speed:
|
||||||
time python3 ./maigret.py --version
|
time python3 -m maigret --version
|
||||||
python3 -c "import timeit; t = timeit.Timer('import maigret'); print(t.timeit(number = 1000000))"
|
python3 -c "import timeit; t = timeit.Timer('import maigret'); print(t.timeit(number = 1000000))"
|
||||||
python3 -X importtime -c "import maigret" 2> maigret-import.log
|
python3 -X importtime -c "import maigret" 2> maigret-import.log
|
||||||
python3 -m tuna maigret-import.log
|
python3 -m tuna maigret-import.log
|
||||||
|
|||||||
@@ -3,27 +3,35 @@
|
|||||||
<p align="center">
|
<p align="center">
|
||||||
<p align="center">
|
<p align="center">
|
||||||
<a href="https://pypi.org/project/maigret/">
|
<a href="https://pypi.org/project/maigret/">
|
||||||
<img alt="PyPI" src="https://img.shields.io/pypi/v/maigret?style=flat-square">
|
<img alt="PyPI version badge for Maigret" src="https://img.shields.io/pypi/v/maigret?style=flat-square" />
|
||||||
</a>
|
</a>
|
||||||
<a href="https://pypi.org/project/maigret/">
|
<a href="https://pypi.org/project/maigret/">
|
||||||
<img alt="PyPI - Downloads" src="https://img.shields.io/pypi/dw/maigret?style=flat-square">
|
<img alt="PyPI download count for Maigret" src="https://img.shields.io/pypi/dw/maigret?style=flat-square" />
|
||||||
</a>
|
</a>
|
||||||
<a href="https://pypi.org/project/maigret/">
|
<a href="https://github.com/soxoj/maigret">
|
||||||
<img alt="Views" src="https://komarev.com/ghpvc/?username=maigret&color=brightgreen&label=views&style=flat-square">
|
<img alt="Minimum Python version required: 3.10+" src="https://img.shields.io/badge/Python-3.10%2B-brightgreen?style=flat-square" />
|
||||||
|
</a>
|
||||||
|
<a href="https://github.com/soxoj/maigret/blob/main/LICENSE">
|
||||||
|
<img alt="License badge for Maigret" src="https://img.shields.io/github/license/soxoj/maigret?style=flat-square" />
|
||||||
|
</a>
|
||||||
|
<a href="https://github.com/soxoj/maigret">
|
||||||
|
<img alt="View count for Maigret project" src="https://komarev.com/ghpvc/?username=maigret&color=brightgreen&label=views&style=flat-square" />
|
||||||
</a>
|
</a>
|
||||||
</p>
|
</p>
|
||||||
<p align="center">
|
<p align="center">
|
||||||
<img src="https://raw.githubusercontent.com/soxoj/maigret/main/static/maigret.png" height="200"/>
|
<img src="https://raw.githubusercontent.com/soxoj/maigret/main/static/maigret.png" height="300"/>
|
||||||
</p>
|
</p>
|
||||||
</p>
|
</p>
|
||||||
|
|
||||||
<i>The Commissioner Jules Maigret is a fictional French police detective, created by Georges Simenon. His investigation method is based on understanding the personality of different people and their interactions.</i>
|
<i>The Commissioner Jules Maigret is a fictional French police detective, created by Georges Simenon. His investigation method is based on understanding the personality of different people and their interactions.</i>
|
||||||
|
|
||||||
|
<b>👉👉👉 [Online Telegram bot](https://t.me/osint_maigret_bot)</b>
|
||||||
|
|
||||||
## About
|
## About
|
||||||
|
|
||||||
**Maigret** collect a dossier on a person **by username only**, checking for accounts on a huge number of sites and gathering all the available information from web pages. No API keys required. Maigret is an easy-to-use and powerful fork of [Sherlock](https://github.com/sherlock-project/sherlock).
|
**Maigret** collects a dossier on a person **by username only**, checking for accounts on a huge number of sites and gathering all the available information from web pages. No API keys required. Maigret is an easy-to-use and powerful fork of [Sherlock](https://github.com/sherlock-project/sherlock).
|
||||||
|
|
||||||
Currently supported more than 2500 sites ([full list](https://github.com/soxoj/maigret/blob/main/sites.md)), search is launched against 500 popular sites in descending order of popularity by default. Also supported checking of Tor sites, I2P sites, and domains (via DNS resolving).
|
Currently supported more than 3000 sites ([full list](https://github.com/soxoj/maigret/blob/main/sites.md)), search is launched against 500 popular sites in descending order of popularity by default. Also supported checking of Tor sites, I2P sites, and domains (via DNS resolving).
|
||||||
|
|
||||||
## Main features
|
## Main features
|
||||||
|
|
||||||
@@ -37,30 +45,28 @@ See full description of Maigret features [in the documentation](https://maigret.
|
|||||||
|
|
||||||
## Installation
|
## Installation
|
||||||
|
|
||||||
|
‼️ Maigret is available online via [official Telegram bot](https://t.me/osint_maigret_bot).
|
||||||
|
|
||||||
Maigret can be installed using pip, Docker, or simply can be launched from the cloned repo.
|
Maigret can be installed using pip, Docker, or simply can be launched from the cloned repo.
|
||||||
|
|
||||||
Standalone EXE-binaries for Windows are located in [Releases section](https://github.com/soxoj/maigret/releases) of GitHub repository.
|
Standalone EXE-binaries for Windows are located in [Releases section](https://github.com/soxoj/maigret/releases) of GitHub repository.
|
||||||
|
|
||||||
Also you can run Maigret using cloud shells and Jupyter notebooks (see buttons below).
|
Also, you can run Maigret using cloud shells and Jupyter notebooks (see buttons below).
|
||||||
|
|
||||||
[](https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/soxoj/maigret&tutorial=README.md)
|
[](https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/soxoj/maigret&tutorial=README.md)
|
||||||
<a href="https://repl.it/github/soxoj/maigret"><img src="https://user-images.githubusercontent.com/27065646/92304596-bf719b00-ef7f-11ea-987f-2c1f3c323088.png" alt="Run on Repl.it" height="50"></a>
|
<a href="https://repl.it/github/soxoj/maigret"><img src="https://replit.com/badge/github/soxoj/maigret" alt="Run on Replit" height="50"></a>
|
||||||
|
|
||||||
<a href="https://colab.research.google.com/gist/soxoj/879b51bc3b2f8b695abb054090645000/maigret-collab.ipynb"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab" height="45"></a>
|
<a href="https://colab.research.google.com/gist/soxoj/879b51bc3b2f8b695abb054090645000/maigret-collab.ipynb"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab" height="45"></a>
|
||||||
<a href="https://mybinder.org/v2/gist/soxoj/9d65c2f4d3bec5dd25949197ea73cf3a/HEAD"><img src="https://mybinder.org/badge_logo.svg" alt="Open In Binder" height="45"></a>
|
<a href="https://mybinder.org/v2/gist/soxoj/9d65c2f4d3bec5dd25949197ea73cf3a/HEAD"><img src="https://mybinder.org/badge_logo.svg" alt="Open In Binder" height="45"></a>
|
||||||
|
|
||||||
### Package installing
|
### Package installing
|
||||||
|
|
||||||
**NOTE**: Python 3.6 or higher and pip is required, **Python 3.8 is recommended.**
|
**NOTE**: Python 3.10 or higher and pip is required, **Python 3.11 is recommended.**
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# install from pypi
|
# install from pypi
|
||||||
pip3 install maigret
|
pip3 install maigret
|
||||||
|
|
||||||
# or clone and install manually
|
|
||||||
git clone https://github.com/soxoj/maigret && cd maigret
|
|
||||||
pip3 install .
|
|
||||||
|
|
||||||
# usage
|
# usage
|
||||||
maigret username
|
maigret username
|
||||||
```
|
```
|
||||||
@@ -68,11 +74,14 @@ maigret username
|
|||||||
### Cloning a repository
|
### Cloning a repository
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
|
# or clone and install manually
|
||||||
git clone https://github.com/soxoj/maigret && cd maigret
|
git clone https://github.com/soxoj/maigret && cd maigret
|
||||||
pip3 install -r requirements.txt
|
|
||||||
|
# build and install
|
||||||
|
pip3 install .
|
||||||
|
|
||||||
# usage
|
# usage
|
||||||
./maigret.py username
|
maigret username
|
||||||
```
|
```
|
||||||
|
|
||||||
### Docker
|
### Docker
|
||||||
@@ -82,7 +91,7 @@ pip3 install -r requirements.txt
|
|||||||
docker pull soxoj/maigret
|
docker pull soxoj/maigret
|
||||||
|
|
||||||
# usage
|
# usage
|
||||||
docker run soxoj/maigret:latest username
|
docker run -v /mydir:/app/reports soxoj/maigret:latest username --html
|
||||||
|
|
||||||
# manual build
|
# manual build
|
||||||
docker build -t maigret .
|
docker build -t maigret .
|
||||||
@@ -91,32 +100,62 @@ docker build -t maigret .
|
|||||||
## Usage examples
|
## Usage examples
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# make HTML and PDF reports
|
# make HTML, PDF, and Xmind8 reports
|
||||||
maigret user --html --pdf
|
maigret user --html
|
||||||
|
maigret user --pdf
|
||||||
|
maigret user --xmind #Output not compatible with xmind 2022+
|
||||||
|
|
||||||
# search on sites marked with tags photo & dating
|
# search on sites marked with tags photo & dating
|
||||||
maigret user --tags photo,dating
|
maigret user --tags photo,dating
|
||||||
|
|
||||||
|
# search on sites marked with tag us
|
||||||
|
maigret user --tags us
|
||||||
|
|
||||||
# search for three usernames on all available sites
|
# search for three usernames on all available sites
|
||||||
maigret user1 user2 user3 -a
|
maigret user1 user2 user3 -a
|
||||||
```
|
```
|
||||||
|
|
||||||
Use `maigret --help` to get full options description. Also options [are documented](https://maigret.readthedocs.io/en/latest/command-line-options.html).
|
Use `maigret --help` to get full options description. Also options [are documented](https://maigret.readthedocs.io/en/latest/command-line-options.html).
|
||||||
|
|
||||||
|
## Contributing
|
||||||
|
|
||||||
|
Maigret has open-source code, so you may contribute your own sites by adding them to `data.json` file, or bring changes to it's code!
|
||||||
|
|
||||||
|
For more information about development and contribution, please read the [development documentation](https://maigret.readthedocs.io/en/latest/development.html).
|
||||||
|
|
||||||
## Demo with page parsing and recursive username search
|
## Demo with page parsing and recursive username search
|
||||||
|
|
||||||
[PDF report](https://raw.githubusercontent.com/soxoj/maigret/main/static/report_alexaimephotographycars.pdf), [HTML report](https://htmlpreview.github.io/?https://raw.githubusercontent.com/soxoj/maigret/main/static/report_alexaimephotographycars.html)
|
### Video (asciinema)
|
||||||
|
|
||||||

|
<a href="https://asciinema.org/a/Ao0y7N0TTxpS0pisoprQJdylZ">
|
||||||
|
<img src="https://asciinema.org/a/Ao0y7N0TTxpS0pisoprQJdylZ.svg" alt="asciicast" width="600">
|
||||||
|
</a>
|
||||||
|
|
||||||
|
### Reports
|
||||||
|
|
||||||
|
[PDF report](https://raw.githubusercontent.com/soxoj/maigret/main/static/report_alexaimephotographycars.pdf), [HTML report](https://htmlpreview.github.io/?https://raw.githubusercontent.com/soxoj/maigret/main/static/report_alexaimephotographycars.html)
|
||||||
|
|
||||||

|

|
||||||
|
|
||||||

|

|
||||||
|
|
||||||
|
|
||||||
[Full console output](https://raw.githubusercontent.com/soxoj/maigret/main/static/recursive_search.md)
|
[Full console output](https://raw.githubusercontent.com/soxoj/maigret/main/static/recursive_search.md)
|
||||||
|
|
||||||
|
## Disclaimer
|
||||||
|
|
||||||
|
**This tool is intended for educational and lawful purposes only.** The developers do not endorse or encourage any illegal activities or misuse of this tool. Regulations regarding the collection and use of personal data vary by country and region, including but not limited to GDPR in the EU, CCPA in the USA, and similar laws worldwide.
|
||||||
|
|
||||||
|
It is your sole responsibility to ensure that your use of this tool complies with all applicable laws and regulations in your jurisdiction. Any illegal use of this tool is strictly prohibited, and you are fully accountable for your actions.
|
||||||
|
|
||||||
|
The authors and developers of this tool bear no responsibility for any misuse or unlawful activities conducted by its users.
|
||||||
|
|
||||||
|
## SOWEL classification
|
||||||
|
|
||||||
|
This tool uses the following OSINT techniques:
|
||||||
|
- [SOTL-2.2. Search For Accounts On Other Platforms](https://sowel.soxoj.com/other-platform-accounts)
|
||||||
|
- [SOTL-6.1. Check Logins Reuse To Find Another Account](https://sowel.soxoj.com/logins-reuse)
|
||||||
|
- [SOTL-6.2. Check Nicknames Reuse To Find Another Account](https://sowel.soxoj.com/nicknames-reuse)
|
||||||
|
|
||||||
## License
|
## License
|
||||||
|
|
||||||
MIT © [Maigret](https://github.com/soxoj/maigret)<br/>
|
MIT © [Maigret](https://github.com/soxoj/maigret)<br/>
|
||||||
|
|||||||
-18
@@ -1,18 +0,0 @@
|
|||||||
#!/usr/bin/env python3
|
|
||||||
import asyncio
|
|
||||||
import sys
|
|
||||||
|
|
||||||
from maigret.maigret import main
|
|
||||||
|
|
||||||
|
|
||||||
def run():
|
|
||||||
try:
|
|
||||||
loop = asyncio.get_event_loop()
|
|
||||||
loop.run_until_complete(main())
|
|
||||||
except KeyboardInterrupt:
|
|
||||||
print('Maigret is interrupted.')
|
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
run()
|
|
||||||
+1
-1
@@ -10,4 +10,4 @@
|
|||||||
pixabay.com FALSE / FALSE 0 anonymous_user_id c1e4ee09-5674-4252-aa94-8c47b1ea80ab
|
pixabay.com FALSE / FALSE 0 anonymous_user_id c1e4ee09-5674-4252-aa94-8c47b1ea80ab
|
||||||
pixabay.com FALSE / FALSE 1647214439 csrftoken vfetTSvIul7gBlURt6s985JNM18GCdEwN5MWMKqX4yI73xoPgEj42dbNefjGx5fr
|
pixabay.com FALSE / FALSE 1647214439 csrftoken vfetTSvIul7gBlURt6s985JNM18GCdEwN5MWMKqX4yI73xoPgEj42dbNefjGx5fr
|
||||||
pixabay.com FALSE / FALSE 1647300839 client_width 1680
|
pixabay.com FALSE / FALSE 1647300839 client_width 1680
|
||||||
pixabay.com FALSE / FALSE 748111764839 is_human 1
|
pixabay.com FALSE / FALSE 748111764839 is_human 1
|
||||||
@@ -1 +1,2 @@
|
|||||||
sphinx-copybutton
|
sphinx-copybutton
|
||||||
|
sphinx_rtd_theme
|
||||||
@@ -18,7 +18,7 @@ Parsing of account pages and online documents
|
|||||||
|
|
||||||
Maigret will try to extract information about the document/account owner
|
Maigret will try to extract information about the document/account owner
|
||||||
(including username and other ids) and will make a search by the
|
(including username and other ids) and will make a search by the
|
||||||
extracted username and ids. :doc:`Examples <extracting-information-from-pages>`.
|
extracted username and ids. See examples in the :ref:`extracting-information-from-pages` section.
|
||||||
|
|
||||||
Main options
|
Main options
|
||||||
------------
|
------------
|
||||||
@@ -27,9 +27,9 @@ Options are also configurable through settings files, see
|
|||||||
:doc:`settings section <settings>`.
|
:doc:`settings section <settings>`.
|
||||||
|
|
||||||
``--tags`` - Filter sites for searching by tags: sites categories and
|
``--tags`` - Filter sites for searching by tags: sites categories and
|
||||||
two-letter country codes. E.g. photo, dating, sport; jp, us, global.
|
two-letter country codes (**not a language!**). E.g. photo, dating, sport; jp, us, global.
|
||||||
Multiple tags can be associated with one site. **Warning: tags markup is
|
Multiple tags can be associated with one site. **Warning**: tags markup is
|
||||||
not stable now.**
|
not stable now. Read more :doc:`in the separate section <tags>`.
|
||||||
|
|
||||||
``-n``, ``--max-connections`` - Allowed number of concurrent connections
|
``-n``, ``--max-connections`` - Allowed number of concurrent connections
|
||||||
**(default: 100)**.
|
**(default: 100)**.
|
||||||
|
|||||||
+3
-3
@@ -3,11 +3,11 @@
|
|||||||
# -- Project information
|
# -- Project information
|
||||||
|
|
||||||
project = 'Maigret'
|
project = 'Maigret'
|
||||||
copyright = '2021, soxoj'
|
copyright = '2024, soxoj'
|
||||||
author = 'soxoj'
|
author = 'soxoj'
|
||||||
|
|
||||||
release = '0.4.2'
|
release = '0.4.4'
|
||||||
version = '0.4.2'
|
version = '0.4.4'
|
||||||
|
|
||||||
# -- General configuration
|
# -- General configuration
|
||||||
|
|
||||||
|
|||||||
+108
-9
@@ -3,16 +3,37 @@
|
|||||||
Development
|
Development
|
||||||
==============
|
==============
|
||||||
|
|
||||||
|
Frequently Asked Questions
|
||||||
|
--------------------------
|
||||||
|
|
||||||
|
1. Where to find the list of supported sites?
|
||||||
|
|
||||||
|
The human-readable list of supported sites is available in the `sites.md <https://github.com/soxoj/maigret/blob/main/sites.md>`_ file in the repository.
|
||||||
|
It's been generated automatically from the main JSON file with the list of supported sites.
|
||||||
|
|
||||||
|
The machine-readable JSON file with the list of supported sites is available in the
|
||||||
|
`data.json <https://github.com/soxoj/maigret/blob/main/maigret/resources/data.json>`_ file in the directory `resources`.
|
||||||
|
|
||||||
|
2. Which methods to check the account presence are supported?
|
||||||
|
|
||||||
|
The supported methods (``checkType`` values in ``data.json``) are:
|
||||||
|
|
||||||
|
- ``message`` - the most reliable method, checks if any string from ``presenceStrs`` is present and none of the strings from ``absenceStrs`` are present in the HTML response
|
||||||
|
- ``status_code`` - checks that status code of the response is 2XX
|
||||||
|
- ``response_url`` - check if there is not redirect and the response is 2XX
|
||||||
|
|
||||||
|
See the details of check mechanisms in the `checking.py <https://github.com/soxoj/maigret/blob/main/maigret/checking.py#L339>`_ file.
|
||||||
|
|
||||||
Testing
|
Testing
|
||||||
-------
|
-------
|
||||||
|
|
||||||
It is recommended use Python 3.7/3.8 for test due to some conflicts in 3.9.
|
It is recommended use Python 3.10 for testing.
|
||||||
|
|
||||||
Install test requirements:
|
Install test requirements:
|
||||||
|
|
||||||
.. code-block:: console
|
.. code-block:: console
|
||||||
|
|
||||||
pip install -r test-requirements.txt
|
poetry install --with dev
|
||||||
|
|
||||||
|
|
||||||
Use the following commands to check Maigret:
|
Use the following commands to check Maigret:
|
||||||
@@ -20,19 +41,74 @@ Use the following commands to check Maigret:
|
|||||||
.. code-block:: console
|
.. code-block:: console
|
||||||
|
|
||||||
# run linter and typing checks
|
# run linter and typing checks
|
||||||
# order of checks%
|
# order of checks:
|
||||||
# - critical syntax errors or undefined names
|
# - critical syntax errors or undefined names
|
||||||
# - flake checks
|
# - flake checks
|
||||||
# - mypy checks
|
# - mypy checks
|
||||||
make lint
|
make lint
|
||||||
|
|
||||||
# run testing with coverage html report
|
# run testing with coverage html report
|
||||||
# current test coverage is 60%
|
# current test coverage is 58%
|
||||||
make text
|
make test
|
||||||
|
|
||||||
# open html report
|
# open html report
|
||||||
open htmlcov/index.html
|
open htmlcov/index.html
|
||||||
|
|
||||||
|
# get flamechart of imports to estimate startup time
|
||||||
|
make speed
|
||||||
|
|
||||||
|
|
||||||
|
How to fix false-positives
|
||||||
|
-----------------------------------------------
|
||||||
|
|
||||||
|
If you want to work with sites database, don't forget to activate statistics update git hook, command for it would look like this: ``git config --local core.hooksPath .githooks/``.
|
||||||
|
|
||||||
|
You should make your git commits from your maigret git repo folder, or else the hook wouldn't find the statistics update script.
|
||||||
|
|
||||||
|
1. Determine the problematic site.
|
||||||
|
|
||||||
|
If you already know which site has a false-positive and want to fix it specifically, go to the next step.
|
||||||
|
|
||||||
|
Otherwise, simply run a search with a random username (e.g. `laiuhi3h4gi3u4hgt`) and check the results.
|
||||||
|
Alternatively, you can use `the Telegram bot <https://t.me/osint_maigret_bot>`_.
|
||||||
|
|
||||||
|
2. Open the account link in your browser and check:
|
||||||
|
|
||||||
|
- If the site is completely gone, remove it from the list
|
||||||
|
- If the site still works but looks different, update in data.json how we check it
|
||||||
|
- If the site requires login to view profiles, disable checking it
|
||||||
|
|
||||||
|
3. Find the site in the `data.json <https://github.com/soxoj/maigret/blob/main/maigret/resources/data.json>`_ file.
|
||||||
|
|
||||||
|
If the ``checkType`` method is not ``message`` and you are going to fix check, update it:
|
||||||
|
- put ``message`` in ``checkType``
|
||||||
|
- put in ``absenceStrs`` a keyword that is present in the HTML response for an non-existing account
|
||||||
|
- put in ``presenceStrs`` a keyword that is present in the HTML response for an existing account
|
||||||
|
|
||||||
|
If you have trouble determining the right keywords, you can use automatic detection by passing the account URL with the ``--submit`` option:
|
||||||
|
|
||||||
|
.. code-block:: console
|
||||||
|
|
||||||
|
maigret --submit https://my.mail.ru/bk/alex
|
||||||
|
|
||||||
|
To disable checking, set ``disabled`` to ``true`` or simply run:
|
||||||
|
|
||||||
|
.. code-block:: console
|
||||||
|
|
||||||
|
maigret --self-check --site My.Mail.ru@bk.ru
|
||||||
|
|
||||||
|
To debug the check method using the response HTML, you can run:
|
||||||
|
|
||||||
|
.. code-block:: console
|
||||||
|
|
||||||
|
maigret soxoj --site My.Mail.ru@bk.ru -d 2> response.txt
|
||||||
|
|
||||||
|
There are few options for sites data.json helpful in various cases:
|
||||||
|
|
||||||
|
- ``engine`` - a predefined check for the sites of certain type (e.g. forums), see the ``engines`` section in the JSON file
|
||||||
|
- ``headers`` - a dictionary of additional headers to be sent to the site
|
||||||
|
- ``requestHeadOnly`` - set to ``true`` if it's enough to make a HEAD request to the site
|
||||||
|
- ``regexCheck`` - a regex to check if the username is valid, in case of frequent false-positives
|
||||||
|
|
||||||
How to publish new version of Maigret
|
How to publish new version of Maigret
|
||||||
-------------------------------------
|
-------------------------------------
|
||||||
@@ -69,7 +145,7 @@ PyPi package.
|
|||||||
4. Get auto-generate release notes:
|
4. Get auto-generate release notes:
|
||||||
|
|
||||||
- Open https://github.com/soxoj/maigret/releases/new
|
- Open https://github.com/soxoj/maigret/releases/new
|
||||||
- Click `Choose a tag`, enter `test`
|
- Click `Choose a tag`, enter `v0.4.0` (your version)
|
||||||
- Click `Create new tag`
|
- Click `Create new tag`
|
||||||
- Press `+ Auto-generate release notes`
|
- Press `+ Auto-generate release notes`
|
||||||
- Copy all the text from description text field below
|
- Copy all the text from description text field below
|
||||||
@@ -81,8 +157,8 @@ PyPi package.
|
|||||||
|
|
||||||
.. code-block:: console
|
.. code-block:: console
|
||||||
|
|
||||||
git add ...
|
git add -p
|
||||||
git commit -m 'Bump to 0.4.0'
|
git commit -m 'Bump to YOUR VERSION'
|
||||||
git push origin head
|
git push origin head
|
||||||
|
|
||||||
|
|
||||||
@@ -98,4 +174,27 @@ PyPi package.
|
|||||||
- Press `+ Auto-generate release notes`
|
- Press `+ Auto-generate release notes`
|
||||||
- **Press "Publish release" button**
|
- **Press "Publish release" button**
|
||||||
|
|
||||||
8. That's all, now you can simply wait push to PyPi. You can monitor it in Action page: https://github.com/soxoj/maigret/actions/workflows/python-publish.yml
|
8. That's all, now you can simply wait push to PyPi. You can monitor it in Action page: https://github.com/soxoj/maigret/actions/workflows/python-publish.yml
|
||||||
|
|
||||||
|
Documentation updates
|
||||||
|
---------------------
|
||||||
|
|
||||||
|
Documentations is auto-generated and auto-deployed from the ``docs`` directory.
|
||||||
|
|
||||||
|
To manually update documentation:
|
||||||
|
|
||||||
|
1. Change something in the ``.rst`` files in the ``docs/source`` directory.
|
||||||
|
2. Install ``pip install -r requirements.txt`` in the docs directory.
|
||||||
|
3. Run ``make singlehtml`` in the terminal in the docs directory.
|
||||||
|
4. Open ``build/singlehtml/index.html`` in your browser to see the result.
|
||||||
|
5. If everything is ok, commit and push your changes to GitHub.
|
||||||
|
|
||||||
|
Roadmap
|
||||||
|
-------
|
||||||
|
|
||||||
|
.. warning::
|
||||||
|
This roadmap requires updating to reflect the current project status and future plans.
|
||||||
|
|
||||||
|
.. figure:: https://i.imgur.com/kk8cFdR.png
|
||||||
|
:target: https://i.imgur.com/kk8cFdR.png
|
||||||
|
:align: center
|
||||||
|
|||||||
@@ -1,35 +0,0 @@
|
|||||||
.. _extracting-information-from-pages:
|
|
||||||
|
|
||||||
Extracting information from pages
|
|
||||||
=================================
|
|
||||||
Maigret can parse URLs and content of web pages by URLs to extract info about account owner and other meta information.
|
|
||||||
|
|
||||||
You must specify the URL with the option ``--parse``, it's can be a link to an account or an online document. List of supported sites `see here <https://github.com/soxoj/socid-extractor#sites>`_.
|
|
||||||
|
|
||||||
After the end of the parsing phase, Maigret will start the search phase by :doc:`supported identifiers <supported-identifier-types>` found (usernames, ids, etc.).
|
|
||||||
|
|
||||||
Examples
|
|
||||||
--------
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
$ maigret --parse https://docs.google.com/spreadsheets/d/1HtZKMLRXNsZ0HjtBmo0Gi03nUPiJIA4CC4jTYbCAnXw/edit\#gid\=0
|
|
||||||
|
|
||||||
Scanning webpage by URL https://docs.google.com/spreadsheets/d/1HtZKMLRXNsZ0HjtBmo0Gi03nUPiJIA4CC4jTYbCAnXw/edit#gid=0...
|
|
||||||
┣╸org_name: Gooten
|
|
||||||
┗╸mime_type: application/vnd.google-apps.ritz
|
|
||||||
Scanning webpage by URL https://clients6.google.com/drive/v2beta/files/1HtZKMLRXNsZ0HjtBmo0Gi03nUPiJIA4CC4jTYbCAnXw?fields=alternateLink%2CcopyRequiresWriterPermission%2CcreatedDate%2Cdescription%2CdriveId%2CfileSize%2CiconLink%2Cid%2Clabels(starred%2C%20trashed)%2ClastViewedByMeDate%2CmodifiedDate%2Cshared%2CteamDriveId%2CuserPermission(id%2Cname%2CemailAddress%2Cdomain%2Crole%2CadditionalRoles%2CphotoLink%2Ctype%2CwithLink)%2Cpermissions(id%2Cname%2CemailAddress%2Cdomain%2Crole%2CadditionalRoles%2CphotoLink%2Ctype%2CwithLink)%2Cparents(id)%2Ccapabilities(canMoveItemWithinDrive%2CcanMoveItemOutOfDrive%2CcanMoveItemOutOfTeamDrive%2CcanAddChildren%2CcanEdit%2CcanDownload%2CcanComment%2CcanMoveChildrenWithinDrive%2CcanRename%2CcanRemoveChildren%2CcanMoveItemIntoTeamDrive)%2Ckind&supportsTeamDrives=true&enforceSingleParent=true&key=AIzaSyC1eQ1xj69IdTMeii5r7brs3R90eck-m7k...
|
|
||||||
┣╸created_at: 2016-02-16T18:51:52.021Z
|
|
||||||
┣╸updated_at: 2019-10-23T17:15:47.157Z
|
|
||||||
┣╸gaia_id: 15696155517366416778
|
|
||||||
┣╸fullname: Nadia Burgess
|
|
||||||
┣╸email: nadia@gooten.com
|
|
||||||
┣╸image: https://lh3.googleusercontent.com/a-/AOh14GheZe1CyNa3NeJInWAl70qkip4oJ7qLsD8vDy6X=s64
|
|
||||||
┗╸email_username: nadia
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
$ maigret.py --parse https://steamcommunity.com/profiles/76561199113454789
|
|
||||||
Scanning webpage by URL https://steamcommunity.com/profiles/76561199113454789...
|
|
||||||
┣╸steam_id: 76561199113454789
|
|
||||||
┣╸nickname: Pok
|
|
||||||
┗╸username: Machine42
|
|
||||||
+124
-3
@@ -14,17 +14,99 @@ Also, Maigret use found ids and usernames from links to start a recursive search
|
|||||||
|
|
||||||
Enabled by default, can be disabled with ``--no extracting``.
|
Enabled by default, can be disabled with ``--no extracting``.
|
||||||
|
|
||||||
|
.. code-block:: text
|
||||||
|
|
||||||
|
$ python3 -m maigret soxoj --timeout 5
|
||||||
|
[-] Starting a search on top 500 sites from the Maigret database...
|
||||||
|
[!] You can run search by full list of sites with flag `-a`
|
||||||
|
[*] Checking username soxoj on:
|
||||||
|
...
|
||||||
|
[+] GitHub: https://github.com/soxoj
|
||||||
|
├─uid: 31013580
|
||||||
|
├─image: https://avatars.githubusercontent.com/u/31013580?v=4
|
||||||
|
├─created_at: 2017-08-14T17:03:07Z
|
||||||
|
├─location: Amsterdam, Netherlands
|
||||||
|
├─follower_count: 1304
|
||||||
|
├─following_count: 54
|
||||||
|
├─fullname: Soxoj
|
||||||
|
├─public_gists_count: 3
|
||||||
|
├─public_repos_count: 88
|
||||||
|
├─twitter_username: sox0j
|
||||||
|
├─bio: Head of OSINT Center of Excellence in @SocialLinks-IO
|
||||||
|
├─is_company: Social Links
|
||||||
|
└─blog_url: soxoj.com
|
||||||
|
...
|
||||||
|
|
||||||
Recursive search
|
Recursive search
|
||||||
----------------
|
----------------
|
||||||
|
|
||||||
Maigret can extract some :ref:`common ids <supported-identifier-types>` and usernames from links on the account page (often people placed links to their other accounts) and immediately start new searches. All the gathered information will be displayed in CLI output and reports.
|
Maigret has the ability to scan account pages for :ref:`common identifiers <supported-identifier-types>` and usernames found in links.
|
||||||
|
When people include links to their other social media accounts, Maigret can automatically detect and initiate new searches for those profiles.
|
||||||
|
Any information discovered through this process will be shown in both the command-line interface output and generated reports.
|
||||||
|
|
||||||
Enabled by default, can be disabled with ``--no-recursion``.
|
Enabled by default, can be disabled with ``--no-recursion``.
|
||||||
|
|
||||||
Reports
|
|
||||||
|
.. code-block:: text
|
||||||
|
|
||||||
|
$ python3 -m maigret soxoj --timeout 5
|
||||||
|
[-] Starting a search on top 500 sites from the Maigret database...
|
||||||
|
[!] You can run search by full list of sites with flag `-a`
|
||||||
|
[*] Checking username soxoj on:
|
||||||
|
...
|
||||||
|
[+] GitHub: https://github.com/soxoj
|
||||||
|
├─uid: 31013580
|
||||||
|
├─image: https://avatars.githubusercontent.com/u/31013580?v=4
|
||||||
|
├─created_at: 2017-08-14T17:03:07Z
|
||||||
|
├─location: Amsterdam, Netherlands
|
||||||
|
├─follower_count: 1304
|
||||||
|
├─following_count: 54
|
||||||
|
├─fullname: Soxoj
|
||||||
|
├─public_gists_count: 3
|
||||||
|
├─public_repos_count: 88
|
||||||
|
├─twitter_username: sox0j <===== another username found here
|
||||||
|
├─bio: Head of OSINT Center of Excellence in @SocialLinks-IO
|
||||||
|
├─is_company: Social Links
|
||||||
|
└─blog_url: soxoj.com
|
||||||
|
...
|
||||||
|
Searching |████████████████████████████████████████| 500/500 [100%] in 9.1s (54.85/s)
|
||||||
|
[-] You can see detailed site check errors with a flag `--print-errors`
|
||||||
|
[*] Checking username sox0j on:
|
||||||
|
[+] Telegram: https://t.me/sox0j
|
||||||
|
├─fullname: @Sox0j
|
||||||
|
...
|
||||||
|
|
||||||
|
Username permutations
|
||||||
|
---------------------
|
||||||
|
|
||||||
|
Maigret can generate permutations of usernames. Just pass a few usernames in the CLI and use ``--permute`` flag.
|
||||||
|
Thanks to `@balestek <https://github.com/balestek>`_ for the idea and implementation.
|
||||||
|
|
||||||
|
.. code-block:: text
|
||||||
|
|
||||||
|
$ python3 -m maigret --permute hope dream --timeout 5
|
||||||
|
[-] 12 permutations from hope dream to check...
|
||||||
|
├─ hopedream
|
||||||
|
├─ _hopedream
|
||||||
|
├─ hopedream_
|
||||||
|
├─ hope_dream
|
||||||
|
├─ hope-dream
|
||||||
|
├─ hope.dream
|
||||||
|
├─ dreamhope
|
||||||
|
├─ _dreamhope
|
||||||
|
├─ dreamhope_
|
||||||
|
├─ dream_hope
|
||||||
|
├─ dream-hope
|
||||||
|
└─ dream.hope
|
||||||
|
[-] Starting a search on top 500 sites from the Maigret database...
|
||||||
|
[!] You can run search by full list of sites with flag `-a`
|
||||||
|
[*] Checking username hopedream on:
|
||||||
|
...
|
||||||
|
|
||||||
|
Reports
|
||||||
-------
|
-------
|
||||||
|
|
||||||
Maigret currently supports HTML, PDF, TXT, XMind mindmap, and JSON reports.
|
Maigret currently supports HTML, PDF, TXT, XMind 8 mindmap, and JSON reports.
|
||||||
|
|
||||||
HTML/PDF reports contain:
|
HTML/PDF reports contain:
|
||||||
|
|
||||||
@@ -34,6 +116,9 @@ HTML/PDF reports contain:
|
|||||||
|
|
||||||
Also, there is a short text report in the CLI output after the end of a searching phase.
|
Also, there is a short text report in the CLI output after the end of a searching phase.
|
||||||
|
|
||||||
|
.. warning::
|
||||||
|
XMind 8 mindmaps are incompatible with XMind 2022!
|
||||||
|
|
||||||
Tags
|
Tags
|
||||||
----
|
----
|
||||||
|
|
||||||
@@ -68,6 +153,42 @@ The Maigret database contains not only the original websites, but also mirrors,
|
|||||||
|
|
||||||
It allows getting additional info about the person and checking the existence of the account even if the main site is unavailable (bot protection, captcha, etc.)
|
It allows getting additional info about the person and checking the existence of the account even if the main site is unavailable (bot protection, captcha, etc.)
|
||||||
|
|
||||||
|
.. _extracting-information-from-pages:
|
||||||
|
|
||||||
|
Extractiion of information from account pages
|
||||||
|
---------------------------------------------
|
||||||
|
|
||||||
|
Maigret can parse URLs and content of web pages by URLs to extract info about account owner and other meta information.
|
||||||
|
|
||||||
|
You must specify the URL with the option ``--parse``, it's can be a link to an account or an online document. List of supported sites `see here <https://github.com/soxoj/socid-extractor#sites>`_.
|
||||||
|
|
||||||
|
After the end of the parsing phase, Maigret will start the search phase by :doc:`supported identifiers <supported-identifier-types>` found (usernames, ids, etc.).
|
||||||
|
|
||||||
|
.. code-block:: console
|
||||||
|
|
||||||
|
$ maigret --parse https://docs.google.com/spreadsheets/d/1HtZKMLRXNsZ0HjtBmo0Gi03nUPiJIA4CC4jTYbCAnXw/edit\#gid\=0
|
||||||
|
|
||||||
|
Scanning webpage by URL https://docs.google.com/spreadsheets/d/1HtZKMLRXNsZ0HjtBmo0Gi03nUPiJIA4CC4jTYbCAnXw/edit#gid=0...
|
||||||
|
┣╸org_name: Gooten
|
||||||
|
┗╸mime_type: application/vnd.google-apps.ritz
|
||||||
|
Scanning webpage by URL https://clients6.google.com/drive/v2beta/files/1HtZKMLRXNsZ0HjtBmo0Gi03nUPiJIA4CC4jTYbCAnXw?fields=alternateLink%2CcopyRequiresWriterPermission%2CcreatedDate%2Cdescription%2CdriveId%2CfileSize%2CiconLink%2Cid%2Clabels(starred%2C%20trashed)%2ClastViewedByMeDate%2CmodifiedDate%2Cshared%2CteamDriveId%2CuserPermission(id%2Cname%2CemailAddress%2Cdomain%2Crole%2CadditionalRoles%2CphotoLink%2Ctype%2CwithLink)%2Cpermissions(id%2Cname%2CemailAddress%2Cdomain%2Crole%2CadditionalRoles%2CphotoLink%2Ctype%2CwithLink)%2Cparents(id)%2Ccapabilities(canMoveItemWithinDrive%2CcanMoveItemOutOfDrive%2CcanMoveItemOutOfTeamDrive%2CcanAddChildren%2CcanEdit%2CcanDownload%2CcanComment%2CcanMoveChildrenWithinDrive%2CcanRename%2CcanRemoveChildren%2CcanMoveItemIntoTeamDrive)%2Ckind&supportsTeamDrives=true&enforceSingleParent=true&key=AIzaSyC1eQ1xj69IdTMeii5r7brs3R90eck-m7k...
|
||||||
|
┣╸created_at: 2016-02-16T18:51:52.021Z
|
||||||
|
┣╸updated_at: 2019-10-23T17:15:47.157Z
|
||||||
|
┣╸gaia_id: 15696155517366416778
|
||||||
|
┣╸fullname: Nadia Burgess
|
||||||
|
┣╸email: nadia@gooten.com
|
||||||
|
┣╸image: https://lh3.googleusercontent.com/a-/AOh14GheZe1CyNa3NeJInWAl70qkip4oJ7qLsD8vDy6X=s64
|
||||||
|
┗╸email_username: nadia
|
||||||
|
|
||||||
|
.. code-block:: console
|
||||||
|
|
||||||
|
$ maigret.py --parse https://steamcommunity.com/profiles/76561199113454789
|
||||||
|
Scanning webpage by URL https://steamcommunity.com/profiles/76561199113454789...
|
||||||
|
┣╸steam_id: 76561199113454789
|
||||||
|
┣╸nickname: Pok
|
||||||
|
┗╸username: Machine42
|
||||||
|
|
||||||
|
|
||||||
Simple API
|
Simple API
|
||||||
----------
|
----------
|
||||||
|
|
||||||
|
|||||||
+22
-7
@@ -3,29 +3,44 @@
|
|||||||
Welcome to the Maigret docs!
|
Welcome to the Maigret docs!
|
||||||
============================
|
============================
|
||||||
|
|
||||||
**Maigret** is an easy-to-use and powerful OSINT tool for collecting a dossier on a person by username only.
|
**Maigret** is an easy-to-use and powerful OSINT tool for collecting a dossier on a person by a username (alias) only.
|
||||||
|
|
||||||
This is achieved by checking for accounts on a huge number of sites and gathering all the available information from web pages.
|
This is achieved by checking for accounts on a huge number of sites and gathering all the available information from web pages.
|
||||||
|
|
||||||
The project's main goal - give to OSINT researchers and pentesters a **universal tool** to get maximum information about a subject and integrate it with other tools in automatization pipelines.
|
The project's main goal — give to OSINT researchers and pentesters a **universal tool** to get maximum information
|
||||||
|
about a person of interest by a username and integrate it with other tools in automatization pipelines.
|
||||||
|
|
||||||
|
.. warning::
|
||||||
|
**This tool is intended for educational and lawful purposes only.**
|
||||||
|
The developers do not endorse or encourage any illegal activities or misuse of this tool.
|
||||||
|
Regulations regarding the collection and use of personal data vary by country and region,
|
||||||
|
including but not limited to GDPR in the EU, CCPA in the USA, and similar laws worldwide.
|
||||||
|
|
||||||
|
It is your sole responsibility to ensure that your use of this tool complies with all applicable laws
|
||||||
|
and regulations in your jurisdiction. Any illegal use of this tool is strictly prohibited,
|
||||||
|
and you are fully accountable for your actions.
|
||||||
|
|
||||||
|
The authors and developers of this tool bear no responsibility for any misuse
|
||||||
|
or unlawful activities conducted by its users.
|
||||||
|
|
||||||
You may be interested in:
|
You may be interested in:
|
||||||
-------------------------
|
-------------------------
|
||||||
- :doc:`Command line options description <command-line-options>` and :doc:`usage examples <usage-examples>`
|
- :doc:`Quick start <quick-start>`
|
||||||
|
- :doc:`Usage examples <usage-examples>`
|
||||||
|
- :doc:`Command line options <command-line-options>`
|
||||||
- :doc:`Features list <features>`
|
- :doc:`Features list <features>`
|
||||||
- :doc:`Project roadmap <roadmap>`
|
|
||||||
|
|
||||||
.. toctree::
|
.. toctree::
|
||||||
:hidden:
|
:hidden:
|
||||||
:caption: Sections
|
:caption: Sections
|
||||||
|
|
||||||
|
quick-start
|
||||||
|
installation
|
||||||
|
usage-examples
|
||||||
command-line-options
|
command-line-options
|
||||||
extracting-information-from-pages
|
|
||||||
features
|
features
|
||||||
philosophy
|
philosophy
|
||||||
roadmap
|
|
||||||
supported-identifier-types
|
supported-identifier-types
|
||||||
tags
|
tags
|
||||||
usage-examples
|
|
||||||
settings
|
settings
|
||||||
development
|
development
|
||||||
|
|||||||
@@ -0,0 +1,88 @@
|
|||||||
|
.. _installation:
|
||||||
|
|
||||||
|
Installation
|
||||||
|
============
|
||||||
|
|
||||||
|
Maigret can be installed using pip, Docker, or simply can be launched from the cloned repo.
|
||||||
|
Also, it is available online via `official Telegram bot <https://t.me/osint_maigret_bot>`_,
|
||||||
|
source code of a bot is `available on GitHub <https://github.com/soxoj/maigret-tg-bot>`_.
|
||||||
|
|
||||||
|
Package installing
|
||||||
|
------------------
|
||||||
|
|
||||||
|
Please note that the sites database in the PyPI package may be outdated.
|
||||||
|
If you encounter frequent false positive results, we recommend installing the latest development version from GitHub instead.
|
||||||
|
|
||||||
|
.. note::
|
||||||
|
Python 3.10 or higher and pip is required, **Python 3.11 is recommended.**
|
||||||
|
|
||||||
|
.. code-block:: bash
|
||||||
|
|
||||||
|
# install from pypi
|
||||||
|
pip3 install maigret
|
||||||
|
|
||||||
|
# usage
|
||||||
|
maigret username
|
||||||
|
|
||||||
|
Development version (GitHub)
|
||||||
|
----------------------------
|
||||||
|
|
||||||
|
.. code-block:: bash
|
||||||
|
|
||||||
|
git clone https://github.com/soxoj/maigret && cd maigret
|
||||||
|
pip3 install .
|
||||||
|
|
||||||
|
# OR
|
||||||
|
pip3 install git+https://github.com/soxoj/maigret.git
|
||||||
|
|
||||||
|
# usage
|
||||||
|
maigret username
|
||||||
|
|
||||||
|
# OR use poetry in case you plan to develop Maigret
|
||||||
|
pip3 install poetry
|
||||||
|
poetry run maigret
|
||||||
|
|
||||||
|
Cloud shells and Jupyter notebooks
|
||||||
|
----------------------------------
|
||||||
|
|
||||||
|
In case you don't want to install Maigret locally, you can use cloud shells and Jupyter notebooks.
|
||||||
|
|
||||||
|
.. image:: https://user-images.githubusercontent.com/27065646/92304704-8d146d80-ef80-11ea-8c29-0deaabb1c702.png
|
||||||
|
:target: https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/soxoj/maigret&tutorial=README.md
|
||||||
|
:alt: Open in Cloud Shell
|
||||||
|
|
||||||
|
.. image:: https://replit.com/badge/github/soxoj/maigret
|
||||||
|
:target: https://repl.it/github/soxoj/maigret
|
||||||
|
:alt: Run on Replit
|
||||||
|
:height: 50
|
||||||
|
|
||||||
|
.. image:: https://colab.research.google.com/assets/colab-badge.svg
|
||||||
|
:target: https://colab.research.google.com/gist/soxoj/879b51bc3b2f8b695abb054090645000/maigret-collab.ipynb
|
||||||
|
:alt: Open In Colab
|
||||||
|
:height: 45
|
||||||
|
|
||||||
|
.. image:: https://mybinder.org/badge_logo.svg
|
||||||
|
:target: https://mybinder.org/v2/gist/soxoj/9d65c2f4d3bec5dd25949197ea73cf3a/HEAD
|
||||||
|
:alt: Open In Binder
|
||||||
|
:height: 45
|
||||||
|
|
||||||
|
Windows standalone EXE-binaries
|
||||||
|
-------------------------------
|
||||||
|
|
||||||
|
Standalone EXE-binaries for Windows are located in the `Releases section <https://github.com/soxoj/maigret/releases>`_ of GitHub repository.
|
||||||
|
|
||||||
|
Currently, the new binary is created automatically after each commit to the main branch, but is not deployed to the Releases section automatically.
|
||||||
|
|
||||||
|
Docker
|
||||||
|
------
|
||||||
|
|
||||||
|
.. code-block:: bash
|
||||||
|
|
||||||
|
# official image of the development version, updated from the github repo
|
||||||
|
docker pull soxoj/maigret
|
||||||
|
|
||||||
|
# usage
|
||||||
|
docker run -v /mydir:/app/reports soxoj/maigret:latest username --html
|
||||||
|
|
||||||
|
# manual build
|
||||||
|
docker build -t maigret .
|
||||||
Binary file not shown.
|
After Width: | Height: | Size: 375 KiB |
@@ -3,4 +3,15 @@
|
|||||||
Philosophy
|
Philosophy
|
||||||
==========
|
==========
|
||||||
|
|
||||||
Username => Dossier
|
TL;DR: Username => Dossier
|
||||||
|
|
||||||
|
Maigret is designed to gather all the available information about person by his username.
|
||||||
|
|
||||||
|
What kind of information is this? First, links to person accounts. Secondly, all the machine-extractable
|
||||||
|
pieces of info, such as: other usernames, full name, URLs to people's images, birthday, location (country,
|
||||||
|
city, etc.), gender.
|
||||||
|
|
||||||
|
All this information forms some dossier, but it also useful for other tools and analytical purposes.
|
||||||
|
Each collected piece of data has a label of a certain format (for example, ``follower_count`` for the number
|
||||||
|
of subscribers or ``created_at`` for account creation time) so that it can be parsed and analyzed by various
|
||||||
|
systems and stored in databases.
|
||||||
|
|||||||
@@ -0,0 +1,15 @@
|
|||||||
|
.. _quick-start:
|
||||||
|
|
||||||
|
Quick start
|
||||||
|
===========
|
||||||
|
|
||||||
|
After :doc:`installing Maigret <installation>`, you can begin searching by providing one or more usernames to look up:
|
||||||
|
|
||||||
|
``maigret username1 username2 ...``
|
||||||
|
|
||||||
|
Maigret will search for accounts with the specified usernames across a vast number of websites. It will provide you with a list
|
||||||
|
of URLs to any discovered accounts, along with relevant information extracted from those profiles.
|
||||||
|
|
||||||
|
.. image:: maigret_screenshot.png
|
||||||
|
:alt: Maigret search results screenshot
|
||||||
|
:align: center
|
||||||
@@ -1,18 +0,0 @@
|
|||||||
.. _roadmap:
|
|
||||||
|
|
||||||
Roadmap
|
|
||||||
=======
|
|
||||||
|
|
||||||
.. figure:: https://i.imgur.com/kk8cFdR.png
|
|
||||||
:target: https://i.imgur.com/kk8cFdR.png
|
|
||||||
:align: center
|
|
||||||
|
|
||||||
Current status
|
|
||||||
--------------
|
|
||||||
|
|
||||||
- Sites DB stats - ok
|
|
||||||
- Scan sessions stats - ok
|
|
||||||
- Site engine autodetect - ok
|
|
||||||
- Engines for all the sites - WIP
|
|
||||||
- Unified reporting flow - ok
|
|
||||||
- Retries - ok
|
|
||||||
@@ -3,6 +3,9 @@
|
|||||||
Settings
|
Settings
|
||||||
==============
|
==============
|
||||||
|
|
||||||
|
.. warning::
|
||||||
|
The settings system is under development and may be subject to change.
|
||||||
|
|
||||||
Options are also configurable through settings files. See
|
Options are also configurable through settings files. See
|
||||||
`settings JSON file <https://github.com/soxoj/maigret/blob/main/maigret/resources/settings.json>`_
|
`settings JSON file <https://github.com/soxoj/maigret/blob/main/maigret/resources/settings.json>`_
|
||||||
for the list of currently supported options.
|
for the list of currently supported options.
|
||||||
|
|||||||
@@ -5,7 +5,8 @@ Tags
|
|||||||
|
|
||||||
The use of tags allows you to select a subset of the sites from big Maigret DB for search.
|
The use of tags allows you to select a subset of the sites from big Maigret DB for search.
|
||||||
|
|
||||||
**Warning: tags markup is not stable now.**
|
.. warning::
|
||||||
|
Tags markup is still not stable.
|
||||||
|
|
||||||
There are several types of tags:
|
There are several types of tags:
|
||||||
|
|
||||||
@@ -17,7 +18,7 @@ There are several types of tags:
|
|||||||
|
|
||||||
Usage
|
Usage
|
||||||
-----
|
-----
|
||||||
``--tags en,jp`` -- search on US and Japanese sites (actually marked as such in the Maigret database)
|
``--tags us,jp`` -- search on US and Japanese sites (actually marked as such in the Maigret database)
|
||||||
|
|
||||||
``--tags coding`` -- search on sites related to software development.
|
``--tags coding`` -- search on sites related to software development.
|
||||||
|
|
||||||
|
|||||||
@@ -3,51 +3,68 @@
|
|||||||
Usage examples
|
Usage examples
|
||||||
==============
|
==============
|
||||||
|
|
||||||
Start a search for accounts with username ``machine42`` on top 500 sites from the Maigret DB.
|
1. Search for accounts with username ``machine42`` on top 500 sites (by default, according to Alexa rank) from the Maigret DB.
|
||||||
|
|
||||||
.. code-block:: console
|
.. code-block:: console
|
||||||
|
|
||||||
maigret machine42
|
maigret machine42
|
||||||
|
|
||||||
Start a search for accounts with username ``machine42`` on **all sites** from the Maigret DB.
|
2. Search for accounts with username ``machine42`` on **all sites** from the Maigret DB.
|
||||||
|
|
||||||
.. code-block:: console
|
.. code-block:: console
|
||||||
|
|
||||||
maigret machine42 -a
|
maigret machine42 -a
|
||||||
|
|
||||||
Start a search [...] and generate HTML and PDF reports.
|
.. note::
|
||||||
|
Maigret will search for accounts on a huge number of sites,
|
||||||
|
and some of them may return false positive results. At the moment, we are working on autorepair mode to deliver
|
||||||
|
the most accurate results.
|
||||||
|
|
||||||
|
If you experience many false positives, you can do the following:
|
||||||
|
|
||||||
|
- Install the last development version of Maigret from GitHub
|
||||||
|
- Run Maigret with ``--self-check`` flag and agree on disabling of problematic sites
|
||||||
|
|
||||||
|
3. Search for accounts with username ``machine42`` and generate HTML and PDF reports.
|
||||||
|
|
||||||
.. code-block:: console
|
.. code-block:: console
|
||||||
|
|
||||||
maigret machine42 -a -HP
|
maigret machine42 -HP
|
||||||
|
|
||||||
Start a search for accounts with username ``machine42`` only on Facebook.
|
or
|
||||||
|
|
||||||
|
.. code-block:: console
|
||||||
|
|
||||||
|
maigret machine42 -a --html --pdf
|
||||||
|
|
||||||
|
|
||||||
|
4. Search for accounts with username ``machine42`` on Facebook only.
|
||||||
|
|
||||||
.. code-block:: console
|
.. code-block:: console
|
||||||
|
|
||||||
maigret machine42 --site Facebook
|
maigret machine42 --site Facebook
|
||||||
|
|
||||||
Extract information from the Steam page by URL and start a search for accounts with found username ``machine42``.
|
5. Extract information from the Steam page by URL and start a search for accounts with found username ``machine42``.
|
||||||
|
|
||||||
.. code-block:: console
|
.. code-block:: console
|
||||||
|
|
||||||
maigret --parse https://steamcommunity.com/profiles/76561199113454789
|
maigret --parse https://steamcommunity.com/profiles/76561199113454789
|
||||||
|
|
||||||
Start a search for accounts with username ``machine42`` only on US and Japanese sites.
|
6. Search for accounts with username ``machine42`` only on US and Japanese sites.
|
||||||
|
|
||||||
.. code-block:: console
|
.. code-block:: console
|
||||||
|
|
||||||
maigret michael --tags en,jp
|
maigret machine42 --tags en,jp
|
||||||
|
|
||||||
Start a search for accounts with username ``machine42`` only on sites related to software development.
|
7. Search for accounts with username ``machine42`` only on sites related to software development.
|
||||||
|
|
||||||
.. code-block:: console
|
.. code-block:: console
|
||||||
|
|
||||||
maigret michael --tags coding
|
maigret machine42 --tags coding
|
||||||
|
|
||||||
Start a search for accounts with username ``machine42`` on uCoz sites only (mostly CIS countries).
|
8. Search for accounts with username ``machine42`` on uCoz sites only (mostly CIS countries).
|
||||||
|
|
||||||
.. code-block:: console
|
.. code-block:: console
|
||||||
|
|
||||||
maigret michael --tags ucoz
|
maigret machine42 --tags ucoz
|
||||||
|
|
||||||
|
|||||||
+40
-65
@@ -1,68 +1,43 @@
|
|||||||
{
|
{
|
||||||
"cells": [
|
"nbformat": 4,
|
||||||
{
|
"nbformat_minor": 0,
|
||||||
"cell_type": "code",
|
"metadata": {
|
||||||
"execution_count": null,
|
"colab": {
|
||||||
"metadata": {
|
"provenance": []
|
||||||
"id": "8v6PEfyXb0Gx"
|
},
|
||||||
},
|
"kernelspec": {
|
||||||
"outputs": [],
|
"name": "python3",
|
||||||
"source": [
|
"display_name": "Python 3"
|
||||||
"# clone the repo\n",
|
},
|
||||||
"!git clone https://github.com/soxoj/maigret\n",
|
"language_info": {
|
||||||
"!pip3 install -r maigret/requirements.txt"
|
"name": "python"
|
||||||
]
|
}
|
||||||
},
|
},
|
||||||
{
|
"cells": [
|
||||||
"cell_type": "code",
|
{
|
||||||
"execution_count": null,
|
"cell_type": "code",
|
||||||
"metadata": {
|
"execution_count": null,
|
||||||
"id": "cXOQUAhDchkl"
|
"metadata": {
|
||||||
},
|
"id": "acxNWJOUmLc4"
|
||||||
"outputs": [],
|
},
|
||||||
"source": [
|
"outputs": [],
|
||||||
"# help\n",
|
"source": [
|
||||||
"!python3 maigret/maigret.py --help"
|
"!git clone https://github.com/soxoj/maigret\n",
|
||||||
]
|
"!pip3 install ./maigret/\n",
|
||||||
},
|
"from IPython.display import clear_output\n",
|
||||||
{
|
"clear_output()\n",
|
||||||
"cell_type": "code",
|
"username = str(input(\"Username >> \"))\n",
|
||||||
"execution_count": null,
|
"!maigret {username} -a -n 10"
|
||||||
"metadata": {
|
]
|
||||||
"id": "SjDmpN4QGnJu"
|
},
|
||||||
},
|
{
|
||||||
"outputs": [],
|
"cell_type": "code",
|
||||||
"source": [
|
"source": [],
|
||||||
"# search\n",
|
"metadata": {
|
||||||
"!python3 maigret/maigret.py user"
|
"id": "S3SmapMHmOoD"
|
||||||
]
|
},
|
||||||
}
|
"execution_count": null,
|
||||||
],
|
"outputs": []
|
||||||
"metadata": {
|
}
|
||||||
"colab": {
|
]
|
||||||
"collapsed_sections": [],
|
|
||||||
"include_colab_link": true,
|
|
||||||
"name": "maigret.ipynb",
|
|
||||||
"provenance": []
|
|
||||||
},
|
|
||||||
"kernelspec": {
|
|
||||||
"display_name": "Python 3",
|
|
||||||
"language": "python",
|
|
||||||
"name": "python3"
|
|
||||||
},
|
|
||||||
"language_info": {
|
|
||||||
"codemirror_mode": {
|
|
||||||
"name": "ipython",
|
|
||||||
"version": 3
|
|
||||||
},
|
|
||||||
"file_extension": ".py",
|
|
||||||
"mimetype": "text/x-python",
|
|
||||||
"name": "python",
|
|
||||||
"nbconvert_exporter": "python",
|
|
||||||
"pygments_lexer": "ipython3",
|
|
||||||
"version": "3.7.10"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"nbformat": 4,
|
|
||||||
"nbformat_minor": 1
|
|
||||||
}
|
}
|
||||||
|
|||||||
-18
@@ -1,18 +0,0 @@
|
|||||||
#!/usr/bin/env python3
|
|
||||||
import asyncio
|
|
||||||
import sys
|
|
||||||
|
|
||||||
from maigret.maigret import main
|
|
||||||
|
|
||||||
|
|
||||||
def run():
|
|
||||||
try:
|
|
||||||
loop = asyncio.get_event_loop()
|
|
||||||
loop.run_until_complete(main())
|
|
||||||
except KeyboardInterrupt:
|
|
||||||
print('Maigret is interrupted.')
|
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
run()
|
|
||||||
@@ -1,3 +1,3 @@
|
|||||||
"""Maigret version file"""
|
"""Maigret version file"""
|
||||||
|
|
||||||
__version__ = '0.4.2'
|
__version__ = '0.4.4'
|
||||||
|
|||||||
+167
-120
@@ -1,38 +1,39 @@
|
|||||||
|
# Standard library imports
|
||||||
|
import ast
|
||||||
import asyncio
|
import asyncio
|
||||||
import logging
|
import logging
|
||||||
|
import random
|
||||||
|
import re
|
||||||
|
import ssl
|
||||||
|
import sys
|
||||||
|
from typing import Dict, List, Optional, Tuple
|
||||||
|
from urllib.parse import quote
|
||||||
|
|
||||||
|
# Third party imports
|
||||||
|
import aiodns
|
||||||
|
from alive_progress import alive_bar
|
||||||
|
from aiohttp import ClientSession, TCPConnector, http_exceptions
|
||||||
|
from aiohttp.client_exceptions import ClientConnectorError, ServerDisconnectedError
|
||||||
|
from python_socks import _errors as proxy_errors
|
||||||
|
from socid_extractor import extract
|
||||||
try:
|
try:
|
||||||
from mock import Mock
|
from mock import Mock
|
||||||
except ImportError:
|
except ImportError:
|
||||||
from unittest.mock import Mock
|
from unittest.mock import Mock
|
||||||
|
|
||||||
import re
|
# Local imports
|
||||||
import ssl
|
|
||||||
import sys
|
|
||||||
import tqdm
|
|
||||||
from typing import Tuple, Optional, Dict, List
|
|
||||||
from urllib.parse import quote
|
|
||||||
|
|
||||||
import aiodns
|
|
||||||
import tqdm.asyncio
|
|
||||||
from python_socks import _errors as proxy_errors
|
|
||||||
from socid_extractor import extract
|
|
||||||
from aiohttp import TCPConnector, ClientSession, http_exceptions
|
|
||||||
from aiohttp.client_exceptions import ServerDisconnectedError, ClientConnectorError
|
|
||||||
|
|
||||||
from .activation import ParsingActivator, import_aiohttp_cookies
|
|
||||||
from . import errors
|
from . import errors
|
||||||
|
from .activation import ParsingActivator, import_aiohttp_cookies
|
||||||
from .errors import CheckError
|
from .errors import CheckError
|
||||||
from .executors import (
|
from .executors import (
|
||||||
AsyncExecutor,
|
AsyncExecutor,
|
||||||
AsyncioSimpleExecutor,
|
AsyncioSimpleExecutor,
|
||||||
AsyncioProgressbarQueueExecutor,
|
AsyncioProgressbarQueueExecutor,
|
||||||
)
|
)
|
||||||
|
|
||||||
from .result import QueryResult, QueryStatus
|
from .result import QueryResult, QueryStatus
|
||||||
from .sites import MaigretDatabase, MaigretSite
|
from .sites import MaigretDatabase, MaigretSite
|
||||||
from .types import QueryOptions, QueryResultWrapper
|
from .types import QueryOptions, QueryResultWrapper
|
||||||
from .utils import get_random_user_agent, ascii_data_display
|
from .utils import ascii_data_display, get_random_user_agent
|
||||||
|
|
||||||
|
|
||||||
SUPPORTED_IDS = (
|
SUPPORTED_IDS = (
|
||||||
@@ -56,119 +57,120 @@ class CheckerBase:
|
|||||||
|
|
||||||
class SimpleAiohttpChecker(CheckerBase):
|
class SimpleAiohttpChecker(CheckerBase):
|
||||||
def __init__(self, *args, **kwargs):
|
def __init__(self, *args, **kwargs):
|
||||||
proxy = kwargs.get('proxy')
|
self.proxy = kwargs.get('proxy')
|
||||||
cookie_jar = kwargs.get('cookie_jar')
|
self.cookie_jar = kwargs.get('cookie_jar')
|
||||||
self.logger = kwargs.get('logger', Mock())
|
self.logger = kwargs.get('logger', Mock())
|
||||||
|
self.url = None
|
||||||
# moved here to speed up the launch of Maigret
|
self.headers = None
|
||||||
from aiohttp_socks import ProxyConnector
|
self.allow_redirects = True
|
||||||
|
self.timeout = 0
|
||||||
# make http client session
|
self.method = 'get'
|
||||||
connector = ProxyConnector.from_url(proxy) if proxy else TCPConnector(ssl=False)
|
|
||||||
connector.verify_ssl = False
|
|
||||||
self.session = ClientSession(
|
|
||||||
connector=connector, trust_env=True, cookie_jar=cookie_jar
|
|
||||||
)
|
|
||||||
|
|
||||||
def prepare(self, url, headers=None, allow_redirects=True, timeout=0, method='get'):
|
def prepare(self, url, headers=None, allow_redirects=True, timeout=0, method='get'):
|
||||||
if method == 'get':
|
self.url = url
|
||||||
request_method = self.session.get
|
self.headers = headers
|
||||||
else:
|
self.allow_redirects = allow_redirects
|
||||||
request_method = self.session.head
|
self.timeout = timeout
|
||||||
|
self.method = method
|
||||||
future = request_method(
|
return None
|
||||||
url=url,
|
|
||||||
headers=headers,
|
|
||||||
allow_redirects=allow_redirects,
|
|
||||||
timeout=timeout,
|
|
||||||
)
|
|
||||||
|
|
||||||
return future
|
|
||||||
|
|
||||||
async def close(self):
|
async def close(self):
|
||||||
await self.session.close()
|
pass
|
||||||
|
|
||||||
async def check(self, future) -> Tuple[str, int, Optional[CheckError]]:
|
|
||||||
html_text = None
|
|
||||||
status_code = 0
|
|
||||||
error: Optional[CheckError] = CheckError("Unknown")
|
|
||||||
|
|
||||||
|
async def _make_request(self, session, url, headers, allow_redirects, timeout, method, logger) -> Tuple[str, int, Optional[CheckError]]:
|
||||||
try:
|
try:
|
||||||
response = await future
|
request_method = session.get if method == 'get' else session.head
|
||||||
|
async with request_method(
|
||||||
|
url=url,
|
||||||
|
headers=headers,
|
||||||
|
allow_redirects=allow_redirects,
|
||||||
|
timeout=timeout,
|
||||||
|
) as response:
|
||||||
|
status_code = response.status
|
||||||
|
response_content = await response.content.read()
|
||||||
|
charset = response.charset or "utf-8"
|
||||||
|
decoded_content = response_content.decode(charset, "ignore")
|
||||||
|
|
||||||
status_code = response.status
|
error = CheckError("Connection lost") if status_code == 0 else None
|
||||||
response_content = await response.content.read()
|
logger.debug(decoded_content)
|
||||||
charset = response.charset or "utf-8"
|
|
||||||
decoded_content = response_content.decode(charset, "ignore")
|
|
||||||
html_text = decoded_content
|
|
||||||
|
|
||||||
error = None
|
return decoded_content, status_code, error
|
||||||
if status_code == 0:
|
|
||||||
error = CheckError("Connection lost")
|
|
||||||
|
|
||||||
self.logger.debug(html_text)
|
|
||||||
|
|
||||||
except asyncio.TimeoutError as e:
|
except asyncio.TimeoutError as e:
|
||||||
error = CheckError("Request timeout", str(e))
|
return None, 0, CheckError("Request timeout", str(e))
|
||||||
except ClientConnectorError as e:
|
except ClientConnectorError as e:
|
||||||
error = CheckError("Connecting failure", str(e))
|
return None, 0, CheckError("Connecting failure", str(e))
|
||||||
except ServerDisconnectedError as e:
|
except ServerDisconnectedError as e:
|
||||||
error = CheckError("Server disconnected", str(e))
|
return None, 0, CheckError("Server disconnected", str(e))
|
||||||
except http_exceptions.BadHttpMessage as e:
|
except http_exceptions.BadHttpMessage as e:
|
||||||
error = CheckError("HTTP", str(e))
|
return None, 0, CheckError("HTTP", str(e))
|
||||||
except proxy_errors.ProxyError as e:
|
except proxy_errors.ProxyError as e:
|
||||||
error = CheckError("Proxy", str(e))
|
return None, 0, CheckError("Proxy", str(e))
|
||||||
except KeyboardInterrupt:
|
except KeyboardInterrupt:
|
||||||
error = CheckError("Interrupted")
|
return None, 0, CheckError("Interrupted")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
# python-specific exceptions
|
|
||||||
if sys.version_info.minor > 6 and (
|
if sys.version_info.minor > 6 and (
|
||||||
isinstance(e, ssl.SSLCertVerificationError)
|
isinstance(e, ssl.SSLCertVerificationError)
|
||||||
or isinstance(e, ssl.SSLError)
|
or isinstance(e, ssl.SSLError)
|
||||||
):
|
):
|
||||||
error = CheckError("SSL", str(e))
|
return None, 0, CheckError("SSL", str(e))
|
||||||
else:
|
else:
|
||||||
self.logger.debug(e, exc_info=True)
|
logger.debug(e, exc_info=True)
|
||||||
error = CheckError("Unexpected", str(e))
|
return None, 0, CheckError("Unexpected", str(e))
|
||||||
|
|
||||||
if error == "Invalid proxy response":
|
async def check(self) -> Tuple[str, int, Optional[CheckError]]:
|
||||||
self.logger.debug(error, exc_info=True)
|
from aiohttp_socks import ProxyConnector
|
||||||
|
connector = ProxyConnector.from_url(self.proxy) if self.proxy else TCPConnector(ssl=False)
|
||||||
|
connector.verify_ssl = False
|
||||||
|
|
||||||
return str(html_text), status_code, error
|
async with ClientSession(
|
||||||
|
connector=connector,
|
||||||
|
trust_env=True,
|
||||||
|
cookie_jar=self.cookie_jar.copy() if self.cookie_jar else None,
|
||||||
|
) as session:
|
||||||
|
html_text, status_code, error = await self._make_request(
|
||||||
|
session,
|
||||||
|
self.url,
|
||||||
|
self.headers,
|
||||||
|
self.allow_redirects,
|
||||||
|
self.timeout,
|
||||||
|
self.method,
|
||||||
|
self.logger
|
||||||
|
)
|
||||||
|
|
||||||
|
if error and str(error) == "Invalid proxy response":
|
||||||
|
self.logger.debug(error, exc_info=True)
|
||||||
|
|
||||||
|
return str(html_text) if html_text else '', status_code, error
|
||||||
|
|
||||||
|
|
||||||
class ProxiedAiohttpChecker(SimpleAiohttpChecker):
|
class ProxiedAiohttpChecker(SimpleAiohttpChecker):
|
||||||
def __init__(self, *args, **kwargs):
|
def __init__(self, *args, **kwargs):
|
||||||
proxy = kwargs.get('proxy')
|
self.proxy = kwargs.get('proxy')
|
||||||
cookie_jar = kwargs.get('cookie_jar')
|
self.cookie_jar = kwargs.get('cookie_jar')
|
||||||
self.logger = kwargs.get('logger', Mock())
|
self.logger = kwargs.get('logger', Mock())
|
||||||
|
|
||||||
# moved here to speed up the launch of Maigret
|
|
||||||
from aiohttp_socks import ProxyConnector
|
|
||||||
|
|
||||||
connector = ProxyConnector.from_url(proxy)
|
|
||||||
connector.verify_ssl = False
|
|
||||||
self.session = ClientSession(
|
|
||||||
connector=connector, trust_env=True, cookie_jar=cookie_jar
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class AiodnsDomainResolver(CheckerBase):
|
class AiodnsDomainResolver(CheckerBase):
|
||||||
|
if sys.platform == 'win32': # Temporary workaround for Windows
|
||||||
|
asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())
|
||||||
|
|
||||||
def __init__(self, *args, **kwargs):
|
def __init__(self, *args, **kwargs):
|
||||||
loop = asyncio.get_event_loop()
|
loop = asyncio.get_event_loop()
|
||||||
self.logger = kwargs.get('logger', Mock())
|
self.logger = kwargs.get('logger', Mock())
|
||||||
self.resolver = aiodns.DNSResolver(loop=loop)
|
self.resolver = aiodns.DNSResolver(loop=loop)
|
||||||
|
|
||||||
def prepare(self, url, headers=None, allow_redirects=True, timeout=0, method='get'):
|
def prepare(self, url, headers=None, allow_redirects=True, timeout=0, method='get'):
|
||||||
return self.resolver.query(url, 'A')
|
self.url = url
|
||||||
|
return None
|
||||||
|
|
||||||
async def check(self, future) -> Tuple[str, int, Optional[CheckError]]:
|
async def check(self) -> Tuple[str, int, Optional[CheckError]]:
|
||||||
status = 404
|
status = 404
|
||||||
error = None
|
error = None
|
||||||
text = ''
|
text = ''
|
||||||
|
|
||||||
try:
|
try:
|
||||||
res = await future
|
res = await self.resolver.query(self.url, 'A')
|
||||||
text = str(res[0].host)
|
text = str(res[0].host)
|
||||||
status = 200
|
status = 200
|
||||||
except aiodns.error.DNSError:
|
except aiodns.error.DNSError:
|
||||||
@@ -187,7 +189,7 @@ class CheckerMock:
|
|||||||
def prepare(self, url, headers=None, allow_redirects=True, timeout=0, method='get'):
|
def prepare(self, url, headers=None, allow_redirects=True, timeout=0, method='get'):
|
||||||
return None
|
return None
|
||||||
|
|
||||||
async def check(self, future) -> Tuple[str, int, Optional[CheckError]]:
|
async def check(self) -> Tuple[str, int, Optional[CheckError]]:
|
||||||
await asyncio.sleep(0)
|
await asyncio.sleep(0)
|
||||||
return '', 0, None
|
return '', 0, None
|
||||||
|
|
||||||
@@ -373,8 +375,16 @@ def process_site_result(
|
|||||||
if extracted_ids_data:
|
if extracted_ids_data:
|
||||||
new_usernames = {}
|
new_usernames = {}
|
||||||
for k, v in extracted_ids_data.items():
|
for k, v in extracted_ids_data.items():
|
||||||
if "username" in k:
|
if "username" in k and not "usernames" in k:
|
||||||
new_usernames[v] = "username"
|
new_usernames[v] = "username"
|
||||||
|
elif "usernames" in k:
|
||||||
|
try:
|
||||||
|
tree = ast.literal_eval(v)
|
||||||
|
if type(tree) == list:
|
||||||
|
for n in tree:
|
||||||
|
new_usernames[n] = "username"
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(e)
|
||||||
if k in SUPPORTED_IDS:
|
if k in SUPPORTED_IDS:
|
||||||
new_usernames[v] = k
|
new_usernames[v] = k
|
||||||
|
|
||||||
@@ -397,7 +407,7 @@ def process_site_result(
|
|||||||
|
|
||||||
|
|
||||||
def make_site_result(
|
def make_site_result(
|
||||||
site: MaigretSite, username: str, options: QueryOptions, logger
|
site: MaigretSite, username: str, options: QueryOptions, logger, *args, **kwargs
|
||||||
) -> QueryResultWrapper:
|
) -> QueryResultWrapper:
|
||||||
results_site: QueryResultWrapper = {}
|
results_site: QueryResultWrapper = {}
|
||||||
|
|
||||||
@@ -414,6 +424,8 @@ def make_site_result(
|
|||||||
|
|
||||||
headers = {
|
headers = {
|
||||||
"User-Agent": get_random_user_agent(),
|
"User-Agent": get_random_user_agent(),
|
||||||
|
# tell server that we want to close connection after request
|
||||||
|
"Connection": "close",
|
||||||
}
|
}
|
||||||
|
|
||||||
headers.update(site.headers)
|
headers.update(site.headers)
|
||||||
@@ -421,6 +433,10 @@ def make_site_result(
|
|||||||
if "url" not in site.__dict__:
|
if "url" not in site.__dict__:
|
||||||
logger.error("No URL for site %s", site.name)
|
logger.error("No URL for site %s", site.name)
|
||||||
|
|
||||||
|
if kwargs.get('retry') and hasattr(site, "mirrors"):
|
||||||
|
site.url_main = random.choice(site.mirrors)
|
||||||
|
logger.info(f"Use {site.url_main} as a main url of site {site}")
|
||||||
|
|
||||||
# URL of user on site (if it exists)
|
# URL of user on site (if it exists)
|
||||||
url = site.url.format(
|
url = site.url.format(
|
||||||
urlMain=site.url_main, urlSubpath=site.url_subpath, username=quote(username)
|
urlMain=site.url_main, urlSubpath=site.url_subpath, username=quote(username)
|
||||||
@@ -516,7 +532,8 @@ def make_site_result(
|
|||||||
|
|
||||||
# Store future request object in the results object
|
# Store future request object in the results object
|
||||||
results_site["future"] = future
|
results_site["future"] = future
|
||||||
results_site["checker"] = checker
|
|
||||||
|
results_site["checker"] = checker
|
||||||
|
|
||||||
return results_site
|
return results_site
|
||||||
|
|
||||||
@@ -524,14 +541,19 @@ def make_site_result(
|
|||||||
async def check_site_for_username(
|
async def check_site_for_username(
|
||||||
site, username, options: QueryOptions, logger, query_notify, *args, **kwargs
|
site, username, options: QueryOptions, logger, query_notify, *args, **kwargs
|
||||||
) -> Tuple[str, QueryResultWrapper]:
|
) -> Tuple[str, QueryResultWrapper]:
|
||||||
default_result = make_site_result(site, username, options, logger)
|
default_result = make_site_result(
|
||||||
future = default_result.get("future")
|
site, username, options, logger, retry=kwargs.get('retry')
|
||||||
if not future:
|
)
|
||||||
|
# future = default_result.get("future")
|
||||||
|
# if not future:
|
||||||
|
# return site.name, default_result
|
||||||
|
|
||||||
|
checker = default_result.get("checker")
|
||||||
|
if not checker:
|
||||||
|
print(f"error, no checker for {site.name}")
|
||||||
return site.name, default_result
|
return site.name, default_result
|
||||||
|
|
||||||
checker = default_result["checker"]
|
response = await checker.check()
|
||||||
|
|
||||||
response = await checker.check(future=future)
|
|
||||||
|
|
||||||
response_result = process_site_result(
|
response_result = process_site_result(
|
||||||
response, query_notify, logger, default_result, site
|
response, query_notify, logger, default_result, site
|
||||||
@@ -543,8 +565,8 @@ async def check_site_for_username(
|
|||||||
|
|
||||||
|
|
||||||
async def debug_ip_request(checker, logger):
|
async def debug_ip_request(checker, logger):
|
||||||
future = checker.prepare(url="https://icanhazip.com")
|
checker.prepare(url="https://icanhazip.com")
|
||||||
ip, status, check_error = await checker.check(future)
|
ip, status, check_error = await checker.check()
|
||||||
if ip:
|
if ip:
|
||||||
logger.debug(f"My IP is: {ip.strip()}")
|
logger.debug(f"My IP is: {ip.strip()}")
|
||||||
else:
|
else:
|
||||||
@@ -580,6 +602,8 @@ async def maigret(
|
|||||||
cookies=None,
|
cookies=None,
|
||||||
retries=0,
|
retries=0,
|
||||||
check_domains=False,
|
check_domains=False,
|
||||||
|
*args,
|
||||||
|
**kwargs,
|
||||||
) -> QueryResultWrapper:
|
) -> QueryResultWrapper:
|
||||||
"""Main search func
|
"""Main search func
|
||||||
|
|
||||||
@@ -597,7 +621,7 @@ async def maigret(
|
|||||||
is_parsing_enabled -- Extract additional info from account pages.
|
is_parsing_enabled -- Extract additional info from account pages.
|
||||||
id_type -- Type of username to search.
|
id_type -- Type of username to search.
|
||||||
Default is 'username', see all supported here:
|
Default is 'username', see all supported here:
|
||||||
https://github.com/soxoj/maigret/wiki/Supported-identifier-types
|
https://maigret.readthedocs.io/en/latest/supported-identifier-types.html
|
||||||
max_connections -- Maximum number of concurrent connections allowed.
|
max_connections -- Maximum number of concurrent connections allowed.
|
||||||
Default is 100.
|
Default is 100.
|
||||||
no_progressbar -- Displaying of ASCII progressbar during scanner.
|
no_progressbar -- Displaying of ASCII progressbar during scanner.
|
||||||
@@ -660,7 +684,11 @@ async def maigret(
|
|||||||
executor = AsyncioSimpleExecutor(logger=logger)
|
executor = AsyncioSimpleExecutor(logger=logger)
|
||||||
else:
|
else:
|
||||||
executor = AsyncioProgressbarQueueExecutor(
|
executor = AsyncioProgressbarQueueExecutor(
|
||||||
logger=logger, in_parallel=max_connections, timeout=timeout + 0.5
|
logger=logger,
|
||||||
|
in_parallel=max_connections,
|
||||||
|
timeout=timeout + 0.5,
|
||||||
|
*args,
|
||||||
|
**kwargs,
|
||||||
)
|
)
|
||||||
|
|
||||||
# make options objects for all the requests
|
# make options objects for all the requests
|
||||||
@@ -702,7 +730,10 @@ async def maigret(
|
|||||||
tasks_dict[sitename] = (
|
tasks_dict[sitename] = (
|
||||||
check_site_for_username,
|
check_site_for_username,
|
||||||
[site, username, options, logger, query_notify],
|
[site, username, options, logger, query_notify],
|
||||||
{'default': (sitename, default_result)},
|
{
|
||||||
|
'default': (sitename, default_result),
|
||||||
|
'retry': retries - attempts + 1,
|
||||||
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
cur_results = await executor.run(tasks_dict.values())
|
cur_results = await executor.run(tasks_dict.values())
|
||||||
@@ -725,10 +756,8 @@ async def maigret(
|
|||||||
|
|
||||||
# closing http client session
|
# closing http client session
|
||||||
await clearweb_checker.close()
|
await clearweb_checker.close()
|
||||||
if tor_proxy:
|
await tor_checker.close()
|
||||||
await tor_checker.close()
|
await i2p_checker.close()
|
||||||
if i2p_proxy:
|
|
||||||
await i2p_checker.close()
|
|
||||||
|
|
||||||
# notify caller that all queries are finished
|
# notify caller that all queries are finished
|
||||||
query_notify.finish()
|
query_notify.finish()
|
||||||
@@ -763,7 +792,7 @@ def timeout_check(value):
|
|||||||
|
|
||||||
async def site_self_check(
|
async def site_self_check(
|
||||||
site: MaigretSite,
|
site: MaigretSite,
|
||||||
logger,
|
logger: logging.Logger,
|
||||||
semaphore,
|
semaphore,
|
||||||
db: MaigretDatabase,
|
db: MaigretDatabase,
|
||||||
silent=False,
|
silent=False,
|
||||||
@@ -809,6 +838,9 @@ async def site_self_check(
|
|||||||
|
|
||||||
result = results_dict[site.name]["status"]
|
result = results_dict[site.name]["status"]
|
||||||
|
|
||||||
|
if result.error and 'Cannot connect to host' in result.error.desc:
|
||||||
|
changes["disabled"] = True
|
||||||
|
|
||||||
site_status = result.status
|
site_status = result.status
|
||||||
|
|
||||||
if site_status != status:
|
if site_status != status:
|
||||||
@@ -836,18 +868,24 @@ async def site_self_check(
|
|||||||
|
|
||||||
if changes["disabled"] != site.disabled:
|
if changes["disabled"] != site.disabled:
|
||||||
site.disabled = changes["disabled"]
|
site.disabled = changes["disabled"]
|
||||||
|
logger.info(f"Switching disabled status of {site.name} to {site.disabled}")
|
||||||
db.update_site(site)
|
db.update_site(site)
|
||||||
if not silent:
|
if not silent:
|
||||||
action = "Disabled" if site.disabled else "Enabled"
|
action = "Disabled" if site.disabled else "Enabled"
|
||||||
print(f"{action} site {site.name}...")
|
print(f"{action} site {site.name}...")
|
||||||
|
|
||||||
|
# remove service tag "unchecked"
|
||||||
|
if "unchecked" in site.tags:
|
||||||
|
site.tags.remove("unchecked")
|
||||||
|
db.update_site(site)
|
||||||
|
|
||||||
return changes
|
return changes
|
||||||
|
|
||||||
|
|
||||||
async def self_check(
|
async def self_check(
|
||||||
db: MaigretDatabase,
|
db: MaigretDatabase,
|
||||||
site_data: dict,
|
site_data: dict,
|
||||||
logger,
|
logger: logging.Logger,
|
||||||
silent=False,
|
silent=False,
|
||||||
max_connections=10,
|
max_connections=10,
|
||||||
proxy=None,
|
proxy=None,
|
||||||
@@ -861,6 +899,7 @@ async def self_check(
|
|||||||
def disabled_count(lst):
|
def disabled_count(lst):
|
||||||
return len(list(filter(lambda x: x.disabled, lst)))
|
return len(list(filter(lambda x: x.disabled, lst)))
|
||||||
|
|
||||||
|
unchecked_old_count = len([site for site in all_sites.values() if "unchecked" in site.tags])
|
||||||
disabled_old_count = disabled_count(all_sites.values())
|
disabled_old_count = disabled_count(all_sites.values())
|
||||||
|
|
||||||
for _, site in all_sites.items():
|
for _, site in all_sites.items():
|
||||||
@@ -870,22 +909,30 @@ async def self_check(
|
|||||||
future = asyncio.ensure_future(check_coro)
|
future = asyncio.ensure_future(check_coro)
|
||||||
tasks.append(future)
|
tasks.append(future)
|
||||||
|
|
||||||
for f in tqdm.asyncio.tqdm.as_completed(tasks):
|
if tasks:
|
||||||
await f
|
with alive_bar(len(tasks), title='Self-checking', force_tty=True) as progress:
|
||||||
|
for f in asyncio.as_completed(tasks):
|
||||||
|
await f
|
||||||
|
progress() # Update the progress bar
|
||||||
|
|
||||||
|
unchecked_new_count = len([site for site in all_sites.values() if "unchecked" in site.tags])
|
||||||
disabled_new_count = disabled_count(all_sites.values())
|
disabled_new_count = disabled_count(all_sites.values())
|
||||||
total_disabled = disabled_new_count - disabled_old_count
|
total_disabled = disabled_new_count - disabled_old_count
|
||||||
|
|
||||||
if total_disabled >= 0:
|
if total_disabled:
|
||||||
message = "Disabled"
|
if total_disabled >= 0:
|
||||||
else:
|
message = "Disabled"
|
||||||
message = "Enabled"
|
else:
|
||||||
total_disabled *= -1
|
message = "Enabled"
|
||||||
|
total_disabled *= -1
|
||||||
|
|
||||||
if not silent:
|
if not silent:
|
||||||
print(
|
print(
|
||||||
f"{message} {total_disabled} ({disabled_old_count} => {disabled_new_count}) checked sites. "
|
f"{message} {total_disabled} ({disabled_old_count} => {disabled_new_count}) checked sites. "
|
||||||
"Run with `--info` flag to get more information"
|
"Run with `--info` flag to get more information"
|
||||||
)
|
)
|
||||||
|
|
||||||
return total_disabled != 0
|
if unchecked_new_count != unchecked_old_count:
|
||||||
|
print(f"Unchecked sites verified: {unchecked_old_count - unchecked_new_count}")
|
||||||
|
|
||||||
|
return total_disabled != 0 or unchecked_new_count != unchecked_old_count
|
||||||
|
|||||||
+8
-1
@@ -58,13 +58,20 @@ COMMON_ERRORS = {
|
|||||||
'Сайт заблокирован хостинг-провайдером': CheckError(
|
'Сайт заблокирован хостинг-провайдером': CheckError(
|
||||||
'Site-specific', 'Site is disabled (Beget)'
|
'Site-specific', 'Site is disabled (Beget)'
|
||||||
),
|
),
|
||||||
|
'Generated by cloudfront (CloudFront)': CheckError(
|
||||||
|
'Request blocked', 'Cloudflare'
|
||||||
|
),
|
||||||
|
'/cdn-cgi/challenge-platform/h/b/orchestrate/chl_page': CheckError(
|
||||||
|
'Just a moment: bot redirect challenge', 'Cloudflare'
|
||||||
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
ERRORS_TYPES = {
|
ERRORS_TYPES = {
|
||||||
'Captcha': 'Try to switch to another IP address or to use service cookies',
|
'Captcha': 'Try to switch to another IP address or to use service cookies',
|
||||||
'Bot protection': 'Try to switch to another IP address',
|
'Bot protection': 'Try to switch to another IP address',
|
||||||
'Censorship': 'switch to another internet service provider',
|
'Censorship': 'Switch to another internet service provider',
|
||||||
'Request timeout': 'Try to increase timeout or to switch to another internet service provider',
|
'Request timeout': 'Try to increase timeout or to switch to another internet service provider',
|
||||||
|
'Connecting failure': 'Try to decrease number of parallel connections (e.g. -n 10)',
|
||||||
}
|
}
|
||||||
|
|
||||||
# TODO: checking for reason
|
# TODO: checking for reason
|
||||||
|
|||||||
+77
-22
@@ -1,12 +1,13 @@
|
|||||||
import asyncio
|
import asyncio
|
||||||
import time
|
|
||||||
import tqdm
|
|
||||||
import sys
|
import sys
|
||||||
from typing import Iterable, Any, List
|
import time
|
||||||
|
from typing import Any, Iterable, List
|
||||||
|
|
||||||
|
import alive_progress
|
||||||
|
from alive_progress import alive_bar
|
||||||
|
|
||||||
from .types import QueryDraft
|
from .types import QueryDraft
|
||||||
|
|
||||||
|
|
||||||
def create_task_func():
|
def create_task_func():
|
||||||
if sys.version_info.minor > 6:
|
if sys.version_info.minor > 6:
|
||||||
create_asyncio_task = asyncio.create_task
|
create_asyncio_task = asyncio.create_task
|
||||||
@@ -34,9 +35,14 @@ class AsyncExecutor:
|
|||||||
class AsyncioSimpleExecutor(AsyncExecutor):
|
class AsyncioSimpleExecutor(AsyncExecutor):
|
||||||
def __init__(self, *args, **kwargs):
|
def __init__(self, *args, **kwargs):
|
||||||
super().__init__(*args, **kwargs)
|
super().__init__(*args, **kwargs)
|
||||||
|
self.semaphore = asyncio.Semaphore(kwargs.get('in_parallel', 100))
|
||||||
|
|
||||||
async def _run(self, tasks: Iterable[QueryDraft]):
|
async def _run(self, tasks: Iterable[QueryDraft]):
|
||||||
futures = [f(*args, **kwargs) for f, args, kwargs in tasks]
|
async def sem_task(f, args, kwargs):
|
||||||
|
async with self.semaphore:
|
||||||
|
return await f(*args, **kwargs)
|
||||||
|
|
||||||
|
futures = [sem_task(f, args, kwargs) for f, args, kwargs in tasks]
|
||||||
return await asyncio.gather(*futures)
|
return await asyncio.gather(*futures)
|
||||||
|
|
||||||
|
|
||||||
@@ -46,9 +52,20 @@ class AsyncioProgressbarExecutor(AsyncExecutor):
|
|||||||
|
|
||||||
async def _run(self, tasks: Iterable[QueryDraft]):
|
async def _run(self, tasks: Iterable[QueryDraft]):
|
||||||
futures = [f(*args, **kwargs) for f, args, kwargs in tasks]
|
futures = [f(*args, **kwargs) for f, args, kwargs in tasks]
|
||||||
|
total_tasks = len(futures)
|
||||||
results = []
|
results = []
|
||||||
for f in tqdm.asyncio.tqdm.as_completed(futures):
|
|
||||||
results.append(await f)
|
# Use alive_bar for progress tracking
|
||||||
|
with alive_bar(total_tasks, title='Searching', force_tty=True) as progress:
|
||||||
|
# Chunk progress updates for efficiency
|
||||||
|
async def track_task(task):
|
||||||
|
result = await task
|
||||||
|
progress() # Update progress bar once task completes
|
||||||
|
return result
|
||||||
|
|
||||||
|
# Use gather to run tasks concurrently and track progress
|
||||||
|
results = await asyncio.gather(*(track_task(f) for f in futures))
|
||||||
|
|
||||||
return results
|
return results
|
||||||
|
|
||||||
|
|
||||||
@@ -66,8 +83,12 @@ class AsyncioProgressbarSemaphoreExecutor(AsyncExecutor):
|
|||||||
async def semaphore_gather(tasks: Iterable[QueryDraft]):
|
async def semaphore_gather(tasks: Iterable[QueryDraft]):
|
||||||
coros = [_wrap_query(q) for q in tasks]
|
coros = [_wrap_query(q) for q in tasks]
|
||||||
results = []
|
results = []
|
||||||
for f in tqdm.asyncio.tqdm.as_completed(coros):
|
|
||||||
results.append(await f)
|
# Use alive_bar correctly as a context manager
|
||||||
|
with alive_bar(len(coros), title='Searching', force_tty=True) as progress:
|
||||||
|
for f in asyncio.as_completed(coros):
|
||||||
|
results.append(await f)
|
||||||
|
progress() # Update the progress bar
|
||||||
return results
|
return results
|
||||||
|
|
||||||
return await semaphore_gather(tasks)
|
return await semaphore_gather(tasks)
|
||||||
@@ -77,11 +98,35 @@ class AsyncioProgressbarQueueExecutor(AsyncExecutor):
|
|||||||
def __init__(self, *args, **kwargs):
|
def __init__(self, *args, **kwargs):
|
||||||
super().__init__(*args, **kwargs)
|
super().__init__(*args, **kwargs)
|
||||||
self.workers_count = kwargs.get('in_parallel', 10)
|
self.workers_count = kwargs.get('in_parallel', 10)
|
||||||
self.progress_func = kwargs.get('progress_func', tqdm.tqdm)
|
|
||||||
self.queue = asyncio.Queue(self.workers_count)
|
self.queue = asyncio.Queue(self.workers_count)
|
||||||
self.timeout = kwargs.get('timeout')
|
self.timeout = kwargs.get('timeout')
|
||||||
|
# Pass a progress function; alive_bar by default
|
||||||
|
self.progress_func = kwargs.get('progress_func', alive_bar)
|
||||||
|
self.progress = None
|
||||||
|
|
||||||
|
# TODO: tests
|
||||||
|
async def increment_progress(self, count):
|
||||||
|
"""Update progress by calling the provided progress function."""
|
||||||
|
if self.progress:
|
||||||
|
if asyncio.iscoroutinefunction(self.progress):
|
||||||
|
await self.progress(count)
|
||||||
|
else:
|
||||||
|
self.progress(count)
|
||||||
|
await asyncio.sleep(0)
|
||||||
|
|
||||||
|
# TODO: tests
|
||||||
|
async def stop_progress(self):
|
||||||
|
"""Stop the progress tracking."""
|
||||||
|
if hasattr(self.progress, "close") and self.progress:
|
||||||
|
close_func = self.progress.close
|
||||||
|
if asyncio.iscoroutinefunction(close_func):
|
||||||
|
await close_func()
|
||||||
|
else:
|
||||||
|
close_func()
|
||||||
|
await asyncio.sleep(0)
|
||||||
|
|
||||||
async def worker(self):
|
async def worker(self):
|
||||||
|
"""Consume tasks from the queue and process them."""
|
||||||
while True:
|
while True:
|
||||||
try:
|
try:
|
||||||
f, args, kwargs = self.queue.get_nowait()
|
f, args, kwargs = self.queue.get_nowait()
|
||||||
@@ -96,23 +141,33 @@ class AsyncioProgressbarQueueExecutor(AsyncExecutor):
|
|||||||
result = kwargs.get('default')
|
result = kwargs.get('default')
|
||||||
|
|
||||||
self.results.append(result)
|
self.results.append(result)
|
||||||
self.progress.update(1)
|
|
||||||
|
if self.progress:
|
||||||
|
await self.increment_progress(1)
|
||||||
|
|
||||||
self.queue.task_done()
|
self.queue.task_done()
|
||||||
|
|
||||||
async def _run(self, queries: Iterable[QueryDraft]):
|
async def _run(self, queries: Iterable[QueryDraft]):
|
||||||
|
"""Main runner function to execute tasks with progress tracking."""
|
||||||
self.results: List[Any] = []
|
self.results: List[Any] = []
|
||||||
|
|
||||||
queries_list = list(queries)
|
queries_list = list(queries)
|
||||||
|
|
||||||
min_workers = min(len(queries_list), self.workers_count)
|
min_workers = min(len(queries_list), self.workers_count)
|
||||||
|
|
||||||
workers = [create_task_func()(self.worker()) for _ in range(min_workers)]
|
workers = [create_task_func()(self.worker()) for _ in range(min_workers)]
|
||||||
|
|
||||||
self.progress = self.progress_func(total=len(queries_list))
|
# Initialize the progress bar
|
||||||
for t in queries_list:
|
if self.progress_func:
|
||||||
await self.queue.put(t)
|
with self.progress_func(len(queries_list), title="Searching", force_tty=True) as bar:
|
||||||
await self.queue.join()
|
self.progress = bar # Assign alive_bar's callable to self.progress
|
||||||
for w in workers:
|
|
||||||
w.cancel()
|
# Add tasks to the queue
|
||||||
self.progress.close()
|
for t in queries_list:
|
||||||
return self.results
|
await self.queue.put(t)
|
||||||
|
|
||||||
|
# Wait for tasks to complete
|
||||||
|
await self.queue.join()
|
||||||
|
|
||||||
|
# Cancel any remaining workers
|
||||||
|
for w in workers:
|
||||||
|
w.cancel()
|
||||||
|
|
||||||
|
return self.results
|
||||||
+51
-10
@@ -1,6 +1,7 @@
|
|||||||
"""
|
"""
|
||||||
Maigret main module
|
Maigret main module
|
||||||
"""
|
"""
|
||||||
|
import ast
|
||||||
import asyncio
|
import asyncio
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
@@ -40,9 +41,10 @@ from .submit import Submitter
|
|||||||
from .types import QueryResultWrapper
|
from .types import QueryResultWrapper
|
||||||
from .utils import get_dict_ascii_tree
|
from .utils import get_dict_ascii_tree
|
||||||
from .settings import Settings
|
from .settings import Settings
|
||||||
|
from .permutator import Permute
|
||||||
|
|
||||||
|
|
||||||
def notify_about_errors(search_results: QueryResultWrapper, query_notify):
|
def notify_about_errors(search_results: QueryResultWrapper, query_notify, show_statistics=False):
|
||||||
errs = errors.extract_and_group(search_results)
|
errs = errors.extract_and_group(search_results)
|
||||||
was_errs_displayed = False
|
was_errs_displayed = False
|
||||||
for e in errs:
|
for e in errs:
|
||||||
@@ -56,12 +58,17 @@ def notify_about_errors(search_results: QueryResultWrapper, query_notify):
|
|||||||
query_notify.warning(text, '!')
|
query_notify.warning(text, '!')
|
||||||
was_errs_displayed = True
|
was_errs_displayed = True
|
||||||
|
|
||||||
|
if show_statistics:
|
||||||
|
query_notify.warning(f'Verbose error statistics:')
|
||||||
|
for e in errs:
|
||||||
|
text = f'{e["err"]}: {round(e["perc"],2)}%'
|
||||||
|
query_notify.warning(text, '!')
|
||||||
|
|
||||||
if was_errs_displayed:
|
if was_errs_displayed:
|
||||||
query_notify.warning(
|
query_notify.warning(
|
||||||
'You can see detailed site check errors with a flag `--print-errors`'
|
'You can see detailed site check errors with a flag `--print-errors`'
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def extract_ids_from_page(url, logger, timeout=5) -> dict:
|
def extract_ids_from_page(url, logger, timeout=5) -> dict:
|
||||||
results = {}
|
results = {}
|
||||||
# url, headers
|
# url, headers
|
||||||
@@ -85,8 +92,17 @@ def extract_ids_from_page(url, logger, timeout=5) -> dict:
|
|||||||
else:
|
else:
|
||||||
print(get_dict_ascii_tree(info.items(), new_line=False), ' ')
|
print(get_dict_ascii_tree(info.items(), new_line=False), ' ')
|
||||||
for k, v in info.items():
|
for k, v in info.items():
|
||||||
if 'username' in k:
|
# TODO: merge with the same functionality in checking module
|
||||||
|
if 'username' in k and not 'usernames' in k:
|
||||||
results[v] = 'username'
|
results[v] = 'username'
|
||||||
|
elif 'usernames' in k:
|
||||||
|
try:
|
||||||
|
tree = ast.literal_eval(v)
|
||||||
|
if type(tree) == list:
|
||||||
|
for n in tree:
|
||||||
|
results[n] = 'username'
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(e)
|
||||||
if k in SUPPORTED_IDS:
|
if k in SUPPORTED_IDS:
|
||||||
results[v] = k
|
results[v] = k
|
||||||
|
|
||||||
@@ -172,7 +188,7 @@ def setup_arguments_parser(settings: Settings):
|
|||||||
type=int,
|
type=int,
|
||||||
dest="connections",
|
dest="connections",
|
||||||
default=settings.max_connections,
|
default=settings.max_connections,
|
||||||
help="Allowed number of concurrent connections.",
|
help=f"Allowed number of concurrent connections (default {settings.max_connections}).",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--no-recursion",
|
"--no-recursion",
|
||||||
@@ -195,6 +211,12 @@ def setup_arguments_parser(settings: Settings):
|
|||||||
choices=SUPPORTED_IDS,
|
choices=SUPPORTED_IDS,
|
||||||
help="Specify identifier(s) type (default: username).",
|
help="Specify identifier(s) type (default: username).",
|
||||||
)
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--permute",
|
||||||
|
action="store_true",
|
||||||
|
default=False,
|
||||||
|
help="Permute at least 2 usernames to generate more possible usernames.",
|
||||||
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--db",
|
"--db",
|
||||||
metavar="DB_FILE",
|
metavar="DB_FILE",
|
||||||
@@ -477,7 +499,7 @@ async def main():
|
|||||||
arg_parser = setup_arguments_parser(settings)
|
arg_parser = setup_arguments_parser(settings)
|
||||||
args = arg_parser.parse_args()
|
args = arg_parser.parse_args()
|
||||||
|
|
||||||
# Re-set loggging level based on args
|
# Re-set logging level based on args
|
||||||
if args.debug:
|
if args.debug:
|
||||||
log_level = logging.DEBUG
|
log_level = logging.DEBUG
|
||||||
elif args.info:
|
elif args.info:
|
||||||
@@ -492,6 +514,10 @@ async def main():
|
|||||||
for u in args.username
|
for u in args.username
|
||||||
if u and u not in ['-'] and u not in args.ignore_ids_list
|
if u and u not in ['-'] and u not in args.ignore_ids_list
|
||||||
}
|
}
|
||||||
|
original_usernames = ""
|
||||||
|
if args.permute and len(usernames) > 1 and args.id_type == 'username':
|
||||||
|
original_usernames = " ".join(usernames.keys())
|
||||||
|
usernames = Permute(usernames).gather(method='strict')
|
||||||
|
|
||||||
parsing_enabled = not args.disable_extracting
|
parsing_enabled = not args.disable_extracting
|
||||||
recursive_search_enabled = not args.disable_recursive_search
|
recursive_search_enabled = not args.disable_recursive_search
|
||||||
@@ -543,7 +569,11 @@ async def main():
|
|||||||
|
|
||||||
# Database self-checking
|
# Database self-checking
|
||||||
if args.self_check:
|
if args.self_check:
|
||||||
print('Maigret sites database self-checking...')
|
if len(site_data) == 0:
|
||||||
|
query_notify.warning('No sites to self-check with the current filters! Exiting...')
|
||||||
|
return
|
||||||
|
|
||||||
|
query_notify.success(f'Maigret sites database self-check started for {len(site_data)} sites...')
|
||||||
is_need_update = await self_check(
|
is_need_update = await self_check(
|
||||||
db,
|
db,
|
||||||
site_data,
|
site_data,
|
||||||
@@ -562,7 +592,9 @@ async def main():
|
|||||||
print('Database was successfully updated.')
|
print('Database was successfully updated.')
|
||||||
else:
|
else:
|
||||||
print('Updates will be applied only for current search session.')
|
print('Updates will be applied only for current search session.')
|
||||||
print('Scan sessions flags stats: ' + str(db.get_scan_stats(site_data)))
|
|
||||||
|
if args.verbose or args.debug:
|
||||||
|
query_notify.info('Scan sessions flags stats: ' + str(db.get_scan_stats(site_data)))
|
||||||
|
|
||||||
# Database statistics
|
# Database statistics
|
||||||
if args.stats:
|
if args.stats:
|
||||||
@@ -581,6 +613,12 @@ async def main():
|
|||||||
query_notify.warning('No usernames to check, exiting.')
|
query_notify.warning('No usernames to check, exiting.')
|
||||||
sys.exit(0)
|
sys.exit(0)
|
||||||
|
|
||||||
|
if len(usernames) > 1 and args.permute and args.id_type == 'username':
|
||||||
|
query_notify.warning(
|
||||||
|
f"{len(usernames)} permutations from {original_usernames} to check..." +
|
||||||
|
get_dict_ascii_tree(usernames, prepend="\t")
|
||||||
|
)
|
||||||
|
|
||||||
if not site_data:
|
if not site_data:
|
||||||
query_notify.warning('No sites to check, exiting!')
|
query_notify.warning('No sites to check, exiting!')
|
||||||
sys.exit(2)
|
sys.exit(2)
|
||||||
@@ -644,7 +682,7 @@ async def main():
|
|||||||
check_domains=args.with_domains,
|
check_domains=args.with_domains,
|
||||||
)
|
)
|
||||||
|
|
||||||
notify_about_errors(results, query_notify)
|
notify_about_errors(results, query_notify, show_statistics=args.verbose)
|
||||||
|
|
||||||
if args.reports_sorting == "data":
|
if args.reports_sorting == "data":
|
||||||
results = sort_report_by_data_points(results)
|
results = sort_report_by_data_points(results)
|
||||||
@@ -719,8 +757,11 @@ async def main():
|
|||||||
|
|
||||||
def run():
|
def run():
|
||||||
try:
|
try:
|
||||||
loop = asyncio.get_event_loop()
|
if sys.version_info.minor >= 10:
|
||||||
loop.run_until_complete(main())
|
asyncio.run(main())
|
||||||
|
else:
|
||||||
|
loop = asyncio.get_event_loop()
|
||||||
|
loop.run_until_complete(main())
|
||||||
except KeyboardInterrupt:
|
except KeyboardInterrupt:
|
||||||
print('Maigret is interrupted.')
|
print('Maigret is interrupted.')
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|||||||
@@ -211,6 +211,10 @@ class QueryNotifyPrint(QueryNotify):
|
|||||||
else:
|
else:
|
||||||
print(msg)
|
print(msg)
|
||||||
|
|
||||||
|
def success(self, message, symbol="+"):
|
||||||
|
msg = f"[{symbol}] {message}"
|
||||||
|
self._colored_print(Fore.GREEN, msg)
|
||||||
|
|
||||||
def warning(self, message, symbol="-"):
|
def warning(self, message, symbol="-"):
|
||||||
msg = f"[{symbol}] {message}"
|
msg = f"[{symbol}] {message}"
|
||||||
self._colored_print(Fore.YELLOW, msg)
|
self._colored_print(Fore.YELLOW, msg)
|
||||||
|
|||||||
@@ -0,0 +1,26 @@
|
|||||||
|
# License MIT. by balestek https://github.com/balestek
|
||||||
|
from itertools import permutations
|
||||||
|
|
||||||
|
|
||||||
|
class Permute:
|
||||||
|
def __init__(self, elements: dict):
|
||||||
|
self.separators = ["", "_", "-", "."]
|
||||||
|
self.elements = elements
|
||||||
|
|
||||||
|
def gather(self, method: str = "strict" or "all") -> dict:
|
||||||
|
permutations_dict = {}
|
||||||
|
for i in range(1, len(self.elements) + 1):
|
||||||
|
for subset in permutations(self.elements, i):
|
||||||
|
if i == 1:
|
||||||
|
if method == "all":
|
||||||
|
permutations_dict[subset[0]] = self.elements[subset[0]]
|
||||||
|
permutations_dict["_" + subset[0]] = self.elements[subset[0]]
|
||||||
|
permutations_dict[subset[0] + "_"] = self.elements[subset[0]]
|
||||||
|
else:
|
||||||
|
for separator in self.separators:
|
||||||
|
perm = separator.join(subset)
|
||||||
|
permutations_dict[perm] = self.elements[subset[0]]
|
||||||
|
if separator == "":
|
||||||
|
permutations_dict["_" + perm] = self.elements[subset[0]]
|
||||||
|
permutations_dict[perm + "_"] = self.elements[subset[0]]
|
||||||
|
return permutations_dict
|
||||||
+7
-3
@@ -8,6 +8,7 @@ from datetime import datetime
|
|||||||
from typing import Dict, Any
|
from typing import Dict, Any
|
||||||
|
|
||||||
import xmind
|
import xmind
|
||||||
|
from dateutil.tz import gettz
|
||||||
from dateutil.parser import parse as parse_datetime_str
|
from dateutil.parser import parse as parse_datetime_str
|
||||||
from jinja2 import Template
|
from jinja2 import Template
|
||||||
|
|
||||||
@@ -16,6 +17,8 @@ from .result import QueryStatus
|
|||||||
from .sites import MaigretDatabase
|
from .sites import MaigretDatabase
|
||||||
from .utils import is_country_tag, CaseConverter, enrich_link_str
|
from .utils import is_country_tag, CaseConverter, enrich_link_str
|
||||||
|
|
||||||
|
|
||||||
|
ADDITIONAL_TZINFO = {"CDT": gettz("America/Chicago")}
|
||||||
SUPPORTED_JSON_REPORT_FORMATS = [
|
SUPPORTED_JSON_REPORT_FORMATS = [
|
||||||
"simple",
|
"simple",
|
||||||
"ndjson",
|
"ndjson",
|
||||||
@@ -67,7 +70,7 @@ def save_txt_report(filename: str, username: str, results: dict):
|
|||||||
def save_html_report(filename: str, context: dict):
|
def save_html_report(filename: str, context: dict):
|
||||||
template, _ = generate_report_template(is_pdf=False)
|
template, _ = generate_report_template(is_pdf=False)
|
||||||
filled_template = template.render(**context)
|
filled_template = template.render(**context)
|
||||||
with open(filename, "w") as f:
|
with open(filename, "w", encoding="utf-8") as f:
|
||||||
f.write(filled_template)
|
f.write(filled_template)
|
||||||
|
|
||||||
|
|
||||||
@@ -292,8 +295,8 @@ def generate_report_context(username_results: list):
|
|||||||
first_seen = created_at
|
first_seen = created_at
|
||||||
else:
|
else:
|
||||||
try:
|
try:
|
||||||
known_time = parse_datetime_str(first_seen)
|
known_time = parse_datetime_str(first_seen, tzinfos=ADDITIONAL_TZINFO)
|
||||||
new_time = parse_datetime_str(created_at)
|
new_time = parse_datetime_str(created_at, tzinfos=ADDITIONAL_TZINFO)
|
||||||
if new_time < known_time:
|
if new_time < known_time:
|
||||||
first_seen = created_at
|
first_seen = created_at
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
@@ -302,6 +305,7 @@ def generate_report_context(username_results: list):
|
|||||||
first_seen,
|
first_seen,
|
||||||
created_at,
|
created_at,
|
||||||
str(e),
|
str(e),
|
||||||
|
exc_info=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
for k, v in status.ids_data.items():
|
for k, v in status.ids_data.items():
|
||||||
|
|||||||
+6723
-822
File diff suppressed because it is too large
Load Diff
@@ -1,21 +1,30 @@
|
|||||||
{
|
{
|
||||||
"presence_strings": [
|
"presence_strings": [
|
||||||
|
"user not found",
|
||||||
|
"404",
|
||||||
|
"Page not found",
|
||||||
|
"error 404",
|
||||||
"username",
|
"username",
|
||||||
"not found",
|
"not found",
|
||||||
"пользователь",
|
"пользователь",
|
||||||
"profile",
|
"profile",
|
||||||
"lastname",
|
"lastname",
|
||||||
"firstname",
|
"firstname",
|
||||||
|
"DisplayName",
|
||||||
"biography",
|
"biography",
|
||||||
|
"title",
|
||||||
"birthday",
|
"birthday",
|
||||||
"репутация",
|
"репутация",
|
||||||
"информация",
|
"информация",
|
||||||
"e-mail"
|
"e-mail",
|
||||||
|
"body",
|
||||||
|
"html",
|
||||||
|
"style"
|
||||||
],
|
],
|
||||||
"supposed_usernames": [
|
"supposed_usernames": [
|
||||||
"alex", "god", "admin", "red", "blue", "john"
|
"alex", "god", "admin", "red", "blue", "john"
|
||||||
],
|
],
|
||||||
"retries_count": 1,
|
"retries_count": 0,
|
||||||
"sites_db_path": "resources/data.json",
|
"sites_db_path": "resources/data.json",
|
||||||
"timeout": 30,
|
"timeout": 30,
|
||||||
"max_connections": 100,
|
"max_connections": 100,
|
||||||
|
|||||||
@@ -68,7 +68,6 @@
|
|||||||
<div class="row-mb">
|
<div class="row-mb">
|
||||||
<div class="col-md">
|
<div class="col-md">
|
||||||
<div class="card flex-md-row mb-4 box-shadow h-md-250">
|
<div class="card flex-md-row mb-4 box-shadow h-md-250">
|
||||||
<span style="position: absolute; right: 10px;"><a href="https://github.com/soxoj/maigret/issues/new?assignees=soxoj&labels=bug&template=report-false-result.md&title=Invalid%20result%20{{ v.url_user }}">Invalid?</a></span>
|
|
||||||
<img class="card-img-right flex-auto d-md-block" alt="Photo" style="width: 200px; height: 200px; object-fit: scale-down;" src="{{ v.status and v.status.ids_data and v.status.ids_data.image or 'https://i.imgur.com/040fmbw.png' }}" data-holder-rendered="true">
|
<img class="card-img-right flex-auto d-md-block" alt="Photo" style="width: 200px; height: 200px; object-fit: scale-down;" src="{{ v.status and v.status.ids_data and v.status.ids_data.image or 'https://i.imgur.com/040fmbw.png' }}" data-holder-rendered="true">
|
||||||
<div class="card-body d-flex flex-column align-items-start" style="padding-top: 0;">
|
<div class="card-body d-flex flex-column align-items-start" style="padding-top: 0;">
|
||||||
<h3 class="mb-0" style="padding-top: 1rem;">
|
<h3 class="mb-0" style="padding-top: 1rem;">
|
||||||
|
|||||||
@@ -64,7 +64,6 @@
|
|||||||
<div class="sitebox" style="margin-top: 20px;" >
|
<div class="sitebox" style="margin-top: 20px;" >
|
||||||
<div>
|
<div>
|
||||||
<div>
|
<div>
|
||||||
<span class="invalid-button"><a href="https://github.com/soxoj/maigret/issues/new?assignees=soxoj&labels=bug&template=report-false-result.md&title=Invalid%20result%20{{ v.url_user }}">Invalid?</a></span>
|
|
||||||
<table>
|
<table>
|
||||||
<tr>
|
<tr>
|
||||||
<td valign="top">
|
<td valign="top">
|
||||||
|
|||||||
+122
-29
@@ -21,6 +21,7 @@ class MaigretEngine:
|
|||||||
|
|
||||||
|
|
||||||
class MaigretSite:
|
class MaigretSite:
|
||||||
|
# Fields that should not be serialized when converting site to JSON
|
||||||
NOT_SERIALIZABLE_FIELDS = [
|
NOT_SERIALIZABLE_FIELDS = [
|
||||||
"name",
|
"name",
|
||||||
"engineData",
|
"engineData",
|
||||||
@@ -31,37 +32,65 @@ class MaigretSite:
|
|||||||
"urlRegexp",
|
"urlRegexp",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
# Username known to exist on the site
|
||||||
username_claimed = ""
|
username_claimed = ""
|
||||||
|
# Username known to not exist on the site
|
||||||
username_unclaimed = ""
|
username_unclaimed = ""
|
||||||
|
# Additional URL path component, e.g. /forum in https://example.com/forum/users/{username}
|
||||||
url_subpath = ""
|
url_subpath = ""
|
||||||
|
# Main site URL (the main page)
|
||||||
url_main = ""
|
url_main = ""
|
||||||
|
# Full URL pattern for username page, e.g. https://example.com/forum/users/{username}
|
||||||
url = ""
|
url = ""
|
||||||
|
# Whether site is disabled. Not used by Maigret without --use-disabled argument
|
||||||
disabled = False
|
disabled = False
|
||||||
|
# Whether a positive result indicates accounts with similar usernames rather than exact matches
|
||||||
similar_search = False
|
similar_search = False
|
||||||
|
# Whether to ignore 403 status codes
|
||||||
ignore403 = False
|
ignore403 = False
|
||||||
|
# Site category tags
|
||||||
tags: List[str] = []
|
tags: List[str] = []
|
||||||
|
|
||||||
|
# Type of identifier (username, gaia_id etc); see SUPPORTED_IDS in checking.py
|
||||||
type = "username"
|
type = "username"
|
||||||
|
# Custom HTTP headers
|
||||||
headers: Dict[str, str] = {}
|
headers: Dict[str, str] = {}
|
||||||
|
# Error message substrings
|
||||||
errors: Dict[str, str] = {}
|
errors: Dict[str, str] = {}
|
||||||
|
# Site activation requirements
|
||||||
activation: Dict[str, Any] = {}
|
activation: Dict[str, Any] = {}
|
||||||
|
# Regular expression for username validation
|
||||||
regex_check = None
|
regex_check = None
|
||||||
|
# URL to probe site status
|
||||||
url_probe = None
|
url_probe = None
|
||||||
|
# Type of check to perform
|
||||||
check_type = ""
|
check_type = ""
|
||||||
|
# Whether to only send HEAD requests (GET by default)
|
||||||
request_head_only = ""
|
request_head_only = ""
|
||||||
|
# GET parameters to include in requests
|
||||||
get_params: Dict[str, Any] = {}
|
get_params: Dict[str, Any] = {}
|
||||||
|
|
||||||
|
# Substrings in HTML response that indicate profile exists
|
||||||
presense_strs: List[str] = []
|
presense_strs: List[str] = []
|
||||||
|
# Substrings in HTML response that indicate profile doesn't exist
|
||||||
absence_strs: List[str] = []
|
absence_strs: List[str] = []
|
||||||
|
# Site statistics
|
||||||
stats: Dict[str, Any] = {}
|
stats: Dict[str, Any] = {}
|
||||||
|
|
||||||
|
# Site engine name
|
||||||
engine = None
|
engine = None
|
||||||
|
# Engine-specific configuration
|
||||||
engine_data: Dict[str, Any] = {}
|
engine_data: Dict[str, Any] = {}
|
||||||
|
# Engine instance
|
||||||
engine_obj: Optional["MaigretEngine"] = None
|
engine_obj: Optional["MaigretEngine"] = None
|
||||||
|
# Future for async requests
|
||||||
request_future = None
|
request_future = None
|
||||||
|
# Alexa traffic rank
|
||||||
alexa_rank = None
|
alexa_rank = None
|
||||||
|
# Source (in case a site is a mirror of another site)
|
||||||
source = None
|
source = None
|
||||||
|
|
||||||
|
# URL protocol (http/https)
|
||||||
protocol = ''
|
protocol = ''
|
||||||
|
|
||||||
def __init__(self, name, information):
|
def __init__(self, name, information):
|
||||||
@@ -80,6 +109,37 @@ class MaigretSite:
|
|||||||
def __str__(self):
|
def __str__(self):
|
||||||
return f"{self.name} ({self.url_main})"
|
return f"{self.name} ({self.url_main})"
|
||||||
|
|
||||||
|
def __is_equal_by_url_or_name(self, url_or_name_str: str):
|
||||||
|
lower_url_or_name_str = url_or_name_str.lower()
|
||||||
|
lower_url = self.url.lower()
|
||||||
|
lower_name = self.name.lower()
|
||||||
|
lower_url_main = self.url_main.lower()
|
||||||
|
|
||||||
|
return \
|
||||||
|
lower_name == lower_url_or_name_str or \
|
||||||
|
(lower_url_main and lower_url_main == lower_url_or_name_str) or \
|
||||||
|
(lower_url_main and lower_url_main in lower_url_or_name_str) or \
|
||||||
|
(lower_url_main and lower_url_or_name_str in lower_url_main) or \
|
||||||
|
(lower_url and lower_url_or_name_str in lower_url)
|
||||||
|
|
||||||
|
def __eq__(self, other):
|
||||||
|
if isinstance(other, MaigretSite):
|
||||||
|
# Compare only relevant attributes, not internal state like request_future
|
||||||
|
attrs_to_compare = [
|
||||||
|
'name', 'url_main', 'url_subpath', 'type', 'headers',
|
||||||
|
'errors', 'activation', 'regex_check', 'url_probe',
|
||||||
|
'check_type', 'request_head_only', 'get_params',
|
||||||
|
'presense_strs', 'absence_strs', 'stats', 'engine',
|
||||||
|
'engine_data', 'alexa_rank', 'source', 'protocol'
|
||||||
|
]
|
||||||
|
|
||||||
|
return all(getattr(self, attr) == getattr(other, attr)
|
||||||
|
for attr in attrs_to_compare)
|
||||||
|
elif isinstance(other, str):
|
||||||
|
# Compare only by name (exactly) or url_main (partial similarity)
|
||||||
|
return self.__is_equal_by_url_or_name(other)
|
||||||
|
return False
|
||||||
|
|
||||||
def update_detectors(self):
|
def update_detectors(self):
|
||||||
if "url" in self.__dict__:
|
if "url" in self.__dict__:
|
||||||
url = self.url
|
url = self.url
|
||||||
@@ -101,6 +161,10 @@ class MaigretSite:
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
def extract_id_from_url(self, url: str) -> Optional[Tuple[str, str]]:
|
def extract_id_from_url(self, url: str) -> Optional[Tuple[str, str]]:
|
||||||
|
"""
|
||||||
|
Extracts username from url.
|
||||||
|
It's outdated, detects only a format of https://example.com/{username}
|
||||||
|
"""
|
||||||
if not self.url_regexp:
|
if not self.url_regexp:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
@@ -223,6 +287,15 @@ class MaigretDatabase:
|
|||||||
def sites_dict(self):
|
def sites_dict(self):
|
||||||
return {site.name: site for site in self._sites}
|
return {site.name: site for site in self._sites}
|
||||||
|
|
||||||
|
def has_site(self, site: MaigretSite):
|
||||||
|
for s in self._sites:
|
||||||
|
if site == s:
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
def __contains__(self, site):
|
||||||
|
return self.has_site(site)
|
||||||
|
|
||||||
def ranked_sites_dict(
|
def ranked_sites_dict(
|
||||||
self,
|
self,
|
||||||
reverse=False,
|
reverse=False,
|
||||||
@@ -234,6 +307,17 @@ class MaigretDatabase:
|
|||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
Ranking and filtering of the sites list
|
Ranking and filtering of the sites list
|
||||||
|
|
||||||
|
Args:
|
||||||
|
reverse (bool, optional): Reverse the sorting order. Defaults to False.
|
||||||
|
top (int, optional): Maximum number of sites to return. Defaults to sys.maxsize.
|
||||||
|
tags (list, optional): List of tags to filter sites by. Defaults to empty list.
|
||||||
|
names (list, optional): List of site names (or urls, see MaigretSite.__eq__) to filter by. Defaults to empty list.
|
||||||
|
disabled (bool, optional): Whether to include disabled sites. Defaults to True.
|
||||||
|
id_type (str, optional): Type of identifier to filter by. Defaults to "username".
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
dict: Dictionary of filtered and ranked sites, with site names as keys and MaigretSite objects as values
|
||||||
"""
|
"""
|
||||||
normalized_names = list(map(str.lower, names))
|
normalized_names = list(map(str.lower, names))
|
||||||
normalized_tags = list(map(str.lower, tags))
|
normalized_tags = list(map(str.lower, tags))
|
||||||
@@ -420,55 +504,64 @@ class MaigretDatabase:
|
|||||||
return results
|
return results
|
||||||
|
|
||||||
def get_db_stats(self, is_markdown=False):
|
def get_db_stats(self, is_markdown=False):
|
||||||
|
# Initialize counters
|
||||||
sites_dict = self.sites_dict
|
sites_dict = self.sites_dict
|
||||||
|
|
||||||
urls = {}
|
urls = {}
|
||||||
tags = {}
|
tags = {}
|
||||||
output = ""
|
|
||||||
disabled_count = 0
|
disabled_count = 0
|
||||||
total_count = len(sites_dict)
|
|
||||||
|
|
||||||
message_checks = 0
|
|
||||||
message_checks_one_factor = 0
|
message_checks_one_factor = 0
|
||||||
|
status_checks = 0
|
||||||
|
|
||||||
for _, site in sites_dict.items():
|
# Collect statistics
|
||||||
|
for site in sites_dict.values():
|
||||||
|
# Count disabled sites
|
||||||
if site.disabled:
|
if site.disabled:
|
||||||
disabled_count += 1
|
disabled_count += 1
|
||||||
|
|
||||||
|
# Count URL types
|
||||||
url_type = site.get_url_template()
|
url_type = site.get_url_template()
|
||||||
urls[url_type] = urls.get(url_type, 0) + 1
|
urls[url_type] = urls.get(url_type, 0) + 1
|
||||||
|
|
||||||
if site.check_type == 'message' and not site.disabled:
|
# Count check types for enabled sites
|
||||||
message_checks += 1
|
if not site.disabled:
|
||||||
if site.absence_strs and site.presense_strs:
|
if site.check_type == 'message':
|
||||||
continue
|
if not (site.absence_strs and site.presense_strs):
|
||||||
message_checks_one_factor += 1
|
message_checks_one_factor += 1
|
||||||
|
elif site.check_type == 'status_code':
|
||||||
|
status_checks += 1
|
||||||
|
|
||||||
|
# Count tags
|
||||||
if not site.tags:
|
if not site.tags:
|
||||||
tags["NO_TAGS"] = tags.get("NO_TAGS", 0) + 1
|
tags["NO_TAGS"] = tags.get("NO_TAGS", 0) + 1
|
||||||
|
|
||||||
for tag in filter(lambda x: not is_country_tag(x), site.tags):
|
for tag in filter(lambda x: not is_country_tag(x), site.tags):
|
||||||
tags[tag] = tags.get(tag, 0) + 1
|
tags[tag] = tags.get(tag, 0) + 1
|
||||||
|
|
||||||
enabled_perc = round(100*(total_count-disabled_count)/total_count, 2)
|
# Calculate percentages
|
||||||
output += f"Enabled/total sites: {total_count - disabled_count}/{total_count} = {enabled_perc}%\n\n"
|
total_count = len(sites_dict)
|
||||||
|
enabled_count = total_count - disabled_count
|
||||||
|
enabled_perc = round(100 * enabled_count / total_count, 2)
|
||||||
|
checks_perc = round(100 * message_checks_one_factor / enabled_count, 2)
|
||||||
|
status_checks_perc = round(100 * status_checks / enabled_count, 2)
|
||||||
|
|
||||||
checks_perc = round(100*message_checks_one_factor/message_checks, 2)
|
# Format output
|
||||||
output += f"Incomplete checks: {message_checks_one_factor}/{message_checks} = {checks_perc}% (false positive risks)\n\n"
|
separator = "\n\n"
|
||||||
|
output = [
|
||||||
|
f"Enabled/total sites: {enabled_count}/{total_count} = {enabled_perc}%",
|
||||||
|
f"Incomplete message checks: {message_checks_one_factor}/{enabled_count} = {checks_perc}% (false positive risks)",
|
||||||
|
f"Status code checks: {status_checks}/{enabled_count} = {status_checks_perc}% (false positive risks)",
|
||||||
|
f"False positive risk (total): {checks_perc + status_checks_perc:.2f}%",
|
||||||
|
self._format_top_items("profile URLs", urls, 20, is_markdown),
|
||||||
|
self._format_top_items("tags", tags, 20, is_markdown, self._tags),
|
||||||
|
]
|
||||||
|
|
||||||
top_urls_count = 20
|
return separator.join(output)
|
||||||
output += f"Top {top_urls_count} profile URLs:\n"
|
|
||||||
for url, count in sorted(urls.items(), key=lambda x: x[1], reverse=True)[:top_urls_count]:
|
def _format_top_items(self, title, items_dict, limit, is_markdown, valid_items=None):
|
||||||
|
"""Helper method to format top items lists"""
|
||||||
|
output = f"Top {limit} {title}:\n"
|
||||||
|
for item, count in sorted(items_dict.items(), key=lambda x: x[1], reverse=True)[:limit]:
|
||||||
if count == 1:
|
if count == 1:
|
||||||
break
|
break
|
||||||
output += f"- ({count})\t`{url}`\n" if is_markdown else f"{count}\t{url}\n"
|
mark = " (non-standard)" if valid_items is not None and item not in valid_items else ""
|
||||||
|
output += f"- ({count})\t`{item}`{mark}\n" if is_markdown else f"{count}\t{item}{mark}\n"
|
||||||
top_tags_count = 20
|
|
||||||
output += f"\nTop {top_tags_count} tags:\n"
|
|
||||||
for tag, count in sorted(tags.items(), key=lambda x: x[1], reverse=True)[:top_tags_count]:
|
|
||||||
mark = ""
|
|
||||||
if tag not in self._tags:
|
|
||||||
mark = " (non-standard)"
|
|
||||||
output += f"- ({count})\t`{tag}`{mark}\n" if is_markdown else f"{count}\t{tag}{mark}\n"
|
|
||||||
|
|
||||||
return output
|
return output
|
||||||
|
|||||||
+155
-23
@@ -2,9 +2,11 @@ import asyncio
|
|||||||
import json
|
import json
|
||||||
import re
|
import re
|
||||||
from typing import List
|
from typing import List
|
||||||
import xml.etree.ElementTree as ET
|
from xml.etree import ElementTree
|
||||||
from aiohttp import TCPConnector, ClientSession
|
from aiohttp import TCPConnector, ClientSession
|
||||||
import requests
|
import requests
|
||||||
|
import cloudscraper
|
||||||
|
from colorama import Fore, Style
|
||||||
|
|
||||||
from .activation import import_aiohttp_cookies
|
from .activation import import_aiohttp_cookies
|
||||||
from .checking import maigret
|
from .checking import maigret
|
||||||
@@ -14,12 +16,34 @@ from .sites import MaigretDatabase, MaigretSite, MaigretEngine
|
|||||||
from .utils import get_random_user_agent, get_match_ratio
|
from .utils import get_random_user_agent, get_match_ratio
|
||||||
|
|
||||||
|
|
||||||
|
class CloudflareSession:
|
||||||
|
def __init__(self):
|
||||||
|
self.scraper = cloudscraper.create_scraper()
|
||||||
|
|
||||||
|
async def get(self, *args, **kwargs):
|
||||||
|
await asyncio.sleep(0)
|
||||||
|
res = self.scraper.get(*args, **kwargs)
|
||||||
|
self.last_text = res.text
|
||||||
|
self.status = res.status_code
|
||||||
|
return self
|
||||||
|
|
||||||
|
def status_code(self):
|
||||||
|
return self.status
|
||||||
|
|
||||||
|
async def text(self):
|
||||||
|
await asyncio.sleep(0)
|
||||||
|
return self.last_text
|
||||||
|
|
||||||
|
async def close(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
class Submitter:
|
class Submitter:
|
||||||
HEADERS = {
|
HEADERS = {
|
||||||
"User-Agent": get_random_user_agent(),
|
"User-Agent": get_random_user_agent(),
|
||||||
}
|
}
|
||||||
|
|
||||||
SEPARATORS = "\"'"
|
SEPARATORS = "\"'\n"
|
||||||
|
|
||||||
RATIO = 0.6
|
RATIO = 0.6
|
||||||
TOP_FEATURES = 5
|
TOP_FEATURES = 5
|
||||||
@@ -32,6 +56,7 @@ class Submitter:
|
|||||||
self.logger = logger
|
self.logger = logger
|
||||||
|
|
||||||
from aiohttp_socks import ProxyConnector
|
from aiohttp_socks import ProxyConnector
|
||||||
|
|
||||||
proxy = self.args.proxy
|
proxy = self.args.proxy
|
||||||
cookie_jar = None
|
cookie_jar = None
|
||||||
if args.cookie_file:
|
if args.cookie_file:
|
||||||
@@ -47,7 +72,7 @@ class Submitter:
|
|||||||
def get_alexa_rank(site_url_main):
|
def get_alexa_rank(site_url_main):
|
||||||
url = f"http://data.alexa.com/data?cli=10&url={site_url_main}"
|
url = f"http://data.alexa.com/data?cli=10&url={site_url_main}"
|
||||||
xml_data = requests.get(url).text
|
xml_data = requests.get(url).text
|
||||||
root = ET.fromstring(xml_data)
|
root = ElementTree.fromstring(xml_data)
|
||||||
alexa_rank = 0
|
alexa_rank = 0
|
||||||
|
|
||||||
try:
|
try:
|
||||||
@@ -113,20 +138,27 @@ class Submitter:
|
|||||||
if status == QueryStatus.CLAIMED:
|
if status == QueryStatus.CLAIMED:
|
||||||
changes["disabled"] = True
|
changes["disabled"] = True
|
||||||
elif status == QueryStatus.CLAIMED:
|
elif status == QueryStatus.CLAIMED:
|
||||||
self.logger.warning(
|
print(
|
||||||
f"Not found `{username}` in {site.name}, must be claimed"
|
f"{Fore.YELLOW}[!] Not found `{username}` in {site.name}, must be claimed{Style.RESET_ALL}"
|
||||||
)
|
)
|
||||||
self.logger.info(results_dict[site.name])
|
self.logger.warning(site.json)
|
||||||
changes["disabled"] = True
|
changes["disabled"] = True
|
||||||
else:
|
else:
|
||||||
self.logger.warning(
|
print(
|
||||||
f"Found `{username}` in {site.name}, must be available"
|
f"{Fore.YELLOW}[!] Found `{username}` in {site.name}, must be available{Style.RESET_ALL}"
|
||||||
)
|
)
|
||||||
self.logger.info(results_dict[site.name])
|
self.logger.warning(site.json)
|
||||||
changes["disabled"] = True
|
changes["disabled"] = True
|
||||||
|
else:
|
||||||
|
print(f"{Fore.GREEN}[+] {username} is successfully checked: {status} in {site.name}{Style.RESET_ALL}")
|
||||||
|
|
||||||
self.logger.info(f"Site {site.name} checking is finished")
|
self.logger.info(f"Site {site.name} checking is finished")
|
||||||
|
|
||||||
|
# remove service tag "unchecked"
|
||||||
|
if "unchecked" in site.tags:
|
||||||
|
site.tags.remove("unchecked")
|
||||||
|
changes["tags"] = site.tags
|
||||||
|
|
||||||
return changes
|
return changes
|
||||||
|
|
||||||
def generate_additional_fields_dialog(self, engine: MaigretEngine, dialog):
|
def generate_additional_fields_dialog(self, engine: MaigretEngine, dialog):
|
||||||
@@ -141,16 +173,20 @@ class Submitter:
|
|||||||
fields['urlSubpath'] = f'/{subpath}'
|
fields['urlSubpath'] = f'/{subpath}'
|
||||||
return fields
|
return fields
|
||||||
|
|
||||||
async def detect_known_engine(self, url_exists, url_mainpage) -> List[MaigretSite]:
|
async def detect_known_engine(
|
||||||
|
self, url_exists, url_mainpage
|
||||||
|
) -> [List[MaigretSite], str]:
|
||||||
resp_text = ''
|
resp_text = ''
|
||||||
try:
|
try:
|
||||||
r = await self.session.get(url_mainpage)
|
r = await self.session.get(url_mainpage)
|
||||||
resp_text = await r.text()
|
content = await r.content.read()
|
||||||
|
charset = r.charset or "utf-8"
|
||||||
|
resp_text = content.decode(charset, "ignore")
|
||||||
self.logger.debug(resp_text)
|
self.logger.debug(resp_text)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.logger.warning(e)
|
self.logger.warning(e)
|
||||||
print("Some error while checking main page")
|
print("Some error while checking main page")
|
||||||
return []
|
return [], resp_text
|
||||||
|
|
||||||
for engine in self.db.engines:
|
for engine in self.db.engines:
|
||||||
strs_to_check = engine.__dict__.get("presenseStrs")
|
strs_to_check = engine.__dict__.get("presenseStrs")
|
||||||
@@ -193,11 +229,12 @@ class Submitter:
|
|||||||
)
|
)
|
||||||
sites.append(maigret_site)
|
sites.append(maigret_site)
|
||||||
|
|
||||||
return sites
|
return sites, resp_text
|
||||||
|
|
||||||
return []
|
return [], resp_text
|
||||||
|
|
||||||
def extract_username_dialog(self, url):
|
@staticmethod
|
||||||
|
def extract_username_dialog(url):
|
||||||
url_parts = url.rstrip("/").split("/")
|
url_parts = url.rstrip("/").split("/")
|
||||||
supposed_username = url_parts[-1].strip('@')
|
supposed_username = url_parts[-1].strip('@')
|
||||||
entered_username = input(
|
entered_username = input(
|
||||||
@@ -256,6 +293,10 @@ class Submitter:
|
|||||||
a_minus_b = tokens_a.difference(tokens_b)
|
a_minus_b = tokens_a.difference(tokens_b)
|
||||||
b_minus_a = tokens_b.difference(tokens_a)
|
b_minus_a = tokens_b.difference(tokens_a)
|
||||||
|
|
||||||
|
# additional filtering by html response
|
||||||
|
a_minus_b = [t for t in a_minus_b if not t in non_exists_resp_text]
|
||||||
|
b_minus_a = [t for t in b_minus_a if not t in exists_resp_text]
|
||||||
|
|
||||||
if len(a_minus_b) == len(b_minus_a) == 0:
|
if len(a_minus_b) == len(b_minus_a) == 0:
|
||||||
print("The pages for existing and non-existing account are the same!")
|
print("The pages for existing and non-existing account are the same!")
|
||||||
|
|
||||||
@@ -272,6 +313,8 @@ class Submitter:
|
|||||||
:top_features_count
|
:top_features_count
|
||||||
]
|
]
|
||||||
|
|
||||||
|
self.logger.debug([(keyword, match_fun(keyword)) for keyword in presence_list])
|
||||||
|
|
||||||
print("Detected text features of existing account: " + ", ".join(presence_list))
|
print("Detected text features of existing account: " + ", ".join(presence_list))
|
||||||
features = input("If features was not detected correctly, write it manually: ")
|
features = input("If features was not detected correctly, write it manually: ")
|
||||||
|
|
||||||
@@ -281,6 +324,8 @@ class Submitter:
|
|||||||
absence_list = sorted(b_minus_a, key=match_fun, reverse=True)[
|
absence_list = sorted(b_minus_a, key=match_fun, reverse=True)[
|
||||||
:top_features_count
|
:top_features_count
|
||||||
]
|
]
|
||||||
|
self.logger.debug([(keyword, match_fun(keyword)) for keyword in absence_list])
|
||||||
|
|
||||||
print(
|
print(
|
||||||
"Detected text features of non-existing account: " + ", ".join(absence_list)
|
"Detected text features of non-existing account: " + ", ".join(absence_list)
|
||||||
)
|
)
|
||||||
@@ -305,6 +350,76 @@ class Submitter:
|
|||||||
site = MaigretSite(url_mainpage.split("/")[-1], site_data)
|
site = MaigretSite(url_mainpage.split("/")[-1], site_data)
|
||||||
return site
|
return site
|
||||||
|
|
||||||
|
async def add_site(self, site):
|
||||||
|
sem = asyncio.Semaphore(1)
|
||||||
|
print(f"{Fore.BLUE}{Style.BRIGHT}[*] Adding site {site.name}, let's check it...{Style.RESET_ALL}")
|
||||||
|
|
||||||
|
result = await self.site_self_check(site, sem)
|
||||||
|
if result["disabled"]:
|
||||||
|
print(
|
||||||
|
f"Checks failed for {site.name}, please, verify them manually."
|
||||||
|
)
|
||||||
|
return {
|
||||||
|
"valid": False,
|
||||||
|
"reason": "checks_failed",
|
||||||
|
}
|
||||||
|
|
||||||
|
while True:
|
||||||
|
print("\nAvailable fields to edit:")
|
||||||
|
editable_fields = {
|
||||||
|
'1': 'name',
|
||||||
|
'2': 'tags',
|
||||||
|
'3': 'url',
|
||||||
|
'4': 'url_main',
|
||||||
|
'5': 'username_claimed',
|
||||||
|
'6': 'username_unclaimed',
|
||||||
|
'7': 'presense_strs',
|
||||||
|
'8': 'absence_strs',
|
||||||
|
}
|
||||||
|
|
||||||
|
for num, field in editable_fields.items():
|
||||||
|
current_value = getattr(site, field)
|
||||||
|
print(f"{num}. {field} (current: {current_value})")
|
||||||
|
|
||||||
|
print("0. finish editing")
|
||||||
|
print("10. reject and block domain")
|
||||||
|
print("11. invalid params, remove")
|
||||||
|
|
||||||
|
choice = input("\nSelect field number to edit (0-8): ").strip()
|
||||||
|
|
||||||
|
if choice == '0':
|
||||||
|
break
|
||||||
|
|
||||||
|
if choice == '10':
|
||||||
|
return {
|
||||||
|
"valid": False,
|
||||||
|
"reason": "manual block",
|
||||||
|
}
|
||||||
|
|
||||||
|
if choice == '11':
|
||||||
|
return {
|
||||||
|
"valid": False,
|
||||||
|
"reason": "remove",
|
||||||
|
}
|
||||||
|
|
||||||
|
if choice in editable_fields:
|
||||||
|
field = editable_fields[choice]
|
||||||
|
current_value = getattr(site, field)
|
||||||
|
new_value = input(f"Enter new value for {field} (current: {current_value}): ").strip()
|
||||||
|
|
||||||
|
if field in ['tags', 'presense_strs', 'absence_strs']:
|
||||||
|
new_value = list(map(str.strip, new_value.split(',')))
|
||||||
|
|
||||||
|
if new_value:
|
||||||
|
setattr(site, field, new_value)
|
||||||
|
print(f"Updated {field} to: {new_value}")
|
||||||
|
|
||||||
|
self.logger.info(site.json)
|
||||||
|
self.db.update_site(site)
|
||||||
|
return {
|
||||||
|
"valid": True,
|
||||||
|
}
|
||||||
|
|
||||||
async def dialog(self, url_exists, cookie_file):
|
async def dialog(self, url_exists, cookie_file):
|
||||||
domain_raw = self.URL_RE.sub("", url_exists).strip().strip("/")
|
domain_raw = self.URL_RE.sub("", url_exists).strip().strip("/")
|
||||||
domain_raw = domain_raw.split("/")[0]
|
domain_raw = domain_raw.split("/")[0]
|
||||||
@@ -337,21 +452,33 @@ class Submitter:
|
|||||||
|
|
||||||
print('Detecting site engine, please wait...')
|
print('Detecting site engine, please wait...')
|
||||||
sites = []
|
sites = []
|
||||||
|
text = None
|
||||||
try:
|
try:
|
||||||
sites = await self.detect_known_engine(url_exists, url_mainpage)
|
sites, text = await self.detect_known_engine(url_exists, url_exists)
|
||||||
except KeyboardInterrupt:
|
except KeyboardInterrupt:
|
||||||
print('Engine detect process is interrupted.')
|
print('Engine detect process is interrupted.')
|
||||||
|
|
||||||
|
if 'cloudflare' in text.lower():
|
||||||
|
print(
|
||||||
|
'Cloudflare protection detected. I will use cloudscraper for futher work'
|
||||||
|
)
|
||||||
|
# self.session = CloudflareSession()
|
||||||
|
|
||||||
if not sites:
|
if not sites:
|
||||||
print("Unable to detect site engine, lets generate checking features")
|
print("Unable to detect site engine, lets generate checking features")
|
||||||
|
|
||||||
redirects = False
|
redirects = False
|
||||||
if self.args.verbose:
|
if self.args.verbose:
|
||||||
redirects = 'y' in input('Should we do redirects automatically? [yN] ').lower()
|
redirects = (
|
||||||
|
'y' in input('Should we do redirects automatically? [yN] ').lower()
|
||||||
|
)
|
||||||
|
|
||||||
sites = [
|
sites = [
|
||||||
await self.check_features_manually(
|
await self.check_features_manually(
|
||||||
url_exists, url_mainpage, cookie_file, redirects,
|
url_exists,
|
||||||
|
url_mainpage,
|
||||||
|
cookie_file,
|
||||||
|
redirects,
|
||||||
)
|
)
|
||||||
]
|
]
|
||||||
|
|
||||||
@@ -371,7 +498,7 @@ class Submitter:
|
|||||||
|
|
||||||
if not found:
|
if not found:
|
||||||
print(
|
print(
|
||||||
f"Sorry, we couldn't find params to detect account presence/absence in {chosen_site.name}."
|
f"{Fore.RED}[!] The check for site '{chosen_site.name}' failed!{Style.RESET_ALL}"
|
||||||
)
|
)
|
||||||
print(
|
print(
|
||||||
"Try to run this mode again and increase features count or choose others."
|
"Try to run this mode again and increase features count or choose others."
|
||||||
@@ -395,13 +522,18 @@ class Submitter:
|
|||||||
|
|
||||||
chosen_site.name = input("Change site name if you want: ") or chosen_site.name
|
chosen_site.name = input("Change site name if you want: ") or chosen_site.name
|
||||||
chosen_site.tags = list(map(str.strip, input("Site tags: ").split(',')))
|
chosen_site.tags = list(map(str.strip, input("Site tags: ").split(',')))
|
||||||
rank = Submitter.get_alexa_rank(chosen_site.url_main)
|
# rank = Submitter.get_alexa_rank(chosen_site.url_main)
|
||||||
if rank:
|
# if rank:
|
||||||
print(f'New alexa rank: {rank}')
|
# print(f'New alexa rank: {rank}')
|
||||||
chosen_site.alexa_rank = rank
|
# chosen_site.alexa_rank = rank
|
||||||
|
|
||||||
self.logger.debug(chosen_site.json)
|
self.logger.debug(chosen_site.json)
|
||||||
site_data = chosen_site.strip_engine_data()
|
site_data = chosen_site.strip_engine_data()
|
||||||
self.logger.debug(site_data.json)
|
self.logger.debug(site_data.json)
|
||||||
self.db.update_site(site_data)
|
self.db.update_site(site_data)
|
||||||
|
|
||||||
|
if self.args.db:
|
||||||
|
print(f"{Fore.GREEN}[+] Maigret DB is saved to {self.args.db}.{Style.RESET_ALL}")
|
||||||
|
self.db.save_to_file(self.args.db)
|
||||||
|
|
||||||
return True
|
return True
|
||||||
|
|||||||
@@ -0,0 +1,47 @@
|
|||||||
|
# Download this first to avoid compatibility issues:
|
||||||
|
#
|
||||||
|
# sudo zypper in python3-devel
|
||||||
|
# sudo zypper in python3-dev
|
||||||
|
#
|
||||||
|
# Then run 'pip3 install -r opensuse.txt' as usual.
|
||||||
|
#
|
||||||
|
aiodns>=3.0.0
|
||||||
|
aiohttp>=3.8.6
|
||||||
|
aiohttp-socks>=0.7.1
|
||||||
|
arabic-reshaper~=3.0.0
|
||||||
|
async-timeout
|
||||||
|
attrs>=22.2.0
|
||||||
|
certifi>=2023.7.22
|
||||||
|
chardet>=5.0.0
|
||||||
|
colorama
|
||||||
|
future>=0.18.3
|
||||||
|
future-annotations>=1.0.0
|
||||||
|
html5lib>=1.1
|
||||||
|
idna>=3.4
|
||||||
|
Jinja2
|
||||||
|
lxml>=4.9.2
|
||||||
|
MarkupSafe
|
||||||
|
mock>=4.0.3
|
||||||
|
multidict
|
||||||
|
pycountry>=22.3.5
|
||||||
|
PyPDF2>=3.0.1
|
||||||
|
PySocks>=1.7.1
|
||||||
|
python-bidi>=0.4.2
|
||||||
|
requests
|
||||||
|
requests-futures>=1.0.0
|
||||||
|
six>=1.16.0
|
||||||
|
socid-extractor>=0.0.24
|
||||||
|
soupsieve>=2.3.2.post1
|
||||||
|
stem>=1.8.1
|
||||||
|
torrequest>=0.1.0
|
||||||
|
tqdm
|
||||||
|
typing-extensions
|
||||||
|
webencodings>=0.5.1
|
||||||
|
svglib
|
||||||
|
xhtml2pdf~=0.2.11
|
||||||
|
XMind>=1.2.0
|
||||||
|
yarl
|
||||||
|
networkx
|
||||||
|
pyvis>=0.2.1
|
||||||
|
reportlab
|
||||||
|
cloudscraper>=1.2.71
|
||||||
Generated
+2869
File diff suppressed because it is too large
Load Diff
@@ -1,5 +1,5 @@
|
|||||||
maigret @ https://github.com/soxoj/maigret/archive/refs/heads/main.zip
|
maigret @ https://github.com/soxoj/maigret/archive/refs/heads/main.zip
|
||||||
pefile==2021.9.3
|
pefile==2023.2.7 # do not bump while pyinstaller is 6.11.1, there is a conflict
|
||||||
psutil==5.9.0
|
psutil==6.1.0
|
||||||
pyinstaller @ https://github.com/pyinstaller/pyinstaller/archive/develop.zip
|
pyinstaller==6.11.1
|
||||||
pywin32-ctypes==0.2.0
|
pywin32-ctypes==0.2.3
|
||||||
|
|||||||
@@ -0,0 +1,90 @@
|
|||||||
|
[build-system]
|
||||||
|
requires = ["poetry-core"]
|
||||||
|
build-backend = "poetry.core.masonry.api"
|
||||||
|
|
||||||
|
[tool.poetry]
|
||||||
|
name = "maigret"
|
||||||
|
version = "0.4.4"
|
||||||
|
description = "🕵️♂️ Collect a dossier on a person by username from thousands of sites."
|
||||||
|
authors = ["Soxoj <soxoj@protonmail.com>"]
|
||||||
|
readme = "README.md"
|
||||||
|
license = "MIT License"
|
||||||
|
homepage = "https://pypi.org/project/maigret"
|
||||||
|
documentation = "https://maigret.readthedocs.io"
|
||||||
|
repository = "https://github.com/soxoj/maigret"
|
||||||
|
classifiers = [
|
||||||
|
"Development Status :: 5 - Production/Stable",
|
||||||
|
"Programming Language :: Python :: 3",
|
||||||
|
"Intended Audience :: Information Technology",
|
||||||
|
"Operating System :: OS Independent",
|
||||||
|
"License :: OSI Approved :: MIT License",
|
||||||
|
"Natural Language :: English"
|
||||||
|
]
|
||||||
|
|
||||||
|
[tool.poetry.urls]
|
||||||
|
"Bug Tracker" = "https://github.com/soxoj/maigret/issues"
|
||||||
|
|
||||||
|
[tool.poetry.dependencies]
|
||||||
|
# poetry install
|
||||||
|
# Install only production dependencies:
|
||||||
|
# poetry install --without dev
|
||||||
|
# Install with dev dependencies:
|
||||||
|
# poetry install --with dev
|
||||||
|
python = "^3.10"
|
||||||
|
aiodns = "^3.0.0"
|
||||||
|
aiohttp = "^3.11.9"
|
||||||
|
aiohttp-socks = "^0.9.1"
|
||||||
|
arabic-reshaper = "^3.0.0"
|
||||||
|
async-timeout = "^5.0.1"
|
||||||
|
attrs = "^24.2.0"
|
||||||
|
certifi = "^2024.8.30"
|
||||||
|
chardet = "^5.0.0"
|
||||||
|
colorama = "^0.4.6"
|
||||||
|
future = "^1.0.0"
|
||||||
|
future-annotations= "^1.0.0"
|
||||||
|
html5lib = "^1.1"
|
||||||
|
idna = "^3.4"
|
||||||
|
Jinja2 = "^3.1.3"
|
||||||
|
lxml = "^5.3.0"
|
||||||
|
MarkupSafe = "^3.0.2"
|
||||||
|
mock = "^5.1.0"
|
||||||
|
multidict = "^6.0.4"
|
||||||
|
pycountry = "^24.6.1"
|
||||||
|
PyPDF2 = "^3.0.1"
|
||||||
|
PySocks = "^1.7.1"
|
||||||
|
python-bidi = "^0.6.3"
|
||||||
|
requests = "^2.31.0"
|
||||||
|
requests-futures = "^1.0.2"
|
||||||
|
six = "^1.16.0"
|
||||||
|
socid-extractor = "^0.0.26"
|
||||||
|
soupsieve = "^2.6"
|
||||||
|
stem = "^1.8.1"
|
||||||
|
torrequest = "^0.1.0"
|
||||||
|
alive_progress = "^3.2.0"
|
||||||
|
typing-extensions = "^4.8.0"
|
||||||
|
webencodings = "^0.5.1"
|
||||||
|
xhtml2pdf = "^0.2.11"
|
||||||
|
XMind = "^1.2.0"
|
||||||
|
yarl = "^1.18.3"
|
||||||
|
networkx = "^2.6.3"
|
||||||
|
pyvis = "^0.3.2"
|
||||||
|
reportlab = "^4.2.0"
|
||||||
|
cloudscraper = "^1.2.71"
|
||||||
|
|
||||||
|
|
||||||
|
[tool.poetry.group.dev.dependencies]
|
||||||
|
# How to add a new dev dependency: poetry add black --group dev
|
||||||
|
# Install dev dependencies with: poetry install --with dev
|
||||||
|
flake8 = "^7.1.1"
|
||||||
|
pytest = "^8.3.4"
|
||||||
|
pytest-asyncio = "^0.24.0"
|
||||||
|
pytest-cov = "^6.0.0"
|
||||||
|
pytest-httpserver = "^1.0.0"
|
||||||
|
pytest-rerunfailures = "^15.0"
|
||||||
|
reportlab = "^4.2.0"
|
||||||
|
mypy = "^1.13.0"
|
||||||
|
tuna = "^0.5.11"
|
||||||
|
|
||||||
|
[tool.poetry.scripts]
|
||||||
|
# Run with: poetry run maigret <username>
|
||||||
|
maigret = "maigret.maigret:run"
|
||||||
@@ -1,39 +0,0 @@
|
|||||||
aiodns==3.0.0
|
|
||||||
aiohttp==3.8.1
|
|
||||||
aiohttp-socks==0.7.1
|
|
||||||
arabic-reshaper==2.1.3
|
|
||||||
async-timeout==4.0.2
|
|
||||||
attrs==21.4.0
|
|
||||||
certifi==2021.10.8
|
|
||||||
chardet==4.0.0
|
|
||||||
colorama==0.4.4
|
|
||||||
future==0.18.2
|
|
||||||
future-annotations==1.0.0
|
|
||||||
html5lib==1.1
|
|
||||||
idna==3.3
|
|
||||||
Jinja2==3.0.3
|
|
||||||
lxml==4.8.0
|
|
||||||
MarkupSafe==2.0.1
|
|
||||||
mock==4.0.3
|
|
||||||
multidict==5.2.0;python_version<"3.7"
|
|
||||||
multidict==6.0.2;python_version>="3.7"
|
|
||||||
pycountry==22.1.10
|
|
||||||
PyPDF2==1.26.0
|
|
||||||
PySocks==1.7.1
|
|
||||||
python-bidi==0.4.2
|
|
||||||
requests==2.27.1
|
|
||||||
requests-futures==1.0.0
|
|
||||||
six==1.16.0
|
|
||||||
socid-extractor>=0.0.21
|
|
||||||
soupsieve==2.3.1
|
|
||||||
stem==1.8.0
|
|
||||||
torrequest==0.1.0
|
|
||||||
tqdm==4.63.0
|
|
||||||
typing-extensions==4.1.1
|
|
||||||
webencodings==0.5.1
|
|
||||||
xhtml2pdf==0.2.5
|
|
||||||
XMind==1.2.0
|
|
||||||
yarl==1.7.2
|
|
||||||
networkx==2.5.1
|
|
||||||
pyvis==0.1.9
|
|
||||||
reportlab==3.6.6
|
|
||||||
@@ -1,9 +0,0 @@
|
|||||||
[egg_info]
|
|
||||||
tag_build =
|
|
||||||
tag_date = 0
|
|
||||||
|
|
||||||
[flake8]
|
|
||||||
per-file-ignores = __init__.py:F401
|
|
||||||
|
|
||||||
[mypy]
|
|
||||||
ignore_missing_imports = True
|
|
||||||
@@ -1,26 +0,0 @@
|
|||||||
from setuptools import (
|
|
||||||
setup,
|
|
||||||
find_packages,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
with open('README.md') as fh:
|
|
||||||
long_description = fh.read()
|
|
||||||
|
|
||||||
with open('requirements.txt') as rf:
|
|
||||||
requires = rf.read().splitlines()
|
|
||||||
|
|
||||||
setup(name='maigret',
|
|
||||||
version='0.4.2',
|
|
||||||
description='Collect a dossier on a person by username from a huge number of sites',
|
|
||||||
long_description=long_description,
|
|
||||||
long_description_content_type="text/markdown",
|
|
||||||
url='https://github.com/soxoj/maigret',
|
|
||||||
install_requires=requires,
|
|
||||||
entry_points={'console_scripts': ['maigret = maigret.maigret:run']},
|
|
||||||
packages=find_packages(),
|
|
||||||
include_package_data=True,
|
|
||||||
author='Soxoj',
|
|
||||||
author_email='soxoj@protonmail.com',
|
|
||||||
license='MIT',
|
|
||||||
zip_safe=False)
|
|
||||||
+22
-20
@@ -1,30 +1,32 @@
|
|||||||
name: maigret2
|
title: Maigret
|
||||||
version: git
|
icon: static/maigret.png
|
||||||
summary: SOCMINT / Instagram
|
name: maigret
|
||||||
|
summary: 🕵️♂️ Collect a dossier on a person by username from thousands of sites.
|
||||||
description: |
|
description: |
|
||||||
Test Test Test
|
**Maigret** collects a dossier on a person **by username only**, checking for accounts on a huge number of sites and gathering all the available information from web pages. No API keys required. Maigret is an easy-to-use and powerful fork of Sherlock.
|
||||||
base: core18
|
|
||||||
|
Currently supported more than 3000 sites, search is launched against 500 popular sites in descending order of popularity by default. Also supported checking of Tor sites, I2P sites, and domains (via DNS resolving).
|
||||||
|
|
||||||
|
version: 0.4.4
|
||||||
|
license: MIT
|
||||||
|
base: core22
|
||||||
confinement: strict
|
confinement: strict
|
||||||
|
|
||||||
|
source-code: https://github.com/soxoj/maigret
|
||||||
|
issues:
|
||||||
|
- https://github.com/soxoj/maigret/issues
|
||||||
|
donation:
|
||||||
|
- https://patreon.com/soxoj
|
||||||
|
contact:
|
||||||
|
- mailto:soxoj@protonmail.com
|
||||||
|
|
||||||
parts:
|
parts:
|
||||||
maigret2:
|
maigret:
|
||||||
plugin: python
|
plugin: python
|
||||||
python-version: python3
|
|
||||||
source: .
|
source: .
|
||||||
stage-packages:
|
|
||||||
- python-six
|
|
||||||
|
|
||||||
|
|
||||||
|
type: app
|
||||||
apps:
|
apps:
|
||||||
maigret2:
|
maigret:
|
||||||
command: bin/maigret
|
command: bin/maigret
|
||||||
|
plugs: [ network, network-bind, home ]
|
||||||
|
|
||||||
architectures:
|
|
||||||
- build-on: amd64
|
|
||||||
- build-on: i386
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Binary file not shown.
|
Before Width: | Height: | Size: 9.0 KiB After Width: | Height: | Size: 45 KiB |
File diff suppressed because one or more lines are too long
|
Before Width: | Height: | Size: 44 KiB After Width: | Height: | Size: 1.6 MiB |
@@ -1,8 +0,0 @@
|
|||||||
reportlab==3.6.6
|
|
||||||
flake8==4.0.1
|
|
||||||
pytest==7.0.1
|
|
||||||
pytest-asyncio==0.16.0;python_version<"3.7"
|
|
||||||
pytest-asyncio==0.18.2;python_version>="3.7"
|
|
||||||
pytest-cov==3.0.0
|
|
||||||
pytest-httpserver==1.0.4
|
|
||||||
pytest-rerunfailures==10.2
|
|
||||||
+1
-1
@@ -19,7 +19,7 @@ empty_mark = Mark('', (), {})
|
|||||||
|
|
||||||
|
|
||||||
def by_slow_marker(item):
|
def by_slow_marker(item):
|
||||||
return item.get_closest_marker('slow', default=empty_mark)
|
return item.get_closest_marker('slow', default=empty_mark).name
|
||||||
|
|
||||||
|
|
||||||
def pytest_collection_modifyitems(items):
|
def pytest_collection_modifyitems(items):
|
||||||
|
|||||||
+28
-9
@@ -1,25 +1,44 @@
|
|||||||
{
|
{
|
||||||
"engines": {},
|
"engines": {},
|
||||||
"sites": {
|
"sites": {
|
||||||
"GooglePlayStore": {
|
"ValidActive": {
|
||||||
"tags": ["global", "us"],
|
"tags": ["global", "us"],
|
||||||
"disabled": false,
|
"disabled": false,
|
||||||
"checkType": "status_code",
|
"checkType": "status_code",
|
||||||
"alexaRank": 1,
|
"alexaRank": 1,
|
||||||
"url": "https://play.google.com/store/apps/developer?id={username}",
|
"url": "https://play.google.com/store/apps/developer?id={username}",
|
||||||
"urlMain": "https://play.google.com/store",
|
"urlMain": "https://play.google.com/store",
|
||||||
"usernameClaimed": "Facebook_nosuchname",
|
"usernameClaimed": "OpenAI",
|
||||||
"usernameUnclaimed": "noonewouldeverusethis7"
|
"usernameUnclaimed": "noonewouldeverusethis7"
|
||||||
},
|
},
|
||||||
"Reddit": {
|
"InvalidActive": {
|
||||||
"tags": ["news", "social", "us"],
|
"tags": ["global", "us"],
|
||||||
|
"disabled": false,
|
||||||
"checkType": "status_code",
|
"checkType": "status_code",
|
||||||
"presenseStrs": ["totalKarma"],
|
"alexaRank": 1,
|
||||||
|
"url": "https://play.google.com/store/apps/dev?id={username}",
|
||||||
|
"urlMain": "https://play.google.com/store",
|
||||||
|
"usernameClaimed": "OpenAI",
|
||||||
|
"usernameUnclaimed": "noonewouldeverusethis7"
|
||||||
|
},
|
||||||
|
"ValidInactive": {
|
||||||
|
"tags": ["global", "us"],
|
||||||
"disabled": true,
|
"disabled": true,
|
||||||
"alexaRank": 17,
|
"checkType": "status_code",
|
||||||
"url": "https://www.reddit.com/user/{username}",
|
"alexaRank": 1,
|
||||||
"urlMain": "https://www.reddit.com/",
|
"url": "https://play.google.com/store/apps/developer?id={username}",
|
||||||
"usernameClaimed": "blue",
|
"urlMain": "https://play.google.com/store",
|
||||||
|
"usernameClaimed": "OpenAI",
|
||||||
|
"usernameUnclaimed": "noonewouldeverusethis7"
|
||||||
|
},
|
||||||
|
"InvalidInactive": {
|
||||||
|
"tags": ["global", "us"],
|
||||||
|
"disabled": true,
|
||||||
|
"checkType": "status_code",
|
||||||
|
"alexaRank": 1,
|
||||||
|
"url": "https://play.google.com/store/apps/dev?id={username}",
|
||||||
|
"urlMain": "https://play.google.com/store",
|
||||||
|
"usernameClaimed": "OpenAI",
|
||||||
"usernameUnclaimed": "noonewouldeverusethis7"
|
"usernameUnclaimed": "noonewouldeverusethis7"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -41,7 +41,8 @@ async def test_import_aiohttp_cookies():
|
|||||||
f.write(COOKIES_TXT)
|
f.write(COOKIES_TXT)
|
||||||
|
|
||||||
cookie_jar = import_aiohttp_cookies(cookies_filename)
|
cookie_jar = import_aiohttp_cookies(cookies_filename)
|
||||||
assert list(cookie_jar._cookies.keys()) == ['xss.is', 'httpbin.org']
|
# new aiohttp support
|
||||||
|
assert list(cookie_jar._cookies.keys()) in (['xss.is', 'httpbin.org'], [('xss.is', '/'), ('httpbin.org', '/')], [('xss.is', ''), ('httpbin.org', '')])
|
||||||
|
|
||||||
url = 'https://httpbin.org/cookies'
|
url = 'https://httpbin.org/cookies'
|
||||||
connector = aiohttp.TCPConnector(ssl=False)
|
connector = aiohttp.TCPConnector(ssl=False)
|
||||||
|
|||||||
+2
-1
@@ -23,11 +23,12 @@ DEFAULT_ARGS: Dict[str, Any] = {
|
|||||||
'no_progressbar': False,
|
'no_progressbar': False,
|
||||||
'parse_url': '',
|
'parse_url': '',
|
||||||
'pdf': False,
|
'pdf': False,
|
||||||
|
'permute': False,
|
||||||
'print_check_errors': False,
|
'print_check_errors': False,
|
||||||
'print_not_found': False,
|
'print_not_found': False,
|
||||||
'proxy': None,
|
'proxy': None,
|
||||||
'reports_sorting': 'default',
|
'reports_sorting': 'default',
|
||||||
'retries': 1,
|
'retries': 0,
|
||||||
'self_check': False,
|
'self_check': False,
|
||||||
'site_list': [],
|
'site_list': [],
|
||||||
'stats': False,
|
'stats': False,
|
||||||
|
|||||||
@@ -13,4 +13,7 @@ def test_tags_validity(default_db):
|
|||||||
if tag not in tags:
|
if tag not in tags:
|
||||||
unknown_tags.add(tag)
|
unknown_tags.add(tag)
|
||||||
|
|
||||||
|
# make sure all tags are known
|
||||||
|
# if you see "unchecked" tag error, please, do
|
||||||
|
# maigret --db `pwd`/maigret/resources/data.json --self-check --tag unchecked --use-disabled-sites
|
||||||
assert unknown_tags == set()
|
assert unknown_tags == set()
|
||||||
|
|||||||
@@ -55,12 +55,12 @@ async def test_asyncio_progressbar_queue_executor():
|
|||||||
executor = AsyncioProgressbarQueueExecutor(logger=logger, in_parallel=2)
|
executor = AsyncioProgressbarQueueExecutor(logger=logger, in_parallel=2)
|
||||||
assert await executor.run(tasks) == [0, 1, 3, 2, 4, 6, 7, 5, 9, 8]
|
assert await executor.run(tasks) == [0, 1, 3, 2, 4, 6, 7, 5, 9, 8]
|
||||||
assert executor.execution_time > 0.5
|
assert executor.execution_time > 0.5
|
||||||
assert executor.execution_time < 0.6
|
assert executor.execution_time < 0.7
|
||||||
|
|
||||||
executor = AsyncioProgressbarQueueExecutor(logger=logger, in_parallel=3)
|
executor = AsyncioProgressbarQueueExecutor(logger=logger, in_parallel=3)
|
||||||
assert await executor.run(tasks) == [0, 3, 1, 4, 6, 2, 7, 9, 5, 8]
|
assert await executor.run(tasks) == [0, 3, 1, 4, 6, 2, 7, 9, 5, 8]
|
||||||
assert executor.execution_time > 0.4
|
assert executor.execution_time > 0.4
|
||||||
assert executor.execution_time < 0.5
|
assert executor.execution_time < 0.6
|
||||||
|
|
||||||
executor = AsyncioProgressbarQueueExecutor(logger=logger, in_parallel=5)
|
executor = AsyncioProgressbarQueueExecutor(logger=logger, in_parallel=5)
|
||||||
assert await executor.run(tasks) in (
|
assert await executor.run(tasks) in (
|
||||||
@@ -68,9 +68,9 @@ async def test_asyncio_progressbar_queue_executor():
|
|||||||
[0, 3, 6, 1, 4, 9, 7, 2, 5, 8],
|
[0, 3, 6, 1, 4, 9, 7, 2, 5, 8],
|
||||||
)
|
)
|
||||||
assert executor.execution_time > 0.3
|
assert executor.execution_time > 0.3
|
||||||
assert executor.execution_time < 0.4
|
assert executor.execution_time < 0.5
|
||||||
|
|
||||||
executor = AsyncioProgressbarQueueExecutor(logger=logger, in_parallel=10)
|
executor = AsyncioProgressbarQueueExecutor(logger=logger, in_parallel=10)
|
||||||
assert await executor.run(tasks) == [0, 3, 6, 9, 1, 4, 7, 2, 5, 8]
|
assert await executor.run(tasks) == [0, 3, 6, 9, 1, 4, 7, 2, 5, 8]
|
||||||
assert executor.execution_time > 0.2
|
assert executor.execution_time > 0.2
|
||||||
assert executor.execution_time < 0.3
|
assert executor.execution_time < 0.4
|
||||||
|
|||||||
+14
-54
@@ -35,66 +35,26 @@ RESULTS_EXAMPLE = {
|
|||||||
|
|
||||||
|
|
||||||
@pytest.mark.slow
|
@pytest.mark.slow
|
||||||
def test_self_check_db_positive_disable(test_db):
|
@pytest.mark.asyncio
|
||||||
|
async def test_self_check_db(test_db):
|
||||||
|
# initalize logger to debug
|
||||||
logger = Mock()
|
logger = Mock()
|
||||||
assert test_db.sites[0].disabled is False
|
|
||||||
|
|
||||||
loop = asyncio.get_event_loop()
|
assert test_db.sites_dict['InvalidActive'].disabled is False
|
||||||
loop.run_until_complete(
|
assert test_db.sites_dict['ValidInactive'].disabled is True
|
||||||
self_check(test_db, test_db.sites_dict, logger, silent=True)
|
assert test_db.sites_dict['ValidActive'].disabled is False
|
||||||
)
|
assert test_db.sites_dict['InvalidInactive'].disabled is True
|
||||||
|
|
||||||
assert test_db.sites[0].disabled is True
|
await self_check(test_db, test_db.sites_dict, logger, silent=False)
|
||||||
|
|
||||||
|
assert test_db.sites_dict['InvalidActive'].disabled is True
|
||||||
@pytest.mark.slow
|
assert test_db.sites_dict['ValidInactive'].disabled is False
|
||||||
def test_self_check_db_positive_enable(test_db):
|
assert test_db.sites_dict['ValidActive'].disabled is False
|
||||||
logger = Mock()
|
assert test_db.sites_dict['InvalidInactive'].disabled is True
|
||||||
|
|
||||||
test_db.sites[0].disabled = True
|
|
||||||
test_db.sites[0].username_claimed = 'Skyeng'
|
|
||||||
assert test_db.sites[0].disabled is True
|
|
||||||
|
|
||||||
loop = asyncio.get_event_loop()
|
|
||||||
loop.run_until_complete(
|
|
||||||
self_check(test_db, test_db.sites_dict, logger, silent=True)
|
|
||||||
)
|
|
||||||
|
|
||||||
assert test_db.sites[0].disabled is False
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.slow
|
|
||||||
def test_self_check_db_negative_disabled(test_db):
|
|
||||||
logger = Mock()
|
|
||||||
|
|
||||||
test_db.sites[0].disabled = True
|
|
||||||
assert test_db.sites[0].disabled is True
|
|
||||||
|
|
||||||
loop = asyncio.get_event_loop()
|
|
||||||
loop.run_until_complete(
|
|
||||||
self_check(test_db, test_db.sites_dict, logger, silent=True)
|
|
||||||
)
|
|
||||||
|
|
||||||
assert test_db.sites[0].disabled is True
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.slow
|
|
||||||
def test_self_check_db_negative_enabled(test_db):
|
|
||||||
logger = Mock()
|
|
||||||
|
|
||||||
test_db.sites[0].disabled = False
|
|
||||||
test_db.sites[0].username_claimed = 'Skyeng'
|
|
||||||
assert test_db.sites[0].disabled is False
|
|
||||||
|
|
||||||
loop = asyncio.get_event_loop()
|
|
||||||
loop.run_until_complete(
|
|
||||||
self_check(test_db, test_db.sites_dict, logger, silent=True)
|
|
||||||
)
|
|
||||||
|
|
||||||
assert test_db.sites[0].disabled is False
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.slow
|
@pytest.mark.slow
|
||||||
|
@pytest.mark.skip(reason="broken, fixme")
|
||||||
def test_maigret_results(test_db):
|
def test_maigret_results(test_db):
|
||||||
logger = Mock()
|
logger = Mock()
|
||||||
|
|
||||||
|
|||||||
@@ -2,6 +2,7 @@
|
|||||||
import copy
|
import copy
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
|
import pytest
|
||||||
from io import StringIO
|
from io import StringIO
|
||||||
|
|
||||||
import xmind
|
import xmind
|
||||||
@@ -424,6 +425,7 @@ def test_html_report_broken():
|
|||||||
assert SUPPOSED_BROKEN_INTERESTS in report_text
|
assert SUPPOSED_BROKEN_INTERESTS in report_text
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.skip(reason='connection reset, fixme')
|
||||||
def test_pdf_report():
|
def test_pdf_report():
|
||||||
report_name = 'report_test.pdf'
|
report_name = 'report_test.pdf'
|
||||||
context = generate_report_context(TEST)
|
context = generate_report_context(TEST)
|
||||||
|
|||||||
@@ -202,3 +202,20 @@ def test_get_url_template():
|
|||||||
},
|
},
|
||||||
)
|
)
|
||||||
assert site.get_url_template() == "SUBDOMAIN"
|
assert site.get_url_template() == "SUBDOMAIN"
|
||||||
|
|
||||||
|
|
||||||
|
def test_has_site_url_or_name(default_db):
|
||||||
|
# by the same url or partial match
|
||||||
|
assert default_db.has_site("https://aback.com.ua/user/") == True
|
||||||
|
assert default_db.has_site("https://aback.com.ua") == True
|
||||||
|
|
||||||
|
# acceptable partial match
|
||||||
|
assert default_db.has_site("https://aback.com.ua/use") == True
|
||||||
|
assert default_db.has_site("https://aback.com") == True
|
||||||
|
|
||||||
|
# by name
|
||||||
|
assert default_db.has_site("Aback") == True
|
||||||
|
|
||||||
|
# false
|
||||||
|
assert default_db.has_site("https://aeifgoai3h4g8a3u4g5") == False
|
||||||
|
assert default_db.has_site("aeifgoai3h4g8a3u4g5") == False
|
||||||
|
|||||||
+11
-5
@@ -3,7 +3,7 @@ import random
|
|||||||
from argparse import ArgumentParser, RawDescriptionHelpFormatter
|
from argparse import ArgumentParser, RawDescriptionHelpFormatter
|
||||||
|
|
||||||
from maigret.maigret import MaigretDatabase
|
from maigret.maigret import MaigretDatabase
|
||||||
from maigret.submit import get_alexa_rank
|
from maigret.submit import Submitter
|
||||||
|
|
||||||
|
|
||||||
def update_tags(site):
|
def update_tags(site):
|
||||||
@@ -22,7 +22,7 @@ def update_tags(site):
|
|||||||
site.disabled = True
|
site.disabled = True
|
||||||
|
|
||||||
print(f'Old alexa rank: {site.alexa_rank}')
|
print(f'Old alexa rank: {site.alexa_rank}')
|
||||||
rank = get_alexa_rank(site.url_main)
|
rank = Submitter.get_alexa_rank(site.url_main)
|
||||||
if rank:
|
if rank:
|
||||||
print(f'New alexa rank: {rank}')
|
print(f'New alexa rank: {rank}')
|
||||||
site.alexa_rank = rank
|
site.alexa_rank = rank
|
||||||
@@ -36,6 +36,7 @@ if __name__ == '__main__':
|
|||||||
parser.add_argument("--base","-b", metavar="BASE_FILE",
|
parser.add_argument("--base","-b", metavar="BASE_FILE",
|
||||||
dest="base_file", default="maigret/resources/data.json",
|
dest="base_file", default="maigret/resources/data.json",
|
||||||
help="JSON file with sites data to update.")
|
help="JSON file with sites data to update.")
|
||||||
|
parser.add_argument("--name", help="Name of site to check")
|
||||||
|
|
||||||
pool = list()
|
pool = list()
|
||||||
|
|
||||||
@@ -45,12 +46,17 @@ if __name__ == '__main__':
|
|||||||
db.load_from_file(args.base_file).sites
|
db.load_from_file(args.base_file).sites
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
site = random.choice(db.sites)
|
if args.name:
|
||||||
|
sites = list(db.ranked_sites_dict(names=[args.name]).values())
|
||||||
|
site = random.choice(sites)
|
||||||
|
else:
|
||||||
|
site = random.choice(db.sites)
|
||||||
|
|
||||||
if site.engine == 'uCoz':
|
if site.engine == 'uCoz':
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if not 'in' in site.tags:
|
# if not 'in' in site.tags:
|
||||||
continue
|
# continue
|
||||||
|
|
||||||
update_tags(site)
|
update_tags(site)
|
||||||
|
|
||||||
|
|||||||
+6
-14
@@ -3,23 +3,13 @@
|
|||||||
This module generates the listing of supported sites in file `SITES.md`
|
This module generates the listing of supported sites in file `SITES.md`
|
||||||
and pretty prints file with sites data.
|
and pretty prints file with sites data.
|
||||||
"""
|
"""
|
||||||
import aiohttp
|
|
||||||
import asyncio
|
import asyncio
|
||||||
import json
|
import json
|
||||||
import sys
|
|
||||||
import requests
|
|
||||||
import logging
|
import logging
|
||||||
import threading
|
|
||||||
import xml.etree.ElementTree as ET
|
|
||||||
from datetime import datetime
|
|
||||||
from argparse import ArgumentParser, RawDescriptionHelpFormatter
|
from argparse import ArgumentParser, RawDescriptionHelpFormatter
|
||||||
|
|
||||||
import tqdm.asyncio
|
from maigret.maigret import get_response
|
||||||
|
from maigret.sites import MaigretDatabase, MaigretEngine
|
||||||
from maigret.maigret import get_response, site_self_check
|
|
||||||
from maigret.sites import MaigretSite, MaigretDatabase, MaigretEngine
|
|
||||||
from maigret.utils import CaseConverter
|
|
||||||
|
|
||||||
|
|
||||||
async def check_engine_of_site(site_name, sites_with_engines, future, engine_name, semaphore, logger):
|
async def check_engine_of_site(site_name, sites_with_engines, future, engine_name, semaphore, logger):
|
||||||
async with semaphore:
|
async with semaphore:
|
||||||
@@ -98,8 +88,10 @@ if __name__ == '__main__':
|
|||||||
tasks.append(future)
|
tasks.append(future)
|
||||||
|
|
||||||
# progress bar
|
# progress bar
|
||||||
for f in tqdm.asyncio.tqdm.as_completed(tasks):
|
with alive_progress(len(tasks), title='Checking sites') as progress:
|
||||||
loop.run_until_complete(f)
|
for f in asyncio.as_completed(tasks):
|
||||||
|
loop.run_until_complete(f)
|
||||||
|
progress()
|
||||||
|
|
||||||
print(f'Total detected {len(new_engine_sites)} sites on engine {engine_name}')
|
print(f'Total detected {len(new_engine_sites)} sites on engine {engine_name}')
|
||||||
# dict with new found engine sites
|
# dict with new found engine sites
|
||||||
|
|||||||
@@ -3,7 +3,7 @@ import json
|
|||||||
import random
|
import random
|
||||||
import re
|
import re
|
||||||
|
|
||||||
import tqdm.asyncio
|
import alive_progress
|
||||||
from mock import Mock
|
from mock import Mock
|
||||||
import requests
|
import requests
|
||||||
|
|
||||||
@@ -181,7 +181,7 @@ if __name__ == '__main__':
|
|||||||
raw_maigret_data = json.dumps({site.name: site.json for site in sites_subset})
|
raw_maigret_data = json.dumps({site.name: site.json for site in sites_subset})
|
||||||
|
|
||||||
new_sites = []
|
new_sites = []
|
||||||
for site in tqdm.asyncio.tqdm(urls):
|
for site in alive_progress.alive_it(urls):
|
||||||
site_lowercase = site.lower()
|
site_lowercase = site.lower()
|
||||||
|
|
||||||
domain_raw = URL_RE.sub('', site_lowercase).strip().strip('/')
|
domain_raw = URL_RE.sub('', site_lowercase).strip().strip('/')
|
||||||
@@ -271,7 +271,9 @@ if __name__ == '__main__':
|
|||||||
future = asyncio.ensure_future(check_coro)
|
future = asyncio.ensure_future(check_coro)
|
||||||
tasks.append(future)
|
tasks.append(future)
|
||||||
|
|
||||||
for f in tqdm.asyncio.tqdm.as_completed(tasks, timeout=TIMEOUT):
|
with alive_progress(len(tasks), title='Checking sites') as progress:
|
||||||
|
for f in asyncio.as_completed(tasks):
|
||||||
|
progress()
|
||||||
try:
|
try:
|
||||||
loop.run_until_complete(f)
|
loop.run_until_complete(f)
|
||||||
except asyncio.exceptions.TimeoutError:
|
except asyncio.exceptions.TimeoutError:
|
||||||
|
|||||||
@@ -3,13 +3,12 @@
|
|||||||
This module generates the listing of supported sites in file `SITES.md`
|
This module generates the listing of supported sites in file `SITES.md`
|
||||||
and pretty prints file with sites data.
|
and pretty prints file with sites data.
|
||||||
"""
|
"""
|
||||||
import json
|
|
||||||
import sys
|
import sys
|
||||||
import requests
|
import requests
|
||||||
import logging
|
import logging
|
||||||
import threading
|
import threading
|
||||||
import xml.etree.ElementTree as ET
|
import xml.etree.ElementTree as ET
|
||||||
from datetime import datetime
|
from datetime import datetime, timezone
|
||||||
from argparse import ArgumentParser, RawDescriptionHelpFormatter
|
from argparse import ArgumentParser, RawDescriptionHelpFormatter
|
||||||
|
|
||||||
from maigret.maigret import MaigretDatabase
|
from maigret.maigret import MaigretDatabase
|
||||||
@@ -27,9 +26,10 @@ RANKS.update({
|
|||||||
|
|
||||||
SEMAPHORE = threading.Semaphore(20)
|
SEMAPHORE = threading.Semaphore(20)
|
||||||
|
|
||||||
|
|
||||||
def get_rank(domain_to_query, site, print_errors=True):
|
def get_rank(domain_to_query, site, print_errors=True):
|
||||||
with SEMAPHORE:
|
with SEMAPHORE:
|
||||||
#Retrieve ranking data via alexa API
|
# Retrieve ranking data via alexa API
|
||||||
url = f"http://data.alexa.com/data?cli=10&url={domain_to_query}"
|
url = f"http://data.alexa.com/data?cli=10&url={domain_to_query}"
|
||||||
xml_data = requests.get(url).text
|
xml_data = requests.get(url).text
|
||||||
root = ET.fromstring(xml_data)
|
root = ET.fromstring(xml_data)
|
||||||
@@ -137,7 +137,7 @@ Rank data fetched from Alexa by domains.
|
|||||||
site_file.write(f'1. {favicon} [{site}]({url_main})*: top {valid_rank}{tags}*{note}\n')
|
site_file.write(f'1. {favicon} [{site}]({url_main})*: top {valid_rank}{tags}*{note}\n')
|
||||||
db.update_site(site)
|
db.update_site(site)
|
||||||
|
|
||||||
site_file.write(f'\nThe list was updated at ({datetime.utcnow()} UTC)\n')
|
site_file.write(f'\nThe list was updated at ({datetime.now(timezone.utc).date()})\n')
|
||||||
db.save_to_file(args.base_file)
|
db.save_to_file(args.base_file)
|
||||||
|
|
||||||
statistics_text = db.get_db_stats(is_markdown=True)
|
statistics_text = db.get_db_stats(is_markdown=True)
|
||||||
|
|||||||
@@ -1,56 +1,38 @@
|
|||||||
#!/usr/bin/env python3
|
|
||||||
import asyncio
|
import asyncio
|
||||||
import logging
|
import logging
|
||||||
import maigret
|
import maigret
|
||||||
|
|
||||||
|
|
||||||
# top popular sites from the Maigret database
|
|
||||||
TOP_SITES_COUNT = 300
|
TOP_SITES_COUNT = 300
|
||||||
# Maigret HTTP requests timeout
|
|
||||||
TIMEOUT = 10
|
TIMEOUT = 10
|
||||||
# max parallel requests
|
|
||||||
MAX_CONNECTIONS = 50
|
MAX_CONNECTIONS = 50
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
def main():
|
||||||
# setup logging and asyncio
|
|
||||||
logger = logging.getLogger('maigret')
|
logger = logging.getLogger('maigret')
|
||||||
logger.setLevel(logging.WARNING)
|
logger.setLevel(logging.WARNING)
|
||||||
loop = asyncio.get_event_loop()
|
loop = asyncio.get_event_loop()
|
||||||
|
|
||||||
# setup Maigret
|
|
||||||
db = maigret.MaigretDatabase().load_from_file('./maigret/resources/data.json')
|
db = maigret.MaigretDatabase().load_from_file('./maigret/resources/data.json')
|
||||||
# also can be downloaded from web
|
|
||||||
# db = MaigretDatabase().load_from_url(MAIGRET_DB_URL)
|
|
||||||
|
|
||||||
# user input
|
|
||||||
username = input('Enter username to search: ')
|
username = input('Enter username to search: ')
|
||||||
|
sites_count = int(input(
|
||||||
sites_count_raw = input(
|
|
||||||
f'Select the number of sites to search ({TOP_SITES_COUNT} for default, {len(db.sites_dict)} max): '
|
f'Select the number of sites to search ({TOP_SITES_COUNT} for default, {len(db.sites_dict)} max): '
|
||||||
)
|
)) or TOP_SITES_COUNT
|
||||||
sites_count = int(sites_count_raw) or TOP_SITES_COUNT
|
|
||||||
|
|
||||||
sites = db.ranked_sites_dict(top=sites_count)
|
sites = db.ranked_sites_dict(top=sites_count)
|
||||||
|
|
||||||
show_progressbar_raw = input('Do you want to show a progressbar? [Yn] ')
|
show_progressbar = input('Do you want to show a progressbar? [Yn] ').lower() != 'n'
|
||||||
show_progressbar = show_progressbar_raw.lower() != 'n'
|
extract_info = input(
|
||||||
|
|
||||||
extract_info_raw = input(
|
|
||||||
'Do you want to extract additional info from accounts\' pages? [Yn] '
|
'Do you want to extract additional info from accounts\' pages? [Yn] '
|
||||||
)
|
).lower() != 'n'
|
||||||
extract_info = extract_info_raw.lower() != 'n'
|
use_notifier = input(
|
||||||
|
|
||||||
use_notifier_raw = input(
|
|
||||||
'Do you want to use notifier for displaying results while searching? [Yn] '
|
'Do you want to use notifier for displaying results while searching? [Yn] '
|
||||||
)
|
).lower() != 'n'
|
||||||
use_notifier = use_notifier_raw.lower() != 'n'
|
|
||||||
|
|
||||||
notifier = None
|
notifier = None
|
||||||
if use_notifier:
|
if use_notifier:
|
||||||
notifier = maigret.Notifier(print_found_only=True, skip_check_errors=True)
|
notifier = maigret.Notifier(print_found_only=True, skip_check_errors=True)
|
||||||
|
|
||||||
# search!
|
|
||||||
search_func = maigret.search(
|
search_func = maigret.search(
|
||||||
username=username,
|
username=username,
|
||||||
site_dict=sites,
|
site_dict=sites,
|
||||||
@@ -58,7 +40,7 @@ if __name__ == '__main__':
|
|||||||
logger=logger,
|
logger=logger,
|
||||||
max_connections=MAX_CONNECTIONS,
|
max_connections=MAX_CONNECTIONS,
|
||||||
query_notify=notifier,
|
query_notify=notifier,
|
||||||
no_progressbar=(not show_progressbar),
|
no_progressbar=not show_progressbar,
|
||||||
is_parsing_enabled=extract_info,
|
is_parsing_enabled=extract_info,
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -69,3 +51,7 @@ if __name__ == '__main__':
|
|||||||
for sitename, data in results.items():
|
for sitename, data in results.items():
|
||||||
is_found = data['status'].is_found()
|
is_found = data['status'].is_found()
|
||||||
print(f'{sitename} - {"Found!" if is_found else "Not found"}')
|
print(f'{sitename} - {"Found!" if is_found else "Not found"}')
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
||||||
|
|||||||
Reference in New Issue
Block a user