mirror of
https://github.com/soxoj/maigret.git
synced 2026-05-09 16:14:32 +00:00
Compare commits
2 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 4da640609c | |||
| d20aad7dc2 |
@@ -2,7 +2,7 @@ name: Build docker image and push to DockerHub
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [ main, dev ]
|
||||
branches: [ main ]
|
||||
|
||||
jobs:
|
||||
docker:
|
||||
@@ -10,62 +10,24 @@ jobs:
|
||||
steps:
|
||||
-
|
||||
name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@v3
|
||||
uses: docker/setup-qemu-action@v1
|
||||
-
|
||||
name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
uses: docker/setup-buildx-action@v1
|
||||
-
|
||||
name: Login to DockerHub
|
||||
uses: docker/login-action@v3
|
||||
uses: docker/login-action@v1
|
||||
with:
|
||||
username: ${{ secrets.DOCKER_HUB_USERNAME }}
|
||||
password: ${{ secrets.DOCKER_HUB_ACCESS_TOKEN }}
|
||||
-
|
||||
name: Extract metadata (CLI)
|
||||
id: meta_cli
|
||||
uses: docker/metadata-action@v5
|
||||
with:
|
||||
images: ${{ secrets.DOCKER_HUB_USERNAME }}/maigret
|
||||
tags: |
|
||||
type=raw,value=latest,enable={{is_default_branch}}
|
||||
type=ref,event=branch
|
||||
type=sha,prefix=
|
||||
-
|
||||
name: Extract metadata (Web UI)
|
||||
id: meta_web
|
||||
uses: docker/metadata-action@v5
|
||||
with:
|
||||
images: ${{ secrets.DOCKER_HUB_USERNAME }}/maigret
|
||||
tags: |
|
||||
type=raw,value=web,enable={{is_default_branch}}
|
||||
type=ref,event=branch,suffix=-web
|
||||
type=sha,prefix=web-
|
||||
-
|
||||
name: Build and push (CLI, default)
|
||||
id: docker_build_cli
|
||||
uses: docker/build-push-action@v6
|
||||
name: Build and push
|
||||
id: docker_build
|
||||
uses: docker/build-push-action@v2
|
||||
with:
|
||||
push: true
|
||||
target: cli
|
||||
tags: ${{ steps.meta_cli.outputs.tags }}
|
||||
labels: ${{ steps.meta_cli.outputs.labels }}
|
||||
tags: ${{ secrets.DOCKER_HUB_USERNAME }}/maigret:latest
|
||||
platforms: linux/amd64,linux/arm64
|
||||
cache-from: type=gha
|
||||
cache-to: type=gha,mode=max
|
||||
-
|
||||
name: Build and push (Web UI)
|
||||
id: docker_build_web
|
||||
uses: docker/build-push-action@v6
|
||||
with:
|
||||
push: true
|
||||
target: web
|
||||
tags: ${{ steps.meta_web.outputs.tags }}
|
||||
labels: ${{ steps.meta_web.outputs.labels }}
|
||||
platforms: linux/amd64,linux/arm64
|
||||
cache-from: type=gha
|
||||
cache-to: type=gha,mode=max
|
||||
-
|
||||
name: Image digests
|
||||
run: |
|
||||
echo "cli: ${{ steps.docker_build_cli.outputs.digest }}"
|
||||
echo "web: ${{ steps.docker_build_web.outputs.digest }}"
|
||||
name: Image digest
|
||||
run: echo ${{ steps.docker_build.outputs.digest }}
|
||||
|
||||
@@ -13,7 +13,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
matrix:
|
||||
python-version: ["3.10", "3.11", "3.12", "3.13", "3.14"]
|
||||
python-version: ["3.10", "3.11", "3.12", "3.13"]
|
||||
|
||||
steps:
|
||||
- name: Checkout
|
||||
|
||||
@@ -1,30 +1,21 @@
|
||||
name: Upload Python Package to PyPI when a Release is Published
|
||||
|
||||
name: Upload Python Package to PyPI when a Release is Created
|
||||
on:
|
||||
release:
|
||||
types: [published]
|
||||
|
||||
types: [created]
|
||||
push:
|
||||
tags:
|
||||
- "v*"
|
||||
permissions:
|
||||
id-token: write
|
||||
contents: read
|
||||
jobs:
|
||||
pypi-publish:
|
||||
name: Publish release to PyPI
|
||||
build-and-publish:
|
||||
runs-on: ubuntu-latest
|
||||
environment:
|
||||
name: pypi
|
||||
url: https://pypi.org/p/maigret
|
||||
permissions:
|
||||
id-token: write
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: "3.x"
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install build
|
||||
- name: Build package
|
||||
run: |
|
||||
python -m build
|
||||
- name: Publish package distributions to PyPI
|
||||
- uses: astral-sh/setup-uv@v3
|
||||
- run: uv build
|
||||
- name: Publish to PyPI (Trusted Publishing)
|
||||
uses: pypa/gh-action-pypi-publish@release/v1
|
||||
with:
|
||||
packages-dir: dist
|
||||
@@ -44,4 +44,3 @@ settings.json
|
||||
*.egg-info
|
||||
build
|
||||
LLM
|
||||
lib
|
||||
|
||||
-191
@@ -1,196 +1,5 @@
|
||||
# Changelog
|
||||
|
||||
## [0.6.0] - 2025-04-10
|
||||
|
||||
## What's Changed
|
||||
* Updated workflows: added 3.13 to test, updated pypi upload by @soxoj in https://github.com/soxoj/maigret/pull/2111
|
||||
* Bump pypdf from 5.1.0 to 6.0.0 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2122
|
||||
* Bump coverage from 7.9.2 to 7.10.3 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2117
|
||||
* Bump soupsieve from 2.6 to 2.7 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2118
|
||||
* Bump mock from 5.1.0 to 5.2.0 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2116
|
||||
* Bump pytest-asyncio from 1.0.0 to 1.1.0 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2114
|
||||
* Bump pytest-cov from 6.0.0 to 6.2.1 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2115
|
||||
* Bump xhtml2pdf from 0.2.16 to 0.2.17 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2149
|
||||
* Bump requests from 2.32.4 to 2.32.5 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2165
|
||||
* Bump lxml from 5.3.0 to 6.0.0 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2146
|
||||
* Bump aiodns from 3.2.0 to 3.5.0 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2148
|
||||
* Bump alive-progress from 3.2.0 to 3.3.0 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2145
|
||||
* Bump certifi from 2025.6.15 to 2025.8.3 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2147
|
||||
* Disabled some sites giving false positive results by @soxoj in https://github.com/soxoj/maigret/pull/2170
|
||||
* Bump flask from 3.1.1 to 3.1.2 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2175
|
||||
* Bump pyinstaller from 6.11.1 to 6.15.0 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2174
|
||||
* Bump mypy from 1.14.1 to 1.17.1 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2173
|
||||
* Bump pytest from 8.3.4 to 8.4.1 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2172
|
||||
* Bump flake8 from 7.1.1 to 7.3.0 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2171
|
||||
* Bump aiohttp from 3.12.14 to 3.12.15 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2181
|
||||
* Bump coverage from 7.10.3 to 7.10.5 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2180
|
||||
* Bump psutil from 6.1.1 to 7.0.0 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2179
|
||||
* Bump lxml from 6.0.0 to 6.0.1 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2178
|
||||
* Bump multidict from 6.6.3 to 6.6.4 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2177
|
||||
* Bump soupsieve from 2.7 to 2.8 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2185
|
||||
* Bump typing-extensions from 4.14.1 to 4.15.0 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2182
|
||||
* Bump python-bidi from 0.6.3 to 0.6.6 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2183
|
||||
* Bump platformdirs from 4.3.8 to 4.4.0 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2184
|
||||
* Make web interface accessible for Docker deployment by default by @soxoj in https://github.com/soxoj/maigret/pull/2189
|
||||
* Bump coverage from 7.10.5 to 7.10.6 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2192
|
||||
* Bump pytest-rerunfailures from 15.1 to 16.0 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2191
|
||||
* Bump pytest-rerunfailures from 15.1 to 16.0.1 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2193
|
||||
* Bump pytest from 8.4.1 to 8.4.2 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2194
|
||||
* Bump pytest-cov from 6.2.1 to 6.3.0 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2195
|
||||
* Bump pytest-cov from 6.3.0 to 7.0.0 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2196
|
||||
* Bump mypy from 1.17.1 to 1.18.1 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2197
|
||||
* Bump black from 25.1.0 to 25.9.0 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2203
|
||||
* Bump mypy from 1.18.1 to 1.18.2 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2202
|
||||
* Bump pytest-asyncio from 1.1.0 to 1.2.0 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2200
|
||||
* Bump pyinstaller from 6.15.0 to 6.16.0 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2199
|
||||
* Bump reportlab from 4.4.3 to 4.4.4 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2206
|
||||
* Bump coverage from 7.10.6 to 7.10.7 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2207
|
||||
* Bump psutil from 7.0.0 to 7.1.0 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2201
|
||||
* Bump asgiref from 3.9.1 to 3.9.2 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2204
|
||||
* Bump lxml from 6.0.1 to 6.0.2 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2208
|
||||
* Bump platformdirs from 4.4.0 to 4.5.0 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2223
|
||||
* Bump asgiref from 3.9.2 to 3.10.0 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2220
|
||||
* Bump yarl from 1.20.1 to 1.22.0 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2221
|
||||
* Bump markupsafe from 3.0.2 to 3.0.3 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2209
|
||||
* Bump multidict from 6.6.4 to 6.7.0 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2224
|
||||
* Bump idna from 3.10 to 3.11 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2227
|
||||
* Bump aiohttp from 3.12.15 to 3.13.0 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2225
|
||||
* Bump coverage from 7.10.7 to 7.11.0 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2230
|
||||
* Bump certifi from 2025.8.3 to 2025.10.5 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2228
|
||||
* Bump pytest-rerunfailures from 16.0.1 to 16.1 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2229
|
||||
* Bump attrs from 25.3.0 to 25.4.0 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2226
|
||||
* Bump aiohttp from 3.13.0 to 3.13.2 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2237
|
||||
* Bump pypdf from 6.0.0 to 6.1.3 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2233
|
||||
* Bump black from 25.9.0 to 25.11.0 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2239
|
||||
* Bump python-bidi from 0.6.6 to 0.6.7 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2234
|
||||
* Bump psutil from 7.1.0 to 7.1.3 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2240
|
||||
* Bump coverage from 7.11.0 to 7.12.0 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2241
|
||||
* Bump werkzeug from 3.1.3 to 3.1.4 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2248
|
||||
* Bump pypdf from 6.1.3 to 6.4.0 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2245
|
||||
* Bump asgiref from 3.10.0 to 3.11.0 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2243
|
||||
* Bump pytest-asyncio from 1.2.0 to 1.3.0 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2242
|
||||
* Bump aiohttp from 3.13.2 to 3.13.3 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2261
|
||||
* Bump pytest from 8.4.2 to 9.0.1 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2244
|
||||
* Bump mypy from 1.18.2 to 1.19.0 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2250
|
||||
* ♻️ Refactor: Hardcoded relative path for database file by @tang-vu in https://github.com/soxoj/maigret/pull/2285
|
||||
* ✨ Quality: Missing tests for settings cascade and override logic by @tang-vu in https://github.com/soxoj/maigret/pull/2287
|
||||
* ✨ Quality: Unexpanded tilde in file path by @tang-vu in https://github.com/soxoj/maigret/pull/2283
|
||||
* Bump urllib3 from 2.5.0 to 2.6.3 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2262
|
||||
* Bump pillow from 11.0.0 to 12.1.1 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2271
|
||||
* Bump black from 25.11.0 to 26.3.1 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2280
|
||||
* Bump cryptography from 44.0.1 to 46.0.5 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2270
|
||||
* Bump pypdf from 6.4.0 to 6.9.1 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2281
|
||||
* Dockerfile fix by @soxoj in https://github.com/soxoj/maigret/pull/2290
|
||||
* Fixed false positives in top-500 by @soxoj in https://github.com/soxoj/maigret/pull/2292
|
||||
* Update Telegram bot link in README by @soxoj in https://github.com/soxoj/maigret/pull/2293
|
||||
* Pyinstaller GitHub workflow fix by @soxoj in https://github.com/soxoj/maigret/pull/2298
|
||||
* Twitter fixed, mirrors mechanism improvement by @soxoj in https://github.com/soxoj/maigret/pull/2299
|
||||
* build(deps): bump flask from 3.1.2 to 3.1.3 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2289
|
||||
* Bump reportlab from 4.4.4 to 4.4.5 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2251
|
||||
* build(deps): bump werkzeug from 3.1.4 to 3.1.6 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2288
|
||||
* Bump certifi from 2025.10.5 to 2025.11.12 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2249
|
||||
* Update Telegram bot link in README by @soxoj in https://github.com/soxoj/maigret/pull/2300
|
||||
* Improve site-check quality by @soxoj in https://github.com/soxoj/maigret/pull/2301
|
||||
* feat(sites): fix false positives: disable 74 broken sites, fix 8 with… by @soxoj in https://github.com/soxoj/maigret/pull/2302
|
||||
* Update sites list workflow by @soxoj in https://github.com/soxoj/maigret/pull/2303
|
||||
* Bump svglib from 1.5.1 to 1.6.0 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2205
|
||||
* feat(workflow): fix update site data workflow dependency by @soxoj in https://github.com/soxoj/maigret/pull/2306
|
||||
* Re-enable taplink.cc with browser User-Agent to bypass Cloudflare by @Copilot in https://github.com/soxoj/maigret/pull/2308
|
||||
* feat(workflow): fix update site data workflow err by @soxoj in https://github.com/soxoj/maigret/pull/2312
|
||||
* Update site data workflow fix: remove ambiguous main tag by @soxoj in https://github.com/soxoj/maigret/pull/2313
|
||||
* Automated Sites List Update by @github-actions[bot] in https://github.com/soxoj/maigret/pull/2314
|
||||
* Fix Love.Mail.ru: update to numeric-only identifiers and new profile URL by @Copilot in https://github.com/soxoj/maigret/pull/2307
|
||||
* Remove dead site xxxforum.org by @Copilot in https://github.com/soxoj/maigret/pull/2310
|
||||
* Disable forums.developer.nvidia.com (auth-gated user profiles) by @Copilot in https://github.com/soxoj/maigret/pull/2305
|
||||
* Pin requests-toolbelt>=1.0.0 to fix urllib3 v2 incompatibility by @Copilot in https://github.com/soxoj/maigret/pull/2316
|
||||
* build(deps): bump reportlab from 4.4.5 to 4.4.10 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2323
|
||||
* build(deps-dev): bump coverage from 7.12.0 to 7.13.5 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2321
|
||||
* build(deps-dev): bump pytest-cov from 7.0.0 to 7.1.0 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2320
|
||||
* build(deps): bump aiohttp-socks from 0.10.1 to 0.11.0 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2319
|
||||
* Disable false-positive site probe: amateurvoyeurforum.com by @Copilot in https://github.com/soxoj/maigret/pull/2332
|
||||
* Disable forums.stevehoffman.tv due to false positives by @Copilot in https://github.com/soxoj/maigret/pull/2331
|
||||
* [WIP] Fix false-positive probe for vegalab site by @Copilot in https://github.com/soxoj/maigret/pull/2336
|
||||
* Fix RoyalCams site check using BongaCams white-label pattern by @Copilot in https://github.com/soxoj/maigret/pull/2334
|
||||
* Fix Setlist site check: switch to message checkType with proper markers by @Copilot in https://github.com/soxoj/maigret/pull/2333
|
||||
* [WIP] Fix invalid link on forums.imore.com by @Copilot in https://github.com/soxoj/maigret/pull/2337
|
||||
* Automated Sites List Update by @github-actions[bot] in https://github.com/soxoj/maigret/pull/2315
|
||||
* Automated Sites List Update by @github-actions[bot] in https://github.com/soxoj/maigret/pull/2339
|
||||
* Fix false-positive site probe: Re-enable Taplink with message checkType by @Copilot in https://github.com/soxoj/maigret/pull/2326
|
||||
* build(deps): bump aiodns from 3.5.0 to 4.0.0 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2345
|
||||
* build(deps-dev): bump mypy from 1.19.0 to 1.19.1 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2347
|
||||
* Disable Librusec site check (false positive) by @Copilot in https://github.com/soxoj/maigret/pull/2349
|
||||
* Disable MirTesen site check (false positive) by @Copilot in https://github.com/soxoj/maigret/pull/2350
|
||||
* build(deps): bump attrs from 25.4.0 to 26.1.0 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2344
|
||||
* Automated Sites List Update by @github-actions[bot] in https://github.com/soxoj/maigret/pull/2341
|
||||
* feat: add cybersecurity platforms + re-enable Root-Me by @juliosuas in https://github.com/soxoj/maigret/pull/2318
|
||||
* Fix club.cnews.ru false positive: switch from status_code to message checkType by @Copilot in https://github.com/soxoj/maigret/pull/2342
|
||||
* Fix SoundCloud false-positive: switch to message-based check by @Copilot in https://github.com/soxoj/maigret/pull/2355
|
||||
* build(deps): bump certifi from 2025.11.12 to 2026.2.25 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2346
|
||||
* feat: add tag blacklisting via `--exclude-tags` by @Copilot in https://github.com/soxoj/maigret/pull/2352
|
||||
* Fix domain substring matching and NoneType crash in submit dialog by @Copilot in https://github.com/soxoj/maigret/pull/2367
|
||||
* feat(core): add POST request support, new sites, migrate to Majestic Million ranking by @soxoj in https://github.com/soxoj/maigret/pull/2317
|
||||
* Fix update-site-data workflow race condition on branch push by @Copilot in https://github.com/soxoj/maigret/pull/2366
|
||||
* Fix false-positive site checks reported by Maigret Bot by @soxoj in https://github.com/soxoj/maigret/pull/2376
|
||||
* build(deps): bump pycountry from 24.6.1 to 26.2.16 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2382
|
||||
* Added Max.ru check; --no-progressbar flag fixed by @soxoj in https://github.com/soxoj/maigret/pull/2386
|
||||
* build(deps): bump asgiref from 3.11.0 to 3.11.1 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2384
|
||||
* build(deps): bump yarl from 1.22.0 to 1.23.0 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2383
|
||||
* build(deps): bump pypdf from 6.9.1 to 6.9.2 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2392
|
||||
* build(deps-dev): bump pytest-httpserver from 1.1.0 to 1.1.5 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2397
|
||||
* Automated Sites List Update by @github-actions[bot] in https://github.com/soxoj/maigret/pull/2399
|
||||
* build(deps): bump requests from 2.32.5 to 2.33.0 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2394
|
||||
* Readme update: commercial use by @soxoj in https://github.com/soxoj/maigret/pull/2403
|
||||
* build(deps): bump pyinstaller from 6.16.0 to 6.19.0 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2405
|
||||
* build(deps): bump psutil from 7.1.3 to 7.2.2 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2406
|
||||
* build(deps-dev): bump pytest from 9.0.1 to 9.0.2 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2381
|
||||
* build(deps): bump soupsieve from 2.8 to 2.8.3 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2404
|
||||
* Sites re-check by @soxoj in https://github.com/soxoj/maigret/pull/2423
|
||||
* Add urlProbes by @soxoj in https://github.com/soxoj/maigret/pull/2425
|
||||
* build(deps): bump cryptography from 46.0.5 to 46.0.6 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2422
|
||||
* Tags and site names improvements by @soxoj in https://github.com/soxoj/maigret/pull/2427
|
||||
* Overhaul site tags and naming: add social tag to 33 networks, fill mi… by @soxoj in https://github.com/soxoj/maigret/pull/2430
|
||||
* build(deps): bump multidict from 6.7.0 to 6.7.1 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2396
|
||||
* build(deps): bump chardet from 5.2.0 to 7.4.0.post2 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2436
|
||||
* build(deps): bump platformdirs from 4.5.0 to 4.9.4 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2434
|
||||
* build(deps): bump aiohttp from 3.13.3 to 3.13.4 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2435
|
||||
* build(deps): bump pygments from 2.18.0 to 2.20.0 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2440
|
||||
* build(deps): bump requests from 2.33.0 to 2.33.1 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2444
|
||||
* build(deps-dev): bump mypy from 1.19.1 to 1.20.0 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2447
|
||||
* build(deps): bump aiohttp from 3.13.4 to 3.13.5 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2448
|
||||
* Add site protection tracking system, fix broken site checks (Instagra… by @soxoj in https://github.com/soxoj/maigret/pull/2452
|
||||
* Multiple lint and types fixes by @soxoj in https://github.com/soxoj/maigret/pull/2454
|
||||
* fix(data): update InterPals absence string to match current site response by @juliosuas in https://github.com/soxoj/maigret/pull/2442
|
||||
* Update of MIT License by @soxoj in https://github.com/soxoj/maigret/pull/2455
|
||||
* Added Crypto/Web3 site checks by @soxoj in https://github.com/soxoj/maigret/pull/2457
|
||||
* DB update mechanism by @soxoj in https://github.com/soxoj/maigret/pull/2458
|
||||
* Fix false positives by @soxoj in https://github.com/soxoj/maigret/pull/2459
|
||||
* False positive fixes by @soxoj in https://github.com/soxoj/maigret/pull/2460
|
||||
* build(deps): bump curl-cffi from 0.14.0 to 0.15.0 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2462
|
||||
* Add Markdown reports for LLM analysis by @soxoj in https://github.com/soxoj/maigret/pull/2463
|
||||
* Sites fixes by @soxoj in https://github.com/soxoj/maigret/pull/2464
|
||||
* Add installation troubleshooting for missing system dependencies by @Copilot in https://github.com/soxoj/maigret/pull/2465
|
||||
* Fix Spotify, add Spotify Community forum by @soxoj in https://github.com/soxoj/maigret/pull/2467
|
||||
* Fix crash on `-a --self-check` by adding exception handling to site check coroutines by @Copilot in https://github.com/soxoj/maigret/pull/2466
|
||||
* Fix failing test for custom DB path resolution by @soxoj in https://github.com/soxoj/maigret/pull/2468
|
||||
* Bump lxml minimum to 6.0.2 for Python 3.14 compatibility by @ocervell in https://github.com/soxoj/maigret/pull/2279
|
||||
* build(deps-dev): bump pytest from 9.0.2 to 9.0.3 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2473
|
||||
* Update HackTheBox and Wikipedia to use new API endpoints by @Copilot in https://github.com/soxoj/maigret/pull/2470
|
||||
* Automated Sites List Update by @github-actions[bot] in https://github.com/soxoj/maigret/pull/2474
|
||||
* build(deps): bump chardet from 7.4.0.post2 to 7.4.1 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2472
|
||||
* build(deps): bump cryptography from 46.0.6 to 46.0.7 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2475
|
||||
* vBulletin cleanup, Flarum sites, engine stats, UA bump by @soxoj in https://github.com/soxoj/maigret/pull/2476
|
||||
* build(deps): bump platformdirs from 4.9.4 to 4.9.6 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2477
|
||||
* Re-enable 69 stale-disabled sites validated via self-check by @soxoj in https://github.com/soxoj/maigret/pull/2478
|
||||
* Fix false positives by @soxoj in https://github.com/soxoj/maigret/pull/2499
|
||||
* build(deps): bump socid-extractor from 0.0.27 to 0.0.28 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2502
|
||||
* build(deps): bump lxml from 6.0.2 to 6.0.3 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2501
|
||||
* Disable Kinja.com site check by @Copilot in https://github.com/soxoj/maigret/pull/2503
|
||||
* Added 3 sites, fixed 6, disabled 8 by @soxoj in https://github.com/soxoj/maigret/pull/2505
|
||||
* Bump to 0.6.0 by @soxoj in https://github.com/soxoj/maigret/pull/2506
|
||||
* Update workflow to trigger on published releases by @soxoj in https://github.com/soxoj/maigret/pull/2508
|
||||
|
||||
**Full Changelog**: https://github.com/soxoj/maigret/compare/v0.5.0...v0.6.0
|
||||
|
||||
## [0.5.0] - 2025-08-10
|
||||
* Site Supression by @C3n7ral051nt4g3ncy in https://github.com/soxoj/maigret/pull/627
|
||||
* Bump yarl from 1.7.2 to 1.8.1 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/626
|
||||
|
||||
+1
-10
@@ -1,4 +1,4 @@
|
||||
FROM python:3.11-slim AS base
|
||||
FROM python:3.11-slim
|
||||
LABEL maintainer="Soxoj <soxoj@protonmail.com>"
|
||||
WORKDIR /app
|
||||
RUN pip install --no-cache-dir --upgrade pip
|
||||
@@ -15,13 +15,4 @@ COPY . .
|
||||
RUN YARL_NO_EXTENSIONS=1 python3 -m pip install --no-cache-dir .
|
||||
# For production use, set FLASK_HOST to a specific IP address for security
|
||||
ENV FLASK_HOST=0.0.0.0
|
||||
|
||||
# Web UI variant: auto-launches the web interface on $PORT
|
||||
FROM base AS web
|
||||
ENV PORT=5000
|
||||
EXPOSE 5000
|
||||
ENTRYPOINT ["sh", "-c", "exec maigret --web \"$PORT\""]
|
||||
|
||||
# Default variant (last stage = `docker build .` target): CLI, backwards-compatible
|
||||
FROM base AS cli
|
||||
ENTRYPOINT ["maigret"]
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
# Maigret
|
||||
|
||||
<div align="center">
|
||||
<div>
|
||||
<p align="center">
|
||||
<p align="center">
|
||||
<a href="https://pypi.org/project/maigret/">
|
||||
<img alt="PyPI version badge for Maigret" src="https://img.shields.io/pypi/v/maigret?style=flat-square" />
|
||||
</a>
|
||||
@@ -17,68 +17,158 @@
|
||||
<a href="https://github.com/soxoj/maigret">
|
||||
<img alt="View count for Maigret project" src="https://komarev.com/ghpvc/?username=maigret&color=brightgreen&label=views&style=flat-square" />
|
||||
</a>
|
||||
</div>
|
||||
<br>
|
||||
<div>
|
||||
<img src="https://raw.githubusercontent.com/soxoj/maigret/main/static/maigret.png" height="300" alt="Maigret logo"/>
|
||||
</div>
|
||||
<br>
|
||||
</div>
|
||||
</p>
|
||||
<p align="center">
|
||||
<img src="https://raw.githubusercontent.com/soxoj/maigret/main/static/maigret.png" height="300"/>
|
||||
</p>
|
||||
</p>
|
||||
|
||||
**Maigret** collects a dossier on a person **by username only**, checking for accounts on a huge number of sites and gathering all the available information from web pages. No API keys required.
|
||||
<i>The Commissioner Jules Maigret is a fictional French police detective, created by Georges Simenon. His investigation method is based on understanding the personality of different people and their interactions.</i>
|
||||
|
||||
## Contents
|
||||
<b>👉👉👉 [Online Telegram bot](https://t.me/maigret_search_bot) | 🏢 [Commercial use & API](#commercial-use)</b>
|
||||
|
||||
- [In one minute](#in-one-minute)
|
||||
- [Main features](#main-features)
|
||||
- [Demo](#demo)
|
||||
- [Installation](#installation)
|
||||
- [Usage](#usage)
|
||||
- [Contributing](#contributing)
|
||||
- [Commercial Use](#commercial-use)
|
||||
- [About](#about)
|
||||
## About
|
||||
|
||||
<a id="one-minute"></a>
|
||||
## In one minute
|
||||
**Maigret** collects a dossier on a person **by username only**, checking for accounts on a huge number of sites and gathering all the available information from web pages. No API keys are required. Maigret is an easy-to-use and powerful fork of [Sherlock](https://github.com/sherlock-project/sherlock).
|
||||
|
||||
Ensure you have Python 3.10 or higher.
|
||||
Currently supports more than 3000 sites ([full list](https://github.com/soxoj/maigret/blob/main/sites.md)), search is launched against 500 popular sites in descending order of popularity by default. Also supported checking Tor sites, I2P sites, and domains (via DNS resolving).
|
||||
|
||||
```bash
|
||||
pip install maigret
|
||||
maigret YOUR_USERNAME
|
||||
```
|
||||
## Powered By Maigret
|
||||
|
||||
No install? Try the [Telegram bot](https://t.me/maigret_search_bot) or a [Cloud Shell](#cloud-shells).
|
||||
|
||||
Want a web UI? See [how to launch it](#web-interface).
|
||||
|
||||
See also: [Quick start](https://maigret.readthedocs.io/en/latest/quick-start.html).
|
||||
|
||||
## Main features
|
||||
|
||||
- Supports 3,000+ sites ([see full list](https://github.com/soxoj/maigret/blob/main/sites.md)). A default run checks the 500 highest-ranked sites by traffic; pass `-a` to scan everything, or `--tags` to narrow by category/country.
|
||||
- Embeddable in Python projects — import `maigret` and run searches programmatically (see [library usage](https://maigret.readthedocs.io/en/latest/library-usage.html)).
|
||||
- [Extracts](https://github.com/soxoj/socid_extractor) all available information about the account owner from profile pages and site APIs, including links to other accounts.
|
||||
- Performs recursive search using discovered usernames and other IDs.
|
||||
- Allows filtering by tags (site categories, countries).
|
||||
- Detects and partially bypasses blocks, censorship, and CAPTCHA.
|
||||
- Fetches an [auto-updated site database](https://maigret.readthedocs.io/en/latest/settings.html#database-auto-update) from GitHub each run (once per 24 hours), and falls back to the built-in database if offline.
|
||||
- Works with Tor and I2P websites; able to check domains.
|
||||
- Ships with a [web interface](#web-interface) for browsing results as a graph and downloading reports in every format from a single page.
|
||||
|
||||
For the complete feature list, see the [features documentation](https://maigret.readthedocs.io/en/latest/features.html).
|
||||
|
||||
### Used by
|
||||
|
||||
Professional OSINT and social-media analysis tools built on Maigret:
|
||||
These are professional tools for social media content analysis and OSINT investigations that use Maigret (banners are clickable).
|
||||
|
||||
<a href="https://github.com/SocialLinks-IO/sociallinks-api"><img height="60" alt="Social Links API" src="https://github.com/user-attachments/assets/789747b2-d7a0-4d4e-8868-ffc4427df660"></a>
|
||||
<a href="https://sociallinks.io/products/sl-crimewall"><img height="60" alt="Social Links Crimewall" src="https://github.com/user-attachments/assets/0b18f06c-2f38-477b-b946-1be1a632a9d1"></a>
|
||||
<a href="https://usersearch.ai/"><img height="60" alt="UserSearch" src="https://github.com/user-attachments/assets/66daa213-cf7d-40cf-9267-42f97cf77580"></a>
|
||||
|
||||
## Demo
|
||||
## Main features
|
||||
|
||||
### Video
|
||||
* Profile page parsing, [extraction](https://github.com/soxoj/socid_extractor) of personal info, links to other profiles, etc.
|
||||
* Recursive search by new usernames and other IDs found
|
||||
* Search by tags (site categories, countries)
|
||||
* Censorship and captcha detection
|
||||
* Requests retries
|
||||
|
||||
See the full description of Maigret features [in the documentation](https://maigret.readthedocs.io/en/latest/features.html).
|
||||
|
||||
## Installation
|
||||
|
||||
‼️ Maigret is available online via [official Telegram bot](https://t.me/maigret_search_bot). Consider using it if you don't want to install anything.
|
||||
|
||||
### Windows
|
||||
|
||||
Standalone EXE-binaries for Windows are located in [Releases section](https://github.com/soxoj/maigret/releases) of GitHub repository.
|
||||
|
||||
Video guide on how to run it: https://youtu.be/qIgwTZOmMmM.
|
||||
|
||||
### Installation in Cloud Shells
|
||||
|
||||
You can launch Maigret using cloud shells and Jupyter notebooks. Press one of the buttons below and follow the instructions to launch it in your browser.
|
||||
|
||||
[](https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/soxoj/maigret&tutorial=README.md)
|
||||
<a href="https://repl.it/github/soxoj/maigret"><img src="https://replit.com/badge/github/soxoj/maigret" alt="Run on Replit" height="50"></a>
|
||||
|
||||
<a href="https://colab.research.google.com/gist/soxoj/879b51bc3b2f8b695abb054090645000/maigret-collab.ipynb"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab" height="45"></a>
|
||||
<a href="https://mybinder.org/v2/gist/soxoj/9d65c2f4d3bec5dd25949197ea73cf3a/HEAD"><img src="https://mybinder.org/badge_logo.svg" alt="Open In Binder" height="45"></a>
|
||||
|
||||
### Local installation
|
||||
|
||||
Maigret can be installed using pip, Docker, or simply can be launched from the cloned repo.
|
||||
|
||||
|
||||
**NOTE**: Python 3.10 or higher and pip is required, **Python 3.11 is recommended.**
|
||||
|
||||
```bash
|
||||
# install from pypi
|
||||
pip3 install maigret
|
||||
|
||||
# usage
|
||||
maigret username
|
||||
```
|
||||
|
||||
### Cloning a repository
|
||||
|
||||
```bash
|
||||
# or clone and install manually
|
||||
git clone https://github.com/soxoj/maigret && cd maigret
|
||||
|
||||
# build and install
|
||||
pip3 install .
|
||||
|
||||
# usage
|
||||
maigret username
|
||||
```
|
||||
|
||||
### Docker
|
||||
|
||||
```bash
|
||||
# official image
|
||||
docker pull soxoj/maigret
|
||||
|
||||
# usage
|
||||
docker run -v /mydir:/app/reports soxoj/maigret:latest username --html
|
||||
|
||||
# manual build
|
||||
docker build -t maigret .
|
||||
```
|
||||
|
||||
### Troubleshooting
|
||||
|
||||
If you encounter build errors during installation, check the [troubleshooting guide](https://maigret.readthedocs.io/en/latest/installation.html#troubleshooting).
|
||||
|
||||
## Usage examples
|
||||
|
||||
```bash
|
||||
# make HTML, PDF, and Xmind8 reports
|
||||
maigret user --html
|
||||
maigret user --pdf
|
||||
maigret user --xmind #Output not compatible with xmind 2022+
|
||||
|
||||
# search on sites marked with tags photo & dating
|
||||
maigret user --tags photo,dating
|
||||
|
||||
# search on sites marked with tag us
|
||||
maigret user --tags us
|
||||
|
||||
# search for three usernames on all available sites
|
||||
maigret user1 user2 user3 -a
|
||||
```
|
||||
|
||||
Use `maigret --help` to get full options description. Also options [are documented](https://maigret.readthedocs.io/en/latest/command-line-options.html).
|
||||
|
||||
### Web interface
|
||||
|
||||
You can run Maigret with a web interface, where you can view the graph with results and download reports of all formats on a single page.
|
||||
|
||||
<details>
|
||||
<summary>Web Interface Screenshots</summary>
|
||||
|
||||

|
||||
|
||||

|
||||
|
||||
</details>
|
||||
|
||||
Instructions:
|
||||
|
||||
1. Run Maigret with the ``--web`` flag and specify the port number.
|
||||
|
||||
```console
|
||||
maigret --web 5000
|
||||
```
|
||||
2. Open http://127.0.0.1:5000 in your browser and enter one or more usernames to make a search.
|
||||
|
||||
3. Wait a bit for the search to complete and view the graph with results, the table with all accounts found, and download reports of all formats.
|
||||
|
||||
## Contributing
|
||||
|
||||
Maigret has open-source code, so you may contribute your own sites by adding them to `data.json` file, or bring changes to it's code!
|
||||
|
||||
For more information about development and contribution, please read the [development documentation](https://maigret.readthedocs.io/en/latest/development.html).
|
||||
|
||||
## Demo with page parsing and recursive username search
|
||||
|
||||
### Video (asciinema)
|
||||
|
||||
<a href="https://asciinema.org/a/Ao0y7N0TTxpS0pisoprQJdylZ">
|
||||
<img src="https://asciinema.org/a/Ao0y7N0TTxpS0pisoprQJdylZ.svg" alt="asciicast" width="600">
|
||||
@@ -94,186 +184,37 @@ Professional OSINT and social-media analysis tools built on Maigret:
|
||||
|
||||
[Full console output](https://raw.githubusercontent.com/soxoj/maigret/main/static/recursive_search.md)
|
||||
|
||||
## Installation
|
||||
## Disclaimer
|
||||
|
||||
Already ran the [In one minute](#one-minute) steps? You're set. Below are alternative methods.
|
||||
**This tool is intended for educational and lawful purposes only.** The developers do not endorse or encourage any illegal activities or misuse of this tool. Regulations regarding the collection and use of personal data vary by country and region, including but not limited to GDPR in the EU, CCPA in the USA, and similar laws worldwide.
|
||||
|
||||
Don't want to install anything? Use the [Telegram bot](https://t.me/maigret_search_bot).
|
||||
It is your sole responsibility to ensure that your use of this tool complies with all applicable laws and regulations in your jurisdiction. Any illegal use of this tool is strictly prohibited, and you are fully accountable for your actions.
|
||||
|
||||
### Windows
|
||||
The authors and developers of this tool bear no responsibility for any misuse or unlawful activities conducted by its users.
|
||||
|
||||
Download a standalone EXE from [Releases](https://github.com/soxoj/maigret/releases). Video guide: https://youtu.be/qIgwTZOmMmM.
|
||||
## Feedback
|
||||
|
||||
<a id="cloud-shells"></a>
|
||||
### Cloud Shells
|
||||
|
||||
Run Maigret in the browser via cloud shells or Jupyter notebooks:
|
||||
|
||||
<a href="https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/soxoj/maigret&tutorial=cloudshell-tutorial.md"><img src="https://user-images.githubusercontent.com/27065646/92304704-8d146d80-ef80-11ea-8c29-0deaabb1c702.png" alt="Open in Cloud Shell" height="50"></a>
|
||||
<a href="https://repl.it/github/soxoj/maigret"><img src="https://replit.com/badge/github/soxoj/maigret" alt="Run on Replit" height="50"></a>
|
||||
|
||||
<a href="https://colab.research.google.com/gist/soxoj/879b51bc3b2f8b695abb054090645000/maigret-collab.ipynb"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab" height="45"></a>
|
||||
<a href="https://mybinder.org/v2/gist/soxoj/9d65c2f4d3bec5dd25949197ea73cf3a/HEAD"><img src="https://mybinder.org/badge_logo.svg" alt="Open In Binder" height="45"></a>
|
||||
|
||||
### Local installation (pip)
|
||||
|
||||
```bash
|
||||
# install from pypi
|
||||
pip3 install maigret
|
||||
|
||||
# usage
|
||||
maigret username
|
||||
```
|
||||
|
||||
### From source
|
||||
|
||||
```bash
|
||||
# or clone and install manually
|
||||
git clone https://github.com/soxoj/maigret && cd maigret
|
||||
|
||||
# build and install
|
||||
pip3 install .
|
||||
|
||||
# usage
|
||||
maigret username
|
||||
```
|
||||
|
||||
### Docker
|
||||
|
||||
Two image variants are published:
|
||||
|
||||
- `soxoj/maigret:latest` — CLI mode (default)
|
||||
- `soxoj/maigret:web` — auto-launches the [web interface](#web-interface)
|
||||
|
||||
```bash
|
||||
# official image (CLI)
|
||||
docker pull soxoj/maigret
|
||||
|
||||
# CLI usage
|
||||
docker run -v /mydir:/app/reports soxoj/maigret:latest username --html
|
||||
|
||||
# Web UI (open http://localhost:5000)
|
||||
docker run -p 5000:5000 soxoj/maigret:web
|
||||
|
||||
# Web UI on a custom port
|
||||
docker run -e PORT=8080 -p 8080:8080 soxoj/maigret:web
|
||||
|
||||
# manual build
|
||||
docker build -t maigret . # CLI image (default target)
|
||||
docker build --target web -t maigret-web . # Web UI image
|
||||
```
|
||||
|
||||
### Troubleshooting
|
||||
|
||||
Build errors? See the [troubleshooting guide](https://maigret.readthedocs.io/en/latest/installation.html#troubleshooting).
|
||||
|
||||
## Usage
|
||||
|
||||
### Examples
|
||||
|
||||
```bash
|
||||
# make HTML, PDF, and Xmind8 reports
|
||||
maigret user --html
|
||||
maigret user --pdf
|
||||
maigret user --xmind #Output not compatible with xmind 2022+
|
||||
|
||||
# machine-readable exports
|
||||
maigret user --json ndjson # newline-delimited JSON (also: --json simple)
|
||||
maigret user --csv
|
||||
maigret user --txt
|
||||
maigret user --graph # interactive D3 graph (HTML)
|
||||
|
||||
# search on sites marked with tags photo & dating
|
||||
maigret user --tags photo,dating
|
||||
|
||||
# search on sites marked with tag us
|
||||
maigret user --tags us
|
||||
|
||||
# search for three usernames on all available sites
|
||||
maigret user1 user2 user3 -a
|
||||
```
|
||||
|
||||
Run `maigret --help` for all options. Docs: [CLI options](https://maigret.readthedocs.io/en/latest/command-line-options.html), [more examples](https://maigret.readthedocs.io/en/latest/usage-examples.html). Running into 403s or timeouts? See [TROUBLESHOOTING.md](TROUBLESHOOTING.md).
|
||||
|
||||
<a id="web-interface"></a>
|
||||
### Web interface
|
||||
|
||||
Maigret has a built-in web UI with a results graph and downloadable reports.
|
||||
|
||||
<details>
|
||||
<summary>Web Interface Screenshots</summary>
|
||||
|
||||

|
||||
|
||||

|
||||
|
||||
</details>
|
||||
|
||||
```console
|
||||
maigret --web 5000
|
||||
```
|
||||
|
||||
Open http://127.0.0.1:5000, enter a username, and view results.
|
||||
|
||||
### Python library
|
||||
|
||||
**Maigret can be embedded in your own Python projects.** The CLI is a thin wrapper around an async function you can call directly — build custom pipelines, feed results into your own tooling, or run it inside a larger OSINT workflow.
|
||||
|
||||
See the full [library usage guide](https://maigret.readthedocs.io/en/latest/library-usage.html) for a working example, async patterns, and how to filter sites by tag.
|
||||
|
||||
### Useful CLI flags
|
||||
|
||||
- `--parse URL` — parse a profile page, extract IDs/usernames, and use them to kick off a recursive search.
|
||||
- `--permute` — generate likely username variants from two or more inputs (e.g. `john doe` → `johndoe`, `j.doe`, …) and search for all of them.
|
||||
- `--self-check [--auto-disable]` — verify `usernameClaimed` / `usernameUnclaimed` pairs against live sites for maintainers auditing the database.
|
||||
|
||||
### Tor / I2P / proxies
|
||||
|
||||
Maigret can route checks through a proxy, Tor, or I2P — useful for `.onion` / `.i2p` sites and for bypassing WAFs that block datacenter IPs.
|
||||
|
||||
```bash
|
||||
# any HTTP/SOCKS proxy
|
||||
maigret user --proxy socks5://127.0.0.1:1080
|
||||
|
||||
# Tor (default gateway socks5://127.0.0.1:9050)
|
||||
maigret user --tor-proxy socks5://127.0.0.1:9050
|
||||
|
||||
# I2P (default gateway http://127.0.0.1:4444)
|
||||
maigret user --i2p-proxy http://127.0.0.1:4444
|
||||
```
|
||||
|
||||
Start your Tor / I2P daemon before running the command — Maigret does not manage these gateways.
|
||||
|
||||
## Contributing
|
||||
|
||||
Add or fix new sites surgically in `data.json` (no `json.load`/`json.dump`), then run `./utils/update_site_data.py` to regenerate `sites.md` and the database metadata, and open a pull request. For more details, see the [CONTRIBUTING guide](https://github.com/soxoj/maigret/blob/main/CONTRIBUTING.md) and [development docs](https://maigret.readthedocs.io/en/latest/development.html). Release history: [CHANGELOG.md](CHANGELOG.md).
|
||||
If you have any questions, suggestions, or feedback, please feel free to [open an issue](https://github.com/soxoj/maigret/issues), create a [GitHub discussion](https://github.com/soxoj/maigret/discussions), or contact the author directly via [Telegram](https://t.me/soxoj).
|
||||
|
||||
## Commercial Use
|
||||
|
||||
The open-source Maigret is MIT-licensed and free for commercial use without restriction — but site checks break over time and need active maintenance.
|
||||
If you need a **daily updated database** of supported sites or an **API for username checks**, feel free to reach out:
|
||||
|
||||
For serious commercial use — with a **daily-updated site database** or a **username-check API** — reach out: 📧 [maigret@soxoj.com](mailto:maigret@soxoj.com)
|
||||
📧 [maigret@soxoj.com](mailto:maigret@soxoj.com)
|
||||
|
||||
- Private site database — 5 000+ sites, updated daily (separate from the public open-source database)
|
||||
- Username check API — integrate Maigret into your product
|
||||
Available options:
|
||||
- Up-to-date site database - regularly maintained and updated list of 5K+ sites, delivered daily
|
||||
- Username check API - programmatic access to Maigret's search capabilities for integration into your products
|
||||
|
||||
## About
|
||||
## SOWEL classification
|
||||
|
||||
### Disclaimer
|
||||
|
||||
**For educational and lawful purposes only.** You are responsible for complying with all applicable laws (GDPR, CCPA, etc.) in your jurisdiction. The authors bear no responsibility for misuse.
|
||||
|
||||
### Feedback
|
||||
|
||||
[Open an issue](https://github.com/soxoj/maigret/issues) · [GitHub Discussions](https://github.com/soxoj/maigret/discussions) · [Telegram](https://t.me/soxoj)
|
||||
|
||||
### SOWEL classification
|
||||
|
||||
OSINT techniques used:
|
||||
This tool uses the following OSINT techniques:
|
||||
- [SOTL-2.2. Search For Accounts On Other Platforms](https://sowel.soxoj.com/other-platform-accounts)
|
||||
- [SOTL-6.1. Check Logins Reuse To Find Another Account](https://sowel.soxoj.com/logins-reuse)
|
||||
- [SOTL-6.2. Check Nicknames Reuse To Find Another Account](https://sowel.soxoj.com/nicknames-reuse)
|
||||
|
||||
### License
|
||||
## License
|
||||
|
||||
MIT © [Maigret](https://github.com/soxoj/maigret)
|
||||
MIT © [Maigret](https://github.com/soxoj/maigret)<br/>
|
||||
MIT © [Sherlock Project](https://github.com/sherlock-project/)<br/>
|
||||
Original Creator of Sherlock Project - [Siddharth Dushantha](https://github.com/sdushantha)
|
||||
|
||||
@@ -1,91 +0,0 @@
|
||||
# Troubleshooting
|
||||
|
||||
Common issues when running Maigret and how to fix them. If none of this helps, [open an issue](https://github.com/soxoj/maigret/issues) with the output of `maigret --version` and the exact command you ran.
|
||||
|
||||
## "Lots of sites fail / timeout / return 403"
|
||||
|
||||
This is by far the most common report. It almost always comes from anti-bot protection (Cloudflare, DDoS-Guard, Akamai, etc.) or a slow network — not from a bug in Maigret.
|
||||
|
||||
**Results vary a lot depending on where you run from.** The same command on the same username can produce very different output on:
|
||||
|
||||
- **Mobile internet** (4G/5G) — usually the best results. Carrier NAT shares your IP with thousands of real users, so WAFs rarely block it.
|
||||
- **Home broadband** — generally good, though some ISPs are reputation-flagged.
|
||||
- **Hosting / cloud / VPS infrastructure** (AWS, GCP, DigitalOcean, Hetzner, etc.) — the worst case. Datacenter IP ranges are blanket-blocked or challenged by most WAFs, so you will see many false negatives and 403s.
|
||||
|
||||
If a run looks suspiciously empty, **try a different network before assuming Maigret is broken**: tether from your phone, switch between Wi-Fi and mobile, or move the run off a VPS onto a residential machine. Comparing results across two networks is also the fastest way to tell whether a missing account is genuinely missing or just blocked on the current IP.
|
||||
|
||||
Once you have a sense of the baseline, try these tweaks in order:
|
||||
|
||||
1. **Raise the timeout.** The default is 30 seconds. On mobile networks or for slow sites, bump it:
|
||||
```bash
|
||||
maigret user --timeout 60
|
||||
```
|
||||
2. **Retry failed checks.** Transient 5xx / timeouts often clear on a second try:
|
||||
```bash
|
||||
maigret user --retries 2
|
||||
```
|
||||
3. **Lower parallelism.** Some WAFs rate-limit aggressively. Maigret defaults to 100 concurrent connections (`-n` / `--max-connections`) — dropping this makes you look less like a scanner:
|
||||
```bash
|
||||
maigret user -n 20
|
||||
```
|
||||
4. **Route through a residential proxy.** Datacenter IPs (AWS, GCP, DigitalOcean) are blanket-blocked by many WAFs. A residential / mobile proxy usually fixes this:
|
||||
```bash
|
||||
maigret user --proxy http://user:pass@residential-proxy:port
|
||||
```
|
||||
Note: Tor (`--tor-proxy`) rarely helps here — most WAFs block Tor exit nodes just as aggressively as datacenter IPs. Use Tor only when you actually need to reach `.onion` sites (see below).
|
||||
|
||||
If specific sites *always* fail regardless of the above, they are likely broken in the database (stale markers, new WAF, site redesign). Report them with `--print-errors` output so a maintainer can look at the check config.
|
||||
|
||||
## "No results at all" / "maigret: command not found"
|
||||
|
||||
- **`command not found`** — `pip install maigret` put the binary under `~/.local/bin` (Linux/macOS) or `%APPDATA%\Python\Scripts` (Windows). Add that directory to `PATH`, or run `python3 -m maigret user` instead.
|
||||
- **Empty output** — check that you actually passed a username; `maigret` alone prints help. Also confirm Python 3.10+ with `python3 --version`.
|
||||
|
||||
## "SSL / certificate errors"
|
||||
|
||||
Usually caused by a corporate MITM proxy or an outdated `certifi` bundle.
|
||||
|
||||
```bash
|
||||
pip install --upgrade certifi
|
||||
```
|
||||
|
||||
If you are behind a corporate proxy, set `HTTPS_PROXY` / `HTTP_PROXY` environment variables and pass `--proxy "$HTTPS_PROXY"` so Maigret uses the same route.
|
||||
|
||||
## ".onion / .i2p sites are skipped"
|
||||
|
||||
These sites only load through the matching gateway. Start your Tor or I2P daemon first, then:
|
||||
|
||||
```bash
|
||||
# Tor
|
||||
maigret user --tor-proxy socks5://127.0.0.1:9050
|
||||
|
||||
# I2P
|
||||
maigret user --i2p-proxy http://127.0.0.1:4444
|
||||
```
|
||||
|
||||
Maigret does not launch or manage these daemons — they must already be running.
|
||||
|
||||
## "The PDF / XMind / HTML report looks wrong"
|
||||
|
||||
- **PDF** — requires `weasyprint` and its system dependencies (Pango, Cairo, GDK-PixBuf). On Debian/Ubuntu: `apt install libpango-1.0-0 libpangoft2-1.0-0`. macOS: `brew install pango`.
|
||||
- **XMind** — the `--xmind` flag generates **XMind 8** files. XMind 2022+ (Zen / XMind 2023) uses a different format and will not open them. Use XMind 8 or convert via `--html`.
|
||||
- **HTML** looks unstyled — open it through a local file path (`file:///...`), not via a preview pane that strips CSS.
|
||||
|
||||
## "The site database is out of date"
|
||||
|
||||
Maigret auto-fetches a fresh `data.json` from GitHub once every 24 hours. To force-refresh now:
|
||||
|
||||
```bash
|
||||
maigret user --force-update
|
||||
```
|
||||
|
||||
To run entirely against the local built-in copy (e.g. offline):
|
||||
|
||||
```bash
|
||||
maigret user --no-autoupdate
|
||||
```
|
||||
|
||||
## Still stuck?
|
||||
|
||||
- [Open an issue](https://github.com/soxoj/maigret/issues) — include your OS, Python version, Maigret version, and the full command.
|
||||
- Ask in [GitHub Discussions](https://github.com/soxoj/maigret/discussions) or the [Telegram](https://t.me/soxoj) channel.
|
||||
@@ -1,69 +0,0 @@
|
||||
# Maigret
|
||||
|
||||
<div align="center">
|
||||
<img src="https://raw.githubusercontent.com/soxoj/maigret/main/static/maigret.png" height="220" alt="Maigret logo"/>
|
||||
</div>
|
||||
|
||||
**Maigret** collects a dossier on a person **by username only**, checking for accounts on a huge number of sites and gathering all the available information from web pages. No API keys required.
|
||||
|
||||
## Installation
|
||||
|
||||
Google Cloud Shell does not ship with all the system libraries Maigret needs (`libcairo2-dev`, `pkg-config`). The helper script below installs them and then builds Maigret from the cloned source.
|
||||
|
||||
Copy the command and run it in the Cloud Shell terminal:
|
||||
|
||||
```bash
|
||||
./utils/cloudshell_install.sh
|
||||
```
|
||||
|
||||
When the script finishes, verify the install:
|
||||
|
||||
```bash
|
||||
maigret --version
|
||||
```
|
||||
|
||||
## Usage examples
|
||||
|
||||
Run a basic search for a username. By default Maigret checks the **500 highest-ranked sites by traffic** — pass `-a` to scan the full 3,000+ database.
|
||||
|
||||
```bash
|
||||
maigret soxoj
|
||||
```
|
||||
|
||||
Search several usernames at once:
|
||||
|
||||
```bash
|
||||
maigret user1 user2 user3
|
||||
```
|
||||
|
||||
Narrow the run to sites related to cryptocurrency via the `crypto` tag (you can also use country tags):
|
||||
|
||||
```bash
|
||||
maigret vitalik.eth --tags crypto
|
||||
```
|
||||
|
||||
Generate reports in HTML, PDF, and XMind 8 formats:
|
||||
|
||||
```bash
|
||||
maigret soxoj --html
|
||||
maigret soxoj --pdf
|
||||
maigret soxoj --xmind
|
||||
```
|
||||
|
||||
Download a generated report from Cloud Shell to your local machine:
|
||||
|
||||
```bash
|
||||
cloudshell download reports/report_soxoj.pdf
|
||||
```
|
||||
|
||||
Tune reliability on flaky networks — raise the timeout and retry failed checks:
|
||||
|
||||
```bash
|
||||
maigret soxoj --timeout 60 --retries 2
|
||||
```
|
||||
|
||||
For the full list of options see `maigret --help` or the [CLI documentation](https://maigret.readthedocs.io/en/latest/command-line-options.html).
|
||||
|
||||
## Further reading
|
||||
|
||||
Full project documentation: [maigret.readthedocs.io](https://maigret.readthedocs.io/)
|
||||
@@ -96,7 +96,7 @@ You should make your git commits from your maigret git repo folder, or else the
|
||||
If you already know which site has a false-positive and want to fix it specifically, go to the next step.
|
||||
|
||||
Otherwise, simply run a search with a random username (e.g. `laiuhi3h4gi3u4hgt`) and check the results.
|
||||
Alternatively, you can use `the Telegram bot <https://t.me/maigret_search_bot>`_.
|
||||
Alternatively, you can use `the Telegram bot <https://t.me/osint_maigret_bot>`_.
|
||||
|
||||
2. Open the account link in your browser and check:
|
||||
|
||||
@@ -142,30 +142,13 @@ There are few options for sites data.json helpful in various cases:
|
||||
``protection`` (site protection tracking)
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
The ``protection`` field records what kind of anti-bot protection a site uses. Maigret reads this field and automatically applies the appropriate bypass mechanism where one exists.
|
||||
|
||||
Two categories of tag:
|
||||
|
||||
- **Load-bearing.** Maigret changes its HTTP client or headers based on the tag. Currently only ``tls_fingerprint`` (switches to ``curl_cffi`` with Chrome-class TLS).
|
||||
- **Documentation-only.** Maigret does **not** change behavior based on the tag; it records *why* the site is hard so a future solver can target the right set of sites without re-auditing.
|
||||
|
||||
Within the documentation-only tags, there is a further split that dictates whether the site is ``disabled: true``:
|
||||
|
||||
- ``ip_reputation`` is the **only** doc-tag that **keeps the site enabled**. It means "works for most users, fails from datacenter/cloud IPs." Disabling would silently hide a working site from anyone with a clean IP. The fix is **external** to Maigret (residential IP or ``--proxy``).
|
||||
- ``cf_js_challenge``, ``cf_firewall``, ``aws_waf_js_challenge``, ``ddos_guard_challenge``, ``custom_bot_protection``, ``js_challenge`` all pair with ``disabled: true``. They mean "does not work for anyone right now"; the tag identifies the provider so that when a bypass ships, every site with that tag can be re-enabled in one pass.
|
||||
The ``protection`` field records what kind of anti-bot protection a site uses. Maigret reads this field and automatically applies the appropriate bypass mechanism.
|
||||
|
||||
Supported values:
|
||||
|
||||
- ``tls_fingerprint`` *(load-bearing; site stays enabled)* — the site fingerprints the TLS handshake (JA3/JA4) and blocks non-browser clients. Maigret automatically uses ``curl_cffi`` with Chrome browser emulation to bypass this. Requires the ``curl_cffi`` package (included as a dependency). Examples: Instagram, NPM, Codepen, Kickstarter, Letterboxd.
|
||||
- ``ip_reputation`` *(documentation-only; site stays enabled)* — the site blocks requests from datacenter/cloud IPs regardless of headers or TLS. Cannot be bypassed automatically; run Maigret from a regular internet connection (not a datacenter) or use a proxy (``--proxy``). The site is **not** marked ``disabled`` because it continues to work for users on residential IPs. Examples: Reddit, Patreon, Figma, OnlyFans.
|
||||
- ``cf_js_challenge`` *(documentation-only; pair with ``disabled: true``)* — Cloudflare Managed Challenge / Turnstile JS challenge. Symptom: HTTP 403 with ``cf-mitigated: challenge`` header; body contains ``challenges.cloudflare.com``, ``_cf_chl_opt``, ``window._cf_chl``, or "Just a moment". Not bypassable via ``curl_cffi`` TLS impersonation (verified across Chrome 123/124/131, Safari 17/18, Firefox 133/135, Edge 101 — all return the same 403 challenge page); a real browser executing the challenge JS is required to obtain the clearance cookie. Sites stay ``disabled: true`` until a CF-challenge solver is integrated. Examples: DMOJ, Elakiri, Fanlore, Bdoutdoors, TheStudentRoom, forum.hr.
|
||||
- ``cf_firewall`` *(documentation-only; pair with ``disabled: true``)* — Cloudflare firewall rule / bot score block (WAF action=block, **not** action=challenge). Symptom: HTTP 403 served by Cloudflare (``server: cloudflare``, ``cf-ray`` header) **without** JS-challenge markers — body typically shows "Access denied", "Attention Required", or just a bare 1015/1016/1020 error page. Unlike ``ip_reputation``, residential IPs are **not** sufficient to bypass — Cloudflare decides based on a composite of bot score, TLS fingerprint, UA, ASN, and custom site-owner rules, so ``curl_cffi`` Chrome impersonation from a residential line still returns 403. Sites stay ``disabled: true`` until a per-site bypass (cookies, real browser, or residential+clean session) is found. Examples: Fark, Fodors, Huntingnet, Hunttalk.
|
||||
- ``aws_waf_js_challenge`` *(documentation-only; pair with ``disabled: true``)* — the site is protected by AWS WAF with a JavaScript challenge. Symptom: HTTP 202 with empty body and ``x-amzn-waf-action: challenge`` header (a token-granting challenge that requires executing the CAPTCHA/challenge JS bundle). Neither ``curl_cffi`` TLS impersonation nor User-Agent changes bypass this — a real browser or the official AWS WAF challenge-solver SDK is required. Sites stay ``disabled: true`` until a solver is integrated. Example: Dreamwidth.
|
||||
- ``ddos_guard_challenge`` *(documentation-only; pair with ``disabled: true``)* — DDoS-Guard (ddos-guard.net) anti-bot page. Symptom: HTTP 403 with ``server: ddos-guard`` header; body contains "DDoS-Guard". DDoS-Guard fingerprints different UAs per source IP, so a single User-Agent override does not work across environments; a JS-capable bypass or DDoS-Guard-aware solver is required. Sites stay ``disabled: true`` until a solver is integrated. Example: ForumHouse.
|
||||
- ``js_challenge`` *(documentation-only; pair with ``disabled: true``)* — **fallback** for JavaScript-challenge systems whose provider cannot be identified (custom in-house challenge pages that are not Cloudflare, AWS WAF, or any other recognized vendor). Prefer a provider-specific tag whenever the provider can be pinned down from response headers or body signatures.
|
||||
- ``custom_bot_protection`` *(documentation-only; pair with ``disabled: true``)* — **fallback** for non-JS-challenge bot protection served by a custom/in-house system (not Cloudflare, not AWS WAF, not DDoS-Guard). Typical symptom: HTTP 403 from the site's own origin server (``server: nginx``, AWS ELB, etc.) with a branded block page, returned regardless of TLS fingerprint or residential IP. Not generically bypassable; investigate per site (cookies, session, proxy geography). Examples: Hackerearth ("HackerEarth Guardian"), FreelanceJob (nginx-level block).
|
||||
|
||||
**Rule: prefer provider-specific protection tags.** When a site is blocked by an identifiable anti-bot vendor, always record the vendor in the tag (``cf_js_challenge``, ``cf_firewall``, ``aws_waf_js_challenge``, ``ddos_guard_challenge``, and future additions such as ``sucuri_challenge``, ``incapsula_challenge``). The generic ``js_challenge`` and ``custom_bot_protection`` tags are reserved for custom/unknown systems. Rationale: bypass solvers are inherently provider-specific (a Cloudflare Turnstile solver does not help with AWS WAF); recording the provider in advance lets us fan out fixes the moment a per-provider solver is added, without re-auditing every disabled site. The same principle applies to other protection categories when the provider is identifiable.
|
||||
- ``tls_fingerprint`` — the site fingerprints the TLS handshake (JA3/JA4) and blocks non-browser clients. Maigret automatically uses ``curl_cffi`` with Chrome browser emulation to bypass this. Requires the ``curl_cffi`` package (included as a dependency). Examples: Instagram, NPM, Codepen, Kickstarter, Letterboxd.
|
||||
- ``ip_reputation`` — the site blocks requests from datacenter/cloud IPs regardless of headers or TLS. Cannot be bypassed automatically; run Maigret from a regular internet connection (not a datacenter) or use a proxy (``--proxy``). Examples: Reddit, Patreon, Figma.
|
||||
- ``js_challenge`` — the site serves a JavaScript challenge page (e.g. "Just a moment...") that cannot be solved without a browser. Maigret detects challenge signatures and returns UNKNOWN instead of a false positive.
|
||||
|
||||
Example:
|
||||
|
||||
|
||||
@@ -29,7 +29,6 @@ You may be interested in:
|
||||
- :doc:`Usage examples <usage-examples>`
|
||||
- :doc:`Command line options <command-line-options>`
|
||||
- :doc:`Features list <features>`
|
||||
- :doc:`Library usage <library-usage>`
|
||||
|
||||
.. toctree::
|
||||
:hidden:
|
||||
@@ -40,15 +39,8 @@ You may be interested in:
|
||||
usage-examples
|
||||
command-line-options
|
||||
features
|
||||
library-usage
|
||||
philosophy
|
||||
supported-identifier-types
|
||||
tags
|
||||
settings
|
||||
development
|
||||
|
||||
.. toctree::
|
||||
:hidden:
|
||||
:caption: Use cases
|
||||
|
||||
use-cases/crypto
|
||||
|
||||
@@ -4,7 +4,7 @@ Installation
|
||||
============
|
||||
|
||||
Maigret can be installed using pip, Docker, or simply can be launched from the cloned repo.
|
||||
Also, it is available online via `official Telegram bot <https://t.me/maigret_search_bot>`_,
|
||||
Also, it is available online via `official Telegram bot <https://t.me/osint_maigret_bot>`_,
|
||||
source code of a bot is `available on GitHub <https://github.com/soxoj/maigret-tg-bot>`_.
|
||||
|
||||
Windows Standalone EXE-binaries
|
||||
@@ -45,7 +45,8 @@ Press one of the buttons below and follow the instructions to launch it in your
|
||||
Local installation from PyPi
|
||||
----------------------------
|
||||
|
||||
Maigret ships with a bundled site database. After installation from PyPI (or any other method), it can **automatically fetch a newer compatible database from GitHub** when you run it—see :ref:`database-auto-update` in :doc:`settings`.
|
||||
Please note that the sites database in the PyPI package may be outdated.
|
||||
If you encounter frequent false positive results, we recommend installing the latest development version from GitHub instead.
|
||||
|
||||
.. note::
|
||||
Python 3.10 or higher and pip is required, **Python 3.11 is recommended.**
|
||||
@@ -55,6 +56,9 @@ Maigret ships with a bundled site database. After installation from PyPI (or any
|
||||
# install from pypi
|
||||
pip3 install maigret
|
||||
|
||||
# install with PDF report support
|
||||
pip3 install maigret[pdf]
|
||||
|
||||
# usage
|
||||
maigret username
|
||||
|
||||
|
||||
@@ -1,139 +0,0 @@
|
||||
.. _library-usage:
|
||||
|
||||
Library usage
|
||||
=============
|
||||
|
||||
Maigret's CLI is a thin wrapper around an async Python API. You can embed Maigret in your own tools, pipelines, and OSINT workflows — no need to shell out.
|
||||
|
||||
This page covers the common patterns. For the full argument list of the underlying function, see ``maigret.checking.maigret`` in the source.
|
||||
|
||||
Installation
|
||||
------------
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
pip install maigret
|
||||
|
||||
Minimal example
|
||||
---------------
|
||||
|
||||
A working end-to-end search against the top 500 sites:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
|
||||
from maigret import search as maigret_search
|
||||
from maigret.sites import MaigretDatabase
|
||||
|
||||
# Load the bundled site database
|
||||
db = MaigretDatabase().load_from_path(
|
||||
"maigret/resources/data.json"
|
||||
)
|
||||
|
||||
# Pick which sites to scan (same filtering the CLI uses)
|
||||
sites = db.ranked_sites_dict(top=500)
|
||||
|
||||
results = asyncio.run(
|
||||
maigret_search(
|
||||
username="soxoj",
|
||||
site_dict=sites,
|
||||
logger=logging.getLogger("maigret"),
|
||||
timeout=30,
|
||||
is_parsing_enabled=True,
|
||||
)
|
||||
)
|
||||
|
||||
for site_name, result in results.items():
|
||||
if result["status"].is_found():
|
||||
print(site_name, result["url_user"])
|
||||
|
||||
Key points:
|
||||
|
||||
- ``maigret_search`` is an ``async`` function — wrap it with ``asyncio.run(...)`` or ``await`` it from inside your own event loop.
|
||||
- ``is_parsing_enabled=True`` turns on ``socid_extractor`` so ``result["ids_data"]`` is populated with profile fields (bio, linked accounts, uids, etc.).
|
||||
- Each entry in the returned dict has a ``"status"`` object with ``is_found()``, plus ``url_user``, ``http_status``, ``rank``, ``ids_data``, and more.
|
||||
|
||||
Filtering sites
|
||||
---------------
|
||||
|
||||
``ranked_sites_dict`` accepts the same filters as the CLI:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
# All sites tagged as coding, top 200 by rank
|
||||
sites = db.ranked_sites_dict(top=200, tags=["coding"])
|
||||
|
||||
# Exclude NSFW and dating sites
|
||||
sites = db.ranked_sites_dict(excluded_tags=["nsfw", "dating"])
|
||||
|
||||
# Only specific sites by name
|
||||
sites = db.ranked_sites_dict(names=["GitHub", "Reddit", "VK"])
|
||||
|
||||
# Include disabled sites (useful for maintenance / self-check)
|
||||
sites = db.ranked_sites_dict(disabled=True)
|
||||
|
||||
Running inside an existing event loop
|
||||
-------------------------------------
|
||||
|
||||
If your application already runs an asyncio loop (FastAPI, aiohttp server, a Discord bot, etc.), ``await`` ``maigret_search`` directly instead of calling ``asyncio.run``:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
async def check_username(username: str) -> dict:
|
||||
results = await maigret_search(
|
||||
username=username,
|
||||
site_dict=sites,
|
||||
logger=logger,
|
||||
timeout=30,
|
||||
)
|
||||
return {
|
||||
name: r["url_user"]
|
||||
for name, r in results.items()
|
||||
if r["status"].is_found()
|
||||
}
|
||||
|
||||
Routing through a proxy
|
||||
-----------------------
|
||||
|
||||
The same proxy / Tor / I2P flags the CLI exposes are plain keyword arguments:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
results = await maigret_search(
|
||||
username="soxoj",
|
||||
site_dict=sites,
|
||||
logger=logger,
|
||||
proxy="socks5://127.0.0.1:1080",
|
||||
tor_proxy="socks5://127.0.0.1:9050", # used for .onion sites
|
||||
i2p_proxy="http://127.0.0.1:4444", # used for .i2p sites
|
||||
timeout=30,
|
||||
)
|
||||
|
||||
Full function signature
|
||||
-----------------------
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
async def maigret(
|
||||
username: str,
|
||||
site_dict: Dict[str, MaigretSite],
|
||||
logger,
|
||||
query_notify=None,
|
||||
proxy=None,
|
||||
tor_proxy=None,
|
||||
i2p_proxy=None,
|
||||
timeout=30,
|
||||
is_parsing_enabled=False,
|
||||
id_type="username",
|
||||
debug=False,
|
||||
forced=False,
|
||||
max_connections=100,
|
||||
no_progressbar=False,
|
||||
cookies=None,
|
||||
retries=0,
|
||||
check_domains=False,
|
||||
) -> QueryResultWrapper
|
||||
|
||||
See :doc:`command-line-options` for a description of each option — the semantics match the CLI flags one-to-one.
|
||||
@@ -3,10 +3,6 @@
|
||||
Philosophy
|
||||
==========
|
||||
|
||||
*The Commissioner Jules Maigret is a fictional French police detective, created by Georges Simenon.
|
||||
His investigation method is based on understanding the personality of different people and their
|
||||
interactions.*
|
||||
|
||||
TL;DR: Username => Dossier
|
||||
|
||||
Maigret is designed to gather all the available information about person by his username.
|
||||
@@ -19,23 +15,3 @@ All this information forms some dossier, but it also useful for other tools and
|
||||
Each collected piece of data has a label of a certain format (for example, ``follower_count`` for the number
|
||||
of subscribers or ``created_at`` for account creation time) so that it can be parsed and analyzed by various
|
||||
systems and stored in databases.
|
||||
|
||||
Origins
|
||||
-------
|
||||
|
||||
Maigret started from studying what OSINT investigators actually use in practice — and from
|
||||
the realization that many popular tools do not deliver real investigative value. The original
|
||||
research behind this observation is summarized in the article
|
||||
`What's wrong with namecheckers <https://soxoj.medium.com/whats-wrong-with-namecheckers-981e5cba600e>`_.
|
||||
For a broader landscape of username-checking tools, see the curated
|
||||
`OSINT namecheckers list <https://github.com/soxoj/osint-namecheckers-list>`_.
|
||||
|
||||
Two ideas grew out of that research:
|
||||
|
||||
- `socid-extractor <https://github.com/soxoj/socid-extractor>`_ — a library focused on pulling
|
||||
structured identity data (user IDs, full names, linked accounts, bios, timestamps, etc.) out of
|
||||
account pages and public API responses, so that finding an account is not the end of the pipeline.
|
||||
- **Maigret** itself — which started as a fork of
|
||||
`Sherlock <https://github.com/sherlock-project/sherlock>`_ but has long since outgrown the
|
||||
original project in coverage, extraction depth, and check reliability. Today Maigret is used
|
||||
as a component by major OSINT vendors in their commercial products.
|
||||
|
||||
@@ -1,147 +0,0 @@
|
||||
.. _use-case-crypto:
|
||||
|
||||
Cryptocurrency & Web3 Investigations
|
||||
=====================================
|
||||
|
||||
Blockchain transactions are public, but the people behind wallets are not. Maigret helps bridge this gap by finding Web3 accounts tied to a username, revealing the person behind a pseudonymous crypto persona.
|
||||
|
||||
Why it matters
|
||||
--------------
|
||||
|
||||
Crypto investigations often start with a wallet address or an ENS name but hit a wall — the blockchain tells you *what* happened, not *who* did it. A username, however, is reused across platforms. If someone trades on OpenSea as ``zachxbt`` and posts on Warpcast as ``zachxbt``, Maigret connects the dots and builds a full profile.
|
||||
|
||||
Common scenarios:
|
||||
|
||||
- **Scam attribution.** A rug-pull promoter uses the same alias on Fragment (Telegram username marketplace), OpenSea, and a personal blog.
|
||||
- **Sanctions compliance.** Verifying whether a counterparty's online footprint matches known sanctioned individuals.
|
||||
- **Due diligence.** Before an OTC deal or DAO vote, checking whether the other party has a consistent online presence or is a freshly created sockpuppet.
|
||||
- **Stolen funds tracing.** A stolen NFT appears on OpenSea under a new account — but the username matches a Warpcast profile with real-world links.
|
||||
|
||||
Supported sites
|
||||
---------------
|
||||
|
||||
Maigret currently checks the following crypto and Web3 platforms:
|
||||
|
||||
.. list-table::
|
||||
:header-rows: 1
|
||||
:widths: 20 40 40
|
||||
|
||||
* - Site
|
||||
- What it reveals
|
||||
- Notes
|
||||
* - **OpenSea**
|
||||
- NFT collections, trading history, profile bio, linked website
|
||||
-
|
||||
* - **Rarible**
|
||||
- NFT marketplace profile, collections, listing history
|
||||
- Complements OpenSea for NFT attribution across marketplaces
|
||||
* - **Zora**
|
||||
- Zora Network profile, minted NFTs, creator activity
|
||||
- Ethereum L2 creator platform; useful for on-chain art attribution
|
||||
* - **Polymarket**
|
||||
- Prediction-market profile, positions, public portfolio P&L
|
||||
- Useful for political/financial prediction attribution
|
||||
* - **Warpcast** (Farcaster)
|
||||
- Decentralized social profile, posts, follower graph, Farcaster ID
|
||||
- Every Farcaster ID maps to an Ethereum address via the on-chain ID registry
|
||||
* - **Fragment**
|
||||
- Telegram username ownership, TON wallet address, purchase date and price
|
||||
- Valuable for linking Telegram identities to TON wallets
|
||||
* - **Paragraph**
|
||||
- Web3 blog/newsletter, ETH wallet address, linked Twitter handle
|
||||
- Richest cross-platform data among crypto sites
|
||||
* - **Tonometerbot**
|
||||
- TON wallet balance, subscriber count, NFT collection, rankings
|
||||
- TON blockchain analytics
|
||||
* - **Spatial**
|
||||
- Metaverse profile, linked social accounts (Discord, Twitter, Instagram, LinkedIn, TikTok)
|
||||
- Rich cross-platform links
|
||||
* - **Revolut.me**
|
||||
- Payment handle: first/last name, country code, base currency, supported payment methods
|
||||
- Not strictly Web3, but widely used by crypto OTC traders for fiat off-ramps; the public API returns structured KYC-adjacent data
|
||||
|
||||
Real-world example: zachxbt
|
||||
---------------------------
|
||||
|
||||
`ZachXBT <https://twitter.com/zachxbt>`_ is a well-known on-chain investigator. Let's see what Maigret can find from just the username ``zachxbt``:
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
maigret zachxbt --tags crypto
|
||||
|
||||
Maigret finds 5 accounts and automatically extracts structured data from each:
|
||||
|
||||
**Fragment** — confirms the Telegram username ``@zachxbt`` is claimed, reveals the TON wallet address (``EQBisZrk...``), purchase price (10 TON), and date (January 2023).
|
||||
|
||||
**Paragraph** — the richest result. Returns the real name used on the platform (``ZachXBT``), bio (``Scam survivor turned 2D investigator``), an Ethereum wallet address (``0x23dBf066...``), and a linked Twitter handle (``zachxbt``). The ``wallet_address`` field is especially valuable — it directly links the pseudonym to an on-chain identity.
|
||||
|
||||
**Warpcast** — Farcaster profile with a Farcaster ID (``fid: 20931``), profile image, and social graph (33K followers). Every Farcaster ID is tied to an Ethereum address via the on-chain ID registry, so this is another on-chain anchor.
|
||||
|
||||
**OpenSea** — NFT marketplace profile with bio (``On-chain sleuth | 10x rug pull survivor``), avatar (hosted on ``seadn.io`` with an Ethereum address in the URL path), and a link to an external investigations page.
|
||||
|
||||
**Hive Blog** — blockchain-based blog account created in March 2025. Low activity (1 post), but confirms the username is claimed across blockchain ecosystems.
|
||||
|
||||
From a single username, Maigret produces:
|
||||
|
||||
- **2 wallet addresses** — one TON (from Fragment), one Ethereum (from Paragraph)
|
||||
- **1 confirmed Twitter handle** — ``zachxbt`` (from Paragraph)
|
||||
- **1 Telegram username** — ``@zachxbt`` (from Fragment)
|
||||
- **1 external link** — ``investigations.notion.site`` (from OpenSea)
|
||||
- **Social graph data** — 33K Farcaster followers, blog activity timestamps
|
||||
|
||||
This is enough to pivot into blockchain analysis tools (Etherscan, Arkham, Nansen) using the wallet addresses, or into social media analysis using the Twitter handle.
|
||||
|
||||
Workflow: from username to wallet
|
||||
---------------------------------
|
||||
|
||||
**Step 1: Search crypto platforms**
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
maigret <username> --tags crypto -v
|
||||
|
||||
Review the results. Pay attention to:
|
||||
|
||||
- **Fragment** — if the username is claimed, you get a TON wallet address directly.
|
||||
- **Paragraph** — blog profiles often contain an ETH address and a Twitter handle.
|
||||
- **Warpcast** — Farcaster IDs map to Ethereum addresses via the on-chain registry.
|
||||
- **OpenSea** — avatar URLs sometimes contain wallet addresses in the path.
|
||||
|
||||
**Step 2: Expand with extracted identifiers**
|
||||
|
||||
Maigret automatically extracts additional identifiers from found profiles (real names, linked accounts, profile URLs) and recursively searches for them. This is enabled by default. If Maigret finds a linked Twitter handle on a Paragraph profile, it will automatically search for that handle across all sites.
|
||||
|
||||
**Step 3: Cross-reference with non-crypto platforms**
|
||||
|
||||
The real power is connecting crypto personas to mainstream accounts. Drop the tag filter:
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
maigret <username> -a
|
||||
|
||||
This checks all 3000+ sites. A match on GitHub, Reddit, or a forum can reveal the person behind the wallet.
|
||||
|
||||
Workflow: from wallet to identity
|
||||
---------------------------------
|
||||
|
||||
If you start with a wallet address rather than a username, you can use complementary tools to get a username first:
|
||||
|
||||
1. **ENS / Unstoppable Domains** — resolve the wallet address to a human-readable name (``vitalik.eth``). Then search that name in Maigret.
|
||||
2. **Etherscan labels** — check if the address has a public label (exchange, known entity).
|
||||
3. **Fragment** — search the TON wallet address to find which Telegram usernames it purchased.
|
||||
4. **Arkham Intelligence / Nansen** — blockchain attribution platforms that may tag the address with a known identity.
|
||||
|
||||
Once you have a username candidate, feed it to Maigret.
|
||||
|
||||
Tips
|
||||
----
|
||||
|
||||
- **Username reuse is the #1 signal.** Crypto-native users often reuse their ENS name (``alice.eth``) or a variation (``alice_eth``, ``aliceeth``) across platforms. Try all variations.
|
||||
- **Fragment is uniquely valuable** because it directly links Telegram usernames to TON wallet addresses — a rare on-chain / off-chain bridge.
|
||||
- **Warpcast profiles are Ethereum-native.** Every Farcaster account is tied to an Ethereum address via the ID registry contract. If you find a Warpcast profile, you implicitly have a wallet address.
|
||||
- **Paragraph often has the richest data** — wallet address, Twitter handle, bio, and activity timestamps in a single API response.
|
||||
- **Use** ``--exclude-tags`` **to skip irrelevant sites** when you're focused on crypto:
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
maigret alice_eth --exclude-tags porn,dating,forum
|
||||
@@ -1,3 +1,3 @@
|
||||
"""Maigret version file"""
|
||||
|
||||
__version__ = '0.6.0'
|
||||
__version__ = '0.5.0'
|
||||
|
||||
+3
-54
@@ -7,7 +7,7 @@ from aiohttp import CookieJar
|
||||
|
||||
class ParsingActivator:
|
||||
@staticmethod
|
||||
def twitter(site, logger, cookies={}, **kwargs):
|
||||
def twitter(site, logger, cookies={}):
|
||||
headers = dict(site.headers)
|
||||
del headers["x-guest-token"]
|
||||
import requests
|
||||
@@ -19,7 +19,7 @@ class ParsingActivator:
|
||||
site.headers["x-guest-token"] = guest_token
|
||||
|
||||
@staticmethod
|
||||
def vimeo(site, logger, cookies={}, **kwargs):
|
||||
def vimeo(site, logger, cookies={}):
|
||||
headers = dict(site.headers)
|
||||
if "Authorization" in headers:
|
||||
del headers["Authorization"]
|
||||
@@ -31,58 +31,7 @@ class ParsingActivator:
|
||||
site.headers["Authorization"] = "jwt " + jwt_token
|
||||
|
||||
@staticmethod
|
||||
def onlyfans(site, logger, url=None, **kwargs):
|
||||
# Signing rules (static_param / checksum_indexes / checksum_constant / format / app_token)
|
||||
# live in data.json under OnlyFans.activation and rotate upstream every ~1–3 weeks.
|
||||
# If "Please refresh the page" keeps firing after activation, refresh them from:
|
||||
# https://raw.githubusercontent.com/DATAHOARDERS/dynamic-rules/main/onlyfans.json
|
||||
import hashlib
|
||||
import secrets
|
||||
import time as _time
|
||||
from urllib.parse import urlparse
|
||||
|
||||
import requests
|
||||
|
||||
act = site.activation
|
||||
static_param = act["static_param"]
|
||||
indexes = act["checksum_indexes"]
|
||||
constant = act["checksum_constant"]
|
||||
fmt = act["format"]
|
||||
init_url = act["url"]
|
||||
|
||||
user_id = site.headers.get("user-id", "0") or "0"
|
||||
|
||||
def _sign(path):
|
||||
t = str(int(_time.time() * 1000))
|
||||
msg = "\n".join([static_param, t, path, user_id]).encode()
|
||||
sha = hashlib.sha1(msg).hexdigest()
|
||||
cs = sum(ord(sha[i]) for i in indexes) + constant
|
||||
return t, fmt.format(sha, abs(cs))
|
||||
|
||||
if site.headers.get("x-bc", "").strip("0") == "":
|
||||
site.headers["x-bc"] = secrets.token_hex(20)
|
||||
|
||||
if not site.headers.get("cookie"):
|
||||
init_path = urlparse(init_url).path
|
||||
t, sg = _sign(init_path)
|
||||
hdrs = dict(site.headers)
|
||||
hdrs["time"] = t
|
||||
hdrs["sign"] = sg
|
||||
hdrs.pop("cookie", None)
|
||||
r = requests.get(init_url, headers=hdrs, timeout=15)
|
||||
jar = "; ".join(f"{k}={v}" for k, v in r.cookies.items())
|
||||
if jar:
|
||||
site.headers["cookie"] = jar
|
||||
logger.debug(f"OnlyFans init: got cookies {list(r.cookies.keys())}")
|
||||
|
||||
target_path = urlparse(url).path if url else urlparse(init_url).path
|
||||
t, sg = _sign(target_path)
|
||||
site.headers["time"] = t
|
||||
site.headers["sign"] = sg
|
||||
logger.debug(f"OnlyFans signed {target_path} time={t}")
|
||||
|
||||
@staticmethod
|
||||
def weibo(site, logger, **kwargs):
|
||||
def weibo(site, logger):
|
||||
headers = dict(site.headers)
|
||||
import requests
|
||||
|
||||
|
||||
-158
@@ -1,158 +0,0 @@
|
||||
"""Maigret AI Analysis Module
|
||||
|
||||
Provides AI-powered analysis of search results using OpenAI-compatible APIs.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
import threading
|
||||
|
||||
import aiohttp
|
||||
|
||||
|
||||
def load_ai_prompt() -> str:
|
||||
"""Load the AI system prompt from the resources directory."""
|
||||
maigret_path = os.path.dirname(os.path.realpath(__file__))
|
||||
prompt_path = os.path.join(maigret_path, "resources", "ai_prompt.txt")
|
||||
with open(prompt_path, "r", encoding="utf-8") as f:
|
||||
return f.read()
|
||||
|
||||
|
||||
def resolve_api_key(settings) -> str | None:
|
||||
"""Resolve OpenAI API key from settings or environment variable.
|
||||
|
||||
Priority: settings.openai_api_key > OPENAI_API_KEY env var.
|
||||
"""
|
||||
key = getattr(settings, "openai_api_key", None)
|
||||
if key:
|
||||
return key
|
||||
return os.environ.get("OPENAI_API_KEY")
|
||||
|
||||
|
||||
class _Spinner:
|
||||
"""Simple animated spinner for terminal output."""
|
||||
|
||||
FRAMES = ["⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏"]
|
||||
|
||||
def __init__(self, text=""):
|
||||
self.text = text
|
||||
self._stop = threading.Event()
|
||||
self._thread = None
|
||||
|
||||
def start(self):
|
||||
self._thread = threading.Thread(target=self._spin, daemon=True)
|
||||
self._thread.start()
|
||||
|
||||
def _spin(self):
|
||||
i = 0
|
||||
while not self._stop.is_set():
|
||||
frame = self.FRAMES[i % len(self.FRAMES)]
|
||||
sys.stderr.write(f"\r{frame} {self.text}")
|
||||
sys.stderr.flush()
|
||||
i += 1
|
||||
self._stop.wait(0.08)
|
||||
|
||||
def stop(self):
|
||||
self._stop.set()
|
||||
if self._thread:
|
||||
self._thread.join()
|
||||
sys.stderr.write("\r\033[2K")
|
||||
sys.stderr.flush()
|
||||
|
||||
|
||||
async def print_streaming(text: str, delay: float = 0.04):
|
||||
"""Print text word by word with a delay, simulating streaming LLM output."""
|
||||
words = text.split(" ")
|
||||
for i, word in enumerate(words):
|
||||
if i > 0:
|
||||
sys.stdout.write(" ")
|
||||
sys.stdout.write(word)
|
||||
sys.stdout.flush()
|
||||
await asyncio.sleep(delay)
|
||||
sys.stdout.write("\n")
|
||||
sys.stdout.flush()
|
||||
|
||||
|
||||
async def get_ai_analysis(
|
||||
api_key: str,
|
||||
markdown_report: str,
|
||||
model: str = "gpt-4o",
|
||||
api_base_url: str = "https://api.openai.com/v1",
|
||||
) -> str:
|
||||
"""Send the markdown report to an OpenAI-compatible API and return the analysis.
|
||||
|
||||
Uses streaming to display tokens as they arrive.
|
||||
Raises on HTTP errors with descriptive messages.
|
||||
"""
|
||||
system_prompt = load_ai_prompt()
|
||||
|
||||
url = f"{api_base_url.rstrip('/')}/chat/completions"
|
||||
headers = {
|
||||
"Authorization": f"Bearer {api_key}",
|
||||
"Content-Type": "application/json",
|
||||
}
|
||||
payload = {
|
||||
"model": model,
|
||||
"stream": True,
|
||||
"messages": [
|
||||
{"role": "system", "content": system_prompt},
|
||||
{"role": "user", "content": markdown_report},
|
||||
],
|
||||
}
|
||||
|
||||
spinner = _Spinner("Analysing the data with AI...")
|
||||
spinner.start()
|
||||
first_token = True
|
||||
full_response = []
|
||||
|
||||
try:
|
||||
async with aiohttp.ClientSession() as session:
|
||||
async with session.post(url, json=payload, headers=headers) as resp:
|
||||
if resp.status == 401:
|
||||
raise RuntimeError("Invalid OpenAI API key (HTTP 401)")
|
||||
if resp.status == 429:
|
||||
raise RuntimeError("OpenAI API rate limit exceeded (HTTP 429)")
|
||||
if resp.status != 200:
|
||||
body = await resp.text()
|
||||
raise RuntimeError(
|
||||
f"OpenAI API error (HTTP {resp.status}): {body[:500]}"
|
||||
)
|
||||
|
||||
async for line in resp.content:
|
||||
decoded = line.decode("utf-8").strip()
|
||||
if not decoded or not decoded.startswith("data: "):
|
||||
continue
|
||||
|
||||
data_str = decoded[len("data: "):]
|
||||
if data_str == "[DONE]":
|
||||
break
|
||||
|
||||
try:
|
||||
chunk = json.loads(data_str)
|
||||
except json.JSONDecodeError:
|
||||
continue
|
||||
|
||||
delta = chunk.get("choices", [{}])[0].get("delta", {})
|
||||
content = delta.get("content", "")
|
||||
if not content:
|
||||
continue
|
||||
|
||||
if first_token:
|
||||
spinner.stop()
|
||||
print()
|
||||
first_token = False
|
||||
|
||||
sys.stdout.write(content)
|
||||
sys.stdout.flush()
|
||||
except Exception:
|
||||
spinner.stop()
|
||||
raise
|
||||
|
||||
if first_token:
|
||||
# No tokens received — stop spinner anyway
|
||||
spinner.stop()
|
||||
|
||||
print()
|
||||
return "".join(full_response)
|
||||
+4
-8
@@ -61,6 +61,8 @@ class SimpleAiohttpChecker(CheckerBase):
|
||||
self.headers = None
|
||||
self.allow_redirects = True
|
||||
self.timeout = 0
|
||||
self.allow_redirects = True
|
||||
self.timeout = 0
|
||||
self.method = 'get'
|
||||
self.payload = None
|
||||
|
||||
@@ -345,11 +347,7 @@ def process_site_result(
|
||||
username = results_info["username"]
|
||||
is_parsing_enabled = results_info["parsing_enabled"]
|
||||
url = results_info.get("url_user")
|
||||
url_probe = results_info.get("url_probe") or url
|
||||
if url_probe != url:
|
||||
logger.info(f"{url_probe} (display: {url})")
|
||||
else:
|
||||
logger.info(url)
|
||||
logger.info(url)
|
||||
|
||||
status = results_info.get("status")
|
||||
if status is not None:
|
||||
@@ -607,8 +605,6 @@ def make_site_result(
|
||||
for k, v in site.get_params.items():
|
||||
url_probe += f"&{k}={v}"
|
||||
|
||||
results_site["url_probe"] = url_probe
|
||||
|
||||
if site.request_method:
|
||||
request_method = site.request_method.lower()
|
||||
elif site.check_type == "status_code" and site.request_head_only:
|
||||
@@ -684,7 +680,7 @@ async def check_site_for_username(
|
||||
method = act["method"]
|
||||
try:
|
||||
activate_fun = getattr(ParsingActivator(), method)
|
||||
activate_fun(site, logger, url=checker.url)
|
||||
activate_fun(site, logger)
|
||||
except AttributeError as e:
|
||||
logger.warning(
|
||||
f"Activation method {method} for site {site.name} not found!",
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
import asyncio
|
||||
import inspect
|
||||
import sys
|
||||
import time
|
||||
from typing import Any, Iterable, List, Callable
|
||||
@@ -114,7 +113,7 @@ class AsyncioProgressbarQueueExecutor(AsyncExecutor):
|
||||
async def increment_progress(self, count):
|
||||
"""Update progress by calling the provided progress function."""
|
||||
if self.progress:
|
||||
if inspect.iscoroutinefunction(self.progress):
|
||||
if asyncio.iscoroutinefunction(self.progress):
|
||||
await self.progress(count)
|
||||
else:
|
||||
self.progress(count)
|
||||
@@ -125,7 +124,7 @@ class AsyncioProgressbarQueueExecutor(AsyncExecutor):
|
||||
"""Stop the progress tracking."""
|
||||
if hasattr(self.progress, "close") and self.progress:
|
||||
close_func = self.progress.close
|
||||
if inspect.iscoroutinefunction(close_func):
|
||||
if asyncio.iscoroutinefunction(close_func):
|
||||
await close_func()
|
||||
else:
|
||||
close_func()
|
||||
|
||||
+14
-80
@@ -494,21 +494,6 @@ def setup_arguments_parser(settings: Settings):
|
||||
" (one report per username).",
|
||||
)
|
||||
|
||||
report_group.add_argument(
|
||||
"--ai",
|
||||
action="store_true",
|
||||
dest="ai",
|
||||
default=False,
|
||||
help="Generate an AI-powered analysis of the search results using OpenAI API. "
|
||||
"Requires OPENAI_API_KEY env var or openai_api_key in settings.",
|
||||
)
|
||||
report_group.add_argument(
|
||||
"--ai-model",
|
||||
dest="ai_model",
|
||||
default=settings.openai_model,
|
||||
help="OpenAI model to use for AI analysis (default: gpt-4o).",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--reports-sorting",
|
||||
default=settings.report_sorting,
|
||||
@@ -611,7 +596,6 @@ async def main():
|
||||
print_found_only=not args.print_not_found,
|
||||
skip_check_errors=not args.print_check_errors,
|
||||
color=not args.no_color,
|
||||
silent=args.ai,
|
||||
)
|
||||
|
||||
# Create object with all information about sites we are aware of.
|
||||
@@ -727,33 +711,17 @@ async def main():
|
||||
+ get_dict_ascii_tree(usernames, prepend="\t")
|
||||
)
|
||||
|
||||
if args.ai:
|
||||
from .ai import resolve_api_key
|
||||
|
||||
if not resolve_api_key(settings):
|
||||
query_notify.warning(
|
||||
'AI analysis requires an OpenAI API key. '
|
||||
'Set OPENAI_API_KEY environment variable or add '
|
||||
'openai_api_key to settings.json.'
|
||||
)
|
||||
sys.exit(1)
|
||||
|
||||
if not site_data:
|
||||
query_notify.warning('No sites to check, exiting!')
|
||||
sys.exit(2)
|
||||
|
||||
if args.ai:
|
||||
query_notify.warning(
|
||||
f'Starting a search on top {len(site_data)} sites from the Maigret database...'
|
||||
)
|
||||
if not args.all_sites:
|
||||
query_notify.warning(
|
||||
f'Starting AI-assisted search on top {len(site_data)} sites from the Maigret database...'
|
||||
'You can run search by full list of sites with flag `-a`', '!'
|
||||
)
|
||||
else:
|
||||
query_notify.warning(
|
||||
f'Starting a search on top {len(site_data)} sites from the Maigret database...'
|
||||
)
|
||||
if not args.all_sites:
|
||||
query_notify.warning(
|
||||
'You can run search by full list of sites with flag `-a`', '!'
|
||||
)
|
||||
|
||||
already_checked = set()
|
||||
general_results = []
|
||||
@@ -806,12 +774,11 @@ async def main():
|
||||
check_domains=args.with_domains,
|
||||
)
|
||||
|
||||
if not args.ai:
|
||||
errs = errors.notify_about_errors(
|
||||
results, query_notify, show_statistics=args.verbose
|
||||
)
|
||||
for e in errs:
|
||||
query_notify.warning(*e)
|
||||
errs = errors.notify_about_errors(
|
||||
results, query_notify, show_statistics=args.verbose
|
||||
)
|
||||
for e in errs:
|
||||
query_notify.warning(*e)
|
||||
|
||||
if args.reports_sorting == "data":
|
||||
results = sort_report_by_data_points(results)
|
||||
@@ -900,43 +867,10 @@ async def main():
|
||||
save_graph_report(filename, general_results, db)
|
||||
query_notify.warning(f'Graph report on all usernames saved in {filename}')
|
||||
|
||||
if not args.ai:
|
||||
text_report = get_plaintext_report(report_context)
|
||||
if text_report:
|
||||
query_notify.info('Short text report:')
|
||||
print(text_report)
|
||||
|
||||
if args.ai:
|
||||
from .ai import get_ai_analysis, resolve_api_key
|
||||
from .report import generate_markdown_report
|
||||
|
||||
api_key = resolve_api_key(settings)
|
||||
|
||||
run_flags = []
|
||||
if args.tags:
|
||||
run_flags.append(f"--tags {args.tags}")
|
||||
if args.site_list:
|
||||
run_flags.append(f"--site {','.join(args.site_list)}")
|
||||
if args.all_sites:
|
||||
run_flags.append("--all-sites")
|
||||
run_info = {
|
||||
"sites_count": sum(len(d) for _, _, d in general_results),
|
||||
"flags": " ".join(run_flags) if run_flags else None,
|
||||
}
|
||||
|
||||
md_report = generate_markdown_report(report_context, run_info=run_info)
|
||||
|
||||
try:
|
||||
await get_ai_analysis(
|
||||
api_key=api_key,
|
||||
markdown_report=md_report,
|
||||
model=args.ai_model,
|
||||
api_base_url=getattr(
|
||||
settings, 'openai_api_base_url', 'https://api.openai.com/v1'
|
||||
),
|
||||
)
|
||||
except Exception as e:
|
||||
query_notify.warning(f'AI analysis failed: {e}')
|
||||
text_report = get_plaintext_report(report_context)
|
||||
if text_report:
|
||||
query_notify.info('Short text report:')
|
||||
print(text_report)
|
||||
|
||||
# update database
|
||||
db.save_to_file(db_file)
|
||||
|
||||
+3
-10
@@ -1,6 +1,7 @@
|
||||
"""Console and query notification helpers.
|
||||
"""Sherlock Notify Module
|
||||
|
||||
This module defines objects for notifying the caller about the results of queries.
|
||||
This module defines the objects for notifying the caller about the
|
||||
results of queries.
|
||||
"""
|
||||
|
||||
import sys
|
||||
@@ -123,7 +124,6 @@ class QueryNotifyPrint(QueryNotify):
|
||||
print_found_only=False,
|
||||
skip_check_errors=False,
|
||||
color=True,
|
||||
silent=False,
|
||||
):
|
||||
"""Create Query Notify Print Object.
|
||||
|
||||
@@ -150,7 +150,6 @@ class QueryNotifyPrint(QueryNotify):
|
||||
self.print_found_only = print_found_only
|
||||
self.skip_check_errors = skip_check_errors
|
||||
self.color = color
|
||||
self.silent = silent
|
||||
|
||||
return
|
||||
|
||||
@@ -189,9 +188,6 @@ class QueryNotifyPrint(QueryNotify):
|
||||
Nothing.
|
||||
"""
|
||||
|
||||
if self.silent:
|
||||
return
|
||||
|
||||
title = f"Checking {id_type}"
|
||||
if self.color:
|
||||
print(
|
||||
@@ -241,9 +237,6 @@ class QueryNotifyPrint(QueryNotify):
|
||||
Return Value:
|
||||
Nothing.
|
||||
"""
|
||||
if self.silent:
|
||||
return
|
||||
|
||||
notify = None
|
||||
self.result = result
|
||||
|
||||
|
||||
+13
-16
@@ -30,18 +30,14 @@ UTILS
|
||||
|
||||
|
||||
def filter_supposed_data(data):
|
||||
# interesting fields
|
||||
allowed_fields = ["fullname", "gender", "location", "age"]
|
||||
|
||||
def _first(v):
|
||||
if isinstance(v, (list, tuple)):
|
||||
return v[0] if v else ""
|
||||
return v
|
||||
|
||||
return {
|
||||
CaseConverter.snake_to_title(k): _first(v)
|
||||
filtered_supposed_data = {
|
||||
CaseConverter.snake_to_title(k): v[0]
|
||||
for k, v in data.items()
|
||||
if k in allowed_fields
|
||||
}
|
||||
return filtered_supposed_data
|
||||
|
||||
|
||||
def sort_report_by_data_points(results):
|
||||
@@ -83,7 +79,13 @@ def save_pdf_report(filename: str, context: dict):
|
||||
filled_template = template.render(**context)
|
||||
|
||||
# moved here to speed up the launch of Maigret
|
||||
from xhtml2pdf import pisa # type: ignore[import-untyped]
|
||||
try:
|
||||
from xhtml2pdf import pisa # type: ignore[import-untyped]
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"PDF report generation requires the 'xhtml2pdf' package. "
|
||||
"Install it with: pip install maigret[pdf]"
|
||||
)
|
||||
|
||||
with open(filename, "w+b") as f:
|
||||
pisa.pisaDocument(io.StringIO(filled_template), dest=f, default_css=css)
|
||||
@@ -271,7 +273,7 @@ def _md_format_value(value) -> str:
|
||||
return s
|
||||
|
||||
|
||||
def generate_markdown_report(context: dict, run_info: dict = None) -> str:
|
||||
def save_markdown_report(filename: str, context: dict, run_info: dict = None):
|
||||
username = context.get("username", "unknown")
|
||||
generated_at = context.get("generated_at", "")
|
||||
brief = context.get("brief", "")
|
||||
@@ -395,13 +397,8 @@ def generate_markdown_report(context: dict, run_info: dict = None) -> str:
|
||||
"CCPA, and similar).\n"
|
||||
)
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def save_markdown_report(filename: str, context: dict, run_info: dict = None):
|
||||
content = generate_markdown_report(context, run_info)
|
||||
with open(filename, "w", encoding="utf-8") as f:
|
||||
f.write(content)
|
||||
f.write("\n".join(lines))
|
||||
|
||||
|
||||
"""
|
||||
|
||||
@@ -1,62 +0,0 @@
|
||||
You are an OSINT analyst that converts raw username-investigation reports into a short, clean human-readable summary.
|
||||
|
||||
Your task:
|
||||
Read the attached account-discovery report and produce a concise report in exactly this style:
|
||||
|
||||
# Investigation Summary
|
||||
|
||||
Name: <most likely real full name>
|
||||
Location: <most likely current location>
|
||||
Occupation: <short combined description based only on strong signals>
|
||||
Interests: <3–6 broad interests inferred from platform types, bios, and activity>
|
||||
Languages: <languages supported by strong evidence only>
|
||||
Website: <main personal website if clearly present>
|
||||
Username: <main username> (variant: <variant usernames if any>)
|
||||
Platforms: <number> profiles, active from <first year> to <last year>
|
||||
Confidence: <High / Medium / Low> — <one short explanation why>
|
||||
|
||||
# Other leads
|
||||
|
||||
- <lead 1>
|
||||
- <lead 2>
|
||||
- <lead 3 if needed>
|
||||
|
||||
Rules:
|
||||
1. Use only information supported by the report.
|
||||
2. Resolve identity using consistency of username, full name, bio, links, company, and location.
|
||||
3. Prefer strong repeated signals over one-off weak signals.
|
||||
4. If one profile clearly conflicts with the rest, mention it in "Other leads" as a likely false positive instead of mixing it into the main identity.
|
||||
5. Keep the tone analytical and neutral.
|
||||
6. Do not mention every platform individually.
|
||||
7. Do not include raw URLs except for the main website.
|
||||
8. Do not mention NSFW/adult platforms in the main summary unless they are the only source for a critical lead; if such a profile looks inconsistent, mention it only as a likely false positive.
|
||||
9. "Occupation" should be a compact merged description, for example: "Chief Product Officer (CPO) at ..., entrepreneur, OSINT community founder".
|
||||
10. "Interests" should be broad categories, not noisy tags. Convert raw platform/tag evidence into natural categories like OSINT, software development, blogging, gaming, streaming, etc.
|
||||
11. "Languages" should only include languages clearly supported by bios, texts, country tags, or profile content.
|
||||
12. For "Platforms", count the profiles reported as found by the report summary, not manually deduplicated.
|
||||
13. For active years, use the earliest and latest reliable dates from the consistent identity cluster. Ignore obvious outlier dates if they belong to likely false positives or weak profiles.
|
||||
14. For confidence:
|
||||
- High = strong consistency across username, name, bio, links, location, and/or company
|
||||
- Medium = partial consistency with some gaps
|
||||
- Low = mostly username-only matches
|
||||
15. If some field is not reliably known, omit speculation and use the best cautious wording possible.
|
||||
16. For "Name", output only the most likely real personal name in clean canonical form.
|
||||
- Remove nicknames, handles, aliases, or bracketed parts such as "(Soxoj)".
|
||||
- Example: "Dmitriy (Soxoj) Danilov" -> "Dmitriy Danilov".
|
||||
17. For "Website", output only the plain domain or URL as text, not a markdown hyperlink.
|
||||
18. In "Other leads", do not label conflicting profiles as "false positive", "likely unrelated", or "potentially a false positive".
|
||||
- Instead, use neutral intelligence wording such as:
|
||||
"Accounts were found that are most likely unrelated to the main identity, but may indicate possible cross-border activity and should be verified."
|
||||
19. When describing anomalies in "Other leads", prefer cautious investigative phrasing:
|
||||
- "may be unrelated"
|
||||
- "requires verification"
|
||||
- "could indicate separate activity"
|
||||
- "should be checked manually"
|
||||
20. Do not include nicknames or aliases inside the Name field unless they are clearly part of the legal or real-world name.
|
||||
|
||||
Output requirements:
|
||||
- Return only the final formatted text.
|
||||
- Keep it short.
|
||||
- No preamble, no explanations.
|
||||
|
||||
Now analyze the following report
|
||||
+1840
-2094
File diff suppressed because it is too large
Load Diff
@@ -1,8 +1,8 @@
|
||||
{
|
||||
"version": 1,
|
||||
"updated_at": "2026-04-26T09:18:14Z",
|
||||
"sites_count": 3139,
|
||||
"min_maigret_version": "0.6.0",
|
||||
"data_sha256": "c51ecaa6c0736c5e1e7ca91aaf111445b3ac9ce9541a472d97db2dcc3ff8aa17",
|
||||
"updated_at": "2026-04-07T22:38:58Z",
|
||||
"sites_count": 3154,
|
||||
"min_maigret_version": "0.5.0",
|
||||
"data_sha256": "1f1abd85bad2a358e4af1919f2d89d38bf374640e78f6f33b362ac620c55d47f",
|
||||
"data_url": "https://raw.githubusercontent.com/soxoj/maigret/main/maigret/resources/data.json"
|
||||
}
|
||||
@@ -55,9 +55,6 @@
|
||||
"pdf_report": false,
|
||||
"html_report": false,
|
||||
"md_report": false,
|
||||
"openai_api_key": "",
|
||||
"openai_model": "gpt-4o",
|
||||
"openai_api_base_url": "https://api.openai.com/v1",
|
||||
"web_interface_port": 5000,
|
||||
"no_autoupdate": false,
|
||||
"db_update_meta_url": "https://raw.githubusercontent.com/soxoj/maigret/main/maigret/resources/db_meta.json",
|
||||
|
||||
@@ -589,8 +589,6 @@ class MaigretDatabase:
|
||||
sites_dict = self.sites_dict
|
||||
urls: Dict[str, int] = {}
|
||||
tags: Dict[str, int] = {}
|
||||
engine_total: Dict[str, int] = {}
|
||||
engine_enabled: Dict[str, int] = {}
|
||||
disabled_count = 0
|
||||
message_checks_one_factor = 0
|
||||
status_checks = 0
|
||||
@@ -613,14 +611,6 @@ class MaigretDatabase:
|
||||
elif site.check_type == 'status_code':
|
||||
status_checks += 1
|
||||
|
||||
# Count engines
|
||||
if site.engine:
|
||||
engine_total[site.engine] = engine_total.get(site.engine, 0) + 1
|
||||
if not site.disabled:
|
||||
engine_enabled[site.engine] = (
|
||||
engine_enabled.get(site.engine, 0) + 1
|
||||
)
|
||||
|
||||
# Count tags
|
||||
if not site.tags:
|
||||
tags["NO_TAGS"] = tags.get("NO_TAGS", 0) + 1
|
||||
@@ -657,26 +647,11 @@ class MaigretDatabase:
|
||||
f"Sites with probing: {', '.join(sorted(site_with_probing))}",
|
||||
f"Sites with activation: {', '.join(sorted(site_with_activation))}",
|
||||
self._format_top_items("profile URLs", urls, 20, is_markdown),
|
||||
self._format_engine_stats(engine_total, engine_enabled, is_markdown),
|
||||
self._format_top_items("tags", tags, 20, is_markdown, self._tags),
|
||||
]
|
||||
|
||||
return separator.join(output)
|
||||
|
||||
def _format_engine_stats(self, engine_total, engine_enabled, is_markdown):
|
||||
"""Format per-engine enabled/total counts, sorted by total descending."""
|
||||
output = "Sites by engine:\n"
|
||||
for engine, total in sorted(
|
||||
engine_total.items(), key=lambda x: x[1], reverse=True
|
||||
):
|
||||
enabled = engine_enabled.get(engine, 0)
|
||||
perc = round(100 * enabled / total, 1) if total else 0.0
|
||||
if is_markdown:
|
||||
output += f"- `{engine}`: {enabled}/{total} ({perc}%)\n"
|
||||
else:
|
||||
output += f"{enabled}/{total} ({perc}%)\t{engine}\n"
|
||||
return output
|
||||
|
||||
def _format_top_items(
|
||||
self, title, items_dict, limit, is_markdown, valid_items=None
|
||||
):
|
||||
|
||||
+1
-1
@@ -8,7 +8,7 @@ from typing import Any
|
||||
|
||||
|
||||
DEFAULT_USER_AGENTS = [
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/132.0.0.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.114 Safari/537.36",
|
||||
]
|
||||
|
||||
|
||||
|
||||
+4
-6
@@ -8,7 +8,6 @@
|
||||
aiodns>=3.0.0
|
||||
aiohttp>=3.8.6
|
||||
aiohttp-socks>=0.7.1
|
||||
arabic-reshaper~=3.0.0
|
||||
async-timeout
|
||||
attrs>=22.2.0
|
||||
certifi>=2023.7.22
|
||||
@@ -24,9 +23,7 @@ MarkupSafe
|
||||
mock>=4.0.3
|
||||
multidict
|
||||
pycountry>=22.3.5
|
||||
PyPDF2>=3.0.1
|
||||
PySocks>=1.7.1
|
||||
python-bidi>=0.4.2
|
||||
requests
|
||||
requests-futures>=1.0.0
|
||||
six>=1.16.0
|
||||
@@ -37,11 +34,12 @@ torrequest>=0.1.0
|
||||
tqdm
|
||||
typing-extensions
|
||||
webencodings>=0.5.1
|
||||
svglib
|
||||
xhtml2pdf~=0.2.11
|
||||
XMind>=1.2.0
|
||||
yarl
|
||||
networkx
|
||||
pyvis>=0.2.1
|
||||
reportlab
|
||||
cloudscraper>=1.2.71
|
||||
# Optional PDF dependencies (install separately for PDF report support):
|
||||
# xhtml2pdf~=0.2.11
|
||||
# arabic-reshaper~=3.0.0
|
||||
# reportlab
|
||||
|
||||
Generated
+441
-437
File diff suppressed because it is too large
Load Diff
@@ -1,5 +1,5 @@
|
||||
maigret @ https://github.com/soxoj/maigret/archive/refs/heads/main.zip
|
||||
pefile==2023.2.7 # do not bump while pyinstaller is 6.11.1, there is a conflict
|
||||
psutil==7.2.2
|
||||
pyinstaller==6.20.0
|
||||
pyinstaller==6.19.0
|
||||
pywin32-ctypes==0.2.3
|
||||
|
||||
+6
-12
@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
|
||||
|
||||
[tool.poetry]
|
||||
name = "maigret"
|
||||
version = "0.6.0"
|
||||
version = "0.5.0"
|
||||
description = "🕵️♂️ Collect a dossier on a person by username from thousands of sites."
|
||||
authors = ["Soxoj <soxoj@protonmail.com>"]
|
||||
readme = "README.md"
|
||||
@@ -15,11 +15,6 @@ repository = "https://github.com/soxoj/maigret"
|
||||
classifiers = [
|
||||
"Development Status :: 5 - Production/Stable",
|
||||
"Programming Language :: Python :: 3",
|
||||
"Programming Language :: Python :: 3.10",
|
||||
"Programming Language :: Python :: 3.11",
|
||||
"Programming Language :: Python :: 3.12",
|
||||
"Programming Language :: Python :: 3.13",
|
||||
"Programming Language :: Python :: 3.14",
|
||||
"Intended Audience :: Information Technology",
|
||||
"Operating System :: OS Independent",
|
||||
"License :: OSI Approved :: MIT License",
|
||||
@@ -39,7 +34,6 @@ python = "^3.10"
|
||||
aiodns = ">=3,<5"
|
||||
aiohttp = "^3.12.14"
|
||||
aiohttp-socks = ">=0.10.1,<0.12.0"
|
||||
arabic-reshaper = "^3.0.0"
|
||||
async-timeout = "^5.0.1"
|
||||
attrs = ">=25.3,<27.0"
|
||||
certifi = ">=2025.6.15,<2027.0.0"
|
||||
@@ -55,31 +49,31 @@ MarkupSafe = "^3.0.2"
|
||||
mock = "^5.1.0"
|
||||
multidict = "^6.6.3"
|
||||
pycountry = ">=24.6.1,<27.0.0"
|
||||
PyPDF2 = "^3.0.1"
|
||||
PySocks = "^1.7.1"
|
||||
python-bidi = "^0.6.3"
|
||||
requests = "^2.32.4"
|
||||
requests-futures = "^1.0.2"
|
||||
requests-toolbelt = "^1.0.0"
|
||||
six = "^1.17.0"
|
||||
socid-extractor = ">=0.0.27,<0.0.29"
|
||||
socid-extractor = "^0.0.27"
|
||||
soupsieve = "^2.6"
|
||||
stem = "^1.8.1"
|
||||
torrequest = "^0.1.0"
|
||||
alive_progress = "^3.2.0"
|
||||
typing-extensions = "^4.14.1"
|
||||
webencodings = "^0.5.1"
|
||||
xhtml2pdf = "^0.2.11"
|
||||
XMind = "^1.2.0"
|
||||
yarl = "^1.20.1"
|
||||
networkx = "^2.6.3"
|
||||
pyvis = "^0.3.2"
|
||||
reportlab = "^4.4.3"
|
||||
cloudscraper = "^1.2.71"
|
||||
flask = {extras = ["async"], version = "^3.1.1"}
|
||||
asgiref = "^3.9.1"
|
||||
platformdirs = "^4.3.8"
|
||||
curl-cffi = ">=0.14,<1.0"
|
||||
xhtml2pdf = {version = "^0.2.11", optional = true}
|
||||
|
||||
[tool.poetry.extras]
|
||||
pdf = ["xhtml2pdf"]
|
||||
|
||||
|
||||
[tool.poetry.group.dev.dependencies]
|
||||
|
||||
@@ -3,5 +3,4 @@
|
||||
filterwarnings =
|
||||
error
|
||||
ignore::UserWarning
|
||||
ignore:codecs.open\(\) is deprecated:DeprecationWarning:xmind.core.saver
|
||||
asyncio_mode=auto
|
||||
+1
-1
@@ -3,7 +3,7 @@ icon: static/maigret.png
|
||||
name: maigret
|
||||
summary: 🕵️♂️ Collect a dossier on a person by username from thousands of sites.
|
||||
description: |
|
||||
**Maigret** collects a dossier on a person **by username only**, checking for accounts on a huge number of sites and gathering all the available information from web pages. No API keys required.
|
||||
**Maigret** collects a dossier on a person **by username only**, checking for accounts on a huge number of sites and gathering all the available information from web pages. No API keys required. Maigret is an easy-to-use and powerful fork of Sherlock.
|
||||
|
||||
Currently supported more than 3000 sites, search is launched against 500 popular sites in descending order of popularity by default. Also supported checking of Tor sites, I2P sites, and domains (via DNS resolving).
|
||||
|
||||
|
||||
@@ -56,110 +56,3 @@ async def test_import_aiohttp_cookies(cookie_test_server):
|
||||
print(f"Server response: {result}")
|
||||
|
||||
assert result == {'cookies': {'a': 'b'}}
|
||||
|
||||
|
||||
# ---- OnlyFans signing tests (pure-compute, no network) ----
|
||||
|
||||
class _FakeSite:
|
||||
"""Minimal stand-in for MaigretSite with the attributes onlyfans() touches."""
|
||||
|
||||
def __init__(self, headers=None, activation=None):
|
||||
self.headers = headers or {}
|
||||
self.activation = activation or {
|
||||
"static_param": "jLM8LXHU1CGcuCzPMNwWX9osCScVuP4D",
|
||||
"checksum_indexes": [28, 3, 16, 32, 25, 24, 23, 0, 26],
|
||||
"checksum_constant": -180,
|
||||
"format": "57203:{}:{:x}:69cfa6d8",
|
||||
"url": "https://onlyfans.com/api2/v2/init",
|
||||
}
|
||||
|
||||
|
||||
class _FakeResponse:
|
||||
def __init__(self, cookies=None):
|
||||
self.cookies = cookies or {}
|
||||
|
||||
|
||||
def test_onlyfans_sets_xbc_when_zero(monkeypatch):
|
||||
site = _FakeSite(headers={"x-bc": "0", "cookie": "existing=1"})
|
||||
|
||||
# Prevent any real network. If _sign path still fires requests.get, fail loudly.
|
||||
import maigret.activation as act_mod
|
||||
|
||||
def boom(*a, **kw): # pragma: no cover - sanity
|
||||
raise AssertionError("requests.get should not run when cookie is present")
|
||||
|
||||
monkeypatch.setattr(act_mod.__dict__.get("requests", None) or __import__("requests"), "get", boom, raising=False)
|
||||
|
||||
logger = Mock()
|
||||
ParsingActivator.onlyfans(site, logger, url="https://onlyfans.com/api2/v2/users/adam")
|
||||
|
||||
# x-bc must be rewritten to a non-zero hex token
|
||||
assert site.headers["x-bc"] != "0"
|
||||
assert len(site.headers["x-bc"]) == 40 # 20 bytes → 40 hex chars
|
||||
# time / sign headers set for target URL
|
||||
assert "time" in site.headers and site.headers["time"].isdigit()
|
||||
assert site.headers["sign"].startswith("57203:")
|
||||
|
||||
|
||||
def test_onlyfans_fetches_init_cookie_when_missing(monkeypatch):
|
||||
"""When cookie header is absent, init endpoint is called and its cookies stored."""
|
||||
site = _FakeSite(headers={"x-bc": "already_set_token", "user-id": "0"})
|
||||
|
||||
import requests
|
||||
|
||||
captured = {}
|
||||
|
||||
def fake_get(url, headers=None, timeout=15):
|
||||
captured["url"] = url
|
||||
captured["headers"] = dict(headers or {})
|
||||
return _FakeResponse(cookies={"sess": "abc123", "csrf": "xyz"})
|
||||
|
||||
monkeypatch.setattr(requests, "get", fake_get)
|
||||
|
||||
logger = Mock()
|
||||
ParsingActivator.onlyfans(site, logger, url="https://onlyfans.com/api2/v2/users/adam")
|
||||
|
||||
# init request made
|
||||
assert captured["url"] == site.activation["url"]
|
||||
# headers passed to init include freshly generated time/sign
|
||||
assert "time" in captured["headers"]
|
||||
assert captured["headers"]["sign"].startswith("57203:")
|
||||
# cookie header populated from response
|
||||
assert site.headers["cookie"] == "sess=abc123; csrf=xyz"
|
||||
|
||||
|
||||
def test_onlyfans_signature_is_deterministic_for_same_time(monkeypatch):
|
||||
"""Two calls with patched time produce identical signatures."""
|
||||
site1 = _FakeSite(headers={"x-bc": "token", "cookie": "c=1"})
|
||||
site2 = _FakeSite(headers={"x-bc": "token", "cookie": "c=1"})
|
||||
|
||||
import maigret.activation
|
||||
monkeypatch.setattr(maigret.activation, "_time", __import__("time"), raising=False)
|
||||
|
||||
fixed = 1_700_000_000.123
|
||||
import time as time_mod
|
||||
monkeypatch.setattr(time_mod, "time", lambda: fixed)
|
||||
|
||||
logger = Mock()
|
||||
ParsingActivator.onlyfans(site1, logger, url="https://onlyfans.com/api2/v2/users/adam")
|
||||
ParsingActivator.onlyfans(site2, logger, url="https://onlyfans.com/api2/v2/users/adam")
|
||||
|
||||
assert site1.headers["time"] == site2.headers["time"]
|
||||
assert site1.headers["sign"] == site2.headers["sign"]
|
||||
|
||||
|
||||
def test_onlyfans_sign_differs_per_path(monkeypatch):
|
||||
"""Different target URLs must yield different signatures."""
|
||||
site = _FakeSite(headers={"x-bc": "token", "cookie": "c=1"})
|
||||
|
||||
import time as time_mod
|
||||
monkeypatch.setattr(time_mod, "time", lambda: 1_700_000_000.0)
|
||||
|
||||
logger = Mock()
|
||||
ParsingActivator.onlyfans(site, logger, url="https://onlyfans.com/api2/v2/users/adam")
|
||||
sig_adam = site.headers["sign"]
|
||||
|
||||
ParsingActivator.onlyfans(site, logger, url="https://onlyfans.com/api2/v2/users/bob")
|
||||
sig_bob = site.headers["sign"]
|
||||
|
||||
assert sig_adam != sig_bob
|
||||
|
||||
@@ -1,22 +1,7 @@
|
||||
from argparse import ArgumentTypeError
|
||||
|
||||
from mock import Mock
|
||||
import pytest
|
||||
|
||||
from maigret import search
|
||||
from maigret.checking import (
|
||||
detect_error_page,
|
||||
extract_ids_data,
|
||||
parse_usernames,
|
||||
update_results_info,
|
||||
get_failed_sites,
|
||||
timeout_check,
|
||||
debug_response_logging,
|
||||
process_site_result,
|
||||
)
|
||||
from maigret.errors import CheckError
|
||||
from maigret.result import MaigretCheckResult, MaigretCheckStatus
|
||||
from maigret.sites import MaigretSite
|
||||
|
||||
|
||||
def site_result_except(server, username, **kwargs):
|
||||
@@ -82,228 +67,3 @@ async def test_checking_by_message_negative(httpserver, local_test_db):
|
||||
|
||||
result = await search('unclaimed', site_dict=sites_dict, logger=Mock())
|
||||
assert result['Message']['status'].is_found() is True
|
||||
|
||||
|
||||
# ---- Pure-function unit tests (no network) ----
|
||||
|
||||
|
||||
def test_detect_error_page_site_specific():
|
||||
err = detect_error_page(
|
||||
"Please enable JavaScript to proceed",
|
||||
200,
|
||||
{"Please enable JavaScript to proceed": "Scraping protection"},
|
||||
ignore_403=False,
|
||||
)
|
||||
assert err is not None
|
||||
assert err.type == "Site-specific"
|
||||
assert err.desc == "Scraping protection"
|
||||
|
||||
|
||||
def test_detect_error_page_403():
|
||||
err = detect_error_page("some body", 403, {}, ignore_403=False)
|
||||
assert err is not None
|
||||
assert err.type == "Access denied"
|
||||
|
||||
|
||||
def test_detect_error_page_403_ignored():
|
||||
# XenForo engine uses ignore403 because member-not-found also returns 403
|
||||
assert detect_error_page("not found body", 403, {}, ignore_403=True) is None
|
||||
|
||||
|
||||
def test_detect_error_page_999_linkedin():
|
||||
# LinkedIn returns 999 on bot suspicion — must NOT be reported as Server error
|
||||
assert detect_error_page("", 999, {}, ignore_403=False) is None
|
||||
|
||||
|
||||
def test_detect_error_page_500():
|
||||
err = detect_error_page("", 503, {}, ignore_403=False)
|
||||
assert err is not None
|
||||
assert err.type == "Server"
|
||||
assert "503" in err.desc
|
||||
|
||||
|
||||
def test_detect_error_page_ok():
|
||||
assert detect_error_page("hello world", 200, {}, ignore_403=False) is None
|
||||
|
||||
|
||||
def test_parse_usernames_single_username():
|
||||
logger = Mock()
|
||||
result = parse_usernames({"profile_username": "alice"}, logger)
|
||||
assert result == {"alice": "username"}
|
||||
|
||||
|
||||
def test_parse_usernames_list_of_usernames():
|
||||
logger = Mock()
|
||||
result = parse_usernames({"other_usernames": "['alice', 'bob']"}, logger)
|
||||
assert result == {"alice": "username", "bob": "username"}
|
||||
|
||||
|
||||
def test_parse_usernames_malformed_list():
|
||||
logger = Mock()
|
||||
result = parse_usernames({"other_usernames": "not-a-list"}, logger)
|
||||
# should swallow the error and just return empty
|
||||
assert result == {}
|
||||
assert logger.warning.called
|
||||
|
||||
|
||||
def test_parse_usernames_supported_id():
|
||||
logger = Mock()
|
||||
# "telegram" is in SUPPORTED_IDS per socid_extractor
|
||||
from maigret.checking import SUPPORTED_IDS
|
||||
if SUPPORTED_IDS:
|
||||
key = next(iter(SUPPORTED_IDS))
|
||||
result = parse_usernames({key: "some_value"}, logger)
|
||||
assert result.get("some_value") == key
|
||||
|
||||
|
||||
def test_update_results_info_links():
|
||||
info = {"username": "test"}
|
||||
result = update_results_info(
|
||||
info,
|
||||
{"links": "['https://example.com/a', 'https://example.com/b']", "website": "https://example.com/w"},
|
||||
{"alice": "username"},
|
||||
)
|
||||
assert result["ids_usernames"] == {"alice": "username"}
|
||||
assert "https://example.com/w" in result["ids_links"]
|
||||
assert "https://example.com/a" in result["ids_links"]
|
||||
|
||||
|
||||
def test_update_results_info_no_website():
|
||||
info = {}
|
||||
result = update_results_info(info, {"links": "[]"}, {})
|
||||
assert result["ids_links"] == []
|
||||
|
||||
|
||||
def test_extract_ids_data_bad_html_returns_empty():
|
||||
logger = Mock()
|
||||
# Random HTML should not raise — returns {} if nothing matches
|
||||
out = extract_ids_data("<html><body>nothing special</body></html>", logger, Mock(name="Site"))
|
||||
assert isinstance(out, dict)
|
||||
|
||||
|
||||
def test_get_failed_sites_filters_permanent_errors():
|
||||
# Temporary errors (Request timeout, Connecting failure, etc.) are retryable → returned.
|
||||
# Permanent ones (Captcha, Access denied, etc.) and results without error → filtered out.
|
||||
good_status = MaigretCheckResult("u", "S1", "https://s1", MaigretCheckStatus.CLAIMED)
|
||||
timeout_err = MaigretCheckResult(
|
||||
"u", "S2", "https://s2", MaigretCheckStatus.UNKNOWN,
|
||||
error=CheckError("Request timeout", "slow server"),
|
||||
)
|
||||
captcha_err = MaigretCheckResult(
|
||||
"u", "S3", "https://s3", MaigretCheckStatus.UNKNOWN,
|
||||
error=CheckError("Captcha", "Cloudflare"),
|
||||
)
|
||||
results = {
|
||||
"S1": {"status": good_status},
|
||||
"S2": {"status": timeout_err},
|
||||
"S3": {"status": captcha_err},
|
||||
"S4": {}, # no status at all
|
||||
}
|
||||
failed = get_failed_sites(results)
|
||||
# Only the temporary-error site is retry-worthy
|
||||
assert failed == ["S2"]
|
||||
|
||||
|
||||
def test_timeout_check_valid():
|
||||
assert timeout_check("2.5") == 2.5
|
||||
assert timeout_check("30") == 30.0
|
||||
|
||||
|
||||
def test_timeout_check_invalid():
|
||||
with pytest.raises(ArgumentTypeError):
|
||||
timeout_check("abc")
|
||||
with pytest.raises(ArgumentTypeError):
|
||||
timeout_check("0")
|
||||
with pytest.raises(ArgumentTypeError):
|
||||
timeout_check("-1")
|
||||
|
||||
|
||||
def test_debug_response_logging_writes(tmp_path, monkeypatch):
|
||||
monkeypatch.chdir(tmp_path)
|
||||
debug_response_logging("https://example.com", "<html>hi</html>", 200, None)
|
||||
out = (tmp_path / "debug.log").read_text()
|
||||
assert "https://example.com" in out
|
||||
assert "200" in out
|
||||
|
||||
|
||||
def test_debug_response_logging_no_response(tmp_path, monkeypatch):
|
||||
monkeypatch.chdir(tmp_path)
|
||||
debug_response_logging("https://example.com", None, None, CheckError("Timeout"))
|
||||
out = (tmp_path / "debug.log").read_text()
|
||||
assert "No response" in out
|
||||
|
||||
|
||||
def _make_site(data_overrides=None):
|
||||
base = {
|
||||
"url": "https://x/{username}",
|
||||
"urlMain": "https://x",
|
||||
"checkType": "status_code",
|
||||
"usernameClaimed": "a",
|
||||
"usernameUnclaimed": "b",
|
||||
}
|
||||
if data_overrides:
|
||||
base.update(data_overrides)
|
||||
return MaigretSite("TestSite", base)
|
||||
|
||||
|
||||
def test_process_site_result_no_response_returns_info():
|
||||
site = _make_site()
|
||||
info = {"username": "a", "parsing_enabled": False, "url_user": "https://x/a"}
|
||||
out = process_site_result(None, Mock(), Mock(), info, site)
|
||||
assert out is info
|
||||
|
||||
|
||||
def test_process_site_result_status_already_set():
|
||||
site = _make_site()
|
||||
pre = MaigretCheckResult("a", "S", "u", MaigretCheckStatus.ILLEGAL)
|
||||
info = {"username": "a", "parsing_enabled": False, "status": pre, "url_user": "u"}
|
||||
# Since status is already set, function returns without changes
|
||||
out = process_site_result(("<html/>", 200, None), Mock(), Mock(), info, site)
|
||||
assert out["status"] is pre
|
||||
|
||||
|
||||
def test_process_site_result_status_code_claimed():
|
||||
site = _make_site({"checkType": "status_code"})
|
||||
info = {"username": "a", "parsing_enabled": False, "url_user": "https://x/a"}
|
||||
out = process_site_result(("<html/>", 200, None), Mock(), Mock(), info, site)
|
||||
assert out["status"].status == MaigretCheckStatus.CLAIMED
|
||||
assert out["http_status"] == 200
|
||||
|
||||
|
||||
def test_process_site_result_status_code_available():
|
||||
site = _make_site({"checkType": "status_code"})
|
||||
info = {"username": "a", "parsing_enabled": False, "url_user": "https://x/a"}
|
||||
out = process_site_result(("<html/>", 404, None), Mock(), Mock(), info, site)
|
||||
assert out["status"].status == MaigretCheckStatus.AVAILABLE
|
||||
|
||||
|
||||
def test_process_site_result_message_claimed():
|
||||
site = _make_site({
|
||||
"checkType": "message",
|
||||
"presenseStrs": ["profile-name"],
|
||||
"absenceStrs": ["not found"],
|
||||
})
|
||||
info = {"username": "a", "parsing_enabled": False, "url_user": "https://x/a"}
|
||||
out = process_site_result(("<div class='profile-name'>Alice</div>", 200, None), Mock(), Mock(), info, site)
|
||||
assert out["status"].status == MaigretCheckStatus.CLAIMED
|
||||
|
||||
|
||||
def test_process_site_result_message_available_by_absence():
|
||||
site = _make_site({
|
||||
"checkType": "message",
|
||||
"presenseStrs": ["profile-name"],
|
||||
"absenceStrs": ["not found"],
|
||||
})
|
||||
info = {"username": "a", "parsing_enabled": False, "url_user": "https://x/a"}
|
||||
out = process_site_result(("<h1>not found</h1> profile-name too", 200, None), Mock(), Mock(), info, site)
|
||||
# absence marker wins even if presence marker also appears
|
||||
assert out["status"].status == MaigretCheckStatus.AVAILABLE
|
||||
|
||||
|
||||
def test_process_site_result_with_error_is_unknown():
|
||||
site = _make_site({"checkType": "status_code"})
|
||||
info = {"username": "a", "parsing_enabled": False, "url_user": "https://x/a"}
|
||||
resp = ("body", 403, CheckError("Captcha", "Cloudflare"))
|
||||
out = process_site_result(resp, Mock(), Mock(), info, site)
|
||||
assert out["status"].status == MaigretCheckStatus.UNKNOWN
|
||||
assert out["status"].error is not None
|
||||
|
||||
@@ -49,8 +49,6 @@ DEFAULT_ARGS: Dict[str, Any] = {
|
||||
'with_domains': False,
|
||||
'xmind': False,
|
||||
'md': False,
|
||||
'ai': False,
|
||||
'ai_model': 'gpt-4o',
|
||||
'no_autoupdate': False,
|
||||
'force_update': False,
|
||||
}
|
||||
|
||||
+11
-11
@@ -26,7 +26,7 @@ async def test_simple_asyncio_executor():
|
||||
executor = AsyncioSimpleExecutor(logger=logger)
|
||||
assert await executor.run(tasks) == [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
|
||||
assert executor.execution_time > 0.2
|
||||
assert executor.execution_time < 1.0
|
||||
assert executor.execution_time < 0.3
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@@ -37,7 +37,7 @@ async def test_asyncio_progressbar_executor():
|
||||
# no guarantees for the results order
|
||||
assert sorted(await executor.run(tasks)) == [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
|
||||
assert executor.execution_time > 0.2
|
||||
assert executor.execution_time < 1.0
|
||||
assert executor.execution_time < 0.3
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@@ -48,7 +48,7 @@ async def test_asyncio_progressbar_semaphore_executor():
|
||||
# no guarantees for the results order
|
||||
assert sorted(await executor.run(tasks)) == [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
|
||||
assert executor.execution_time > 0.2
|
||||
assert executor.execution_time < 1.1
|
||||
assert executor.execution_time < 0.4
|
||||
|
||||
|
||||
@pytest.mark.slow
|
||||
@@ -59,12 +59,12 @@ async def test_asyncio_progressbar_queue_executor():
|
||||
executor = AsyncioProgressbarQueueExecutor(logger=logger, in_parallel=2)
|
||||
assert await executor.run(tasks) == [0, 1, 3, 2, 4, 6, 7, 5, 9, 8]
|
||||
assert executor.execution_time > 0.5
|
||||
assert executor.execution_time < 1.4
|
||||
assert executor.execution_time < 0.7
|
||||
|
||||
executor = AsyncioProgressbarQueueExecutor(logger=logger, in_parallel=3)
|
||||
assert await executor.run(tasks) == [0, 3, 1, 4, 6, 2, 7, 9, 5, 8]
|
||||
assert executor.execution_time > 0.4
|
||||
assert executor.execution_time < 1.3
|
||||
assert executor.execution_time < 0.6
|
||||
|
||||
executor = AsyncioProgressbarQueueExecutor(logger=logger, in_parallel=5)
|
||||
assert await executor.run(tasks) in (
|
||||
@@ -72,12 +72,12 @@ async def test_asyncio_progressbar_queue_executor():
|
||||
[0, 3, 6, 1, 4, 9, 7, 2, 5, 8],
|
||||
)
|
||||
assert executor.execution_time > 0.3
|
||||
assert executor.execution_time < 1.2
|
||||
assert executor.execution_time < 0.5
|
||||
|
||||
executor = AsyncioProgressbarQueueExecutor(logger=logger, in_parallel=10)
|
||||
assert await executor.run(tasks) == [0, 3, 6, 9, 1, 4, 7, 2, 5, 8]
|
||||
assert executor.execution_time > 0.2
|
||||
assert executor.execution_time < 1.1
|
||||
assert executor.execution_time < 0.4
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@@ -88,13 +88,13 @@ async def test_asyncio_queue_generator_executor():
|
||||
results = [result async for result in executor.run(tasks)] # type: ignore[arg-type]
|
||||
assert results == [0, 1, 3, 2, 4, 6, 7, 5, 9, 8]
|
||||
assert executor.execution_time > 0.5
|
||||
assert executor.execution_time < 1.3
|
||||
assert executor.execution_time < 0.6
|
||||
|
||||
executor = AsyncioQueueGeneratorExecutor(logger=logger, in_parallel=3)
|
||||
results = [result async for result in executor.run(tasks)] # type: ignore[arg-type]
|
||||
assert results == [0, 3, 1, 4, 6, 2, 7, 9, 5, 8]
|
||||
assert executor.execution_time > 0.4
|
||||
assert executor.execution_time < 1.2
|
||||
assert executor.execution_time < 0.5
|
||||
|
||||
executor = AsyncioQueueGeneratorExecutor(logger=logger, in_parallel=5)
|
||||
results = [result async for result in executor.run(tasks)] # type: ignore[arg-type]
|
||||
@@ -103,10 +103,10 @@ async def test_asyncio_queue_generator_executor():
|
||||
[0, 3, 6, 1, 4, 9, 7, 2, 5, 8],
|
||||
)
|
||||
assert executor.execution_time > 0.3
|
||||
assert executor.execution_time < 1.1
|
||||
assert executor.execution_time < 0.4
|
||||
|
||||
executor = AsyncioQueueGeneratorExecutor(logger=logger, in_parallel=10)
|
||||
results = [result async for result in executor.run(tasks)] # type: ignore[arg-type]
|
||||
assert results == [0, 3, 6, 9, 1, 4, 7, 2, 5, 8]
|
||||
assert executor.execution_time > 0.2
|
||||
assert executor.execution_time < 1.0
|
||||
assert executor.execution_time < 0.3
|
||||
|
||||
@@ -10,15 +10,8 @@ import xmind # type: ignore[import-untyped]
|
||||
from jinja2 import Template
|
||||
|
||||
from maigret.report import (
|
||||
filter_supposed_data,
|
||||
sort_report_by_data_points,
|
||||
_md_format_value,
|
||||
generate_csv_report,
|
||||
generate_txt_report,
|
||||
save_csv_report,
|
||||
save_txt_report,
|
||||
save_json_report,
|
||||
save_markdown_report,
|
||||
save_xmind_report,
|
||||
save_html_report,
|
||||
save_pdf_report,
|
||||
@@ -463,223 +456,3 @@ def test_text_report_broken():
|
||||
assert brief_part in report_text
|
||||
assert 'us' in report_text
|
||||
assert 'photo' in report_text
|
||||
|
||||
|
||||
def test_filter_supposed_data():
|
||||
data = {
|
||||
'fullname': ['Alice'],
|
||||
'gender': ['female'],
|
||||
'location': ['Berlin'],
|
||||
'age': ['30'],
|
||||
'email': ['x@y.z'], # not allowed, must be dropped
|
||||
'bio': ['hi'], # not allowed
|
||||
}
|
||||
result = filter_supposed_data(data)
|
||||
assert result == {
|
||||
'Fullname': 'Alice',
|
||||
'Gender': 'female',
|
||||
'Location': 'Berlin',
|
||||
'Age': '30',
|
||||
}
|
||||
|
||||
|
||||
def test_filter_supposed_data_empty():
|
||||
assert filter_supposed_data({}) == {}
|
||||
assert filter_supposed_data({'nope': ['v']}) == {}
|
||||
|
||||
|
||||
def test_filter_supposed_data_scalar_values():
|
||||
# Strings and scalars must be kept whole — previously v[0] on "Alice"
|
||||
# silently returned "A" instead of "Alice".
|
||||
data = {
|
||||
'fullname': 'Alice',
|
||||
'gender': 'female',
|
||||
'location': 'Berlin',
|
||||
'age': 30,
|
||||
}
|
||||
assert filter_supposed_data(data) == {
|
||||
'Fullname': 'Alice',
|
||||
'Gender': 'female',
|
||||
'Location': 'Berlin',
|
||||
'Age': 30,
|
||||
}
|
||||
|
||||
|
||||
def test_filter_supposed_data_empty_list_yields_empty_string():
|
||||
# Edge case: list value present but empty should not crash with IndexError.
|
||||
assert filter_supposed_data({'fullname': []}) == {'Fullname': ''}
|
||||
|
||||
|
||||
def test_filter_supposed_data_mixed_values():
|
||||
# List and scalar mixed in the same payload.
|
||||
data = {'fullname': ['Alice', 'Alicia'], 'gender': 'female'}
|
||||
assert filter_supposed_data(data) == {
|
||||
'Fullname': 'Alice',
|
||||
'Gender': 'female',
|
||||
}
|
||||
|
||||
|
||||
def test_sort_report_by_data_points():
|
||||
status_many = MaigretCheckResult('', '', '', MaigretCheckStatus.CLAIMED)
|
||||
status_many.ids_data = {'a': 1, 'b': 2, 'c': 3}
|
||||
status_one = MaigretCheckResult('', '', '', MaigretCheckStatus.CLAIMED)
|
||||
status_one.ids_data = {'a': 1}
|
||||
status_none = MaigretCheckResult('', '', '', MaigretCheckStatus.CLAIMED)
|
||||
|
||||
results = {
|
||||
'few': {'status': status_one},
|
||||
'many': {'status': status_many},
|
||||
'zero': {'status': status_none},
|
||||
'nostatus': {},
|
||||
}
|
||||
sorted_out = sort_report_by_data_points(results)
|
||||
keys = list(sorted_out.keys())
|
||||
# site with 3 ids_data fields must come first
|
||||
assert keys[0] == 'many'
|
||||
# site with 1 field next
|
||||
assert keys[1] == 'few'
|
||||
|
||||
|
||||
def test_md_format_value_list():
|
||||
assert _md_format_value(['a', 'b', 'c']) == 'a, b, c'
|
||||
|
||||
|
||||
def test_md_format_value_url():
|
||||
assert _md_format_value('https://example.com') == '[https://example.com](https://example.com)'
|
||||
assert _md_format_value('http://x.y') == '[http://x.y](http://x.y)'
|
||||
|
||||
|
||||
def test_md_format_value_plain():
|
||||
assert _md_format_value('hello') == 'hello'
|
||||
assert _md_format_value(42) == '42'
|
||||
|
||||
|
||||
def test_save_csv_report():
|
||||
filename = 'report_test.csv'
|
||||
save_csv_report(filename, 'test', EXAMPLE_RESULTS)
|
||||
with open(filename) as f:
|
||||
content = f.read()
|
||||
assert 'username,name,url_main' in content
|
||||
assert 'test,GitHub' in content
|
||||
|
||||
|
||||
def test_save_txt_report():
|
||||
filename = 'report_test.txt'
|
||||
save_txt_report(filename, 'test', EXAMPLE_RESULTS)
|
||||
with open(filename) as f:
|
||||
content = f.read()
|
||||
assert 'https://www.github.com/test' in content
|
||||
assert 'Total Websites Username Detected On : 1' in content
|
||||
|
||||
|
||||
def test_save_json_report_simple():
|
||||
filename = 'report_test.json'
|
||||
save_json_report(filename, 'test', EXAMPLE_RESULTS, 'simple')
|
||||
with open(filename) as f:
|
||||
data = json.load(f)
|
||||
assert 'GitHub' in data
|
||||
|
||||
|
||||
def test_save_json_report_ndjson():
|
||||
filename = 'report_test_ndjson.json'
|
||||
save_json_report(filename, 'test', EXAMPLE_RESULTS, 'ndjson')
|
||||
with open(filename) as f:
|
||||
lines = f.readlines()
|
||||
assert len(lines) == 1
|
||||
assert json.loads(lines[0])['sitename'] == 'GitHub'
|
||||
|
||||
|
||||
def _markdown_context_with_rich_ids():
|
||||
"""Build a context with found accounts, ids_data (incl. image, url, list) to exercise all branches."""
|
||||
found_result = copy.deepcopy(GOOD_RESULT)
|
||||
found_result.tags = ['photo', 'us']
|
||||
found_result.ids_data = {
|
||||
"fullname": "Alice",
|
||||
"name": "Alice A.",
|
||||
"location": "Berlin",
|
||||
"bio": "Photographer",
|
||||
"external_url": "https://example.com/profile",
|
||||
"image": "https://example.com/avatar.png", # must be skipped
|
||||
"aliases": ["alice", "alicea"], # list value
|
||||
"last_online": "2024-01-02 10:00:00",
|
||||
}
|
||||
data = {
|
||||
'Github': {
|
||||
'username': 'alice',
|
||||
'parsing_enabled': True,
|
||||
'url_main': 'https://github.com/',
|
||||
'url_user': 'https://github.com/alice',
|
||||
'status': found_result,
|
||||
'http_status': 200,
|
||||
'is_similar': False,
|
||||
'rank': 1,
|
||||
'site': MaigretSite('Github', {}),
|
||||
'found': True,
|
||||
'ids_data': found_result.ids_data,
|
||||
},
|
||||
'Similar': {
|
||||
'username': 'alice',
|
||||
'url_user': 'https://other.com/alice',
|
||||
'is_similar': True,
|
||||
'found': True,
|
||||
'status': copy.deepcopy(GOOD_RESULT),
|
||||
},
|
||||
}
|
||||
return {
|
||||
'username': 'alice',
|
||||
'generated_at': '2024-01-02 10:00',
|
||||
'brief': 'Search returned 1 account',
|
||||
'countries_tuple_list': [('us', 1)],
|
||||
'interests_tuple_list': [('photo', 1)],
|
||||
'first_seen': '2023-01-01',
|
||||
'results': [('alice', 'username', data)],
|
||||
}
|
||||
|
||||
|
||||
def test_save_markdown_report():
|
||||
filename = 'report_test.md'
|
||||
context = _markdown_context_with_rich_ids()
|
||||
save_markdown_report(filename, context, run_info={'sites_count': 100, 'flags': '--top-sites 100'})
|
||||
with open(filename) as f:
|
||||
content = f.read()
|
||||
assert '# Report by searching on username "alice"' in content
|
||||
assert '## Summary' in content
|
||||
assert '## Accounts found' in content
|
||||
assert '### Github' in content
|
||||
assert '[https://github.com/alice](https://github.com/alice)' in content
|
||||
assert 'Ethical use' in content
|
||||
assert '100 sites checked' in content
|
||||
# image field must NOT appear in per-site listing
|
||||
assert 'avatar.png' not in content
|
||||
# list field rendered with join
|
||||
assert 'alice, alicea' in content
|
||||
# external url formatted as markdown link
|
||||
assert '[https://example.com/profile](https://example.com/profile)' in content
|
||||
|
||||
|
||||
def test_save_markdown_report_minimal_context():
|
||||
"""No run_info, no first_seen — exercise the fallback branches."""
|
||||
filename = 'report_test_min.md'
|
||||
context = {
|
||||
'username': 'bob',
|
||||
'brief': 'nothing found',
|
||||
'results': [],
|
||||
}
|
||||
save_markdown_report(filename, context)
|
||||
with open(filename) as f:
|
||||
content = f.read()
|
||||
assert '# Report by searching on username "bob"' in content
|
||||
assert '## Summary' in content
|
||||
|
||||
|
||||
def test_get_plaintext_report_minimal():
|
||||
"""Minimal context without countries/interests."""
|
||||
context = {
|
||||
'brief': 'Nothing to report.',
|
||||
'interests_tuple_list': [],
|
||||
'countries_tuple_list': [],
|
||||
}
|
||||
out = get_plaintext_report(context)
|
||||
assert 'Nothing to report.' in out
|
||||
assert 'Countries:' not in out
|
||||
assert 'Interests' not in out
|
||||
|
||||
@@ -1,5 +0,0 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
sudo apt-get update && sudo apt-get install -y libcairo2-dev pkg-config
|
||||
pip install .
|
||||
Reference in New Issue
Block a user