Compare commits

...

9 Commits

Author SHA1 Message Date
copilot-swe-agent[bot] 5f5ca84f72 fix: use 100vh instead of fixed 750px for network graph height
Agent-Logs-Url: https://github.com/soxoj/maigret/sessions/943e32b6-d0c4-44a8-a87e-0b77410edbcc

Co-authored-by: soxoj <31013580+soxoj@users.noreply.github.com>
2026-04-30 20:47:20 +00:00
copilot-swe-agent[bot] 98b3dbe7c5 Initial plan 2026-04-30 20:46:36 +00:00
dependabot[bot] 533884bad5 build(deps): bump reportlab from 4.4.10 to 4.5.0 (#2578)
Bumps [reportlab](https://www.reportlab.com/) from 4.4.10 to 4.5.0.

---
updated-dependencies:
- dependency-name: reportlab
  dependency-version: 4.5.0
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2026-04-30 22:44:05 +02:00
github-actions[bot] 12c8721a16 Updated site list and statistics (#2576)
Co-authored-by: soxoj <31013580+soxoj@users.noreply.github.com>
2026-04-29 17:20:42 +02:00
Soxoj b79f8aca28 Add site checks: 18 new sites (#2575) 2026-04-29 16:55:47 +02:00
dependabot[bot] 1a9fe77d6e build(deps): bump arabic-reshaper from 3.0.0 to 3.0.1 (#2573)
Bumps [arabic-reshaper](https://github.com/mpcabd/python-arabic-reshaper) from 3.0.0 to 3.0.1.
- [Release notes](https://github.com/mpcabd/python-arabic-reshaper/releases)
- [Commits](https://github.com/mpcabd/python-arabic-reshaper/compare/v3.0.0...v3.0.1)

---
updated-dependencies:
- dependency-name: arabic-reshaper
  dependency-version: 3.0.1
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2026-04-29 12:37:52 +02:00
Soxoj 1352bd35c6 Fix site checks: 5 fixed, 4 disabled; fix UA leak bug (#2569) 2026-04-26 14:51:44 +02:00
Soxoj 3960510b63 Fix site checks: 7 fixed, 1 disabled (#2565)
False-positive site probe issues #2531, #2542, #2556, #2559, #2560, #2561, #2563, #2496.
2026-04-26 12:34:52 +02:00
Soxoj a7bda700b4 Add Docker web image with multi-stage building (#2564) 2026-04-26 11:45:08 +02:00
10 changed files with 581 additions and 58 deletions
+48 -10
View File
@@ -2,7 +2,7 @@ name: Build docker image and push to DockerHub
on:
push:
branches: [ main ]
branches: [ main, dev ]
jobs:
docker:
@@ -10,24 +10,62 @@ jobs:
steps:
-
name: Set up QEMU
uses: docker/setup-qemu-action@v1
uses: docker/setup-qemu-action@v3
-
name: Set up Docker Buildx
uses: docker/setup-buildx-action@v1
uses: docker/setup-buildx-action@v3
-
name: Login to DockerHub
uses: docker/login-action@v1
uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKER_HUB_USERNAME }}
password: ${{ secrets.DOCKER_HUB_ACCESS_TOKEN }}
-
name: Build and push
id: docker_build
uses: docker/build-push-action@v2
name: Extract metadata (CLI)
id: meta_cli
uses: docker/metadata-action@v5
with:
images: ${{ secrets.DOCKER_HUB_USERNAME }}/maigret
tags: |
type=raw,value=latest,enable={{is_default_branch}}
type=ref,event=branch
type=sha,prefix=
-
name: Extract metadata (Web UI)
id: meta_web
uses: docker/metadata-action@v5
with:
images: ${{ secrets.DOCKER_HUB_USERNAME }}/maigret
tags: |
type=raw,value=web,enable={{is_default_branch}}
type=ref,event=branch,suffix=-web
type=sha,prefix=web-
-
name: Build and push (CLI, default)
id: docker_build_cli
uses: docker/build-push-action@v6
with:
push: true
tags: ${{ secrets.DOCKER_HUB_USERNAME }}/maigret:latest
target: cli
tags: ${{ steps.meta_cli.outputs.tags }}
labels: ${{ steps.meta_cli.outputs.labels }}
platforms: linux/amd64,linux/arm64
cache-from: type=gha
cache-to: type=gha,mode=max
-
name: Image digest
run: echo ${{ steps.docker_build.outputs.digest }}
name: Build and push (Web UI)
id: docker_build_web
uses: docker/build-push-action@v6
with:
push: true
target: web
tags: ${{ steps.meta_web.outputs.tags }}
labels: ${{ steps.meta_web.outputs.labels }}
platforms: linux/amd64,linux/arm64
cache-from: type=gha
cache-to: type=gha,mode=max
-
name: Image digests
run: |
echo "cli: ${{ steps.docker_build_cli.outputs.digest }}"
echo "web: ${{ steps.docker_build_web.outputs.digest }}"
+10 -1
View File
@@ -1,4 +1,4 @@
FROM python:3.11-slim
FROM python:3.11-slim AS base
LABEL maintainer="Soxoj <soxoj@protonmail.com>"
WORKDIR /app
RUN pip install --no-cache-dir --upgrade pip
@@ -15,4 +15,13 @@ COPY . .
RUN YARL_NO_EXTENSIONS=1 python3 -m pip install --no-cache-dir .
# For production use, set FLASK_HOST to a specific IP address for security
ENV FLASK_HOST=0.0.0.0
# Web UI variant: auto-launches the web interface on $PORT
FROM base AS web
ENV PORT=5000
EXPOSE 5000
ENTRYPOINT ["sh", "-c", "exec maigret --web \"$PORT\""]
# Default variant (last stage = `docker build .` target): CLI, backwards-compatible
FROM base AS cli
ENTRYPOINT ["maigret"]
+15 -3
View File
@@ -140,15 +140,27 @@ maigret username
### Docker
Two image variants are published:
- `soxoj/maigret:latest` — CLI mode (default)
- `soxoj/maigret:web` — auto-launches the [web interface](#web-interface)
```bash
# official image
# official image (CLI)
docker pull soxoj/maigret
# usage
# CLI usage
docker run -v /mydir:/app/reports soxoj/maigret:latest username --html
# Web UI (open http://localhost:5000)
docker run -p 5000:5000 soxoj/maigret:web
# Web UI on a custom port
docker run -e PORT=8080 -p 8080:8080 soxoj/maigret:web
# manual build
docker build -t maigret .
docker build -t maigret . # CLI image (default target)
docker build --target web -t maigret-web . # Web UI image
```
### Troubleshooting
+7 -1
View File
@@ -247,9 +247,15 @@ class CurlCffiChecker(CheckerBase):
async def check(self) -> Tuple[Optional[str], int, Optional[CheckError]]:
try:
async with CurlCffiAsyncSession() as session:
# Strip the User-Agent so curl_cffi can use the impersonated browser's
# matching UA. Mixing a random UA with a Chrome TLS fingerprint trips
# composite bot scoring (e.g. Cloudflare returns a JS challenge for
# "Chrome 91 UA + Chrome 131 TLS"). Keep any site-specific custom headers.
headers = {k: v for k, v in (self.headers or {}).items()
if k.lower() not in ('user-agent', 'connection')}
kwargs = {
'url': self.url,
'headers': self.headers,
'headers': headers or None,
'allow_redirects': self.allow_redirects,
'timeout': self.timeout if self.timeout else 10,
'impersonate': self.browser_emulate,
+1 -1
View File
@@ -245,7 +245,7 @@ def save_graph_report(filename: str, username_results: list, db: MaigretDatabase
# Generate interactive visualization
from pyvis.network import Network # type: ignore[import-untyped]
nt = Network(notebook=True, height="750px", width="100%")
nt = Network(notebook=True, height="100vh", width="100%")
nt.from_nx(G)
nt.show(filename)
+290 -8
View File
@@ -1335,12 +1335,17 @@
"usernameClaimed": "Blue",
"usernameUnclaimed": "noonewouldeverusethis7",
"alexaRank": 242,
"presenseStrs": [
"class=\"gs_a\""
],
"absenceStrs": [
"did not match any articles",
"not match"
],
"errors": {
"Our systems have detected unusual traffic": "Google rate-limit / captcha"
"Our systems have detected unusual traffic": "Google rate-limit / captcha",
"/sorry/index": "Google rate-limit / captcha",
"unusual traffic from your computer network": "Google rate-limit / captcha"
},
"tags": [
"education",
@@ -5462,7 +5467,13 @@
"tags": [
"gaming"
],
"checkType": "status_code",
"checkType": "message",
"presenseStrs": [
"class=\"profile-container\""
],
"absenceStrs": [
"request-error"
],
"alexaRank": 2067,
"urlMain": "https://www.roblox.com/",
"url": "https://www.roblox.com/user.aspx?username={username}",
@@ -5963,6 +5974,10 @@
"usernameUnclaimed": "noonewouldeverusethis7"
},
"Muckrack": {
"disabled": true,
"protection": [
"cf_js_challenge"
],
"absenceStrs": [
"(404) Page Not Found"
],
@@ -6077,6 +6092,9 @@
"tags": [
"freelance"
],
"protection": [
"tls_fingerprint"
],
"checkType": "message",
"absenceStrs": [
"\"users\":{}"
@@ -6701,6 +6719,10 @@
"usernameUnclaimed": "noonewouldeverusethis777"
},
"MyFitnessPal": {
"disabled": true,
"protection": [
"custom_bot_protection"
],
"tags": [
"sport"
],
@@ -6900,6 +6922,9 @@
"tags": [
"music"
],
"protection": [
"tls_fingerprint"
],
"checkType": "message",
"presenseStrs": [
"Points:"
@@ -7404,6 +7429,10 @@
"tags": [
"gaming"
],
"protection": [
"tls_fingerprint"
],
"ignore403": true,
"checkType": "status_code",
"alexaRank": 5699,
"urlMain": "https://www.moddb.com/",
@@ -7746,6 +7775,10 @@
"usernameUnclaimed": "noonewouldeverusethis77777"
},
"Morguefile": {
"disabled": true,
"protection": [
"cf_js_challenge"
],
"absenceStrs": [
"free photographs for commercial use"
],
@@ -8360,6 +8393,9 @@
"Muse Score": {
"url": "https://musescore.com/{username}",
"urlMain": "https://musescore.com/",
"protection": [
"tls_fingerprint"
],
"checkType": "status_code",
"usernameClaimed": "arrangeme",
"usernameUnclaimed": "noonewouldeverusethis7",
@@ -11389,7 +11425,17 @@
"tags": [
"us"
],
"checkType": "response_url",
"checkType": "message",
"presenseStrs": [
"'s profile - Garden.org</title>"
],
"absenceStrs": [
"<title>Member List - Garden.org</title>"
],
"errors": {
"Just a moment": "Cloudflare challenge",
"challenges.cloudflare.com": "Cloudflare challenge"
},
"alexaRank": 17338,
"urlMain": "https://garden.org",
"url": "https://garden.org/users/profile/{username}/",
@@ -13171,8 +13217,12 @@
"url": "https://hive.blog/@{username}",
"urlMain": "https://hive.blog/",
"checkType": "message",
"presenseStrs": [
"class=\"UserProfile\""
],
"absenceStrs": [
"<title>User Not Found - Hive</title>"
"<title>User Not Found - Hive</title>",
"class=\"NotFound"
],
"usernameClaimed": "mango-juice",
"usernameUnclaimed": "noonewouldeverusethis7",
@@ -14017,10 +14067,13 @@
"gb",
"hk"
],
"protection": [
"tls_fingerprint"
],
"checkType": "status_code",
"alexaRank": 49143,
"urlMain": "https://www.mybuilder.com",
"url": "https://www.mybuilder.com/profile/view/{username}",
"url": "https://www.mybuilder.com/profile/{username}",
"usernameClaimed": "adam",
"usernameUnclaimed": "noonewouldeverusethis7"
},
@@ -14771,7 +14824,14 @@
"tags": [
"gaming"
],
"checkType": "response_url",
"checkType": "message",
"presenseStrs": [
"class=\"profile-avatar\""
],
"errors": {
"Just a moment": "Cloudflare challenge",
"challenges.cloudflare.com": "Cloudflare challenge"
},
"alexaRank": 65342,
"urlMain": "https://www.thesimsresource.com/",
"url": "https://www.thesimsresource.com/members/{username}/",
@@ -15976,6 +16036,10 @@
"usernameClaimed": "admin"
},
"Movieforums": {
"disabled": true,
"protection": [
"cf_js_challenge"
],
"tags": [
"forum",
"la"
@@ -16774,8 +16838,8 @@
"ru"
],
"checkType": "message",
"absenceStrs": [
"Пользователь с таким именем не найден"
"presenseStrs": [
"class=\"userprofile\""
],
"alexaRank": 160156,
"urlMain": "https://www.rusfootball.info/",
@@ -18596,6 +18660,9 @@
"us"
],
"checkType": "message",
"presenseStrs": [
"class=\"data_head\""
],
"absenceStrs": [
"The user you requested does not exist, no matter how much you wish this might be the case."
],
@@ -28554,6 +28621,7 @@
]
},
"TikTok Online Viewer": {
"disabled": true,
"errors": {
"Website unavailable": "Site error",
"is currently offline": "Site error"
@@ -35390,6 +35458,220 @@
"url": "https://op.gg/lol/summoners/search?q={username}&region=th",
"usernameClaimed": "adam",
"usernameUnclaimed": "noonewouldeverusethis7"
},
"hiveon.com forum": {
"tags": [
"coding",
"ru"
],
"urlMain": "https://hiveon.com/forum",
"engine": "Discourse",
"usernameClaimed": "Rony",
"usernameUnclaimed": "noonewouldeverusethis7"
},
"forum.manticoresearch.com": {
"tags": [
"coding"
],
"urlMain": "https://forum.manticoresearch.com",
"engine": "Discourse",
"usernameClaimed": "gloria",
"usernameUnclaimed": "noonewouldeverusethis7"
},
"forum.jscourse.com": {
"tags": [
"coding",
"ru"
],
"urlMain": "https://forum.jscourse.com",
"engine": "Discourse",
"usernameClaimed": "kharkovhipster",
"usernameUnclaimed": "noonewouldeverusethis7"
},
"forums.grandstream.com": {
"tags": [
"coding"
],
"checkType": "status_code",
"urlMain": "https://forums.grandstream.com",
"url": "https://forums.grandstream.com/u/{username}/summary",
"urlProbe": "https://forums.grandstream.com/u/{username}.json",
"usernameClaimed": "EricPitz",
"usernameUnclaimed": "noonewouldeverusethis7"
},
"support.wirenboard.com": {
"tags": [
"coding",
"ru"
],
"urlMain": "https://support.wirenboard.com",
"engine": "Discourse",
"usernameClaimed": "enginPetr",
"usernameUnclaimed": "noonewouldeverusethis7"
},
"forum.cs-cart.ru": {
"tags": [
"coding",
"ru"
],
"urlMain": "https://forum.cs-cart.ru",
"engine": "Discourse",
"usernameClaimed": "a.shishkin",
"usernameUnclaimed": "noonewouldeverusethis7"
},
"instantcms.ru": {
"tags": [
"coding",
"ru"
],
"checkType": "status_code",
"urlMain": "https://instantcms.ru",
"url": "https://instantcms.ru/users/{username}",
"usernameClaimed": "fuze",
"usernameUnclaimed": "noonewouldeverusethis7"
},
"wewin.ru": {
"tags": [
"ru"
],
"checkType": "status_code",
"urlMain": "https://wewin.ru",
"url": "https://wewin.ru/user/{username}",
"usernameClaimed": "dimok",
"usernameUnclaimed": "noonewouldeverusethis7"
},
"myslo.ru": {
"tags": [
"news",
"ru"
],
"checkType": "status_code",
"urlMain": "https://myslo.ru",
"url": "https://myslo.ru/user/profile/{username}",
"usernameClaimed": "admin",
"usernameUnclaimed": "noonewouldeverusethis7"
},
"add-groups.com": {
"tags": [
"messaging",
"ru"
],
"checkType": "status_code",
"urlMain": "https://add-groups.com",
"url": "https://add-groups.com/user/{username}",
"usernameClaimed": "admin",
"usernameUnclaimed": "noonewouldeverusethis7"
},
"Profi.ru": {
"tags": [
"freelance",
"ru"
],
"checkType": "status_code",
"urlMain": "https://profi.ru",
"url": "https://profi.ru/profile/{username}/",
"usernameClaimed": "petrov",
"usernameUnclaimed": "noonewouldeverusethis7"
},
"mover.uz": {
"tags": [
"video"
],
"checkType": "status_code",
"urlMain": "https://mover.uz",
"url": "https://mover.uz/channel/{username}",
"usernameClaimed": "AlterEgo",
"usernameUnclaimed": "noonewouldeverusethis7"
},
"BitPapa": {
"tags": [
"crypto",
"ru"
],
"checkType": "message",
"presenseStrs": [
"avgEscrowReleaseTime"
],
"urlMain": "https://bitpapa.com",
"url": "https://bitpapa.com/ru/user/{username}",
"usernameClaimed": "admin",
"usernameUnclaimed": "noonewouldeverusethis7"
},
"minfin.com.ua": {
"tags": [
"finance",
"ua"
],
"checkType": "message",
"presenseStrs": [
"Был на сайте"
],
"absenceStrs": [
"\"isRequestFailed\":true"
],
"urlMain": "https://minfin.com.ua",
"url": "https://minfin.com.ua/users/{username}/",
"usernameClaimed": "Maksim",
"usernameUnclaimed": "noonewouldeverusethis7"
},
"Pexels": {
"tags": [
"photo"
],
"checkType": "status_code",
"protection": [
"tls_fingerprint"
],
"urlMain": "https://www.pexels.com",
"url": "https://www.pexels.com/ru-ru/@{username}",
"usernameClaimed": "jess",
"usernameUnclaimed": "noonewouldeverusethis7"
},
"BestGore": {
"tags": [
"video"
],
"checkType": "status_code",
"protection": [
"tls_fingerprint"
],
"urlMain": "https://bestgore.fun",
"url": "https://bestgore.fun/c/{username}/videos",
"usernameClaimed": "user",
"usernameUnclaimed": "noonewouldeverusethis7"
},
"AirNFTs": {
"tags": [
"crypto",
"nft"
],
"checkType": "message",
"protection": [
"tls_fingerprint"
],
"presenseStrs": [
"accountCreatedAt"
],
"urlMain": "https://app.airnfts.com",
"url": "https://app.airnfts.com/creators/{username}",
"usernameClaimed": "demo",
"usernameUnclaimed": "noonewouldeverusethis7"
},
"GreasyFork": {
"tags": [
"coding"
],
"checkType": "message",
"presenseStrs": [
"class=\"user-list\""
],
"absenceStrs": [
"<p>No users!</p>"
],
"urlMain": "https://greasyfork.org",
"url": "https://greasyfork.org/en/users?q={username}",
"usernameClaimed": "jcunews",
"usernameUnclaimed": "noonewouldeverusethis7"
}
},
"engines": {
+3 -3
View File
@@ -1,8 +1,8 @@
{
"version": 1,
"updated_at": "2026-04-25T16:11:27Z",
"sites_count": 3139,
"updated_at": "2026-04-29T14:56:55Z",
"sites_count": 3157,
"min_maigret_version": "0.6.0",
"data_sha256": "c51ecaa6c0736c5e1e7ca91aaf111445b3ac9ce9541a472d97db2dcc3ff8aa17",
"data_sha256": "5dac8f1c045ea650d5872cf9dfd7f224410eaadba0f2b7eb60514cc51ba0097a",
"data_url": "https://raw.githubusercontent.com/soxoj/maigret/main/maigret/resources/data.json"
}
Generated
+7 -7
View File
@@ -232,14 +232,14 @@ graphemeu = "0.7.2"
[[package]]
name = "arabic-reshaper"
version = "3.0.0"
version = "3.0.1"
description = "Reconstruct Arabic sentences to be used in applications that do not support Arabic"
optional = false
python-versions = "*"
python-versions = ">=3.10"
groups = ["main"]
files = [
{file = "arabic_reshaper-3.0.0-py3-none-any.whl", hash = "sha256:3f71d5034bb694204a239a6f1ebcf323ac3c5b059de02259235e2016a1a5e2dc"},
{file = "arabic_reshaper-3.0.0.tar.gz", hash = "sha256:ffcd13ba5ec007db71c072f5b23f420da92ac7f268512065d49e790e62237099"},
{file = "arabic_reshaper-3.0.1-py3-none-any.whl", hash = "sha256:41c5adc2420f85758eada7e880251c4b6a2adbd83377bd27e5d4eba71f648bc7"},
{file = "arabic_reshaper-3.0.1.tar.gz", hash = "sha256:a0d9b2a9fa29b5f2c1d705f407adf6ca4242405b9cac0e5cc09e6c4f3f8fb68c"},
]
[package.extras]
@@ -3168,14 +3168,14 @@ png = ["pypng"]
[[package]]
name = "reportlab"
version = "4.4.10"
version = "4.5.0"
description = "The Reportlab Toolkit"
optional = false
python-versions = "<4,>=3.9"
groups = ["main", "dev"]
files = [
{file = "reportlab-4.4.10-py3-none-any.whl", hash = "sha256:5abc815746ae2bc44e7ff25db96814f921349ca814c992c7eac3c26029bf7c24"},
{file = "reportlab-4.4.10.tar.gz", hash = "sha256:5cbbb34ac3546039d0086deb2938cdec06b12da3cdb836e813258eb33cd28487"},
{file = "reportlab-4.5.0-py3-none-any.whl", hash = "sha256:b8cc8996947d84e805368b47b2376070966f091d029351a0d8a1f238984c2c7f"},
{file = "reportlab-4.5.0.tar.gz", hash = "sha256:e595932789ab7a107ba253e83f7815622708a9fd49920d0d6a909880eb66ac75"},
]
[package.dependencies]
+42 -24
View File
@@ -1,5 +1,5 @@
## List of supported sites (search methods): total 3139
## List of supported sites (search methods): total 3157
Rank data fetched from Majestic Million by domains.
@@ -229,7 +229,7 @@ Rank data fetched from Majestic Million by domains.
1. ![](https://www.google.com/s2/favicons?domain=https://beacons.ai) [beacons.ai (https://beacons.ai)](https://beacons.ai)*: top 5K, links*
1. ![](https://www.google.com/s2/favicons?domain=https://www.artsy.net) [Artsy (https://www.artsy.net)](https://www.artsy.net)*: top 5K, art*, search is disabled
1. ![](https://www.google.com/s2/favicons?domain=https://www.ifttt.com/) [IFTTT (https://www.ifttt.com/)](https://www.ifttt.com/)*: top 5K, tech*
1. ![](https://www.google.com/s2/favicons?domain=https://muckrack.com) [Muckrack (https://muckrack.com)](https://muckrack.com)*: top 5K, news*
1. ![](https://www.google.com/s2/favicons?domain=https://muckrack.com) [Muckrack (https://muckrack.com)](https://muckrack.com)*: top 5K, news*, search is disabled
1. ![](https://www.google.com/s2/favicons?domain=https://www.crunchyroll.com/) [Crunchyroll (https://www.crunchyroll.com/)](https://www.crunchyroll.com/)*: top 5K, forum, movies*, search is disabled
1. ![](https://www.google.com/s2/favicons?domain=https://odysee.com/) [Odysee (https://odysee.com/)](https://odysee.com/)*: top 5K, video*
1. ![](https://www.google.com/s2/favicons?domain=https://replit.com/) [Replit (https://replit.com/)](https://replit.com/)*: top 5K, coding*
@@ -258,7 +258,7 @@ Rank data fetched from Majestic Million by domains.
1. ![](https://www.google.com/s2/favicons?domain=https://ultimate-guitar.com/) [Ultimate-Guitar (https://ultimate-guitar.com/)](https://ultimate-guitar.com/)*: top 5K, music*
1. ![](https://www.google.com/s2/favicons?domain=https://chaturbate.com) [ChaturBate (https://chaturbate.com)](https://chaturbate.com)*: top 5K, porn, webcam*
1. ![](https://www.google.com/s2/favicons?domain=https://hackerone.com/) [HackerOne (https://hackerone.com/)](https://hackerone.com/)*: top 5K, coding, hacking*
1. ![](https://www.google.com/s2/favicons?domain=https://www.myfitnesspal.com/) [MyFitnessPal (https://www.myfitnesspal.com/)](https://www.myfitnesspal.com/)*: top 5K, sport*
1. ![](https://www.google.com/s2/favicons?domain=https://www.myfitnesspal.com/) [MyFitnessPal (https://www.myfitnesspal.com/)](https://www.myfitnesspal.com/)*: top 5K, sport*, search is disabled
1. ![](https://www.google.com/s2/favicons?domain=https://gab.com/) [Plurk (https://gab.com/)](https://gab.com/)*: top 5K, social, tw, us*
1. ![](https://www.google.com/s2/favicons?domain=https://contently.com/) [Contently (https://contently.com/)](https://contently.com/)*: top 5K, freelance*
1. ![](https://www.google.com/s2/favicons?domain=https://www.myminifactory.com/) [MyMiniFactory (https://www.myminifactory.com/)](https://www.myminifactory.com/)*: top 5K, 3d, shopping*
@@ -310,7 +310,7 @@ Rank data fetched from Majestic Million by domains.
1. ![](https://www.google.com/s2/favicons?domain=https://www.mercadolivre.com.br) [MercadoLivre (https://www.mercadolivre.com.br)](https://www.mercadolivre.com.br)*: top 10K, br*
1. ![](https://www.google.com/s2/favicons?domain=https://tinder.com/) [Tinder (https://tinder.com/)](https://tinder.com/)*: top 10K, dating*
1. ![](https://www.google.com/s2/favicons?domain=https://www.anobii.com) [Anobii (https://www.anobii.com)](https://www.anobii.com)*: top 10K, books*
1. ![](https://www.google.com/s2/favicons?domain=https://morguefile.com) [Morguefile (https://morguefile.com)](https://morguefile.com)*: top 10K, photo*
1. ![](https://www.google.com/s2/favicons?domain=https://morguefile.com) [Morguefile (https://morguefile.com)](https://morguefile.com)*: top 10K, photo*, search is disabled
1. ![](https://www.google.com/s2/favicons?domain=https://velog.io/) [Velog (https://velog.io/)](https://velog.io/)*: top 10K, blog, coding, kr*
1. ![](https://www.google.com/s2/favicons?domain=https://kick.com/) [Kick (https://kick.com/)](https://kick.com/)*: top 10K, streaming*
1. ![](https://www.google.com/s2/favicons?domain=https://www.domestika.org) [domestika.org (https://www.domestika.org)](https://www.domestika.org)*: top 10K, education*
@@ -710,7 +710,7 @@ Rank data fetched from Majestic Million by domains.
1. ![](https://www.google.com/s2/favicons?domain=https://fotostrana.ru) [fotostrana.ru (https://fotostrana.ru)](https://fotostrana.ru)*: top 100K, ru*
1. ![](https://www.google.com/s2/favicons?domain=https://www.bigfooty.com/forum/) [bigfooty.com (https://www.bigfooty.com/forum/)](https://www.bigfooty.com/forum/)*: top 100K, au, forum*
1. ![](https://www.google.com/s2/favicons?domain=https://tl.net) [Tl (https://tl.net)](https://tl.net)*: top 10M, de, dk*
1. ![](https://www.google.com/s2/favicons?domain=https://www.movieforums.com) [Movieforums (https://www.movieforums.com)](https://www.movieforums.com)*: top 10M, forum, la*
1. ![](https://www.google.com/s2/favicons?domain=https://www.movieforums.com) [Movieforums (https://www.movieforums.com)](https://www.movieforums.com)*: top 10M, forum, la*, search is disabled
1. ![](https://www.google.com/s2/favicons?domain=https://crevado.com/) [Crevado (https://crevado.com/)](https://crevado.com/)*: top 10M, design*
1. ![](https://www.google.com/s2/favicons?domain=https://monkeytype.com/) [Monkeytype (https://monkeytype.com/)](https://monkeytype.com/)*: top 10M, gaming*
1. ![](https://www.google.com/s2/favicons?domain=https://www.mylot.com/) [Mylot (https://www.mylot.com/)](https://www.mylot.com/)*: top 10M, pl*
@@ -2402,7 +2402,7 @@ Rank data fetched from Majestic Million by domains.
1. ![](https://www.google.com/s2/favicons?domain=http://terminator-scc.net.ru) [Terminator (http://terminator-scc.net.ru)](http://terminator-scc.net.ru)*: top 100M, ru*
1. ![](https://www.google.com/s2/favicons?domain=https://www.thedaftclub.com) [Thedaftclub (https://www.thedaftclub.com)](https://www.thedaftclub.com)*: top 100M*, search is disabled
1. ![](https://www.google.com/s2/favicons?domain=https://www.thephysicsforum.com) [Thephysicsforum (https://www.thephysicsforum.com)](https://www.thephysicsforum.com)*: top 100M, forum*, search is disabled
1. ![](https://www.google.com/s2/favicons?domain=https://ttonlineviewer.com) [TikTok Online Viewer (https://ttonlineviewer.com)](https://ttonlineviewer.com)*: top 100M*
1. ![](https://www.google.com/s2/favicons?domain=https://ttonlineviewer.com) [TikTok Online Viewer (https://ttonlineviewer.com)](https://ttonlineviewer.com)*: top 100M*, search is disabled
1. ![](https://www.google.com/s2/favicons?domain=http://tkgr.ru/) [Tkgr (http://tkgr.ru/)](http://tkgr.ru/)*: top 100M, ru*
1. ![](https://www.google.com/s2/favicons?domain=https://torrent-soft.net) [Torrent-soft (https://torrent-soft.net)](https://torrent-soft.net)*: top 100M, ru*
1. ![](https://www.google.com/s2/favicons?domain=https://totalstavki.ru) [TotalStavki (https://totalstavki.ru)](https://totalstavki.ru)*: top 100M, ru*, search is disabled
@@ -3142,19 +3142,37 @@ Rank data fetched from Majestic Million by domains.
1. ![](https://www.google.com/s2/favicons?domain=https://discuss.flarum.org.cn) [discuss.flarum.org.cn (https://discuss.flarum.org.cn)](https://discuss.flarum.org.cn)*: top 100M, cn, forum*
1. ![](https://www.google.com/s2/favicons?domain=https://flarum.es) [flarum.es (https://flarum.es)](https://flarum.es)*: top 100M, es, forum*
1. ![](https://www.google.com/s2/favicons?domain=https://forum.fibra.click) [forum.fibra.click (https://forum.fibra.click)](https://forum.fibra.click)*: top 100M, forum, it*
1. ![](https://www.google.com/s2/favicons?domain=https://hiveon.com/forum) [hiveon.com forum (https://hiveon.com/forum)](https://hiveon.com/forum)*: top 100M, coding, ru*
1. ![](https://www.google.com/s2/favicons?domain=https://forum.manticoresearch.com) [forum.manticoresearch.com (https://forum.manticoresearch.com)](https://forum.manticoresearch.com)*: top 100M, coding*
1. ![](https://www.google.com/s2/favicons?domain=https://forum.jscourse.com) [forum.jscourse.com (https://forum.jscourse.com)](https://forum.jscourse.com)*: top 100M, coding, ru*
1. ![](https://www.google.com/s2/favicons?domain=https://forums.grandstream.com) [forums.grandstream.com (https://forums.grandstream.com)](https://forums.grandstream.com)*: top 100M, coding*
1. ![](https://www.google.com/s2/favicons?domain=https://support.wirenboard.com) [support.wirenboard.com (https://support.wirenboard.com)](https://support.wirenboard.com)*: top 100M, coding, ru*
1. ![](https://www.google.com/s2/favicons?domain=https://forum.cs-cart.ru) [forum.cs-cart.ru (https://forum.cs-cart.ru)](https://forum.cs-cart.ru)*: top 100M, coding, ru*
1. ![](https://www.google.com/s2/favicons?domain=https://instantcms.ru) [instantcms.ru (https://instantcms.ru)](https://instantcms.ru)*: top 100M, coding, ru*
1. ![](https://www.google.com/s2/favicons?domain=https://wewin.ru) [wewin.ru (https://wewin.ru)](https://wewin.ru)*: top 100M, ru*
1. ![](https://www.google.com/s2/favicons?domain=https://myslo.ru) [myslo.ru (https://myslo.ru)](https://myslo.ru)*: top 100M, news, ru*
1. ![](https://www.google.com/s2/favicons?domain=https://add-groups.com) [add-groups.com (https://add-groups.com)](https://add-groups.com)*: top 100M, messaging, ru*
1. ![](https://www.google.com/s2/favicons?domain=https://profi.ru) [Profi.ru (https://profi.ru)](https://profi.ru)*: top 100M, freelance, ru*
1. ![](https://www.google.com/s2/favicons?domain=https://mover.uz) [mover.uz (https://mover.uz)](https://mover.uz)*: top 100M, video*
1. ![](https://www.google.com/s2/favicons?domain=https://bitpapa.com) [BitPapa (https://bitpapa.com)](https://bitpapa.com)*: top 100M, crypto, ru*
1. ![](https://www.google.com/s2/favicons?domain=https://minfin.com.ua) [minfin.com.ua (https://minfin.com.ua)](https://minfin.com.ua)*: top 100M, finance, ua*
1. ![](https://www.google.com/s2/favicons?domain=https://www.pexels.com) [Pexels (https://www.pexels.com)](https://www.pexels.com)*: top 100M, photo*
1. ![](https://www.google.com/s2/favicons?domain=https://bestgore.fun) [BestGore (https://bestgore.fun)](https://bestgore.fun)*: top 100M, video*
1. ![](https://www.google.com/s2/favicons?domain=https://app.airnfts.com) [AirNFTs (https://app.airnfts.com)](https://app.airnfts.com)*: top 100M, crypto, nft*
1. ![](https://www.google.com/s2/favicons?domain=https://greasyfork.org) [GreasyFork (https://greasyfork.org)](https://greasyfork.org)*: top 100M, coding*
The list was updated at (2026-04-25)
The list was updated at (2026-04-29)
## Statistics
Enabled/total sites: 2510/3139 = 79.96%
Enabled/total sites: 2523/3157 = 79.92%
Incomplete message checks: 317/2510 = 12.63% (false positive risks)
Incomplete message checks: 316/2523 = 12.52% (false positive risks)
Status code checks: 625/2510 = 24.9% (false positive risks)
Status code checks: 633/2523 = 25.09% (false positive risks)
False positive risk (total): 37.53%
False positive risk (total): 37.61%
Sites with probing: 500px, Armchairgm, BinarySearch (disabled), BleachFandom, Bluesky, BongaCams, Boosty, BuyMeACoffee, Calendly, Cent, Chess, Code Sandbox, Code Snippet Wiki, DailyMotion, Discord, Diskusjon.no, Disqus, Docker Hub, Duolingo, FandomCommunityCentral, GitHub, GitLab, Google Plus (archived), Gravatar, HackTheBox, Hackerrank, Hashnode, Holopin, Imgur, Issuu, Keybase, Kick, Kvinneguiden, LeetCode, Lesswrong, Livejasmin, LocalCryptos (disabled), Medium, MicrosoftLearn, MixCloud, Monkeytype, NPM, Niftygateway, Omg.lol, OnlyFans, Paragraph, Picsart, Plurk, Polarsteps, Rarible, Reddit (disabled), Reddit Search (Pushshift) (disabled), Revolut.me, RoyalCams, Scratch, Soop, SportsTracker, Spotify, StackOverflow, Substack, TAP'D, Topcoder, Trello, Twitch, Twitter, Twitter Shadowban (disabled), UnstoppableDomains, Vimeo, Vivino, Warframe Market, Warpcast, Weibo, Wikipedia, Yapisal (disabled), YouNow, en.brickimedia.org, nightbot, notabug.org, qiwi.me (disabled)
Sites with probing: 500px, Armchairgm, BinarySearch (disabled), BleachFandom, Bluesky, BongaCams, Boosty, BuyMeACoffee, Calendly, Cent, Chess, Code Sandbox, Code Snippet Wiki, DailyMotion, Discord, Diskusjon.no, Disqus, Docker Hub, Duolingo, FandomCommunityCentral, GitHub, GitLab, Google Plus (archived), Gravatar, HackTheBox, Hackerrank, Hashnode, Holopin, Imgur, Issuu, Keybase, Kick, Kvinneguiden, LeetCode, Lesswrong, Livejasmin, LocalCryptos (disabled), Medium, MicrosoftLearn, MixCloud, Monkeytype, NPM, Niftygateway, Omg.lol, OnlyFans, Paragraph, Picsart, Plurk, Polarsteps, Rarible, Reddit (disabled), Reddit Search (Pushshift) (disabled), Revolut.me, RoyalCams, Scratch, Soop, SportsTracker, Spotify, StackOverflow, Substack, TAP'D, Topcoder, Trello, Twitch, Twitter, Twitter Shadowban (disabled), UnstoppableDomains, Vimeo, Vivino, Warframe Market, Warpcast, Weibo, Wikipedia, Yapisal (disabled), YouNow, en.brickimedia.org, forums.grandstream.com, nightbot, notabug.org, qiwi.me (disabled)
Sites with activation: OnlyFans, Twitter, Vimeo, Weibo
@@ -3162,13 +3180,13 @@ Top 20 profile URLs:
- (709) `{urlMain}/index/8-0-{username} (uCoz)`
- (312) `/{username}`
- (223) `{urlMain}{urlSubpath}/members/?username={username} (XenForo)`
- (170) `/user/{username}`
- (138) `/profile/{username}`
- (172) `/user/{username}`
- (140) `/profile/{username}`
- (127) `{urlMain}{urlSubpath}/search.php?author={username} (phpBB/Search)`
- (120) `{urlMain}{urlSubpath}/member.php?username={username} (vBulletin)`
- (116) `/u/{username}`
- (93) `/users/{username}`
- (87) `{urlMain}/u/{username}/summary (Discourse)`
- (95) `/users/{username}`
- (92) `{urlMain}/u/{username}/summary (Discourse)`
- (68) `/@{username}`
- (55) `/wiki/User:{username}`
- (45) `SUBDOMAIN`
@@ -3186,7 +3204,7 @@ Sites by engine:
- `XenForo`: 181/223 (81.2%)
- `phpBB/Search`: 120/127 (94.5%)
- `vBulletin`: 31/120 (25.8%)
- `Discourse`: 81/87 (93.1%)
- `Discourse`: 86/92 (93.5%)
- `phpBB`: 21/27 (77.8%)
- `engine404`: 19/23 (82.6%)
- `op.gg`: 17/17 (100.0%)
@@ -3201,20 +3219,20 @@ Top 20 tags:
- (1057) `NO_TAGS` (non-standard)
- (750) `forum`
- (128) `gaming`
- (80) `coding`
- (57) `photo`
- (88) `coding`
- (58) `photo`
- (46) `tech`
- (45) `social`
- (41) `news`
- (42) `news`
- (39) `blog`
- (33) `music`
- (31) `shopping`
- (27) `crypto`
- (26) `finance`
- (29) `crypto`
- (27) `finance`
- (25) `video`
- (25) `sharing`
- (23) `video`
- (23) `education`
- (22) `freelance`
- (21) `art`
- (21) `freelance`
- (18) `hobby`
- (17) `sport`
+158
View File
@@ -307,3 +307,161 @@ def test_process_site_result_with_error_is_unknown():
out = process_site_result(resp, Mock(), Mock(), info, site)
assert out["status"].status == MaigretCheckStatus.UNKNOWN
assert out["status"].error is not None
# ---- CurlCffiChecker: TLS impersonation header sanitisation ----
class _FakeCurlResponse:
def __init__(self, text="ok", status_code=200):
self.text = text
self.status_code = status_code
class _FakeCurlSession:
"""Captures the kwargs of the last .get/.post/.head call for assertions."""
last_method = None
last_kwargs = None
async def __aenter__(self):
return self
async def __aexit__(self, exc_type, exc, tb):
return False
async def get(self, **kwargs):
type(self).last_method = 'get'
type(self).last_kwargs = kwargs
return _FakeCurlResponse()
async def post(self, **kwargs):
type(self).last_method = 'post'
type(self).last_kwargs = kwargs
return _FakeCurlResponse()
async def head(self, **kwargs):
type(self).last_method = 'head'
type(self).last_kwargs = kwargs
return _FakeCurlResponse()
@pytest.fixture
def fake_curl_cffi(monkeypatch):
"""Replace CurlCffiAsyncSession with a recorder. Resets capture between tests."""
from maigret import checking
_FakeCurlSession.last_method = None
_FakeCurlSession.last_kwargs = None
monkeypatch.setattr(checking, 'CurlCffiAsyncSession', _FakeCurlSession)
return _FakeCurlSession
@pytest.mark.asyncio
async def test_curl_cffi_strips_random_user_agent_to_let_impersonation_drive_ua(fake_curl_cffi):
"""Regression: maigret used to forward `get_random_user_agent()` (often Chrome 91)
to curl_cffi alongside `impersonate="chrome"` (Chrome 131 TLS). Cloudflare composite
bot scoring rejects the resulting "Chrome 91 UA + Chrome 131 TLS" combo with a JS
challenge. The fix strips User-Agent and Connection from the headers passed to
curl_cffi so the impersonation default UA wins.
"""
from maigret.checking import CurlCffiChecker
checker = CurlCffiChecker(logger=Mock(), browser_emulate='chrome')
checker.prepare(
url='https://example.com/u/test',
headers={
"User-Agent": "Mozilla/5.0 ... Chrome/91.0.4472.124 ...", # maigret default
"Connection": "close", # maigret default
},
allow_redirects=True,
timeout=10,
method='get',
)
await checker.check()
sent = fake_curl_cffi.last_kwargs
assert fake_curl_cffi.last_method == 'get'
assert sent['impersonate'] == 'chrome'
# The whole point of the fix: random UA must not leak through.
assert sent['headers'] is None or 'User-Agent' not in sent['headers']
assert sent['headers'] is None or 'user-agent' not in {k.lower() for k in sent['headers']}
# Connection: close also stripped (interferes with impersonation defaults).
assert sent['headers'] is None or 'Connection' not in sent['headers']
@pytest.mark.asyncio
async def test_curl_cffi_preserves_site_specific_headers(fake_curl_cffi):
"""Site-specific headers (e.g. Content-Type for POST APIs, auth tokens, cookies)
must survive the User-Agent strip only UA and Connection are removed.
"""
from maigret.checking import CurlCffiChecker
checker = CurlCffiChecker(logger=Mock(), browser_emulate='chrome')
checker.prepare(
url='https://example.com/api',
headers={
"User-Agent": "Mozilla/5.0 random",
"Connection": "close",
"Content-Type": "application/json",
"X-Csrf-Token": "abc123",
},
allow_redirects=True,
timeout=10,
method='get',
)
await checker.check()
sent_headers = fake_curl_cffi.last_kwargs['headers']
assert sent_headers is not None
assert sent_headers.get("Content-Type") == "application/json"
assert sent_headers.get("X-Csrf-Token") == "abc123"
# Sanity: stripped pair is gone
assert "User-Agent" not in sent_headers
assert "Connection" not in sent_headers
@pytest.mark.asyncio
async def test_curl_cffi_handles_empty_headers(fake_curl_cffi):
"""No headers at all → headers kwarg is None (not an empty dict that could confuse
curl_cffi's impersonation header injection)."""
from maigret.checking import CurlCffiChecker
checker = CurlCffiChecker(logger=Mock(), browser_emulate='chrome')
checker.prepare(
url='https://example.com/u/test',
headers=None,
allow_redirects=True,
timeout=10,
method='get',
)
await checker.check()
assert fake_curl_cffi.last_kwargs['headers'] is None
assert fake_curl_cffi.last_kwargs['impersonate'] == 'chrome'
@pytest.mark.asyncio
async def test_curl_cffi_strips_ua_for_post_too(fake_curl_cffi):
"""The same UA-strip must apply on POST (e.g. Discord-style POST username probes
with `tls_fingerprint`)."""
from maigret.checking import CurlCffiChecker
checker = CurlCffiChecker(logger=Mock(), browser_emulate='chrome')
checker.prepare(
url='https://example.com/api/check',
headers={
"User-Agent": "Mozilla/5.0 random",
"Content-Type": "application/json",
},
allow_redirects=True,
timeout=10,
method='post',
payload={"username": "test"},
)
await checker.check()
sent = fake_curl_cffi.last_kwargs
assert fake_curl_cffi.last_method == 'post'
assert sent['json'] == {"username": "test"}
assert "User-Agent" not in sent['headers']
assert sent['headers'].get("Content-Type") == "application/json"