From 3fd34afb77b424eebb0c1cba0211f45c92a4bd95 Mon Sep 17 00:00:00 2001 From: Soxoj <31013580+soxoj@users.noreply.github.com> Date: Mon, 6 Apr 2026 21:41:16 +0200 Subject: [PATCH] Sites fixes (#2464) --- maigret/errors.py | 1 + maigret/resources/data.json | 58 +++++++++------------------------- maigret/resources/db_meta.json | 6 ++-- sites.md | 17 +++++----- 4 files changed, 27 insertions(+), 55 deletions(-) diff --git a/maigret/errors.py b/maigret/errors.py index d8930c5..986a59e 100644 --- a/maigret/errors.py +++ b/maigret/errors.py @@ -58,6 +58,7 @@ COMMON_ERRORS = { 'Censorship', 'MGTS' ), 'Incapsula incident ID': CheckError('Bot protection', 'Incapsula'), + 'Client Challenge': CheckError('Bot protection', 'Anti-bot challenge'), 'DDoS-Guard': CheckError('Bot protection', 'DDoS-Guard'), 'Сайт заблокирован хостинг-провайдером': CheckError( 'Site-specific', 'Site is disabled (Beget)' diff --git a/maigret/resources/data.json b/maigret/resources/data.json index 34a6fd9..a4dad42 100644 --- a/maigret/resources/data.json +++ b/maigret/resources/data.json @@ -80,8 +80,7 @@ "\"routePath\":null" ], "errors": { - "Login • Instagram": "Login required", - "Just a moment": "Cloudflare challenge" + "Login • Instagram": "Login required" }, "alexaRank": 4, "urlMain": "https://www.instagram.com/", @@ -101,7 +100,7 @@ "sec-ch-ua": "Google Chrome\";v=\"87\", \" Not;A Brand\";v=\"99\", \"Chromium\";v=\"87\"", "authorization": "Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA", "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36", - "x-guest-token": "2041186137171976270" + "x-guest-token": "2041232709842808872" }, "errors": { "Bad guest token": "x-guest-token update required" @@ -294,7 +293,7 @@ "method": "vimeo" }, "headers": { - "Authorization": "jwt eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJleHAiOjE3NzU0OTI1ODAsInVzZXJfaWQiOm51bGwsImFwcF9pZCI6NTg0NzksInNjb3BlcyI6InB1YmxpYyIsInRlYW1fdXNlcl9pZCI6bnVsbCwianRpIjoiZjkwOGY0MmYtMTE2Zi00MDRkLWExOTgtOGUyOTE2MTFmZTQzIn0.Wt_z9qrjHofYPtUIDkbxrPX2S-glzmEowkR8m89O_Zg" + "Authorization": "jwt eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJleHAiOjE3NzU1MDM2ODAsInVzZXJfaWQiOm51bGwsImFwcF9pZCI6NTg0NzksInNjb3BlcyI6InB1YmxpYyIsInRlYW1fdXNlcl9pZCI6bnVsbCwianRpIjoiMWVlMjg4ZTQtZGRkMC00ZWYyLTgyOWYtMDRmMjg3NjI1MTA5In0.FkO1cjuIS9jpn5nxkRWWp-jr0Meh_WUvRP1L46qVhcw" }, "urlProbe": "https://api.vimeo.com/users/{username}?fields=name%2Cgender%2Cbio%2Curi%2Clink%2Cbackground_video%2Clocation_details%2Cpictures%2Cverified%2Cmetadata.public_videos.total%2Cavailable_for_hire%2Ccan_work_remotely%2Cmetadata.connections.videos.total%2Cmetadata.connections.albums.total%2Cmetadata.connections.followers.total%2Cmetadata.connections.following.total%2Cmetadata.public_videos.total%2Cmetadata.connections.vimeo_experts.is_enrolled%2Ctotal_collection_count%2Ccreated_time%2Cprofile_preferences%2Cmembership%2Cclients%2Cskills%2Cproject_types%2Crates%2Ccategories%2Cis_expert%2Cprofile_discovery%2Cwebsites%2Ccontact_emails&fetch_user_profile=1", "checkType": "status_code", @@ -1062,9 +1061,6 @@ "robots", "noindex,nofollow" ], - "errors": { - "Client Challenge": "Anti-bot challenge" - }, "tags": [ "documents", "sharing" @@ -1549,17 +1545,11 @@ "tags": [ "reading" ], - "checkType": "message", - "absenceStrs": [ - "Page not found" - ], - "errors": { - "This page is unavailable": "Site censorship" - }, + "checkType": "status_code", "alexaRank": 280, "urlMain": "https://www.scribd.com/", "url": "https://www.scribd.com/{username}", - "usernameClaimed": "blue", + "usernameClaimed": "scribd", "usernameUnclaimed": "noonewouldeverusethis7" }, "Freepik": { @@ -10307,9 +10297,6 @@ "usernameUnclaimed": "noonewouldeverusethis7", "headers": { "User-Agent": "" - }, - "errors": { - "DDoS-Guard": "DDoS protection detected, use proxy/vpn" } }, "PCPartPicker": { @@ -12192,10 +12179,7 @@ "protection": [ "ip_reputation" ], - "disabled": true, - "errors": { - "Just a moment": "Cloudflare challenge" - } + "disabled": true }, "discuss.inventables.com": { "urlMain": "https://discuss.inventables.com", @@ -19377,7 +19361,10 @@ "urlMain": "https://poembook.ru", "url": "https://poembook.ru/any?query={username}", "usernameClaimed": "DIKANNA", - "usernameUnclaimed": "noonewouldeverusethis7" + "usernameUnclaimed": "noonewouldeverusethis7", + "presenseStrs": [ + "poembook.ru/profile/" + ] }, "forum.exkavator.ru": { "disabled": true, @@ -19580,7 +19567,8 @@ "urlMain": "https://forum.mau.ru", "url": "https://forum.mau.ru/profile.php?mode=viewprofile&u={username}", "usernameClaimed": "curl", - "usernameUnclaimed": "noonewouldeverusethis7" + "usernameUnclaimed": "noonewouldeverusethis7", + "disabled": true }, "Magix": { "checkType": "message", @@ -23400,20 +23388,6 @@ "usernameClaimed": "ubi-pingu", "usernameUnclaimed": "noonewouldeverusethis7" }, - "1001mem.ru": { - "tags": [ - "ru" - ], - "regexCheck": "^[^.]{1,}$", - "checkType": "message", - "absenceStrs": [ - "Этот пользователь не существует, или заблокирован." - ], - "urlMain": "http://1001mem.ru", - "url": "http://1001mem.ru/{username}", - "usernameClaimed": "adam", - "usernameUnclaimed": "noonewouldeverusethis7" - }, "11x2": { "checkType": "status_code", "urlMain": "https://11x2.com", @@ -26869,10 +26843,7 @@ "disabled": true, "protection": [ "js_challenge" - ], - "errors": { - "Just a moment": "Cloudflare challenge" - } + ] }, "No-jus": { "tags": [ @@ -33722,7 +33693,8 @@ "checkType": "status_code", "usernameClaimed": "adam", "usernameUnclaimed": "noonewouldeverusethis7", - "url": "https://wiki.mozilla.org/wiki/User:{username}" + "url": "https://wiki.mozilla.org/wiki/User:{username}", + "disabled": true }, "wiki.mtasa.com": { "checkType": "status_code", diff --git a/maigret/resources/db_meta.json b/maigret/resources/db_meta.json index 330fd1a..5009c3d 100644 --- a/maigret/resources/db_meta.json +++ b/maigret/resources/db_meta.json @@ -1,8 +1,8 @@ { "version": 1, - "updated_at": "2026-04-06T16:20:33Z", - "sites_count": 3155, + "updated_at": "2026-04-06T19:19:38Z", + "sites_count": 3154, "min_maigret_version": "0.5.0", - "data_sha256": "da87fd6f32bd60efc25e35aa6aa7d329e490d4aa544ddb68539d490cd2157b56", + "data_sha256": "e44eee4a22651061460abae4535b935b2021a19377ead76b87a5f24de625df3a", "data_url": "https://raw.githubusercontent.com/soxoj/maigret/main/maigret/resources/data.json" } \ No newline at end of file diff --git a/sites.md b/sites.md index 7d359b8..0148e4d 100644 --- a/sites.md +++ b/sites.md @@ -1,5 +1,5 @@ -## List of supported sites (search methods): total 3155 +## List of supported sites (search methods): total 3154 Rank data fetched from Majestic Million by domains. @@ -1936,7 +1936,7 @@ Rank data fetched from Majestic Million by domains. 1. ![](https://www.google.com/s2/favicons?domain=https://community.endlessos.com) [community.endlessos.com (https://community.endlessos.com)](https://community.endlessos.com)*: top 100M, forum* 1. ![](https://www.google.com/s2/favicons?domain=https://forum.golangbridge.org/) [Golangbridge (https://forum.golangbridge.org/)](https://forum.golangbridge.org/)*: top 100M, forum, sa, ua, vn* 1. ![](https://www.google.com/s2/favicons?domain=https://freelance.codeby.net) [freelance.codeby.net (https://freelance.codeby.net)](https://freelance.codeby.net)*: top 100M, ru*, search is disabled -1. ![](https://www.google.com/s2/favicons?domain=https://forum.mau.ru) [mau (https://forum.mau.ru)](https://forum.mau.ru)*: top 100M, forum, ru* +1. ![](https://www.google.com/s2/favicons?domain=https://forum.mau.ru) [mau (https://forum.mau.ru)](https://forum.mau.ru)*: top 100M, forum, ru*, search is disabled 1. ![](https://www.google.com/s2/favicons?domain=https://forum.kvinneguiden.no) [Kvinneguiden (https://forum.kvinneguiden.no)](https://forum.kvinneguiden.no)*: top 100M, forum* 1. ![](https://www.google.com/s2/favicons?domain=https://forum.itvdn.com) [ITVDN Forum (https://forum.itvdn.com)](https://forum.itvdn.com)*: top 100M, forum, ru, ua* 1. ![](https://www.google.com/s2/favicons?domain=https://forums.golf-monthly.co.uk/) [GolfMonthly (https://forums.golf-monthly.co.uk/)](https://forums.golf-monthly.co.uk/)*: top 100M, forum, gb*, search is disabled @@ -1991,7 +1991,6 @@ Rank data fetched from Majestic Million by domains. 1. ![](https://www.google.com/s2/favicons?domain=http://forum.trade-print.ru) [forum.trade-print.ru (http://forum.trade-print.ru)](http://forum.trade-print.ru)*: top 100M*, search is disabled 1. ![](https://www.google.com/s2/favicons?domain=http://0-3.ru) [0-3.RU (http://0-3.ru)](http://0-3.ru)*: top 100M, forum, ru* 1. ![](https://www.google.com/s2/favicons?domain=) [discussions.ubisoft.com ()]()*: top 100M, forum, gaming* -1. ![](https://www.google.com/s2/favicons?domain=http://1001mem.ru) [1001mem.ru (http://1001mem.ru)](http://1001mem.ru)*: top 100M, ru* 1. ![](https://www.google.com/s2/favicons?domain=https://11x2.com) [11x2 (https://11x2.com)](https://11x2.com)*: top 100M* 1. ![](https://www.google.com/s2/favicons?domain=https://1xforum.com) [1xforum (https://1xforum.com)](https://1xforum.com)*: top 100M, forum, ru*, search is disabled 1. ![](https://www.google.com/s2/favicons?domain=https://2Dimensions.com/) [2Dimensions (https://2Dimensions.com/)](https://2Dimensions.com/)*: top 100M* @@ -3051,7 +3050,7 @@ Rank data fetched from Majestic Million by domains. 1. ![](https://www.google.com/s2/favicons?domain=) [webflow.com ()]()*: top 100M* 1. ![](https://www.google.com/s2/favicons?domain=) [wiki.creativecommons.org ()]()*: top 100M* 1. ![](https://www.google.com/s2/favicons?domain=) [wiki.linuxquestions.org ()]()*: top 100M* -1. ![](https://www.google.com/s2/favicons?domain=) [wiki.mozilla.org ()]()*: top 100M* +1. ![](https://www.google.com/s2/favicons?domain=) [wiki.mozilla.org ()]()*: top 100M*, search is disabled 1. ![](https://www.google.com/s2/favicons?domain=) [wiki.mtasa.com ()]()*: top 100M* 1. ![](https://www.google.com/s2/favicons?domain=) [wiki.teamfortress.com ()]()*: top 100M* 1. ![](https://www.google.com/s2/favicons?domain=) [wiki.tfes.org ()]()*: top 100M* @@ -3162,13 +3161,13 @@ Rank data fetched from Majestic Million by domains. The list was updated at (2026-04-06) ## Statistics -Enabled/total sites: 2538/3155 = 80.44% +Enabled/total sites: 2535/3154 = 80.37% -Incomplete message checks: 338/2538 = 13.32% (false positive risks) +Incomplete message checks: 334/2535 = 13.18% (false positive risks) -Status code checks: 631/2538 = 24.86% (false positive risks) +Status code checks: 631/2535 = 24.89% (false positive risks) -False positive risk (total): 38.18% +False positive risk (total): 38.07% Sites with probing: 500px, Armchairgm, BinarySearch (disabled), BleachFandom, Bluesky, BongaCams, Boosty, BuyMeACoffee, Calendly, Cent, Chess, Code Sandbox, Code Snippet Wiki, DailyMotion, Discord, Diskusjon.no, Disqus, Docker Hub, Duolingo, FandomCommunityCentral, GitHub, GitLab, Google Plus (archived), Gravatar, Hashnode, Holopin, Imgur, Issuu, Keybase, Kick, Kvinneguiden, LeetCode, Lesswrong, Livejasmin, LocalCryptos (disabled), MicrosoftLearn, MixCloud, Monkeytype, Niftygateway, Omg.lol, Paragraph, Picsart, Plurk, Polarsteps, Rarible, Reddit, Reddit Search (Pushshift) (disabled), RoyalCams, Scratch, Soop, SportsTracker, Spotify (disabled), StackOverflow, Substack, TAP'D, Topcoder, Trello, Twitch, Twitter, Twitter Shadowban (disabled), UnstoppableDomains, Vimeo, Warframe Market, Warpcast, Weibo, Yapisal (disabled), YouNow, en.brickimedia.org, nightbot, notabug.org, qiwi.me (disabled) @@ -3176,7 +3175,7 @@ Sites with activation: Spotify (disabled), Twitter, Vimeo, Weibo Top 20 profile URLs: - (710) `{urlMain}/index/8-0-{username} (uCoz)` -- (318) `/{username}` +- (317) `/{username}` - (221) `{urlMain}{urlSubpath}/members/?username={username} (XenForo)` - (173) `/user/{username}` - (138) `/profile/{username}`