From c1674529ad445ec68715f13567be0e90a8ee68b5 Mon Sep 17 00:00:00 2001 From: Soxoj <31013580+soxoj@users.noreply.github.com> Date: Fri, 15 May 2026 18:15:38 +0200 Subject: [PATCH] Fix site checks: 2 fixed, 3 disabled; add Faceit; fix utils import (#2660) --- maigret/resources/data.json | 38 +++++++++++++++++++++++++++++----- maigret/resources/db_meta.json | 6 +++--- sites.md | 27 ++++++++++++------------ utils/update_site_data.py | 4 ++++ 4 files changed, 54 insertions(+), 21 deletions(-) diff --git a/maigret/resources/data.json b/maigret/resources/data.json index d2d334c..ae3fa6e 100644 --- a/maigret/resources/data.json +++ b/maigret/resources/data.json @@ -874,6 +874,7 @@ "url": "https://discord.com", "urlMain": "https://discord.com/", "urlProbe": "https://discord.com/api/v9/unique-username/username-attempt-unauthed", + "regexCheck": "^.{2,32}$", "checkType": "message", "absenceStrs": [ "{\"taken\":false}" @@ -891,7 +892,9 @@ "errors": { "The resource is being rate limited": "Rate limited", "You are being rate limited": "Rate limited", - "rate_limited": "Rate limited" + "rate_limited": "Rate limited", + "BASE_TYPE_BAD_LENGTH": "Invalid username length", + "Must be between 2 and 32 in length": "Invalid username length" }, "tags": [ "gaming", @@ -1527,6 +1530,10 @@ ] }, "Scribd": { + "disabled": true, + "protection": [ + "js_challenge" + ], "tags": [ "reading" ], @@ -1535,7 +1542,10 @@ "urlMain": "https://www.scribd.com/", "url": "https://www.scribd.com/{username}", "usernameClaimed": "scribd", - "usernameUnclaimed": "noonewouldeverusethis7" + "usernameUnclaimed": "noonewouldeverusethis7", + "errors": { + "Client Challenge": "Anti-bot challenge" + } }, "Freepik": { "protection": [ @@ -2873,6 +2883,7 @@ ] }, "Gamespot": { + "disabled": true, "protection": [ "tls_fingerprint" ], @@ -11619,6 +11630,10 @@ "usernameUnclaimed": "noonewouldeverusethis7" }, "igromania": { + "disabled": true, + "protection": [ + "custom_bot_protection" + ], "tags": [ "forum", "gaming", @@ -13034,10 +13049,12 @@ ], "tags": [ "gb", - "movies", - "pk" + "movies" + ], + "checkType": "message", + "presenseStrs": [ + "Fan Page - British Comedy Guide" ], - "checkType": "status_code", "alexaRank": 29459, "urlMain": "https://www.comedy.co.uk", "url": "https://www.comedy.co.uk/profile/{username}/", @@ -35924,6 +35941,17 @@ "url": "https://greasyfork.org/en/users?q={username}", "usernameClaimed": "jcunews", "usernameUnclaimed": "noonewouldeverusethis7" + }, + "Faceit": { + "tags": [ + "gaming" + ], + "checkType": "status_code", + "urlMain": "https://faceit.com/", + "url": "https://www.faceit.com/en/players/{username}", + "urlProbe": "https://www.faceit.com/api/users/v1/nicknames/{username}", + "usernameClaimed": "Snotax", + "usernameUnclaimed": "noonewouldeverusethis7" } }, "engines": { diff --git a/maigret/resources/db_meta.json b/maigret/resources/db_meta.json index 510a8f4..c669c84 100644 --- a/maigret/resources/db_meta.json +++ b/maigret/resources/db_meta.json @@ -1,8 +1,8 @@ { "version": 1, - "updated_at": "2026-05-15T12:30:52Z", - "sites_count": 3154, + "updated_at": "2026-05-15T16:12:58Z", + "sites_count": 3155, "min_maigret_version": "0.6.0", - "data_sha256": "f86d77a18bcd1d353933b64d99953634ce5e2966860f25bacd5e3de5659fb8a7", + "data_sha256": "df2ab3dbc96bdcdc8aa4e9da485df75ce6c3274814080f00a35e89f7f43783e1", "data_url": "https://raw.githubusercontent.com/soxoj/maigret/main/maigret/resources/data.json" } \ No newline at end of file diff --git a/sites.md b/sites.md index 26e7d2f..2a68a70 100644 --- a/sites.md +++ b/sites.md @@ -1,5 +1,5 @@ -## List of supported sites (search methods): total 3154 +## List of supported sites (search methods): total 3155 Rank data fetched from Majestic Million by domains. @@ -72,7 +72,7 @@ Rank data fetched from Majestic Million by domains. 1. ![](https://www.google.com/s2/favicons?domain=https://yandex.ru/q/) [YandexZnatoki (https://yandex.ru/q/)](https://yandex.ru/q/)*: top 500, ru* 1. ![](https://www.google.com/s2/favicons?domain=https://www.goodreads.com/) [GoodReads (https://www.goodreads.com/)](https://www.goodreads.com/)*: top 500, books* 1. ![](https://www.google.com/s2/favicons?domain=https://www.weforum.org) [Weforum (https://www.weforum.org)](https://www.weforum.org)*: top 500, forum* -1. ![](https://www.google.com/s2/favicons?domain=https://www.scribd.com/) [Scribd (https://www.scribd.com/)](https://www.scribd.com/)*: top 500, reading* +1. ![](https://www.google.com/s2/favicons?domain=https://www.scribd.com/) [Scribd (https://www.scribd.com/)](https://www.scribd.com/)*: top 500, reading*, search is disabled 1. ![](https://www.google.com/s2/favicons?domain=https://www.freepik.com) [Freepik (https://www.freepik.com)](https://www.freepik.com)*: top 500, art, photo, stock* 1. ![](https://www.google.com/s2/favicons?domain=https://www.academia.edu/) [Academia.edu (https://www.academia.edu/)](https://www.academia.edu/)*: top 500, education, research* 1. ![](https://www.google.com/s2/favicons?domain=https://www.openstreetmap.org/) [OpenStreetMap (https://www.openstreetmap.org/)](https://www.openstreetmap.org/)*: top 500, maps* @@ -145,7 +145,7 @@ Rank data fetched from Majestic Million by domains. 1. ![](https://www.google.com/s2/favicons?domain=https://www.istockphoto.com) [iStock (https://www.istockphoto.com)](https://www.istockphoto.com)*: top 1K, photo, stock* 1. ![](https://www.google.com/s2/favicons?domain=https://pastebin.com/) [Pastebin (https://pastebin.com/)](https://pastebin.com/)*: top 1K, sharing* 1. ![](https://www.google.com/s2/favicons?domain=https://500px.com/) [500px (https://500px.com/)](https://500px.com/)*: top 1K, photo* -1. ![](https://www.google.com/s2/favicons?domain=https://www.gamespot.com/) [Gamespot (https://www.gamespot.com/)](https://www.gamespot.com/)*: top 1K, gaming* +1. ![](https://www.google.com/s2/favicons?domain=https://www.gamespot.com/) [Gamespot (https://www.gamespot.com/)](https://www.gamespot.com/)*: top 1K, gaming*, search is disabled 1. ![](https://www.google.com/s2/favicons?domain=https://houzz.com/) [Houzz (https://houzz.com/)](https://houzz.com/)*: top 1K, design*, search is disabled 1. ![](https://www.google.com/s2/favicons?domain=https://www.tradingview.com/) [TradingView (https://www.tradingview.com/)](https://www.tradingview.com/)*: top 1K, trading* 1. ![](https://www.google.com/s2/favicons?domain=https://foursquare.com/) [Foursquare (https://foursquare.com/)](https://foursquare.com/)*: top 1K, geosocial, in, social* @@ -538,7 +538,7 @@ Rank data fetched from Majestic Million by domains. 1. ![](https://www.google.com/s2/favicons?domain=https://osu.ppy.sh/) [osu! (https://osu.ppy.sh/)](https://osu.ppy.sh/)*: top 100K* 1. ![](https://www.google.com/s2/favicons?domain=https://www.mydealz.de/) [Mydealz (https://www.mydealz.de/)](https://www.mydealz.de/)*: top 100K, de* 1. ![](https://www.google.com/s2/favicons?domain=https://www.b17.ru/) [B17 (https://www.b17.ru/)](https://www.b17.ru/)*: top 100K, ru* -1. ![](https://www.google.com/s2/favicons?domain=https://www.comedy.co.uk) [Comedy (https://www.comedy.co.uk)](https://www.comedy.co.uk)*: top 100K, gb, movies, pk* +1. ![](https://www.google.com/s2/favicons?domain=https://www.comedy.co.uk) [Comedy (https://www.comedy.co.uk)](https://www.comedy.co.uk)*: top 100K, gb, movies* 1. ![](https://www.google.com/s2/favicons?domain=https://www.fl.ru/) [FL.ru (https://www.fl.ru/)](https://www.fl.ru/)*: top 100K, ru* 1. ![](https://www.google.com/s2/favicons?domain=https://youpic.com/) [YouPic (https://youpic.com/)](https://youpic.com/)*: top 100K, photo* 1. ![](https://www.google.com/s2/favicons?domain=https://filmow.com/) [Filmow (https://filmow.com/)](https://filmow.com/)*: top 100K, br, pt* @@ -1757,7 +1757,7 @@ Rank data fetched from Majestic Million by domains. 1. ![](https://www.google.com/s2/favicons?domain=https://ru.pathofexile.com) [Pathofexile (https://ru.pathofexile.com)](https://ru.pathofexile.com)*: top 100M, ru*, search is disabled 1. ![](https://www.google.com/s2/favicons?domain=https://boards.theforce.net) [boards.theforce.net (https://boards.theforce.net)](https://boards.theforce.net)*: top 100M*, search is disabled 1. ![](https://www.google.com/s2/favicons?domain=https://community.justlanded.com) [Justlanded (https://community.justlanded.com)](https://community.justlanded.com)*: top 100M* -1. ![](https://www.google.com/s2/favicons?domain=http://forum.igromania.ru/) [igromania (http://forum.igromania.ru/)](http://forum.igromania.ru/)*: top 100M, forum, gaming, ru* +1. ![](https://www.google.com/s2/favicons?domain=http://forum.igromania.ru/) [igromania (http://forum.igromania.ru/)](http://forum.igromania.ru/)*: top 100M, forum, gaming, ru*, search is disabled 1. ![](https://www.google.com/s2/favicons?domain=http://forum.lingvolive.com) [Lingvolive (http://forum.lingvolive.com)](http://forum.lingvolive.com)*: top 100M, de, forum, it, ru*, search is disabled 1. ![](https://www.google.com/s2/favicons?domain=https://forums.overclockers.co.uk) [forums.overclockers.co.uk (https://forums.overclockers.co.uk)](https://forums.overclockers.co.uk)*: top 100M, forum, gb, uk*, search is disabled 1. ![](https://www.google.com/s2/favicons?domain=https://community.sphero.com) [community.sphero.com (https://community.sphero.com)](https://community.sphero.com)*: top 100M, forum, tech* @@ -3157,19 +3157,20 @@ Rank data fetched from Majestic Million by domains. 1. ![](https://www.google.com/s2/favicons?domain=https://bestgore.fun) [BestGore (https://bestgore.fun)](https://bestgore.fun)*: top 100M, video* 1. ![](https://www.google.com/s2/favicons?domain=https://app.airnfts.com) [AirNFTs (https://app.airnfts.com)](https://app.airnfts.com)*: top 100M, crypto, nft* 1. ![](https://www.google.com/s2/favicons?domain=https://greasyfork.org) [GreasyFork (https://greasyfork.org)](https://greasyfork.org)*: top 100M, coding* +1. ![](https://www.google.com/s2/favicons?domain=https://faceit.com/) [Faceit (https://faceit.com/)](https://faceit.com/)*: top 100M, gaming* -The list was updated at (2026-05-13) +The list was updated at (2026-05-15) ## Statistics -Enabled/total sites: 2524/3154 = 80.03% +Enabled/total sites: 2522/3155 = 79.94% -Incomplete message checks: 311/2524 = 12.32% (false positive risks) +Incomplete message checks: 311/2522 = 12.33% (false positive risks) -Status code checks: 637/2524 = 25.24% (false positive risks) +Status code checks: 635/2522 = 25.18% (false positive risks) -False positive risk (total): 37.56% +False positive risk (total): 37.51% -Sites with probing: 500px, Armchairgm, BinarySearch (disabled), BleachFandom, Bluesky, BongaCams, Boosty, BuyMeACoffee, Calendly, Cent, Chess, Code Sandbox (disabled), Code Snippet Wiki, DailyMotion, Discord, Diskusjon.no, Disqus, Docker Hub, Duolingo, FandomCommunityCentral, GitHub, GitLab, Google Plus (archived), Gravatar, HackTheBox, Hackerrank, Hashnode, Holopin, Imgur, Issuu, Keybase, Kick, Kvinneguiden, LeetCode, Lesswrong, Livejasmin, LocalCryptos (disabled), Medium, MicrosoftLearn, MixCloud, Monkeytype, NPM, Niftygateway, Omg.lol, OnlyFans, Paragraph, Picsart, Plurk, Polarsteps, Rarible, Reddit, Reddit Search (Pushshift) (disabled), Revolut.me, RoyalCams, Scratch, Soop, SportsTracker, Spotify, StackOverflow, Substack, TAP'D, Topcoder, Trello, Twitch, Twitter, Twitter Shadowban (disabled), UnstoppableDomains, Vimeo, Vivino, Warframe Market, Warpcast, Weibo, Wikipedia, Yapisal (disabled), YouNow, en.brickimedia.org, forums.grandstream.com, nightbot, notabug.org, qiwi.me (disabled) +Sites with probing: 500px, Armchairgm, BinarySearch (disabled), BleachFandom, Bluesky, BongaCams, Boosty, BuyMeACoffee, Calendly, Cent, Chess, Code Sandbox (disabled), Code Snippet Wiki, DailyMotion, Discord, Diskusjon.no, Disqus, Docker Hub, Duolingo, Faceit, FandomCommunityCentral, GitHub, GitLab, Google Plus (archived), Gravatar, HackTheBox, Hackerrank, Hashnode, Holopin, Imgur, Issuu, Keybase, Kick, Kvinneguiden, LeetCode, Lesswrong, Livejasmin, LocalCryptos (disabled), Medium, MicrosoftLearn, MixCloud, Monkeytype, NPM, Niftygateway, Omg.lol, OnlyFans, Paragraph, Picsart, Plurk, Polarsteps, Rarible, Reddit, Reddit Search (Pushshift) (disabled), Revolut.me, RoyalCams, Scratch, Soop, SportsTracker, Spotify, StackOverflow, Substack, TAP'D, Topcoder, Trello, Twitch, Twitter, Twitter Shadowban (disabled), UnstoppableDomains, Vimeo, Vivino, Warframe Market, Warpcast, Weibo, Wikipedia, Yapisal (disabled), YouNow, en.brickimedia.org, forums.grandstream.com, nightbot, notabug.org, qiwi.me (disabled) Sites with activation: OnlyFans, Twitter, Vimeo, Weibo @@ -3200,7 +3201,7 @@ Sites by engine: - `uCoz`: 634/709 (89.4%) - `XenForo`: 177/223 (79.4%) - `phpBB/Search`: 119/127 (93.7%) -- `vBulletin`: 31/120 (25.8%) +- `vBulletin`: 30/120 (25.0%) - `Discourse`: 84/92 (91.3%) - `phpBB`: 21/27 (77.8%) - `engine404`: 19/23 (82.6%) @@ -3215,7 +3216,7 @@ Sites by engine: Top 20 tags: - (1057) `NO_TAGS` (non-standard) - (749) `forum` -- (128) `gaming` +- (129) `gaming` - (88) `coding` - (57) `photo` - (46) `tech` diff --git a/utils/update_site_data.py b/utils/update_site_data.py index 5d7ecf8..e72b49c 100755 --- a/utils/update_site_data.py +++ b/utils/update_site_data.py @@ -15,6 +15,10 @@ import xml.etree.ElementTree as ET from datetime import datetime, timezone from argparse import ArgumentParser, RawDescriptionHelpFormatter +# Make `from utils.X import Y` work when invoked as `python3 ./utils/update_site_data.py` +# (direct script execution puts utils/ on sys.path, not the repo root). +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + from maigret.maigret import MaigretDatabase from utils.generate_db_meta import write_meta_if_changed