mirror of
https://github.com/soxoj/maigret.git
synced 2026-05-15 19:05:43 +00:00
Fix site checks: 2 fixed, 3 disabled; add Faceit; fix utils import (#2660)
This commit is contained in:
@@ -874,6 +874,7 @@
|
||||
"url": "https://discord.com",
|
||||
"urlMain": "https://discord.com/",
|
||||
"urlProbe": "https://discord.com/api/v9/unique-username/username-attempt-unauthed",
|
||||
"regexCheck": "^.{2,32}$",
|
||||
"checkType": "message",
|
||||
"absenceStrs": [
|
||||
"{\"taken\":false}"
|
||||
@@ -891,7 +892,9 @@
|
||||
"errors": {
|
||||
"The resource is being rate limited": "Rate limited",
|
||||
"You are being rate limited": "Rate limited",
|
||||
"rate_limited": "Rate limited"
|
||||
"rate_limited": "Rate limited",
|
||||
"BASE_TYPE_BAD_LENGTH": "Invalid username length",
|
||||
"Must be between 2 and 32 in length": "Invalid username length"
|
||||
},
|
||||
"tags": [
|
||||
"gaming",
|
||||
@@ -1527,6 +1530,10 @@
|
||||
]
|
||||
},
|
||||
"Scribd": {
|
||||
"disabled": true,
|
||||
"protection": [
|
||||
"js_challenge"
|
||||
],
|
||||
"tags": [
|
||||
"reading"
|
||||
],
|
||||
@@ -1535,7 +1542,10 @@
|
||||
"urlMain": "https://www.scribd.com/",
|
||||
"url": "https://www.scribd.com/{username}",
|
||||
"usernameClaimed": "scribd",
|
||||
"usernameUnclaimed": "noonewouldeverusethis7"
|
||||
"usernameUnclaimed": "noonewouldeverusethis7",
|
||||
"errors": {
|
||||
"Client Challenge": "Anti-bot challenge"
|
||||
}
|
||||
},
|
||||
"Freepik": {
|
||||
"protection": [
|
||||
@@ -2873,6 +2883,7 @@
|
||||
]
|
||||
},
|
||||
"Gamespot": {
|
||||
"disabled": true,
|
||||
"protection": [
|
||||
"tls_fingerprint"
|
||||
],
|
||||
@@ -11619,6 +11630,10 @@
|
||||
"usernameUnclaimed": "noonewouldeverusethis7"
|
||||
},
|
||||
"igromania": {
|
||||
"disabled": true,
|
||||
"protection": [
|
||||
"custom_bot_protection"
|
||||
],
|
||||
"tags": [
|
||||
"forum",
|
||||
"gaming",
|
||||
@@ -13034,10 +13049,12 @@
|
||||
],
|
||||
"tags": [
|
||||
"gb",
|
||||
"movies",
|
||||
"pk"
|
||||
"movies"
|
||||
],
|
||||
"checkType": "message",
|
||||
"presenseStrs": [
|
||||
"Fan Page - British Comedy Guide"
|
||||
],
|
||||
"checkType": "status_code",
|
||||
"alexaRank": 29459,
|
||||
"urlMain": "https://www.comedy.co.uk",
|
||||
"url": "https://www.comedy.co.uk/profile/{username}/",
|
||||
@@ -35924,6 +35941,17 @@
|
||||
"url": "https://greasyfork.org/en/users?q={username}",
|
||||
"usernameClaimed": "jcunews",
|
||||
"usernameUnclaimed": "noonewouldeverusethis7"
|
||||
},
|
||||
"Faceit": {
|
||||
"tags": [
|
||||
"gaming"
|
||||
],
|
||||
"checkType": "status_code",
|
||||
"urlMain": "https://faceit.com/",
|
||||
"url": "https://www.faceit.com/en/players/{username}",
|
||||
"urlProbe": "https://www.faceit.com/api/users/v1/nicknames/{username}",
|
||||
"usernameClaimed": "Snotax",
|
||||
"usernameUnclaimed": "noonewouldeverusethis7"
|
||||
}
|
||||
},
|
||||
"engines": {
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
{
|
||||
"version": 1,
|
||||
"updated_at": "2026-05-15T12:30:52Z",
|
||||
"sites_count": 3154,
|
||||
"updated_at": "2026-05-15T16:12:58Z",
|
||||
"sites_count": 3155,
|
||||
"min_maigret_version": "0.6.0",
|
||||
"data_sha256": "f86d77a18bcd1d353933b64d99953634ce5e2966860f25bacd5e3de5659fb8a7",
|
||||
"data_sha256": "df2ab3dbc96bdcdc8aa4e9da485df75ce6c3274814080f00a35e89f7f43783e1",
|
||||
"data_url": "https://raw.githubusercontent.com/soxoj/maigret/main/maigret/resources/data.json"
|
||||
}
|
||||
@@ -1,5 +1,5 @@
|
||||
|
||||
## List of supported sites (search methods): total 3154
|
||||
## List of supported sites (search methods): total 3155
|
||||
|
||||
Rank data fetched from Majestic Million by domains.
|
||||
|
||||
@@ -72,7 +72,7 @@ Rank data fetched from Majestic Million by domains.
|
||||
1.  [YandexZnatoki (https://yandex.ru/q/)](https://yandex.ru/q/)*: top 500, ru*
|
||||
1.  [GoodReads (https://www.goodreads.com/)](https://www.goodreads.com/)*: top 500, books*
|
||||
1.  [Weforum (https://www.weforum.org)](https://www.weforum.org)*: top 500, forum*
|
||||
1.  [Scribd (https://www.scribd.com/)](https://www.scribd.com/)*: top 500, reading*
|
||||
1.  [Scribd (https://www.scribd.com/)](https://www.scribd.com/)*: top 500, reading*, search is disabled
|
||||
1.  [Freepik (https://www.freepik.com)](https://www.freepik.com)*: top 500, art, photo, stock*
|
||||
1.  [Academia.edu (https://www.academia.edu/)](https://www.academia.edu/)*: top 500, education, research*
|
||||
1.  [OpenStreetMap (https://www.openstreetmap.org/)](https://www.openstreetmap.org/)*: top 500, maps*
|
||||
@@ -145,7 +145,7 @@ Rank data fetched from Majestic Million by domains.
|
||||
1.  [iStock (https://www.istockphoto.com)](https://www.istockphoto.com)*: top 1K, photo, stock*
|
||||
1.  [Pastebin (https://pastebin.com/)](https://pastebin.com/)*: top 1K, sharing*
|
||||
1.  [500px (https://500px.com/)](https://500px.com/)*: top 1K, photo*
|
||||
1.  [Gamespot (https://www.gamespot.com/)](https://www.gamespot.com/)*: top 1K, gaming*
|
||||
1.  [Gamespot (https://www.gamespot.com/)](https://www.gamespot.com/)*: top 1K, gaming*, search is disabled
|
||||
1.  [Houzz (https://houzz.com/)](https://houzz.com/)*: top 1K, design*, search is disabled
|
||||
1.  [TradingView (https://www.tradingview.com/)](https://www.tradingview.com/)*: top 1K, trading*
|
||||
1.  [Foursquare (https://foursquare.com/)](https://foursquare.com/)*: top 1K, geosocial, in, social*
|
||||
@@ -538,7 +538,7 @@ Rank data fetched from Majestic Million by domains.
|
||||
1.  [osu! (https://osu.ppy.sh/)](https://osu.ppy.sh/)*: top 100K*
|
||||
1.  [Mydealz (https://www.mydealz.de/)](https://www.mydealz.de/)*: top 100K, de*
|
||||
1.  [B17 (https://www.b17.ru/)](https://www.b17.ru/)*: top 100K, ru*
|
||||
1.  [Comedy (https://www.comedy.co.uk)](https://www.comedy.co.uk)*: top 100K, gb, movies, pk*
|
||||
1.  [Comedy (https://www.comedy.co.uk)](https://www.comedy.co.uk)*: top 100K, gb, movies*
|
||||
1.  [FL.ru (https://www.fl.ru/)](https://www.fl.ru/)*: top 100K, ru*
|
||||
1.  [YouPic (https://youpic.com/)](https://youpic.com/)*: top 100K, photo*
|
||||
1.  [Filmow (https://filmow.com/)](https://filmow.com/)*: top 100K, br, pt*
|
||||
@@ -1757,7 +1757,7 @@ Rank data fetched from Majestic Million by domains.
|
||||
1.  [Pathofexile (https://ru.pathofexile.com)](https://ru.pathofexile.com)*: top 100M, ru*, search is disabled
|
||||
1.  [boards.theforce.net (https://boards.theforce.net)](https://boards.theforce.net)*: top 100M*, search is disabled
|
||||
1.  [Justlanded (https://community.justlanded.com)](https://community.justlanded.com)*: top 100M*
|
||||
1.  [igromania (http://forum.igromania.ru/)](http://forum.igromania.ru/)*: top 100M, forum, gaming, ru*
|
||||
1.  [igromania (http://forum.igromania.ru/)](http://forum.igromania.ru/)*: top 100M, forum, gaming, ru*, search is disabled
|
||||
1.  [Lingvolive (http://forum.lingvolive.com)](http://forum.lingvolive.com)*: top 100M, de, forum, it, ru*, search is disabled
|
||||
1.  [forums.overclockers.co.uk (https://forums.overclockers.co.uk)](https://forums.overclockers.co.uk)*: top 100M, forum, gb, uk*, search is disabled
|
||||
1.  [community.sphero.com (https://community.sphero.com)](https://community.sphero.com)*: top 100M, forum, tech*
|
||||
@@ -3157,19 +3157,20 @@ Rank data fetched from Majestic Million by domains.
|
||||
1.  [BestGore (https://bestgore.fun)](https://bestgore.fun)*: top 100M, video*
|
||||
1.  [AirNFTs (https://app.airnfts.com)](https://app.airnfts.com)*: top 100M, crypto, nft*
|
||||
1.  [GreasyFork (https://greasyfork.org)](https://greasyfork.org)*: top 100M, coding*
|
||||
1.  [Faceit (https://faceit.com/)](https://faceit.com/)*: top 100M, gaming*
|
||||
|
||||
The list was updated at (2026-05-13)
|
||||
The list was updated at (2026-05-15)
|
||||
## Statistics
|
||||
|
||||
Enabled/total sites: 2524/3154 = 80.03%
|
||||
Enabled/total sites: 2522/3155 = 79.94%
|
||||
|
||||
Incomplete message checks: 311/2524 = 12.32% (false positive risks)
|
||||
Incomplete message checks: 311/2522 = 12.33% (false positive risks)
|
||||
|
||||
Status code checks: 637/2524 = 25.24% (false positive risks)
|
||||
Status code checks: 635/2522 = 25.18% (false positive risks)
|
||||
|
||||
False positive risk (total): 37.56%
|
||||
False positive risk (total): 37.51%
|
||||
|
||||
Sites with probing: 500px, Armchairgm, BinarySearch (disabled), BleachFandom, Bluesky, BongaCams, Boosty, BuyMeACoffee, Calendly, Cent, Chess, Code Sandbox (disabled), Code Snippet Wiki, DailyMotion, Discord, Diskusjon.no, Disqus, Docker Hub, Duolingo, FandomCommunityCentral, GitHub, GitLab, Google Plus (archived), Gravatar, HackTheBox, Hackerrank, Hashnode, Holopin, Imgur, Issuu, Keybase, Kick, Kvinneguiden, LeetCode, Lesswrong, Livejasmin, LocalCryptos (disabled), Medium, MicrosoftLearn, MixCloud, Monkeytype, NPM, Niftygateway, Omg.lol, OnlyFans, Paragraph, Picsart, Plurk, Polarsteps, Rarible, Reddit, Reddit Search (Pushshift) (disabled), Revolut.me, RoyalCams, Scratch, Soop, SportsTracker, Spotify, StackOverflow, Substack, TAP'D, Topcoder, Trello, Twitch, Twitter, Twitter Shadowban (disabled), UnstoppableDomains, Vimeo, Vivino, Warframe Market, Warpcast, Weibo, Wikipedia, Yapisal (disabled), YouNow, en.brickimedia.org, forums.grandstream.com, nightbot, notabug.org, qiwi.me (disabled)
|
||||
Sites with probing: 500px, Armchairgm, BinarySearch (disabled), BleachFandom, Bluesky, BongaCams, Boosty, BuyMeACoffee, Calendly, Cent, Chess, Code Sandbox (disabled), Code Snippet Wiki, DailyMotion, Discord, Diskusjon.no, Disqus, Docker Hub, Duolingo, Faceit, FandomCommunityCentral, GitHub, GitLab, Google Plus (archived), Gravatar, HackTheBox, Hackerrank, Hashnode, Holopin, Imgur, Issuu, Keybase, Kick, Kvinneguiden, LeetCode, Lesswrong, Livejasmin, LocalCryptos (disabled), Medium, MicrosoftLearn, MixCloud, Monkeytype, NPM, Niftygateway, Omg.lol, OnlyFans, Paragraph, Picsart, Plurk, Polarsteps, Rarible, Reddit, Reddit Search (Pushshift) (disabled), Revolut.me, RoyalCams, Scratch, Soop, SportsTracker, Spotify, StackOverflow, Substack, TAP'D, Topcoder, Trello, Twitch, Twitter, Twitter Shadowban (disabled), UnstoppableDomains, Vimeo, Vivino, Warframe Market, Warpcast, Weibo, Wikipedia, Yapisal (disabled), YouNow, en.brickimedia.org, forums.grandstream.com, nightbot, notabug.org, qiwi.me (disabled)
|
||||
|
||||
Sites with activation: OnlyFans, Twitter, Vimeo, Weibo
|
||||
|
||||
@@ -3200,7 +3201,7 @@ Sites by engine:
|
||||
- `uCoz`: 634/709 (89.4%)
|
||||
- `XenForo`: 177/223 (79.4%)
|
||||
- `phpBB/Search`: 119/127 (93.7%)
|
||||
- `vBulletin`: 31/120 (25.8%)
|
||||
- `vBulletin`: 30/120 (25.0%)
|
||||
- `Discourse`: 84/92 (91.3%)
|
||||
- `phpBB`: 21/27 (77.8%)
|
||||
- `engine404`: 19/23 (82.6%)
|
||||
@@ -3215,7 +3216,7 @@ Sites by engine:
|
||||
Top 20 tags:
|
||||
- (1057) `NO_TAGS` (non-standard)
|
||||
- (749) `forum`
|
||||
- (128) `gaming`
|
||||
- (129) `gaming`
|
||||
- (88) `coding`
|
||||
- (57) `photo`
|
||||
- (46) `tech`
|
||||
|
||||
@@ -15,6 +15,10 @@ import xml.etree.ElementTree as ET
|
||||
from datetime import datetime, timezone
|
||||
from argparse import ArgumentParser, RawDescriptionHelpFormatter
|
||||
|
||||
# Make `from utils.X import Y` work when invoked as `python3 ./utils/update_site_data.py`
|
||||
# (direct script execution puts utils/ on sys.path, not the repo root).
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from maigret.maigret import MaigretDatabase
|
||||
from utils.generate_db_meta import write_meta_if_changed
|
||||
|
||||
|
||||
Reference in New Issue
Block a user