Fix site checks: 5 fixed; readme fix (#2562)

* Fix site checks: 5 fixed; readme fix

* Logging improvements

* Improve YouTube data extraction
This commit is contained in:
Soxoj
2026-04-25 18:15:38 +02:00
committed by GitHub
parent c6cfef84ce
commit e962b8c693
5 changed files with 23 additions and 13 deletions
+1 -1
View File
@@ -109,7 +109,7 @@ Download a standalone EXE from [Releases](https://github.com/soxoj/maigret/relea
Run Maigret in the browser via cloud shells or Jupyter notebooks:
[![Open in Cloud Shell](https://user-images.githubusercontent.com/27065646/92304704-8d146d80-ef80-11ea-8c29-0deaabb1c702.png)](https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/soxoj/maigret&tutorial=cloudshell-tutorial.md)
<a href="https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/soxoj/maigret&tutorial=cloudshell-tutorial.md"><img src="https://user-images.githubusercontent.com/27065646/92304704-8d146d80-ef80-11ea-8c29-0deaabb1c702.png" alt="Open in Cloud Shell" height="50"></a>
<a href="https://repl.it/github/soxoj/maigret"><img src="https://replit.com/badge/github/soxoj/maigret" alt="Run on Replit" height="50"></a>
<a href="https://colab.research.google.com/gist/soxoj/879b51bc3b2f8b695abb054090645000/maigret-collab.ipynb"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab" height="45"></a>
+7 -1
View File
@@ -345,7 +345,11 @@ def process_site_result(
username = results_info["username"]
is_parsing_enabled = results_info["parsing_enabled"]
url = results_info.get("url_user")
logger.info(url)
url_probe = results_info.get("url_probe") or url
if url_probe != url:
logger.info(f"{url_probe} (display: {url})")
else:
logger.info(url)
status = results_info.get("status")
if status is not None:
@@ -603,6 +607,8 @@ def make_site_result(
for k, v in site.get_params.items():
url_probe += f"&{k}={v}"
results_site["url_probe"] = url_probe
if site.request_method:
request_method = site.request_method.lower()
elif site.check_type == "status_code" and site.request_head_only:
+10 -6
View File
@@ -40,7 +40,7 @@
],
"alexaRank": 3,
"urlMain": "https://www.youtube.com/",
"url": "https://www.youtube.com/@{username}",
"url": "https://www.youtube.com/@{username}/about",
"usernameClaimed": "test",
"usernameUnclaimed": "noonewouldeverusethis777"
},
@@ -63,7 +63,7 @@
],
"alexaRank": 3,
"urlMain": "https://www.youtube.com/",
"url": "https://www.youtube.com/@{username}",
"url": "https://www.youtube.com/@{username}/about",
"usernameClaimed": "test",
"usernameUnclaimed": "noonewouldeverusethis777"
},
@@ -100,7 +100,7 @@
"sec-ch-ua": "Google Chrome\";v=\"87\", \" Not;A Brand\";v=\"99\", \"Chromium\";v=\"87\"",
"authorization": "Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA",
"user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36",
"x-guest-token": "2045154491230572773"
"x-guest-token": "2048070238281826593"
},
"errors": {
"Bad guest token": "x-guest-token update required"
@@ -296,7 +296,7 @@
"method": "vimeo"
},
"headers": {
"Authorization": "jwt eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJleHAiOjE3NzY0Mzg3MjAsInVzZXJfaWQiOm51bGwsImFwcF9pZCI6NTg0NzksInNjb3BlcyI6InB1YmxpYyIsInRlYW1fdXNlcl9pZCI6bnVsbCwianRpIjoiNjY0OWY3ZWItMThjZS00ODU1LWIzNmEtNWY3MzRkOGZhNjAyIn0.l1SRcr5UqvxqYLveW7MTECKSfkgsbh1y9QZqZmBX1EI"
"Authorization": "jwt eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJleHAiOjE3NzcxMzM4ODAsInVzZXJfaWQiOm51bGwsImFwcF9pZCI6NTg0NzksInNjb3BlcyI6InB1YmxpYyIsInRlYW1fdXNlcl9pZCI6bnVsbCwianRpIjoiZjFiMGJjNWUtMjIyOC00Y2I1LWFlNmItODk0YjZhNGNmODJhIn0.YCPXekRrHnJy8iH1gX4iVuNURiw6sU_FlmsfHnM2oug"
},
"urlProbe": "https://api.vimeo.com/users/{username}?fields=name%2Cgender%2Cbio%2Curi%2Clink%2Cbackground_video%2Clocation_details%2Cpictures%2Cverified%2Cmetadata.public_videos.total%2Cavailable_for_hire%2Ccan_work_remotely%2Cmetadata.connections.videos.total%2Cmetadata.connections.albums.total%2Cmetadata.connections.followers.total%2Cmetadata.connections.following.total%2Cmetadata.public_videos.total%2Cmetadata.connections.vimeo_experts.is_enrolled%2Ctotal_collection_count%2Ccreated_time%2Cprofile_preferences%2Cmembership%2Cclients%2Cskills%2Cproject_types%2Crates%2Ccategories%2Cis_expert%2Cprofile_discovery%2Cwebsites%2Ccontact_emails&fetch_user_profile=1",
"checkType": "status_code",
@@ -1339,6 +1339,9 @@
"did not match any articles",
"not match"
],
"errors": {
"Our systems have detected unusual traffic": "Google rate-limit / captcha"
},
"tags": [
"education",
"research"
@@ -5639,8 +5642,8 @@
"user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36",
"user-id": "0",
"x-bc": "0a106d301866494c873ae3a05bc3c97cee59a749",
"time": "1776959404882",
"sign": "57203:46ddb95bceab303946739ba884f008f6a2118657:646:69cfa6d8",
"time": "1777132991121",
"sign": "57203:3723aa7d500e76eabca29df74e4e97c483f14204:66d:69cfa6d8",
"referer": "https://onlyfans.com/",
"cookie": "__cf_bm=YovfzPN0T_wg6F60L5eZKPOQvlGESws3UDGgEkmPb9A-1776790253-1.0.1.1-KRZgptNe5P9epBZSdITa12VfTEDlDdLckPY3I2FDAacvCPxOj0PqeK86J5mcC7UQ_TM8_O24bAh21ElYINovqk2386EoJYyLmknHJ5UsFts"
},
@@ -11185,6 +11188,7 @@
"alexaRank": 14969,
"urlMain": "https://www.vivino.com/",
"url": "https://www.vivino.com/users/{username}",
"urlProbe": "https://api.vivino.com/users/{username}",
"usernameClaimed": "adam",
"usernameUnclaimed": "noonewouldeverusethis7"
},
+2 -2
View File
@@ -1,8 +1,8 @@
{
"version": 1,
"updated_at": "2026-04-23T19:44:45Z",
"updated_at": "2026-04-25T16:11:27Z",
"sites_count": 3139,
"min_maigret_version": "0.6.0",
"data_sha256": "35bfbb1271a50890c78a03d8e9d9f8d07f78de0e140c8232626de2f6eb124bae",
"data_sha256": "c51ecaa6c0736c5e1e7ca91aaf111445b3ac9ce9541a472d97db2dcc3ff8aa17",
"data_url": "https://raw.githubusercontent.com/soxoj/maigret/main/maigret/resources/data.json"
}
+3 -3
View File
@@ -3143,7 +3143,7 @@ Rank data fetched from Majestic Million by domains.
1. ![](https://www.google.com/s2/favicons?domain=https://flarum.es) [flarum.es (https://flarum.es)](https://flarum.es)*: top 100M, es, forum*
1. ![](https://www.google.com/s2/favicons?domain=https://forum.fibra.click) [forum.fibra.click (https://forum.fibra.click)](https://forum.fibra.click)*: top 100M, forum, it*
The list was updated at (2026-04-23)
The list was updated at (2026-04-25)
## Statistics
Enabled/total sites: 2510/3139 = 79.96%
@@ -3154,7 +3154,7 @@ Status code checks: 625/2510 = 24.9% (false positive risks)
False positive risk (total): 37.53%
Sites with probing: 500px, Armchairgm, BinarySearch (disabled), BleachFandom, Bluesky, BongaCams, Boosty, BuyMeACoffee, Calendly, Cent, Chess, Code Sandbox, Code Snippet Wiki, DailyMotion, Discord, Diskusjon.no, Disqus, Docker Hub, Duolingo, FandomCommunityCentral, GitHub, GitLab, Google Plus (archived), Gravatar, HackTheBox, Hackerrank, Hashnode, Holopin, Imgur, Issuu, Keybase, Kick, Kvinneguiden, LeetCode, Lesswrong, Livejasmin, LocalCryptos (disabled), Medium, MicrosoftLearn, MixCloud, Monkeytype, NPM, Niftygateway, Omg.lol, OnlyFans, Paragraph, Picsart, Plurk, Polarsteps, Rarible, Reddit (disabled), Reddit Search (Pushshift) (disabled), Revolut.me, RoyalCams, Scratch, Soop, SportsTracker, Spotify, StackOverflow, Substack, TAP'D, Topcoder, Trello, Twitch, Twitter, Twitter Shadowban (disabled), UnstoppableDomains, Vimeo, Warframe Market, Warpcast, Weibo, Wikipedia, Yapisal (disabled), YouNow, en.brickimedia.org, nightbot, notabug.org, qiwi.me (disabled)
Sites with probing: 500px, Armchairgm, BinarySearch (disabled), BleachFandom, Bluesky, BongaCams, Boosty, BuyMeACoffee, Calendly, Cent, Chess, Code Sandbox, Code Snippet Wiki, DailyMotion, Discord, Diskusjon.no, Disqus, Docker Hub, Duolingo, FandomCommunityCentral, GitHub, GitLab, Google Plus (archived), Gravatar, HackTheBox, Hackerrank, Hashnode, Holopin, Imgur, Issuu, Keybase, Kick, Kvinneguiden, LeetCode, Lesswrong, Livejasmin, LocalCryptos (disabled), Medium, MicrosoftLearn, MixCloud, Monkeytype, NPM, Niftygateway, Omg.lol, OnlyFans, Paragraph, Picsart, Plurk, Polarsteps, Rarible, Reddit (disabled), Reddit Search (Pushshift) (disabled), Revolut.me, RoyalCams, Scratch, Soop, SportsTracker, Spotify, StackOverflow, Substack, TAP'D, Topcoder, Trello, Twitch, Twitter, Twitter Shadowban (disabled), UnstoppableDomains, Vimeo, Vivino, Warframe Market, Warpcast, Weibo, Wikipedia, Yapisal (disabled), YouNow, en.brickimedia.org, nightbot, notabug.org, qiwi.me (disabled)
Sites with activation: OnlyFans, Twitter, Vimeo, Weibo
@@ -3169,7 +3169,7 @@ Top 20 profile URLs:
- (116) `/u/{username}`
- (93) `/users/{username}`
- (87) `{urlMain}/u/{username}/summary (Discourse)`
- (70) `/@{username}`
- (68) `/@{username}`
- (55) `/wiki/User:{username}`
- (45) `SUBDOMAIN`
- (38) `/members/?username={username}`