mirror of
https://github.com/soxoj/maigret.git
synced 2026-05-06 14:08:59 +00:00
Fix site checks: 5 fixed; readme fix (#2562)
* Fix site checks: 5 fixed; readme fix * Logging improvements * Improve YouTube data extraction
This commit is contained in:
@@ -109,7 +109,7 @@ Download a standalone EXE from [Releases](https://github.com/soxoj/maigret/relea
|
||||
|
||||
Run Maigret in the browser via cloud shells or Jupyter notebooks:
|
||||
|
||||
[](https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/soxoj/maigret&tutorial=cloudshell-tutorial.md)
|
||||
<a href="https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/soxoj/maigret&tutorial=cloudshell-tutorial.md"><img src="https://user-images.githubusercontent.com/27065646/92304704-8d146d80-ef80-11ea-8c29-0deaabb1c702.png" alt="Open in Cloud Shell" height="50"></a>
|
||||
<a href="https://repl.it/github/soxoj/maigret"><img src="https://replit.com/badge/github/soxoj/maigret" alt="Run on Replit" height="50"></a>
|
||||
|
||||
<a href="https://colab.research.google.com/gist/soxoj/879b51bc3b2f8b695abb054090645000/maigret-collab.ipynb"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab" height="45"></a>
|
||||
|
||||
+7
-1
@@ -345,7 +345,11 @@ def process_site_result(
|
||||
username = results_info["username"]
|
||||
is_parsing_enabled = results_info["parsing_enabled"]
|
||||
url = results_info.get("url_user")
|
||||
logger.info(url)
|
||||
url_probe = results_info.get("url_probe") or url
|
||||
if url_probe != url:
|
||||
logger.info(f"{url_probe} (display: {url})")
|
||||
else:
|
||||
logger.info(url)
|
||||
|
||||
status = results_info.get("status")
|
||||
if status is not None:
|
||||
@@ -603,6 +607,8 @@ def make_site_result(
|
||||
for k, v in site.get_params.items():
|
||||
url_probe += f"&{k}={v}"
|
||||
|
||||
results_site["url_probe"] = url_probe
|
||||
|
||||
if site.request_method:
|
||||
request_method = site.request_method.lower()
|
||||
elif site.check_type == "status_code" and site.request_head_only:
|
||||
|
||||
@@ -40,7 +40,7 @@
|
||||
],
|
||||
"alexaRank": 3,
|
||||
"urlMain": "https://www.youtube.com/",
|
||||
"url": "https://www.youtube.com/@{username}",
|
||||
"url": "https://www.youtube.com/@{username}/about",
|
||||
"usernameClaimed": "test",
|
||||
"usernameUnclaimed": "noonewouldeverusethis777"
|
||||
},
|
||||
@@ -63,7 +63,7 @@
|
||||
],
|
||||
"alexaRank": 3,
|
||||
"urlMain": "https://www.youtube.com/",
|
||||
"url": "https://www.youtube.com/@{username}",
|
||||
"url": "https://www.youtube.com/@{username}/about",
|
||||
"usernameClaimed": "test",
|
||||
"usernameUnclaimed": "noonewouldeverusethis777"
|
||||
},
|
||||
@@ -100,7 +100,7 @@
|
||||
"sec-ch-ua": "Google Chrome\";v=\"87\", \" Not;A Brand\";v=\"99\", \"Chromium\";v=\"87\"",
|
||||
"authorization": "Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA",
|
||||
"user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36",
|
||||
"x-guest-token": "2045154491230572773"
|
||||
"x-guest-token": "2048070238281826593"
|
||||
},
|
||||
"errors": {
|
||||
"Bad guest token": "x-guest-token update required"
|
||||
@@ -296,7 +296,7 @@
|
||||
"method": "vimeo"
|
||||
},
|
||||
"headers": {
|
||||
"Authorization": "jwt eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJleHAiOjE3NzY0Mzg3MjAsInVzZXJfaWQiOm51bGwsImFwcF9pZCI6NTg0NzksInNjb3BlcyI6InB1YmxpYyIsInRlYW1fdXNlcl9pZCI6bnVsbCwianRpIjoiNjY0OWY3ZWItMThjZS00ODU1LWIzNmEtNWY3MzRkOGZhNjAyIn0.l1SRcr5UqvxqYLveW7MTECKSfkgsbh1y9QZqZmBX1EI"
|
||||
"Authorization": "jwt eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJleHAiOjE3NzcxMzM4ODAsInVzZXJfaWQiOm51bGwsImFwcF9pZCI6NTg0NzksInNjb3BlcyI6InB1YmxpYyIsInRlYW1fdXNlcl9pZCI6bnVsbCwianRpIjoiZjFiMGJjNWUtMjIyOC00Y2I1LWFlNmItODk0YjZhNGNmODJhIn0.YCPXekRrHnJy8iH1gX4iVuNURiw6sU_FlmsfHnM2oug"
|
||||
},
|
||||
"urlProbe": "https://api.vimeo.com/users/{username}?fields=name%2Cgender%2Cbio%2Curi%2Clink%2Cbackground_video%2Clocation_details%2Cpictures%2Cverified%2Cmetadata.public_videos.total%2Cavailable_for_hire%2Ccan_work_remotely%2Cmetadata.connections.videos.total%2Cmetadata.connections.albums.total%2Cmetadata.connections.followers.total%2Cmetadata.connections.following.total%2Cmetadata.public_videos.total%2Cmetadata.connections.vimeo_experts.is_enrolled%2Ctotal_collection_count%2Ccreated_time%2Cprofile_preferences%2Cmembership%2Cclients%2Cskills%2Cproject_types%2Crates%2Ccategories%2Cis_expert%2Cprofile_discovery%2Cwebsites%2Ccontact_emails&fetch_user_profile=1",
|
||||
"checkType": "status_code",
|
||||
@@ -1339,6 +1339,9 @@
|
||||
"did not match any articles",
|
||||
"not match"
|
||||
],
|
||||
"errors": {
|
||||
"Our systems have detected unusual traffic": "Google rate-limit / captcha"
|
||||
},
|
||||
"tags": [
|
||||
"education",
|
||||
"research"
|
||||
@@ -5639,8 +5642,8 @@
|
||||
"user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36",
|
||||
"user-id": "0",
|
||||
"x-bc": "0a106d301866494c873ae3a05bc3c97cee59a749",
|
||||
"time": "1776959404882",
|
||||
"sign": "57203:46ddb95bceab303946739ba884f008f6a2118657:646:69cfa6d8",
|
||||
"time": "1777132991121",
|
||||
"sign": "57203:3723aa7d500e76eabca29df74e4e97c483f14204:66d:69cfa6d8",
|
||||
"referer": "https://onlyfans.com/",
|
||||
"cookie": "__cf_bm=YovfzPN0T_wg6F60L5eZKPOQvlGESws3UDGgEkmPb9A-1776790253-1.0.1.1-KRZgptNe5P9epBZSdITa12VfTEDlDdLckPY3I2FDAacvCPxOj0PqeK86J5mcC7UQ_TM8_O24bAh21ElYINovqk2386EoJYyLmknHJ5UsFts"
|
||||
},
|
||||
@@ -11185,6 +11188,7 @@
|
||||
"alexaRank": 14969,
|
||||
"urlMain": "https://www.vivino.com/",
|
||||
"url": "https://www.vivino.com/users/{username}",
|
||||
"urlProbe": "https://api.vivino.com/users/{username}",
|
||||
"usernameClaimed": "adam",
|
||||
"usernameUnclaimed": "noonewouldeverusethis7"
|
||||
},
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
{
|
||||
"version": 1,
|
||||
"updated_at": "2026-04-23T19:44:45Z",
|
||||
"updated_at": "2026-04-25T16:11:27Z",
|
||||
"sites_count": 3139,
|
||||
"min_maigret_version": "0.6.0",
|
||||
"data_sha256": "35bfbb1271a50890c78a03d8e9d9f8d07f78de0e140c8232626de2f6eb124bae",
|
||||
"data_sha256": "c51ecaa6c0736c5e1e7ca91aaf111445b3ac9ce9541a472d97db2dcc3ff8aa17",
|
||||
"data_url": "https://raw.githubusercontent.com/soxoj/maigret/main/maigret/resources/data.json"
|
||||
}
|
||||
@@ -3143,7 +3143,7 @@ Rank data fetched from Majestic Million by domains.
|
||||
1.  [flarum.es (https://flarum.es)](https://flarum.es)*: top 100M, es, forum*
|
||||
1.  [forum.fibra.click (https://forum.fibra.click)](https://forum.fibra.click)*: top 100M, forum, it*
|
||||
|
||||
The list was updated at (2026-04-23)
|
||||
The list was updated at (2026-04-25)
|
||||
## Statistics
|
||||
|
||||
Enabled/total sites: 2510/3139 = 79.96%
|
||||
@@ -3154,7 +3154,7 @@ Status code checks: 625/2510 = 24.9% (false positive risks)
|
||||
|
||||
False positive risk (total): 37.53%
|
||||
|
||||
Sites with probing: 500px, Armchairgm, BinarySearch (disabled), BleachFandom, Bluesky, BongaCams, Boosty, BuyMeACoffee, Calendly, Cent, Chess, Code Sandbox, Code Snippet Wiki, DailyMotion, Discord, Diskusjon.no, Disqus, Docker Hub, Duolingo, FandomCommunityCentral, GitHub, GitLab, Google Plus (archived), Gravatar, HackTheBox, Hackerrank, Hashnode, Holopin, Imgur, Issuu, Keybase, Kick, Kvinneguiden, LeetCode, Lesswrong, Livejasmin, LocalCryptos (disabled), Medium, MicrosoftLearn, MixCloud, Monkeytype, NPM, Niftygateway, Omg.lol, OnlyFans, Paragraph, Picsart, Plurk, Polarsteps, Rarible, Reddit (disabled), Reddit Search (Pushshift) (disabled), Revolut.me, RoyalCams, Scratch, Soop, SportsTracker, Spotify, StackOverflow, Substack, TAP'D, Topcoder, Trello, Twitch, Twitter, Twitter Shadowban (disabled), UnstoppableDomains, Vimeo, Warframe Market, Warpcast, Weibo, Wikipedia, Yapisal (disabled), YouNow, en.brickimedia.org, nightbot, notabug.org, qiwi.me (disabled)
|
||||
Sites with probing: 500px, Armchairgm, BinarySearch (disabled), BleachFandom, Bluesky, BongaCams, Boosty, BuyMeACoffee, Calendly, Cent, Chess, Code Sandbox, Code Snippet Wiki, DailyMotion, Discord, Diskusjon.no, Disqus, Docker Hub, Duolingo, FandomCommunityCentral, GitHub, GitLab, Google Plus (archived), Gravatar, HackTheBox, Hackerrank, Hashnode, Holopin, Imgur, Issuu, Keybase, Kick, Kvinneguiden, LeetCode, Lesswrong, Livejasmin, LocalCryptos (disabled), Medium, MicrosoftLearn, MixCloud, Monkeytype, NPM, Niftygateway, Omg.lol, OnlyFans, Paragraph, Picsart, Plurk, Polarsteps, Rarible, Reddit (disabled), Reddit Search (Pushshift) (disabled), Revolut.me, RoyalCams, Scratch, Soop, SportsTracker, Spotify, StackOverflow, Substack, TAP'D, Topcoder, Trello, Twitch, Twitter, Twitter Shadowban (disabled), UnstoppableDomains, Vimeo, Vivino, Warframe Market, Warpcast, Weibo, Wikipedia, Yapisal (disabled), YouNow, en.brickimedia.org, nightbot, notabug.org, qiwi.me (disabled)
|
||||
|
||||
Sites with activation: OnlyFans, Twitter, Vimeo, Weibo
|
||||
|
||||
@@ -3169,7 +3169,7 @@ Top 20 profile URLs:
|
||||
- (116) `/u/{username}`
|
||||
- (93) `/users/{username}`
|
||||
- (87) `{urlMain}/u/{username}/summary (Discourse)`
|
||||
- (70) `/@{username}`
|
||||
- (68) `/@{username}`
|
||||
- (55) `/wiki/User:{username}`
|
||||
- (45) `SUBDOMAIN`
|
||||
- (38) `/members/?username={username}`
|
||||
|
||||
Reference in New Issue
Block a user