mirror of
https://github.com/soxoj/maigret.git
synced 2026-05-07 06:24:35 +00:00
Fix site checks: 5 fixed; readme fix (#2562)
* Fix site checks: 5 fixed; readme fix * Logging improvements * Improve YouTube data extraction
This commit is contained in:
@@ -109,7 +109,7 @@ Download a standalone EXE from [Releases](https://github.com/soxoj/maigret/relea
|
|||||||
|
|
||||||
Run Maigret in the browser via cloud shells or Jupyter notebooks:
|
Run Maigret in the browser via cloud shells or Jupyter notebooks:
|
||||||
|
|
||||||
[](https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/soxoj/maigret&tutorial=cloudshell-tutorial.md)
|
<a href="https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/soxoj/maigret&tutorial=cloudshell-tutorial.md"><img src="https://user-images.githubusercontent.com/27065646/92304704-8d146d80-ef80-11ea-8c29-0deaabb1c702.png" alt="Open in Cloud Shell" height="50"></a>
|
||||||
<a href="https://repl.it/github/soxoj/maigret"><img src="https://replit.com/badge/github/soxoj/maigret" alt="Run on Replit" height="50"></a>
|
<a href="https://repl.it/github/soxoj/maigret"><img src="https://replit.com/badge/github/soxoj/maigret" alt="Run on Replit" height="50"></a>
|
||||||
|
|
||||||
<a href="https://colab.research.google.com/gist/soxoj/879b51bc3b2f8b695abb054090645000/maigret-collab.ipynb"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab" height="45"></a>
|
<a href="https://colab.research.google.com/gist/soxoj/879b51bc3b2f8b695abb054090645000/maigret-collab.ipynb"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab" height="45"></a>
|
||||||
|
|||||||
+7
-1
@@ -345,7 +345,11 @@ def process_site_result(
|
|||||||
username = results_info["username"]
|
username = results_info["username"]
|
||||||
is_parsing_enabled = results_info["parsing_enabled"]
|
is_parsing_enabled = results_info["parsing_enabled"]
|
||||||
url = results_info.get("url_user")
|
url = results_info.get("url_user")
|
||||||
logger.info(url)
|
url_probe = results_info.get("url_probe") or url
|
||||||
|
if url_probe != url:
|
||||||
|
logger.info(f"{url_probe} (display: {url})")
|
||||||
|
else:
|
||||||
|
logger.info(url)
|
||||||
|
|
||||||
status = results_info.get("status")
|
status = results_info.get("status")
|
||||||
if status is not None:
|
if status is not None:
|
||||||
@@ -603,6 +607,8 @@ def make_site_result(
|
|||||||
for k, v in site.get_params.items():
|
for k, v in site.get_params.items():
|
||||||
url_probe += f"&{k}={v}"
|
url_probe += f"&{k}={v}"
|
||||||
|
|
||||||
|
results_site["url_probe"] = url_probe
|
||||||
|
|
||||||
if site.request_method:
|
if site.request_method:
|
||||||
request_method = site.request_method.lower()
|
request_method = site.request_method.lower()
|
||||||
elif site.check_type == "status_code" and site.request_head_only:
|
elif site.check_type == "status_code" and site.request_head_only:
|
||||||
|
|||||||
@@ -40,7 +40,7 @@
|
|||||||
],
|
],
|
||||||
"alexaRank": 3,
|
"alexaRank": 3,
|
||||||
"urlMain": "https://www.youtube.com/",
|
"urlMain": "https://www.youtube.com/",
|
||||||
"url": "https://www.youtube.com/@{username}",
|
"url": "https://www.youtube.com/@{username}/about",
|
||||||
"usernameClaimed": "test",
|
"usernameClaimed": "test",
|
||||||
"usernameUnclaimed": "noonewouldeverusethis777"
|
"usernameUnclaimed": "noonewouldeverusethis777"
|
||||||
},
|
},
|
||||||
@@ -63,7 +63,7 @@
|
|||||||
],
|
],
|
||||||
"alexaRank": 3,
|
"alexaRank": 3,
|
||||||
"urlMain": "https://www.youtube.com/",
|
"urlMain": "https://www.youtube.com/",
|
||||||
"url": "https://www.youtube.com/@{username}",
|
"url": "https://www.youtube.com/@{username}/about",
|
||||||
"usernameClaimed": "test",
|
"usernameClaimed": "test",
|
||||||
"usernameUnclaimed": "noonewouldeverusethis777"
|
"usernameUnclaimed": "noonewouldeverusethis777"
|
||||||
},
|
},
|
||||||
@@ -100,7 +100,7 @@
|
|||||||
"sec-ch-ua": "Google Chrome\";v=\"87\", \" Not;A Brand\";v=\"99\", \"Chromium\";v=\"87\"",
|
"sec-ch-ua": "Google Chrome\";v=\"87\", \" Not;A Brand\";v=\"99\", \"Chromium\";v=\"87\"",
|
||||||
"authorization": "Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA",
|
"authorization": "Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA",
|
||||||
"user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36",
|
"user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36",
|
||||||
"x-guest-token": "2045154491230572773"
|
"x-guest-token": "2048070238281826593"
|
||||||
},
|
},
|
||||||
"errors": {
|
"errors": {
|
||||||
"Bad guest token": "x-guest-token update required"
|
"Bad guest token": "x-guest-token update required"
|
||||||
@@ -296,7 +296,7 @@
|
|||||||
"method": "vimeo"
|
"method": "vimeo"
|
||||||
},
|
},
|
||||||
"headers": {
|
"headers": {
|
||||||
"Authorization": "jwt eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJleHAiOjE3NzY0Mzg3MjAsInVzZXJfaWQiOm51bGwsImFwcF9pZCI6NTg0NzksInNjb3BlcyI6InB1YmxpYyIsInRlYW1fdXNlcl9pZCI6bnVsbCwianRpIjoiNjY0OWY3ZWItMThjZS00ODU1LWIzNmEtNWY3MzRkOGZhNjAyIn0.l1SRcr5UqvxqYLveW7MTECKSfkgsbh1y9QZqZmBX1EI"
|
"Authorization": "jwt eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJleHAiOjE3NzcxMzM4ODAsInVzZXJfaWQiOm51bGwsImFwcF9pZCI6NTg0NzksInNjb3BlcyI6InB1YmxpYyIsInRlYW1fdXNlcl9pZCI6bnVsbCwianRpIjoiZjFiMGJjNWUtMjIyOC00Y2I1LWFlNmItODk0YjZhNGNmODJhIn0.YCPXekRrHnJy8iH1gX4iVuNURiw6sU_FlmsfHnM2oug"
|
||||||
},
|
},
|
||||||
"urlProbe": "https://api.vimeo.com/users/{username}?fields=name%2Cgender%2Cbio%2Curi%2Clink%2Cbackground_video%2Clocation_details%2Cpictures%2Cverified%2Cmetadata.public_videos.total%2Cavailable_for_hire%2Ccan_work_remotely%2Cmetadata.connections.videos.total%2Cmetadata.connections.albums.total%2Cmetadata.connections.followers.total%2Cmetadata.connections.following.total%2Cmetadata.public_videos.total%2Cmetadata.connections.vimeo_experts.is_enrolled%2Ctotal_collection_count%2Ccreated_time%2Cprofile_preferences%2Cmembership%2Cclients%2Cskills%2Cproject_types%2Crates%2Ccategories%2Cis_expert%2Cprofile_discovery%2Cwebsites%2Ccontact_emails&fetch_user_profile=1",
|
"urlProbe": "https://api.vimeo.com/users/{username}?fields=name%2Cgender%2Cbio%2Curi%2Clink%2Cbackground_video%2Clocation_details%2Cpictures%2Cverified%2Cmetadata.public_videos.total%2Cavailable_for_hire%2Ccan_work_remotely%2Cmetadata.connections.videos.total%2Cmetadata.connections.albums.total%2Cmetadata.connections.followers.total%2Cmetadata.connections.following.total%2Cmetadata.public_videos.total%2Cmetadata.connections.vimeo_experts.is_enrolled%2Ctotal_collection_count%2Ccreated_time%2Cprofile_preferences%2Cmembership%2Cclients%2Cskills%2Cproject_types%2Crates%2Ccategories%2Cis_expert%2Cprofile_discovery%2Cwebsites%2Ccontact_emails&fetch_user_profile=1",
|
||||||
"checkType": "status_code",
|
"checkType": "status_code",
|
||||||
@@ -1339,6 +1339,9 @@
|
|||||||
"did not match any articles",
|
"did not match any articles",
|
||||||
"not match"
|
"not match"
|
||||||
],
|
],
|
||||||
|
"errors": {
|
||||||
|
"Our systems have detected unusual traffic": "Google rate-limit / captcha"
|
||||||
|
},
|
||||||
"tags": [
|
"tags": [
|
||||||
"education",
|
"education",
|
||||||
"research"
|
"research"
|
||||||
@@ -5639,8 +5642,8 @@
|
|||||||
"user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36",
|
"user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36",
|
||||||
"user-id": "0",
|
"user-id": "0",
|
||||||
"x-bc": "0a106d301866494c873ae3a05bc3c97cee59a749",
|
"x-bc": "0a106d301866494c873ae3a05bc3c97cee59a749",
|
||||||
"time": "1776959404882",
|
"time": "1777132991121",
|
||||||
"sign": "57203:46ddb95bceab303946739ba884f008f6a2118657:646:69cfa6d8",
|
"sign": "57203:3723aa7d500e76eabca29df74e4e97c483f14204:66d:69cfa6d8",
|
||||||
"referer": "https://onlyfans.com/",
|
"referer": "https://onlyfans.com/",
|
||||||
"cookie": "__cf_bm=YovfzPN0T_wg6F60L5eZKPOQvlGESws3UDGgEkmPb9A-1776790253-1.0.1.1-KRZgptNe5P9epBZSdITa12VfTEDlDdLckPY3I2FDAacvCPxOj0PqeK86J5mcC7UQ_TM8_O24bAh21ElYINovqk2386EoJYyLmknHJ5UsFts"
|
"cookie": "__cf_bm=YovfzPN0T_wg6F60L5eZKPOQvlGESws3UDGgEkmPb9A-1776790253-1.0.1.1-KRZgptNe5P9epBZSdITa12VfTEDlDdLckPY3I2FDAacvCPxOj0PqeK86J5mcC7UQ_TM8_O24bAh21ElYINovqk2386EoJYyLmknHJ5UsFts"
|
||||||
},
|
},
|
||||||
@@ -11185,6 +11188,7 @@
|
|||||||
"alexaRank": 14969,
|
"alexaRank": 14969,
|
||||||
"urlMain": "https://www.vivino.com/",
|
"urlMain": "https://www.vivino.com/",
|
||||||
"url": "https://www.vivino.com/users/{username}",
|
"url": "https://www.vivino.com/users/{username}",
|
||||||
|
"urlProbe": "https://api.vivino.com/users/{username}",
|
||||||
"usernameClaimed": "adam",
|
"usernameClaimed": "adam",
|
||||||
"usernameUnclaimed": "noonewouldeverusethis7"
|
"usernameUnclaimed": "noonewouldeverusethis7"
|
||||||
},
|
},
|
||||||
|
|||||||
@@ -1,8 +1,8 @@
|
|||||||
{
|
{
|
||||||
"version": 1,
|
"version": 1,
|
||||||
"updated_at": "2026-04-23T19:44:45Z",
|
"updated_at": "2026-04-25T16:11:27Z",
|
||||||
"sites_count": 3139,
|
"sites_count": 3139,
|
||||||
"min_maigret_version": "0.6.0",
|
"min_maigret_version": "0.6.0",
|
||||||
"data_sha256": "35bfbb1271a50890c78a03d8e9d9f8d07f78de0e140c8232626de2f6eb124bae",
|
"data_sha256": "c51ecaa6c0736c5e1e7ca91aaf111445b3ac9ce9541a472d97db2dcc3ff8aa17",
|
||||||
"data_url": "https://raw.githubusercontent.com/soxoj/maigret/main/maigret/resources/data.json"
|
"data_url": "https://raw.githubusercontent.com/soxoj/maigret/main/maigret/resources/data.json"
|
||||||
}
|
}
|
||||||
@@ -3143,7 +3143,7 @@ Rank data fetched from Majestic Million by domains.
|
|||||||
1.  [flarum.es (https://flarum.es)](https://flarum.es)*: top 100M, es, forum*
|
1.  [flarum.es (https://flarum.es)](https://flarum.es)*: top 100M, es, forum*
|
||||||
1.  [forum.fibra.click (https://forum.fibra.click)](https://forum.fibra.click)*: top 100M, forum, it*
|
1.  [forum.fibra.click (https://forum.fibra.click)](https://forum.fibra.click)*: top 100M, forum, it*
|
||||||
|
|
||||||
The list was updated at (2026-04-23)
|
The list was updated at (2026-04-25)
|
||||||
## Statistics
|
## Statistics
|
||||||
|
|
||||||
Enabled/total sites: 2510/3139 = 79.96%
|
Enabled/total sites: 2510/3139 = 79.96%
|
||||||
@@ -3154,7 +3154,7 @@ Status code checks: 625/2510 = 24.9% (false positive risks)
|
|||||||
|
|
||||||
False positive risk (total): 37.53%
|
False positive risk (total): 37.53%
|
||||||
|
|
||||||
Sites with probing: 500px, Armchairgm, BinarySearch (disabled), BleachFandom, Bluesky, BongaCams, Boosty, BuyMeACoffee, Calendly, Cent, Chess, Code Sandbox, Code Snippet Wiki, DailyMotion, Discord, Diskusjon.no, Disqus, Docker Hub, Duolingo, FandomCommunityCentral, GitHub, GitLab, Google Plus (archived), Gravatar, HackTheBox, Hackerrank, Hashnode, Holopin, Imgur, Issuu, Keybase, Kick, Kvinneguiden, LeetCode, Lesswrong, Livejasmin, LocalCryptos (disabled), Medium, MicrosoftLearn, MixCloud, Monkeytype, NPM, Niftygateway, Omg.lol, OnlyFans, Paragraph, Picsart, Plurk, Polarsteps, Rarible, Reddit (disabled), Reddit Search (Pushshift) (disabled), Revolut.me, RoyalCams, Scratch, Soop, SportsTracker, Spotify, StackOverflow, Substack, TAP'D, Topcoder, Trello, Twitch, Twitter, Twitter Shadowban (disabled), UnstoppableDomains, Vimeo, Warframe Market, Warpcast, Weibo, Wikipedia, Yapisal (disabled), YouNow, en.brickimedia.org, nightbot, notabug.org, qiwi.me (disabled)
|
Sites with probing: 500px, Armchairgm, BinarySearch (disabled), BleachFandom, Bluesky, BongaCams, Boosty, BuyMeACoffee, Calendly, Cent, Chess, Code Sandbox, Code Snippet Wiki, DailyMotion, Discord, Diskusjon.no, Disqus, Docker Hub, Duolingo, FandomCommunityCentral, GitHub, GitLab, Google Plus (archived), Gravatar, HackTheBox, Hackerrank, Hashnode, Holopin, Imgur, Issuu, Keybase, Kick, Kvinneguiden, LeetCode, Lesswrong, Livejasmin, LocalCryptos (disabled), Medium, MicrosoftLearn, MixCloud, Monkeytype, NPM, Niftygateway, Omg.lol, OnlyFans, Paragraph, Picsart, Plurk, Polarsteps, Rarible, Reddit (disabled), Reddit Search (Pushshift) (disabled), Revolut.me, RoyalCams, Scratch, Soop, SportsTracker, Spotify, StackOverflow, Substack, TAP'D, Topcoder, Trello, Twitch, Twitter, Twitter Shadowban (disabled), UnstoppableDomains, Vimeo, Vivino, Warframe Market, Warpcast, Weibo, Wikipedia, Yapisal (disabled), YouNow, en.brickimedia.org, nightbot, notabug.org, qiwi.me (disabled)
|
||||||
|
|
||||||
Sites with activation: OnlyFans, Twitter, Vimeo, Weibo
|
Sites with activation: OnlyFans, Twitter, Vimeo, Weibo
|
||||||
|
|
||||||
@@ -3169,7 +3169,7 @@ Top 20 profile URLs:
|
|||||||
- (116) `/u/{username}`
|
- (116) `/u/{username}`
|
||||||
- (93) `/users/{username}`
|
- (93) `/users/{username}`
|
||||||
- (87) `{urlMain}/u/{username}/summary (Discourse)`
|
- (87) `{urlMain}/u/{username}/summary (Discourse)`
|
||||||
- (70) `/@{username}`
|
- (68) `/@{username}`
|
||||||
- (55) `/wiki/User:{username}`
|
- (55) `/wiki/User:{username}`
|
||||||
- (45) `SUBDOMAIN`
|
- (45) `SUBDOMAIN`
|
||||||
- (38) `/members/?username={username}`
|
- (38) `/members/?username={username}`
|
||||||
|
|||||||
Reference in New Issue
Block a user