diff --git a/maigret/resources/data.json b/maigret/resources/data.json
index 820aad4..5222056 100644
--- a/maigret/resources/data.json
+++ b/maigret/resources/data.json
@@ -2757,16 +2757,27 @@
],
"checkType": "message",
"absenceStrs": [
- "
null null
"
+ "error_404",
+ "c-error404",
+ "Author not found",
+ "c-error404_back",
+ "c-error404_header"
],
"presenseStrs": [
- "Joined CNET:"
+ "},firstName:",
+ "#email",
+ ",cmsDisplayName:",
+ "og:title",
+ "c-pageProfile"
],
"alexaRank": 181,
- "urlMain": "https://www.cnet.com/",
+ "urlMain": "https://www.cnet.com",
"url": "https://www.cnet.com/profiles/{username}/",
"usernameClaimed": "leadicicco",
- "usernameUnclaimed": "noonewouldeverusethis"
+ "usernameUnclaimed": "chexowcxzm",
+ "headers": {
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.114 Safari/537.36"
+ }
},
"CORSAIR": {
"urlSubpath": "/v3",
@@ -3187,16 +3198,27 @@
],
"checkType": "message",
"absenceStrs": [
- "The page you are looking for doesn\u2019t exist. (404)"
+ "error image",
+ " 404 Page not found
",
+ "_404-header",
+ "_404-inner-container",
+ " no-nav "
],
"presenseStrs": [
- "Full Stats"
+ "profile-top",
+ "og:title",
+ " style=",
+ "view-profile",
+ " data-username="
],
"alexaRank": 211,
- "urlMain": "https://www.chess.com/",
+ "urlMain": "https://www.chess.com",
"url": "https://www.chess.com/member/{username}",
- "usernameClaimed": "blue",
- "usernameUnclaimed": "noonewouldeverusethis7"
+ "usernameClaimed": "sexytwerker69",
+ "usernameUnclaimed": "aublurbrxm",
+ "headers": {
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.114 Safari/537.36"
+ }
},
"Chess-russia": {
"tags": [
@@ -3832,18 +3854,31 @@
},
"DailyMotion": {
"tags": [
- "us",
"video"
],
"checkType": "message",
"presenseStrs": [
- "al:ios:app_name"
+ " style=",
+ "",
+ "og:title",
+ "Twitter",
+ "og:site_name"
],
"alexaRank": 263,
- "urlMain": "https://www.dailymotion.com/",
+ "urlMain": "https://www.dailymotion.com",
"url": "https://www.dailymotion.com/{username}",
"usernameClaimed": "blue",
- "usernameUnclaimed": "noonewouldeverusethis7"
+ "usernameUnclaimed": "rstnodkwzr",
+ "absenceStrs": [
+ "Page not found",
+ "profile",
+ "error404",
+ "bodyall",
+ "No matches found"
+ ],
+ "headers": {
+ "User-Agent": ""
+ }
},
"Dalnoboi": {
"tags": [
@@ -10583,13 +10618,27 @@
],
"checkType": "message",
"absenceStrs": [
- "Page Not Found | Mozilla"
+ ">Page Not Found",
+ "error-page",
+ "sumo-page-intro",
+ "search-results-visible page-not-found",
+ "search-empty"
],
"alexaRank": 172,
"urlMain": "https://support.mozilla.org",
- "url": "https://support.mozilla.org/en-US/user/{username}",
- "usernameClaimed": "adam",
- "usernameUnclaimed": "noonewouldeverusethis7"
+ "url": "https://support.mozilla.org/en-US/user/{username}/",
+ "usernameClaimed": "derekmarable",
+ "usernameUnclaimed": "tasgcxxxcz",
+ "presenseStrs": [
+ "user-nav",
+ "",
+ "sidebar-nav",
+ "noindex",
+ "sidebar-nav--item"
+ ],
+ "headers": {
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.114 Safari/537.36"
+ }
},
"Mpgh": {
"urlSubpath": "/forum",
@@ -17422,7 +17471,7 @@
"method": "vimeo"
},
"headers": {
- "Authorization": "jwt eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJleHAiOjE3MzM4NzUyMDAsInVzZXJfaWQiOm51bGwsImFwcF9pZCI6NTg0NzksInNjb3BlcyI6InB1YmxpYyIsInRlYW1fdXNlcl9pZCI6bnVsbCwianRpIjoiM2MxNWE0NDUtMjVlYy00NzJhLTg5NzgtMjIzMWJiMmQ1Y2Q0In0.-hmhKFIcM0SyYtDadKAU2eqQhcYvfFGPR8vvuzLNbWM"
+ "Authorization": "jwt eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJleHAiOjE3MzM4NzYyODAsInVzZXJfaWQiOm51bGwsImFwcF9pZCI6NTg0NzksInNjb3BlcyI6InB1YmxpYyIsInRlYW1fdXNlcl9pZCI6bnVsbCwianRpIjoiMDk0ZjY5MjctZDJhMy00ZTY3LWI1N2EtN2IwMjBlOTcyZjQ5In0.dxgGrY7vQs6DW3sfKaOJy4UL8MKjMK-ssr_kndr9_vY"
},
"urlProbe": "https://api.vimeo.com/users/{username}?fields=name%2Cgender%2Cbio%2Curi%2Clink%2Cbackground_video%2Clocation_details%2Cpictures%2Cverified%2Cmetadata.public_videos.total%2Cavailable_for_hire%2Ccan_work_remotely%2Cmetadata.connections.videos.total%2Cmetadata.connections.albums.total%2Cmetadata.connections.followers.total%2Cmetadata.connections.following.total%2Cmetadata.public_videos.total%2Cmetadata.connections.vimeo_experts.is_enrolled%2Ctotal_collection_count%2Ccreated_time%2Cprofile_preferences%2Cmembership%2Cclients%2Cskills%2Cproject_types%2Crates%2Ccategories%2Cis_expert%2Cprofile_discovery%2Cwebsites%2Ccontact_emails&fetch_user_profile=1",
"checkType": "status_code",
@@ -30423,6 +30472,9 @@
"presenseStrs": [
"collectionName"
],
+ "errors": {
+ "recaptchaKey": "Captcha detected"
+ },
"url": "https://www.istockphoto.com/ru/portfolio/{username}",
"urlMain": "https://www.istockphoto.com",
"usernameClaimed": "leowilde",
diff --git a/maigret/submit.py b/maigret/submit.py
index 9ba95c9..31750ec 100644
--- a/maigret/submit.py
+++ b/maigret/submit.py
@@ -509,6 +509,8 @@ class Submitter:
supposed_username = self.extract_username_dialog(url_exists)
self.logger.info(f"Supposed username: {supposed_username}")
+ # TODO: pass status_codes
+ # check it here and suggest to enable / auto-enable redirects
presence_list, absence_list, status, non_exist_username = (
await self.check_features_manually(
username=supposed_username,
@@ -598,8 +600,11 @@ class Submitter:
self.logger.info(f"New site name is {new_name}")
chosen_site.name = new_name
- # TODO: remove empty tags
- new_tags = input(f"{Fore.GREEN}[?] Site tags: {Style.RESET_ALL}")
+ default_tags_str = ""
+ if old_site:
+ default_tags_str = f' [{", ".join(old_site.tags)}]'
+
+ new_tags = input(f"{Fore.GREEN}[?] Site tags{default_tags_str}: {Style.RESET_ALL}")
if new_tags:
chosen_site.tags = list(map(str.strip, new_tags.split(',')))
else:
diff --git a/sites.md b/sites.md
index e3382cd..f0fd2db 100644
--- a/sites.md
+++ b/sites.md
@@ -96,18 +96,18 @@ Rank data fetched from Alexa by domains.
1.  [TheGuardian (https://theguardian.com)](https://theguardian.com)*: top 500, news, us*, search is disabled
1.  [Trello (https://trello.com/)](https://trello.com/)*: top 500, tasks*
1.  [Mozilla Support (https://support.mozilla.org)](https://support.mozilla.org)*: top 500, us*
-1.  [CNET (https://www.cnet.com/)](https://www.cnet.com/)*: top 500, news, tech, us*
+1.  [CNET (https://www.cnet.com)](https://www.cnet.com)*: top 500, news, tech, us*
1.  [Shutterstock (https://www.shutterstock.com)](https://www.shutterstock.com)*: top 500, music, photo, stock, us*
1.  [Wix (https://wix.com/)](https://wix.com/)*: top 500, us*
1.  [Slack (https://slack.com)](https://slack.com)*: top 500, messaging*
-1.  [Chess (https://www.chess.com/)](https://www.chess.com/)*: top 500, gaming, hobby*
+1.  [Chess (https://www.chess.com)](https://www.chess.com)*: top 500, gaming, hobby*
1.  [upwork.com (https://upwork.com)](https://upwork.com)*: top 500, us*
1.  [Archive.org (https://archive.org)](https://archive.org)*: top 500*, search is disabled
1.  [Figma (https://www.figma.com/)](https://www.figma.com/)*: top 500, design*
1.  [iStock (https://www.istockphoto.com)](https://www.istockphoto.com)*: top 500, photo, stock*
1.  [Scribd (https://www.scribd.com/)](https://www.scribd.com/)*: top 500, reading*
1.  [opensea.io (https://opensea.io)](https://opensea.io)*: top 500, us*
-1.  [DailyMotion (https://www.dailymotion.com/)](https://www.dailymotion.com/)*: top 500, us, video*
+1.  [DailyMotion (https://www.dailymotion.com)](https://www.dailymotion.com)*: top 500, video*
1.  [Behance (https://www.behance.net/)](https://www.behance.net/)*: top 500, business*
1.  [Yelp (http://www.yelp.com)](http://www.yelp.com)*: top 500, review*, search is disabled
1.  [Yelp (by id) (https://www.yelp.com)](https://www.yelp.com)*: top 500, review*
@@ -3141,16 +3141,16 @@ Rank data fetched from Alexa by domains.
1.  [OP.GG [PUBG] (https://pubg.op.gg)](https://pubg.op.gg)*: top 100M, gaming*
1.  [OP.GG [Valorant] (https://valorant.op.gg)](https://valorant.op.gg)*: top 100M, gaming*
-The list was updated at (2024-12-10)
+The list was updated at (2024-12-11)
## Statistics
Enabled/total sites: 2693/3137 = 85.85%
-Incomplete message checks: 397/2693 = 14.74% (false positive risks)
+Incomplete message checks: 395/2693 = 14.67% (false positive risks)
Status code checks: 616/2693 = 22.87% (false positive risks)
-False positive risk (total): 37.61%
+False positive risk (total): 37.54%
Sites with probing: 500px, Aparat, BinarySearch (disabled), BongaCams, BuyMeACoffee, Cent, Disqus, Docker Hub, Duolingo, Gab, GitHub, GitLab, Google Plus (archived), Gravatar, Imgur, Issuu, Keybase, Livejasmin, LocalCryptos (disabled), MixCloud, Niftygateway, Reddit Search (Pushshift) (disabled), SportsTracker, Spotify (disabled), TAP'D, Trello, Twitch, Twitter, Twitter Shadowban (disabled), UnstoppableDomains, Vimeo, Weibo, Yapisal (disabled), YouNow, nightbot, notabug.org, polarsteps, qiwi.me (disabled)