Fixed Vimeo, activation/probing mechanisms improvements

This commit is contained in:
Soxoj
2024-12-11 00:56:00 +01:00
parent 81a817a39f
commit 127d9032c3
8 changed files with 79 additions and 35 deletions
+2 -1
View File
@@ -1,2 +1,3 @@
#!/bin/sh #!/bin/sh
python3 ./utils/update_site_data.py echo 'Activating update_sitesmd hook script...'
poetry run update_sitesmd
+2
View File
@@ -1,3 +1,4 @@
import json
from http.cookiejar import MozillaCookieJar from http.cookiejar import MozillaCookieJar
from http.cookies import Morsel from http.cookies import Morsel
@@ -25,6 +26,7 @@ class ParsingActivator:
import requests import requests
r = requests.get(site.activation["url"], headers=headers) r = requests.get(site.activation["url"], headers=headers)
logger.debug(f"Vimeo viewer activation: {json.dumps(r.json(), indent=4)}")
jwt_token = r.json()["jwt"] jwt_token = r.json()["jwt"]
site.headers["Authorization"] = "jwt " + jwt_token site.headers["Authorization"] = "jwt " + jwt_token
+7 -8
View File
@@ -7037,7 +7037,7 @@
"alexaRank": 1, "alexaRank": 1,
"urlMain": "https://play.google.com/store", "urlMain": "https://play.google.com/store",
"url": "https://play.google.com/store/apps/developer?id={username}", "url": "https://play.google.com/store/apps/developer?id={username}",
"usernameClaimed": "OpenAI", "usernameClaimed": "KONAMI",
"usernameUnclaimed": "noonewouldeverusethis7" "usernameUnclaimed": "noonewouldeverusethis7"
}, },
"Gorod.dp.ua": { "Gorod.dp.ua": {
@@ -17412,26 +17412,25 @@
}, },
"Vimeo": { "Vimeo": {
"tags": [ "tags": [
"us",
"video" "video"
], ],
"headers": {
"Authorization": "jwt eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJleHAiOjE3MzM4MzkwODAsInVzZXJfaWQiOm51bGwsImFwcF9pZCI6NTg0NzksInNjb3BlcyI6InB1YmxpYyIsInRlYW1fdXNlcl9pZCI6bnVsbCwianRpIjoiOWNjMjk0ZjktZGZhOS00NDI0LWE0OGEtN2JjYzkwYjM2NTMyIn0.wG0kC7fWtrdKI9ccS-LE81lVgQRfYobrqCAPWxr1wzc"
},
"activation": { "activation": {
"url": "https://vimeo.com/_rv/viewer", "url": "https://vimeo.com/_rv/viewer",
"marks": [ "marks": [
"Something strange occurred. Please get in touch with the app's creator." "Something strange occurred. Please get in touch"
], ],
"method": "vimeo" "method": "vimeo"
}, },
"headers": {
"Authorization": "jwt eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJleHAiOjE3MzM4NzUyMDAsInVzZXJfaWQiOm51bGwsImFwcF9pZCI6NTg0NzksInNjb3BlcyI6InB1YmxpYyIsInRlYW1fdXNlcl9pZCI6bnVsbCwianRpIjoiM2MxNWE0NDUtMjVlYy00NzJhLTg5NzgtMjIzMWJiMmQ1Y2Q0In0.-hmhKFIcM0SyYtDadKAU2eqQhcYvfFGPR8vvuzLNbWM"
},
"urlProbe": "https://api.vimeo.com/users/{username}?fields=name%2Cgender%2Cbio%2Curi%2Clink%2Cbackground_video%2Clocation_details%2Cpictures%2Cverified%2Cmetadata.public_videos.total%2Cavailable_for_hire%2Ccan_work_remotely%2Cmetadata.connections.videos.total%2Cmetadata.connections.albums.total%2Cmetadata.connections.followers.total%2Cmetadata.connections.following.total%2Cmetadata.public_videos.total%2Cmetadata.connections.vimeo_experts.is_enrolled%2Ctotal_collection_count%2Ccreated_time%2Cprofile_preferences%2Cmembership%2Cclients%2Cskills%2Cproject_types%2Crates%2Ccategories%2Cis_expert%2Cprofile_discovery%2Cwebsites%2Ccontact_emails&fetch_user_profile=1", "urlProbe": "https://api.vimeo.com/users/{username}?fields=name%2Cgender%2Cbio%2Curi%2Clink%2Cbackground_video%2Clocation_details%2Cpictures%2Cverified%2Cmetadata.public_videos.total%2Cavailable_for_hire%2Ccan_work_remotely%2Cmetadata.connections.videos.total%2Cmetadata.connections.albums.total%2Cmetadata.connections.followers.total%2Cmetadata.connections.following.total%2Cmetadata.public_videos.total%2Cmetadata.connections.vimeo_experts.is_enrolled%2Ctotal_collection_count%2Ccreated_time%2Cprofile_preferences%2Cmembership%2Cclients%2Cskills%2Cproject_types%2Crates%2Ccategories%2Cis_expert%2Cprofile_discovery%2Cwebsites%2Ccontact_emails&fetch_user_profile=1",
"checkType": "status_code", "checkType": "status_code",
"alexaRank": 148, "alexaRank": 148,
"urlMain": "https://vimeo.com/", "urlMain": "https://vimeo.com",
"url": "https://vimeo.com/{username}", "url": "https://vimeo.com/{username}",
"usernameClaimed": "blue", "usernameClaimed": "blue",
"usernameUnclaimed": "noonewouldeverusethis7" "usernameUnclaimed": "smbepezbrg"
}, },
"Virgool": { "Virgool": {
"disabled": true, "disabled": true,
+13
View File
@@ -560,6 +560,17 @@ class MaigretDatabase:
checks_perc = round(100 * message_checks_one_factor / enabled_count, 2) checks_perc = round(100 * message_checks_one_factor / enabled_count, 2)
status_checks_perc = round(100 * status_checks / enabled_count, 2) status_checks_perc = round(100 * status_checks / enabled_count, 2)
# Sites with probing and activation (kinda special cases, let's watch them)
site_with_probing = []
site_with_activation = []
for site in sites_dict.values():
def get_site_label(site):
return f"{site.name}{' (disabled)' if site.disabled else ''}"
if site.url_probe:
site_with_probing.append(get_site_label(site))
if site.activation:
site_with_activation.append(get_site_label(site))
# Format output # Format output
separator = "\n\n" separator = "\n\n"
output = [ output = [
@@ -567,6 +578,8 @@ class MaigretDatabase:
f"Incomplete message checks: {message_checks_one_factor}/{enabled_count} = {checks_perc}% (false positive risks)", f"Incomplete message checks: {message_checks_one_factor}/{enabled_count} = {checks_perc}% (false positive risks)",
f"Status code checks: {status_checks}/{enabled_count} = {status_checks_perc}% (false positive risks)", f"Status code checks: {status_checks}/{enabled_count} = {status_checks_perc}% (false positive risks)",
f"False positive risk (total): {checks_perc + status_checks_perc:.2f}%", f"False positive risk (total): {checks_perc + status_checks_perc:.2f}%",
f"Sites with probing: {', '.join(sorted(site_with_probing))}",
f"Sites with activation: {', '.join(sorted(site_with_activation))}",
self._format_top_items("profile URLs", urls, 20, is_markdown), self._format_top_items("profile URLs", urls, 20, is_markdown),
self._format_top_items("tags", tags, 20, is_markdown, self._tags), self._format_top_items("tags", tags, 20, is_markdown, self._tags),
] ]
+18 -1
View File
@@ -184,7 +184,7 @@ class Submitter:
url_parts = url.rstrip("/").split("/") url_parts = url.rstrip("/").split("/")
supposed_username = url_parts[-1].strip('@') supposed_username = url_parts[-1].strip('@')
entered_username = input( entered_username = input(
f'Is "{supposed_username}" a valid username? If not, write it manually: ' f"{Fore.GREEN}[?] Is \"{supposed_username}\" a valid username? If not, write it manually: {Style.RESET_ALL}"
) )
return entered_username if entered_username else supposed_username return entered_username if entered_username else supposed_username
@@ -390,6 +390,13 @@ class Submitter:
} }
async def dialog(self, url_exists, cookie_file): async def dialog(self, url_exists, cookie_file):
"""
An implementation of the submit mode:
- User provides a URL of a existing social media account
- Maigret tries to detect the site engine and understand how to check
for account presence with HTTP responses analysis
- If detection succeeds, Maigret generates a new site entry/replace old one in the database
"""
old_site = None old_site = None
additional_options_enabled = self.logger.level in ( additional_options_enabled = self.logger.level in (
logging.DEBUG, logging.DEBUG,
@@ -444,6 +451,15 @@ class Submitter:
f'{Fore.GREEN}[+] We will update site "{old_site.name}" in case of success.{Style.RESET_ALL}' f'{Fore.GREEN}[+] We will update site "{old_site.name}" in case of success.{Style.RESET_ALL}'
) )
# Check if the site check is ordinary or not
if old_site and (old_site.url_probe or old_site.activation):
skip = input(f"{Fore.RED}[!] The site check depends on activation / probing mechanism! Consider to update it manually. Continue? [yN]{Style.RESET_ALL}")
if skip.lower() in ['n', '']:
return False
# TODO: urlProbe support
# TODO: activation support
url_mainpage = self.extract_mainpage_url(url_exists) url_mainpage = self.extract_mainpage_url(url_exists)
# headers update # headers update
@@ -511,6 +527,7 @@ class Submitter:
"urlMain": url_mainpage, "urlMain": url_mainpage,
"usernameClaimed": supposed_username, "usernameClaimed": supposed_username,
"usernameUnclaimed": non_exist_username, "usernameUnclaimed": non_exist_username,
"headers": custom_headers,
"checkType": "message", "checkType": "message",
} }
self.logger.info(json.dumps(site_data, indent=4)) self.logger.info(json.dumps(site_data, indent=4))
+1
View File
@@ -90,3 +90,4 @@ coverage = "^7.6.9"
[tool.poetry.scripts] [tool.poetry.scripts]
# Run with: poetry run maigret <username> # Run with: poetry run maigret <username>
maigret = "maigret.maigret:run" maigret = "maigret.maigret:run"
update_sitesmd = "utils.update_site_data:main"
+28 -23
View File
@@ -88,7 +88,7 @@ Rank data fetched from Alexa by domains.
1. ![](https://www.google.com/s2/favicons?domain=https://www.blogger.com) [Blogger (by GAIA id) (https://www.blogger.com)](https://www.blogger.com)*: top 500, blog* 1. ![](https://www.google.com/s2/favicons?domain=https://www.blogger.com) [Blogger (by GAIA id) (https://www.blogger.com)](https://www.blogger.com)*: top 500, blog*
1. ![](https://www.google.com/s2/favicons?domain=https://www.researchgate.net/) [ResearchGate (https://www.researchgate.net/)](https://www.researchgate.net/)*: top 500, in, us* 1. ![](https://www.google.com/s2/favicons?domain=https://www.researchgate.net/) [ResearchGate (https://www.researchgate.net/)](https://www.researchgate.net/)*: top 500, in, us*
1. ![](https://www.google.com/s2/favicons?domain=https://www.freepik.com) [Freepik (https://www.freepik.com)](https://www.freepik.com)*: top 500, art, photo, stock* 1. ![](https://www.google.com/s2/favicons?domain=https://www.freepik.com) [Freepik (https://www.freepik.com)](https://www.freepik.com)*: top 500, art, photo, stock*
1. ![](https://www.google.com/s2/favicons?domain=https://vimeo.com/) [Vimeo (https://vimeo.com/)](https://vimeo.com/)*: top 500, us, video* 1. ![](https://www.google.com/s2/favicons?domain=https://vimeo.com) [Vimeo (https://vimeo.com)](https://vimeo.com)*: top 500, video*
1. ![](https://www.google.com/s2/favicons?domain=https://www.pinterest.com/) [Pinterest (https://www.pinterest.com/)](https://www.pinterest.com/)*: top 500, art, photo, sharing* 1. ![](https://www.google.com/s2/favicons?domain=https://www.pinterest.com/) [Pinterest (https://www.pinterest.com/)](https://www.pinterest.com/)*: top 500, art, photo, sharing*
1. ![](https://www.google.com/s2/favicons?domain=https://www.fiverr.com/) [Fiverr (https://www.fiverr.com/)](https://www.fiverr.com/)*: top 500, shopping, us* 1. ![](https://www.google.com/s2/favicons?domain=https://www.fiverr.com/) [Fiverr (https://www.fiverr.com/)](https://www.fiverr.com/)*: top 500, shopping, us*
1. ![](https://www.google.com/s2/favicons?domain=https://t.me/) [Telegram (https://t.me/)](https://t.me/)*: top 500, messaging* 1. ![](https://www.google.com/s2/favicons?domain=https://t.me/) [Telegram (https://t.me/)](https://t.me/)*: top 500, messaging*
@@ -3148,9 +3148,13 @@ Enabled/total sites: 2693/3137 = 85.85%
Incomplete message checks: 397/2693 = 14.74% (false positive risks) Incomplete message checks: 397/2693 = 14.74% (false positive risks)
Status code checks: 719/2693 = 26.7% (false positive risks) Status code checks: 616/2693 = 22.87% (false positive risks)
False positive risk (total): 41.44% False positive risk (total): 37.61%
Sites with probing: 500px, Aparat, BinarySearch (disabled), BongaCams, BuyMeACoffee, Cent, Disqus, Docker Hub, Duolingo, Gab, GitHub, GitLab, Google Plus (archived), Gravatar, Imgur, Issuu, Keybase, Livejasmin, LocalCryptos (disabled), MixCloud, Niftygateway, Reddit Search (Pushshift) (disabled), SportsTracker, Spotify (disabled), TAP'D, Trello, Twitch, Twitter, Twitter Shadowban (disabled), UnstoppableDomains, Vimeo, Weibo, Yapisal (disabled), YouNow, nightbot, notabug.org, polarsteps, qiwi.me (disabled)
Sites with activation: Spotify (disabled), Twitter, Vimeo, Weibo
Top 20 profile URLs: Top 20 profile URLs:
- (796) `{urlMain}/index/8-0-{username} (uCoz)` - (796) `{urlMain}/index/8-0-{username} (uCoz)`
@@ -3174,24 +3178,25 @@ Top 20 profile URLs:
- (17) `/forum/members/?username={username}` - (17) `/forum/members/?username={username}`
- (17) `/search.php?keywords=&terms=all&author={username}` - (17) `/search.php?keywords=&terms=all&author={username}`
Top 20 tags: Top 20 tags:
- (328) `NO_TAGS` (non-standard) - (1105) `NO_TAGS` (non-standard)
- (307) `forum` - (735) `forum`
- (50) `gaming` - (92) `gaming`
- (26) `coding` - (48) `photo`
- (21) `photo` - (41) `coding`
- (20) `blog` - (30) `tech`
- (19) `news` - (29) `news`
- (15) `music` - (28) `blog`
- (14) `tech` - (23) `music`
- (12) `freelance` - (19) `finance`
- (12) `finance` - (18) `crypto`
- (11) `sharing` - (16) `sharing`
- (10) `dating` - (16) `freelance`
- (10) `art` - (15) `art`
- (10) `shopping` - (15) `shopping`
- (10) `movies` - (13) `sport`
- (8) `crypto` - (13) `business`
- (7) `sport` - (12) `movies`
- (7) `hobby` - (11) `hobby`
- (7) `hacking` - (11) `education`
+8 -2
View File
@@ -67,7 +67,7 @@ def get_step_rank(rank):
return get_readable_rank(list(filter(lambda x: x >= rank, valid_step_ranks))[0]) return get_readable_rank(list(filter(lambda x: x >= rank, valid_step_ranks))[0])
if __name__ == '__main__': def main():
parser = ArgumentParser(formatter_class=RawDescriptionHelpFormatter parser = ArgumentParser(formatter_class=RawDescriptionHelpFormatter
) )
parser.add_argument("--base","-b", metavar="BASE_FILE", parser.add_argument("--base","-b", metavar="BASE_FILE",
@@ -86,6 +86,8 @@ if __name__ == '__main__':
db = MaigretDatabase() db = MaigretDatabase()
sites_subset = db.load_from_file(args.base_file).sites sites_subset = db.load_from_file(args.base_file).sites
print(f"\nUpdating supported sites list (don't worry, it's needed)...")
with open("sites.md", "w") as site_file: with open("sites.md", "w") as site_file:
site_file.write(f""" site_file.write(f"""
## List of supported sites (search methods): total {len(sites_subset)}\n ## List of supported sites (search methods): total {len(sites_subset)}\n
@@ -144,4 +146,8 @@ Rank data fetched from Alexa by domains.
site_file.write('## Statistics\n\n') site_file.write('## Statistics\n\n')
site_file.write(statistics_text) site_file.write(statistics_text)
print("\nFinished updating supported site listing!") print("Finished updating supported site listing!")
if __name__ == '__main__':
main()