From 0ad2cdef2c0330f39e95ed9aa1f12b6f30c7fb78 Mon Sep 17 00:00:00 2001 From: Soxoj <31013580+soxoj@users.noreply.github.com> Date: Sun, 16 Apr 2023 18:24:29 +0200 Subject: [PATCH] Fixed false positives, updated networkx dep, some lint fixes (#894) * Fixed false positives, updated networkx dep, some lint fixes * Downgraded networkx version --- maigret/checking.py | 16 ++++++++++---- maigret/resources/data.json | 43 ++++++++++++++++++++++++------------- maigret/sites.py | 30 ++++++++++++++++++-------- maigret/submit.py | 20 ++++++++++++----- requirements.txt | 2 +- 5 files changed, 77 insertions(+), 34 deletions(-) diff --git a/maigret/checking.py b/maigret/checking.py index 3385329..8a6e795 100644 --- a/maigret/checking.py +++ b/maigret/checking.py @@ -529,7 +529,9 @@ def make_site_result( async def check_site_for_username( site, username, options: QueryOptions, logger, query_notify, *args, **kwargs ) -> Tuple[str, QueryResultWrapper]: - default_result = make_site_result(site, username, options, logger, retry=kwargs.get('retry')) + default_result = make_site_result( + site, username, options, logger, retry=kwargs.get('retry') + ) future = default_result.get("future") if not future: return site.name, default_result @@ -667,8 +669,11 @@ async def maigret( executor = AsyncioSimpleExecutor(logger=logger) else: executor = AsyncioProgressbarQueueExecutor( - logger=logger, in_parallel=max_connections, timeout=timeout + 0.5, - *args, **kwargs + logger=logger, + in_parallel=max_connections, + timeout=timeout + 0.5, + *args, + **kwargs, ) # make options objects for all the requests @@ -710,7 +715,10 @@ async def maigret( tasks_dict[sitename] = ( check_site_for_username, [site, username, options, logger, query_notify], - {'default': (sitename, default_result), 'retry': retries-attempts+1}, + { + 'default': (sitename, default_result), + 'retry': retries - attempts + 1, + }, ) cur_results = await executor.run(tasks_dict.values()) diff --git a/maigret/resources/data.json b/maigret/resources/data.json index afd195d..be284c1 100644 --- a/maigret/resources/data.json +++ b/maigret/resources/data.json @@ -1255,8 +1255,14 @@ "usernameUnclaimed": "noonewouldeverusethis7" }, "Arduino": { - "checkType": "status_code", - "url": "https://create.arduino.cc/projecthub/{username}", + "checkType": "message", + "presenseStrs": [ + "Arduino Project Hub" + ], + "absenceStrs": [ + "Arduino Project Hub" + ], + "url": "https://projecthub.arduino.cc/{username}", "usernameClaimed": "uehkon", "usernameUnclaimed": "noonewouldeverusethis7" }, @@ -1639,11 +1645,14 @@ ], "checkType": "message", "absenceStrs": [ - "\u0423\u043a\u0430\u0437\u0430\u043d\u043d\u044b\u0439 \u043f\u043e\u043b\u044c\u0437\u043e\u0432\u0430\u0442\u0435\u043b\u044c \u043d\u0435 \u043d\u0430\u0439\u0434\u0435\u043d. \u041f\u043e\u0436\u0430\u043b\u0443\u0439\u0441\u0442\u0430, \u0432\u0432\u0435\u0434\u0438\u0442\u0435 \u0434\u0440\u0443\u0433\u043e\u0435 \u0438\u043c\u044f." + "Go to the homepage" + ], + "presenseStrs": [ + "\u041f\u043e\u0441\u0442\u044b \u043e\u0442 " ], "alexaRank": 8074009, "urlMain": "https://automania.ru", - "url": "https://automania.ru/forums/members/?username={username}", + "url": "https://automania.ru/author/{username}/", "usernameClaimed": "Bones", "usernameUnclaimed": "noonewouldeverusethis7" }, @@ -2024,7 +2033,7 @@ "This user page is currently not available" ], "presenseStrs": [ - "BentBox photos and videos" + "id=\"followingUser\"" ], "url": "https://bentbox.co/{username}", "usernameClaimed": "uehkon", @@ -4269,7 +4278,7 @@ ], "checkType": "message", "absenceStrs": [ - "https://www.donationalerts.com/img/404.svg" + "/img/404.svg" ], "alexaRank": 19188, "urlMain": "https://www.donationalerts.com/", @@ -6928,6 +6937,7 @@ }, "Gothic": { "urlSubpath": "/forum", + "disabled": true, "tags": [ "forum", "ru" @@ -14260,6 +14270,7 @@ "usernameUnclaimed": "noonewouldeverusethis7" }, "Shikimori": { + "disabled": true, "tags": [ "ru" ], @@ -15554,7 +15565,7 @@ "regexCheck": "^[A-Za-z0-9_-]{3,16}$", "checkType": "message", "absenceStrs": [ - "\u041f\u043e\u043b\u044c\u0437\u043e\u0432\u0430\u0442\u0435\u043b\u044c \u043d\u0435 \u043d\u0430\u0439\u0434\u0435\u043d" + "404 \u2014 Not found" ], "urlMain": "https://trashbox.ru/", "url": "https://trashbox.ru/users/{username}", @@ -17077,7 +17088,7 @@ "video" ], "headers": { - "Authorization": "jwt eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJleHAiOjE2NjI4NDkxODAsInVzZXJfaWQiOm51bGwsImFwcF9pZCI6NTg0NzksInNjb3BlcyI6InB1YmxpYyIsInRlYW1fdXNlcl9pZCI6bnVsbH0.DAmAaaKixSd_WQ9-7PiTZDmyK61SHEYluYC-qdcJtkE" + "Authorization": "jwt eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJleHAiOjE2ODE2NjIxODAsInVzZXJfaWQiOm51bGwsImFwcF9pZCI6NTg0NzksInNjb3BlcyI6InB1YmxpYyIsInRlYW1fdXNlcl9pZCI6bnVsbH0.T_MPgWQBmH-KaXWYmfRPS8XdgmCHn7kTOtzDlCOBlQg" }, "activation": { "url": "https://vimeo.com/_rv/viewer", @@ -29405,6 +29416,7 @@ "alexaRank": 153717 }, "forums.imore.com": { + "disabled": true, "urlMain": "https://forums.imore.com", "engine": "vBulletin", "usernameClaimed": "alex", @@ -31918,6 +31930,7 @@ "url": "https://discuss.kubernetes.io/u/{username}" }, "discuss.newrelic.com": { + "disabled": true, "checkType": "status_code", "usernameClaimed": "adam", "usernameUnclaimed": "noonewouldeverusethis7", @@ -33913,7 +33926,7 @@ ] }, "lyricsTraining": { - "taggs":[ + "tags": [ "music" ], "checkType": "message", @@ -33928,7 +33941,7 @@ "usernameUnclaimed": "noonewouldeverusethis12" }, "expoForum": { - "taggs":[ + "tags": [ "forum", "coding" ], @@ -33938,7 +33951,7 @@ "usernameUnclaimed": "noonewouldeverusethis12" }, "rawg.io": { - "taggs":[ + "tags": [ "gaming" ], "checkType": "status_code", @@ -33947,7 +33960,7 @@ "usernameUnclaimed": "noonewouldeverusethis12" }, "SchemeColor": { - "taggs":[ + "tags": [ "art", "design" ], @@ -33957,7 +33970,7 @@ "usernameUnclaimed": "noonewouldeverusethis12" }, "aetherhub": { - "taggs":[ + "tags": [ "gaming" ], "checkType": "status_code", @@ -33966,7 +33979,7 @@ "usernameUnclaimed": "noonewouldeverusethis12" }, "bugbounty": { - "taggs":[ + "tags": [ "hacking" ], "checkType": "status_code", @@ -33975,7 +33988,7 @@ "usernameUnclaimed": "noonewouldeverusethis12" }, "universocraft": { - "taggs":[ + "tags": [ "gaming" ], "checkType": "message", diff --git a/maigret/sites.py b/maigret/sites.py index 65312ff..bc71f84 100644 --- a/maigret/sites.py +++ b/maigret/sites.py @@ -455,31 +455,43 @@ class MaigretDatabase: for tag in filter(lambda x: not is_country_tag(x), site.tags): tags[tag] = tags.get(tag, 0) + 1 - enabled_count = total_count-disabled_count - enabled_perc = round(100*enabled_count/total_count, 2) - output += f"Enabled/total sites: {enabled_count}/{total_count} = {enabled_perc}%\n\n" + enabled_count = total_count - disabled_count + enabled_perc = round(100 * enabled_count / total_count, 2) + output += ( + f"Enabled/total sites: {enabled_count}/{total_count} = {enabled_perc}%\n\n" + ) - checks_perc = round(100*message_checks_one_factor/enabled_count, 2) + checks_perc = round(100 * message_checks_one_factor / enabled_count, 2) output += f"Incomplete message checks: {message_checks_one_factor}/{enabled_count} = {checks_perc}% (false positive risks)\n\n" - status_checks_perc = round(100*status_checks/enabled_count, 2) + status_checks_perc = round(100 * status_checks / enabled_count, 2) output += f"Status code checks: {status_checks}/{enabled_count} = {status_checks_perc}% (false positive risks)\n\n" - output += f"False positive risk (total): {checks_perc+status_checks_perc:.2f}%\n\n" + output += ( + f"False positive risk (total): {checks_perc+status_checks_perc:.2f}%\n\n" + ) top_urls_count = 20 output += f"Top {top_urls_count} profile URLs:\n" - for url, count in sorted(urls.items(), key=lambda x: x[1], reverse=True)[:top_urls_count]: + for url, count in sorted(urls.items(), key=lambda x: x[1], reverse=True)[ + :top_urls_count + ]: if count == 1: break output += f"- ({count})\t`{url}`\n" if is_markdown else f"{count}\t{url}\n" top_tags_count = 20 output += f"\nTop {top_tags_count} tags:\n" - for tag, count in sorted(tags.items(), key=lambda x: x[1], reverse=True)[:top_tags_count]: + for tag, count in sorted(tags.items(), key=lambda x: x[1], reverse=True)[ + :top_tags_count + ]: mark = "" if tag not in self._tags: mark = " (non-standard)" - output += f"- ({count})\t`{tag}`{mark}\n" if is_markdown else f"{count}\t{tag}{mark}\n" + output += ( + f"- ({count})\t`{tag}`{mark}\n" + if is_markdown + else f"{count}\t{tag}{mark}\n" + ) return output diff --git a/maigret/submit.py b/maigret/submit.py index 7f2baaf..21e6706 100644 --- a/maigret/submit.py +++ b/maigret/submit.py @@ -36,6 +36,7 @@ class CloudflareSession: async def close(self): pass + class Submitter: HEADERS = { "User-Agent": get_random_user_agent(), @@ -54,6 +55,7 @@ class Submitter: self.logger = logger from aiohttp_socks import ProxyConnector + proxy = self.args.proxy cookie_jar = None if args.cookie_file: @@ -163,7 +165,9 @@ class Submitter: fields['urlSubpath'] = f'/{subpath}' return fields - async def detect_known_engine(self, url_exists, url_mainpage) -> [List[MaigretSite], str]: + async def detect_known_engine( + self, url_exists, url_mainpage + ) -> [List[MaigretSite], str]: resp_text = '' try: r = await self.session.get(url_mainpage) @@ -366,9 +370,10 @@ class Submitter: except KeyboardInterrupt: print('Engine detect process is interrupted.') - if 'cloudflare' in text.lower(): - print('Cloudflare protection detected. I will use cloudscraper for futher work') + print( + 'Cloudflare protection detected. I will use cloudscraper for futher work' + ) # self.session = CloudflareSession() if not sites: @@ -376,11 +381,16 @@ class Submitter: redirects = False if self.args.verbose: - redirects = 'y' in input('Should we do redirects automatically? [yN] ').lower() + redirects = ( + 'y' in input('Should we do redirects automatically? [yN] ').lower() + ) sites = [ await self.check_features_manually( - url_exists, url_mainpage, cookie_file, redirects, + url_exists, + url_mainpage, + cookie_file, + redirects, ) ] diff --git a/requirements.txt b/requirements.txt index dc153fc..4771b2b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -33,7 +33,7 @@ webencodings==0.5.1 xhtml2pdf==0.2.8 XMind==1.2.0 yarl==1.8.1 -networkx==2.6 +networkx==2.6.3 pyvis==0.2.1 reportlab==3.6.12 cloudscraper==1.2.66