From 13c20afe5bd7b88ff19772b303013592392099ee Mon Sep 17 00:00:00 2001 From: Soxoj <31013580+soxoj@users.noreply.github.com> Date: Mon, 25 Nov 2024 18:27:59 +0100 Subject: [PATCH] Improved self-check mode (#1887) --- maigret/checking.py | 38 +++++++++++++++++++++----------- maigret/maigret.py | 10 +++++++-- maigret/notify.py | 4 ++++ maigret/resources/settings.json | 11 +++++++++- maigret/submit.py | 39 +++++++++++++++++++++++++-------- sites.md | 1 - tests/test_data.py | 3 +++ 7 files changed, 80 insertions(+), 26 deletions(-) diff --git a/maigret/checking.py b/maigret/checking.py index c88c5c2..54614a2 100644 --- a/maigret/checking.py +++ b/maigret/checking.py @@ -869,6 +869,11 @@ async def site_self_check( action = "Disabled" if site.disabled else "Enabled" print(f"{action} site {site.name}...") + # remove service tag "unchecked" + if "unchecked" in site.tags: + site.tags.remove("unchecked") + db.update_site(site) + return changes @@ -889,6 +894,7 @@ async def self_check( def disabled_count(lst): return len(list(filter(lambda x: x.disabled, lst))) + unchecked_old_count = len([site for site in all_sites.values() if "unchecked" in site.tags]) disabled_old_count = disabled_count(all_sites.values()) for _, site in all_sites.items(): @@ -898,22 +904,28 @@ async def self_check( future = asyncio.ensure_future(check_coro) tasks.append(future) - for f in tqdm.asyncio.tqdm.as_completed(tasks): - await f + if tasks: + for f in tqdm.asyncio.tqdm.as_completed(tasks): + await f + unchecked_new_count = len([site for site in all_sites.values() if "unchecked" in site.tags]) disabled_new_count = disabled_count(all_sites.values()) total_disabled = disabled_new_count - disabled_old_count - if total_disabled >= 0: - message = "Disabled" - else: - message = "Enabled" - total_disabled *= -1 + if total_disabled: + if total_disabled >= 0: + message = "Disabled" + else: + message = "Enabled" + total_disabled *= -1 - if not silent: - print( - f"{message} {total_disabled} ({disabled_old_count} => {disabled_new_count}) checked sites. " - "Run with `--info` flag to get more information" - ) + if not silent: + print( + f"{message} {total_disabled} ({disabled_old_count} => {disabled_new_count}) checked sites. " + "Run with `--info` flag to get more information" + ) - return total_disabled != 0 + if unchecked_new_count != unchecked_old_count: + print(f"Unchecked sites verified: {unchecked_old_count - unchecked_new_count}") + + return total_disabled != 0 or unchecked_new_count != unchecked_old_count diff --git a/maigret/maigret.py b/maigret/maigret.py index 4cd9d22..f717c94 100755 --- a/maigret/maigret.py +++ b/maigret/maigret.py @@ -569,7 +569,11 @@ async def main(): # Database self-checking if args.self_check: - print('Maigret sites database self-checking...') + if len(site_data) == 0: + query_notify.warning('No sites to self-check with the current filters! Exiting...') + return + + query_notify.success(f'Maigret sites database self-check started for {len(site_data)} sites...') is_need_update = await self_check( db, site_data, @@ -588,7 +592,9 @@ async def main(): print('Database was successfully updated.') else: print('Updates will be applied only for current search session.') - print('Scan sessions flags stats: ' + str(db.get_scan_stats(site_data))) + + if args.verbose or args.debug: + query_notify.info('Scan sessions flags stats: ' + str(db.get_scan_stats(site_data))) # Database statistics if args.stats: diff --git a/maigret/notify.py b/maigret/notify.py index be86946..3e4840f 100644 --- a/maigret/notify.py +++ b/maigret/notify.py @@ -211,6 +211,10 @@ class QueryNotifyPrint(QueryNotify): else: print(msg) + def success(self, message, symbol="+"): + msg = f"[{symbol}] {message}" + self._colored_print(Fore.GREEN, msg) + def warning(self, message, symbol="-"): msg = f"[{symbol}] {message}" self._colored_print(Fore.YELLOW, msg) diff --git a/maigret/resources/settings.json b/maigret/resources/settings.json index fe04807..6235704 100644 --- a/maigret/resources/settings.json +++ b/maigret/resources/settings.json @@ -1,16 +1,25 @@ { "presence_strings": [ + "user not found", + "404", + "Page not found", + "error 404", "username", "not found", "пользователь", "profile", "lastname", "firstname", + "DisplayName", "biography", + "title", "birthday", "репутация", "информация", - "e-mail" + "e-mail", + "body", + "html", + "style" ], "supposed_usernames": [ "alex", "god", "admin", "red", "blue", "john" diff --git a/maigret/submit.py b/maigret/submit.py index 3327abc..e623ed7 100644 --- a/maigret/submit.py +++ b/maigret/submit.py @@ -43,7 +43,7 @@ class Submitter: "User-Agent": get_random_user_agent(), } - SEPARATORS = "\"'" + SEPARATORS = "\"'\n" RATIO = 0.6 TOP_FEATURES = 5 @@ -138,17 +138,19 @@ class Submitter: if status == QueryStatus.CLAIMED: changes["disabled"] = True elif status == QueryStatus.CLAIMED: - self.logger.warning( - f"Not found `{username}` in {site.name}, must be claimed" + print( + f"{Fore.YELLOW}[!] Not found `{username}` in {site.name}, must be claimed{Style.RESET_ALL}" ) - self.logger.info(results_dict[site.name]) + self.logger.warning(site.json) changes["disabled"] = True else: - self.logger.warning( - f"Found `{username}` in {site.name}, must be available" + print( + f"{Fore.YELLOW}[!] Found `{username}` in {site.name}, must be available{Style.RESET_ALL}" ) - self.logger.info(results_dict[site.name]) + self.logger.warning(site.json) changes["disabled"] = True + else: + print(f"{Fore.GREEN}[+] {username} is successfully checked: {status} in {site.name}{Style.RESET_ALL}") self.logger.info(f"Site {site.name} checking is finished") @@ -286,6 +288,10 @@ class Submitter: a_minus_b = tokens_a.difference(tokens_b) b_minus_a = tokens_b.difference(tokens_a) + # additional filtering by html response + a_minus_b = [t for t in a_minus_b if not t in non_exists_resp_text] + b_minus_a = [t for t in b_minus_a if not t in exists_resp_text] + if len(a_minus_b) == len(b_minus_a) == 0: print("The pages for existing and non-existing account are the same!") @@ -302,6 +308,8 @@ class Submitter: :top_features_count ] + self.logger.debug([(keyword, match_fun(keyword)) for keyword in presence_list]) + print("Detected text features of existing account: " + ", ".join(presence_list)) features = input("If features was not detected correctly, write it manually: ") @@ -311,6 +319,8 @@ class Submitter: absence_list = sorted(b_minus_a, key=match_fun, reverse=True)[ :top_features_count ] + self.logger.debug([(keyword, match_fun(keyword)) for keyword in absence_list]) + print( "Detected text features of non-existing account: " + ", ".join(absence_list) ) @@ -338,7 +348,6 @@ class Submitter: async def add_site(self, site): sem = asyncio.Semaphore(1) print(f"{Fore.BLUE}{Style.BRIGHT}[*] Adding site {site.name}, let's check it...{Style.RESET_ALL}") - print(site.json) result = await self.site_self_check(site, sem) if result["disabled"]: @@ -369,6 +378,7 @@ class Submitter: print("0. finish editing") print("10. reject and block domain") + print("11. invalid params, remove") choice = input("\nSelect field number to edit (0-8): ").strip() @@ -381,6 +391,12 @@ class Submitter: "reason": "manual block", } + if choice == '11': + return { + "valid": False, + "reason": "remove", + } + if choice in editable_fields: field = editable_fields[choice] current_value = getattr(site, field) @@ -477,7 +493,7 @@ class Submitter: if not found: print( - f"Sorry, we couldn't find params to detect account presence/absence in {chosen_site.name}." + f"{Fore.RED}[!] The check for site '{chosen_site.name}' failed!{Style.RESET_ALL}" ) print( "Try to run this mode again and increase features count or choose others." @@ -510,4 +526,9 @@ class Submitter: site_data = chosen_site.strip_engine_data() self.logger.debug(site_data.json) self.db.update_site(site_data) + + if self.args.db: + print(f"{Fore.GREEN}[+] Maigret DB is saved to {self.args.db}.{Style.RESET_ALL}") + self.db.save_to_file(self.args.db) + return True diff --git a/sites.md b/sites.md index 714a417..7c6c26a 100644 --- a/sites.md +++ b/sites.md @@ -3117,7 +3117,6 @@ Rank data fetched from Alexa by domains. 1. ![](https://www.google.com/s2/favicons?domain=https://www.stopstalk.com) [www.stopstalk.com (https://www.stopstalk.com)](https://www.stopstalk.com)*: top 100M* 1. ![](https://www.google.com/s2/favicons?domain=https://www.polywork.com) [www.polywork.com (https://www.polywork.com)](https://www.polywork.com)*: top 100M* 1. ![](https://www.google.com/s2/favicons?domain=https://oshwlab.com) [oshwlab.com (https://oshwlab.com)](https://oshwlab.com)*: top 100M* -1. ![](https://www.google.com/s2/favicons?domain=https://www.xshaker.net) [www.xshaker.net (https://www.xshaker.net)](https://www.xshaker.net)*: top 100M* 1. ![](https://www.google.com/s2/favicons?domain=https://chaturbator.su) [chaturbator.su (https://chaturbator.su)](https://chaturbator.su)*: top 100M* 1. ![](https://www.google.com/s2/favicons?domain=https://imgflip.com) [imgflip.com (https://imgflip.com)](https://imgflip.com)*: top 100M* 1. ![](https://www.google.com/s2/favicons?domain=https://www.flickr.com) [www.flickr.com (https://www.flickr.com)](https://www.flickr.com)*: top 100M* diff --git a/tests/test_data.py b/tests/test_data.py index 538aacf..b296def 100644 --- a/tests/test_data.py +++ b/tests/test_data.py @@ -13,4 +13,7 @@ def test_tags_validity(default_db): if tag not in tags: unknown_tags.add(tag) + # make sure all tags are known + # if you see "unchecked" tag error, please, do + # maigret --db `pwd`/maigret/resources/data.json --self-check --tag unchecked --use-disabled-sites assert unknown_tags == set()