Improved self-check mode (#1887)

This commit is contained in:
Soxoj
2024-11-25 18:27:59 +01:00
committed by GitHub
parent d8a05807ba
commit 13c20afe5b
7 changed files with 80 additions and 26 deletions
+25 -13
View File
@@ -869,6 +869,11 @@ async def site_self_check(
action = "Disabled" if site.disabled else "Enabled" action = "Disabled" if site.disabled else "Enabled"
print(f"{action} site {site.name}...") print(f"{action} site {site.name}...")
# remove service tag "unchecked"
if "unchecked" in site.tags:
site.tags.remove("unchecked")
db.update_site(site)
return changes return changes
@@ -889,6 +894,7 @@ async def self_check(
def disabled_count(lst): def disabled_count(lst):
return len(list(filter(lambda x: x.disabled, lst))) return len(list(filter(lambda x: x.disabled, lst)))
unchecked_old_count = len([site for site in all_sites.values() if "unchecked" in site.tags])
disabled_old_count = disabled_count(all_sites.values()) disabled_old_count = disabled_count(all_sites.values())
for _, site in all_sites.items(): for _, site in all_sites.items():
@@ -898,22 +904,28 @@ async def self_check(
future = asyncio.ensure_future(check_coro) future = asyncio.ensure_future(check_coro)
tasks.append(future) tasks.append(future)
for f in tqdm.asyncio.tqdm.as_completed(tasks): if tasks:
await f for f in tqdm.asyncio.tqdm.as_completed(tasks):
await f
unchecked_new_count = len([site for site in all_sites.values() if "unchecked" in site.tags])
disabled_new_count = disabled_count(all_sites.values()) disabled_new_count = disabled_count(all_sites.values())
total_disabled = disabled_new_count - disabled_old_count total_disabled = disabled_new_count - disabled_old_count
if total_disabled >= 0: if total_disabled:
message = "Disabled" if total_disabled >= 0:
else: message = "Disabled"
message = "Enabled" else:
total_disabled *= -1 message = "Enabled"
total_disabled *= -1
if not silent: if not silent:
print( print(
f"{message} {total_disabled} ({disabled_old_count} => {disabled_new_count}) checked sites. " f"{message} {total_disabled} ({disabled_old_count} => {disabled_new_count}) checked sites. "
"Run with `--info` flag to get more information" "Run with `--info` flag to get more information"
) )
return total_disabled != 0 if unchecked_new_count != unchecked_old_count:
print(f"Unchecked sites verified: {unchecked_old_count - unchecked_new_count}")
return total_disabled != 0 or unchecked_new_count != unchecked_old_count
+8 -2
View File
@@ -569,7 +569,11 @@ async def main():
# Database self-checking # Database self-checking
if args.self_check: if args.self_check:
print('Maigret sites database self-checking...') if len(site_data) == 0:
query_notify.warning('No sites to self-check with the current filters! Exiting...')
return
query_notify.success(f'Maigret sites database self-check started for {len(site_data)} sites...')
is_need_update = await self_check( is_need_update = await self_check(
db, db,
site_data, site_data,
@@ -588,7 +592,9 @@ async def main():
print('Database was successfully updated.') print('Database was successfully updated.')
else: else:
print('Updates will be applied only for current search session.') print('Updates will be applied only for current search session.')
print('Scan sessions flags stats: ' + str(db.get_scan_stats(site_data)))
if args.verbose or args.debug:
query_notify.info('Scan sessions flags stats: ' + str(db.get_scan_stats(site_data)))
# Database statistics # Database statistics
if args.stats: if args.stats:
+4
View File
@@ -211,6 +211,10 @@ class QueryNotifyPrint(QueryNotify):
else: else:
print(msg) print(msg)
def success(self, message, symbol="+"):
msg = f"[{symbol}] {message}"
self._colored_print(Fore.GREEN, msg)
def warning(self, message, symbol="-"): def warning(self, message, symbol="-"):
msg = f"[{symbol}] {message}" msg = f"[{symbol}] {message}"
self._colored_print(Fore.YELLOW, msg) self._colored_print(Fore.YELLOW, msg)
+10 -1
View File
@@ -1,16 +1,25 @@
{ {
"presence_strings": [ "presence_strings": [
"user not found",
"404",
"Page not found",
"error 404",
"username", "username",
"not found", "not found",
"пользователь", "пользователь",
"profile", "profile",
"lastname", "lastname",
"firstname", "firstname",
"DisplayName",
"biography", "biography",
"title",
"birthday", "birthday",
"репутация", "репутация",
"информация", "информация",
"e-mail" "e-mail",
"body",
"html",
"style"
], ],
"supposed_usernames": [ "supposed_usernames": [
"alex", "god", "admin", "red", "blue", "john" "alex", "god", "admin", "red", "blue", "john"
+30 -9
View File
@@ -43,7 +43,7 @@ class Submitter:
"User-Agent": get_random_user_agent(), "User-Agent": get_random_user_agent(),
} }
SEPARATORS = "\"'" SEPARATORS = "\"'\n"
RATIO = 0.6 RATIO = 0.6
TOP_FEATURES = 5 TOP_FEATURES = 5
@@ -138,17 +138,19 @@ class Submitter:
if status == QueryStatus.CLAIMED: if status == QueryStatus.CLAIMED:
changes["disabled"] = True changes["disabled"] = True
elif status == QueryStatus.CLAIMED: elif status == QueryStatus.CLAIMED:
self.logger.warning( print(
f"Not found `{username}` in {site.name}, must be claimed" f"{Fore.YELLOW}[!] Not found `{username}` in {site.name}, must be claimed{Style.RESET_ALL}"
) )
self.logger.info(results_dict[site.name]) self.logger.warning(site.json)
changes["disabled"] = True changes["disabled"] = True
else: else:
self.logger.warning( print(
f"Found `{username}` in {site.name}, must be available" f"{Fore.YELLOW}[!] Found `{username}` in {site.name}, must be available{Style.RESET_ALL}"
) )
self.logger.info(results_dict[site.name]) self.logger.warning(site.json)
changes["disabled"] = True changes["disabled"] = True
else:
print(f"{Fore.GREEN}[+] {username} is successfully checked: {status} in {site.name}{Style.RESET_ALL}")
self.logger.info(f"Site {site.name} checking is finished") self.logger.info(f"Site {site.name} checking is finished")
@@ -286,6 +288,10 @@ class Submitter:
a_minus_b = tokens_a.difference(tokens_b) a_minus_b = tokens_a.difference(tokens_b)
b_minus_a = tokens_b.difference(tokens_a) b_minus_a = tokens_b.difference(tokens_a)
# additional filtering by html response
a_minus_b = [t for t in a_minus_b if not t in non_exists_resp_text]
b_minus_a = [t for t in b_minus_a if not t in exists_resp_text]
if len(a_minus_b) == len(b_minus_a) == 0: if len(a_minus_b) == len(b_minus_a) == 0:
print("The pages for existing and non-existing account are the same!") print("The pages for existing and non-existing account are the same!")
@@ -302,6 +308,8 @@ class Submitter:
:top_features_count :top_features_count
] ]
self.logger.debug([(keyword, match_fun(keyword)) for keyword in presence_list])
print("Detected text features of existing account: " + ", ".join(presence_list)) print("Detected text features of existing account: " + ", ".join(presence_list))
features = input("If features was not detected correctly, write it manually: ") features = input("If features was not detected correctly, write it manually: ")
@@ -311,6 +319,8 @@ class Submitter:
absence_list = sorted(b_minus_a, key=match_fun, reverse=True)[ absence_list = sorted(b_minus_a, key=match_fun, reverse=True)[
:top_features_count :top_features_count
] ]
self.logger.debug([(keyword, match_fun(keyword)) for keyword in absence_list])
print( print(
"Detected text features of non-existing account: " + ", ".join(absence_list) "Detected text features of non-existing account: " + ", ".join(absence_list)
) )
@@ -338,7 +348,6 @@ class Submitter:
async def add_site(self, site): async def add_site(self, site):
sem = asyncio.Semaphore(1) sem = asyncio.Semaphore(1)
print(f"{Fore.BLUE}{Style.BRIGHT}[*] Adding site {site.name}, let's check it...{Style.RESET_ALL}") print(f"{Fore.BLUE}{Style.BRIGHT}[*] Adding site {site.name}, let's check it...{Style.RESET_ALL}")
print(site.json)
result = await self.site_self_check(site, sem) result = await self.site_self_check(site, sem)
if result["disabled"]: if result["disabled"]:
@@ -369,6 +378,7 @@ class Submitter:
print("0. finish editing") print("0. finish editing")
print("10. reject and block domain") print("10. reject and block domain")
print("11. invalid params, remove")
choice = input("\nSelect field number to edit (0-8): ").strip() choice = input("\nSelect field number to edit (0-8): ").strip()
@@ -381,6 +391,12 @@ class Submitter:
"reason": "manual block", "reason": "manual block",
} }
if choice == '11':
return {
"valid": False,
"reason": "remove",
}
if choice in editable_fields: if choice in editable_fields:
field = editable_fields[choice] field = editable_fields[choice]
current_value = getattr(site, field) current_value = getattr(site, field)
@@ -477,7 +493,7 @@ class Submitter:
if not found: if not found:
print( print(
f"Sorry, we couldn't find params to detect account presence/absence in {chosen_site.name}." f"{Fore.RED}[!] The check for site '{chosen_site.name}' failed!{Style.RESET_ALL}"
) )
print( print(
"Try to run this mode again and increase features count or choose others." "Try to run this mode again and increase features count or choose others."
@@ -510,4 +526,9 @@ class Submitter:
site_data = chosen_site.strip_engine_data() site_data = chosen_site.strip_engine_data()
self.logger.debug(site_data.json) self.logger.debug(site_data.json)
self.db.update_site(site_data) self.db.update_site(site_data)
if self.args.db:
print(f"{Fore.GREEN}[+] Maigret DB is saved to {self.args.db}.{Style.RESET_ALL}")
self.db.save_to_file(self.args.db)
return True return True
-1
View File
@@ -3117,7 +3117,6 @@ Rank data fetched from Alexa by domains.
1. ![](https://www.google.com/s2/favicons?domain=https://www.stopstalk.com) [www.stopstalk.com (https://www.stopstalk.com)](https://www.stopstalk.com)*: top 100M* 1. ![](https://www.google.com/s2/favicons?domain=https://www.stopstalk.com) [www.stopstalk.com (https://www.stopstalk.com)](https://www.stopstalk.com)*: top 100M*
1. ![](https://www.google.com/s2/favicons?domain=https://www.polywork.com) [www.polywork.com (https://www.polywork.com)](https://www.polywork.com)*: top 100M* 1. ![](https://www.google.com/s2/favicons?domain=https://www.polywork.com) [www.polywork.com (https://www.polywork.com)](https://www.polywork.com)*: top 100M*
1. ![](https://www.google.com/s2/favicons?domain=https://oshwlab.com) [oshwlab.com (https://oshwlab.com)](https://oshwlab.com)*: top 100M* 1. ![](https://www.google.com/s2/favicons?domain=https://oshwlab.com) [oshwlab.com (https://oshwlab.com)](https://oshwlab.com)*: top 100M*
1. ![](https://www.google.com/s2/favicons?domain=https://www.xshaker.net) [www.xshaker.net (https://www.xshaker.net)](https://www.xshaker.net)*: top 100M*
1. ![](https://www.google.com/s2/favicons?domain=https://chaturbator.su) [chaturbator.su (https://chaturbator.su)](https://chaturbator.su)*: top 100M* 1. ![](https://www.google.com/s2/favicons?domain=https://chaturbator.su) [chaturbator.su (https://chaturbator.su)](https://chaturbator.su)*: top 100M*
1. ![](https://www.google.com/s2/favicons?domain=https://imgflip.com) [imgflip.com (https://imgflip.com)](https://imgflip.com)*: top 100M* 1. ![](https://www.google.com/s2/favicons?domain=https://imgflip.com) [imgflip.com (https://imgflip.com)](https://imgflip.com)*: top 100M*
1. ![](https://www.google.com/s2/favicons?domain=https://www.flickr.com) [www.flickr.com (https://www.flickr.com)](https://www.flickr.com)*: top 100M* 1. ![](https://www.google.com/s2/favicons?domain=https://www.flickr.com) [www.flickr.com (https://www.flickr.com)](https://www.flickr.com)*: top 100M*
+3
View File
@@ -13,4 +13,7 @@ def test_tags_validity(default_db):
if tag not in tags: if tag not in tags:
unknown_tags.add(tag) unknown_tags.add(tag)
# make sure all tags are known
# if you see "unchecked" tag error, please, do
# maigret --db `pwd`/maigret/resources/data.json --self-check --tag unchecked --use-disabled-sites
assert unknown_tags == set() assert unknown_tags == set()