mirror of
https://github.com/soxoj/maigret.git
synced 2026-05-07 06:24:35 +00:00
Improved self-check mode (#1887)
This commit is contained in:
+25
-13
@@ -869,6 +869,11 @@ async def site_self_check(
|
||||
action = "Disabled" if site.disabled else "Enabled"
|
||||
print(f"{action} site {site.name}...")
|
||||
|
||||
# remove service tag "unchecked"
|
||||
if "unchecked" in site.tags:
|
||||
site.tags.remove("unchecked")
|
||||
db.update_site(site)
|
||||
|
||||
return changes
|
||||
|
||||
|
||||
@@ -889,6 +894,7 @@ async def self_check(
|
||||
def disabled_count(lst):
|
||||
return len(list(filter(lambda x: x.disabled, lst)))
|
||||
|
||||
unchecked_old_count = len([site for site in all_sites.values() if "unchecked" in site.tags])
|
||||
disabled_old_count = disabled_count(all_sites.values())
|
||||
|
||||
for _, site in all_sites.items():
|
||||
@@ -898,22 +904,28 @@ async def self_check(
|
||||
future = asyncio.ensure_future(check_coro)
|
||||
tasks.append(future)
|
||||
|
||||
for f in tqdm.asyncio.tqdm.as_completed(tasks):
|
||||
await f
|
||||
if tasks:
|
||||
for f in tqdm.asyncio.tqdm.as_completed(tasks):
|
||||
await f
|
||||
|
||||
unchecked_new_count = len([site for site in all_sites.values() if "unchecked" in site.tags])
|
||||
disabled_new_count = disabled_count(all_sites.values())
|
||||
total_disabled = disabled_new_count - disabled_old_count
|
||||
|
||||
if total_disabled >= 0:
|
||||
message = "Disabled"
|
||||
else:
|
||||
message = "Enabled"
|
||||
total_disabled *= -1
|
||||
if total_disabled:
|
||||
if total_disabled >= 0:
|
||||
message = "Disabled"
|
||||
else:
|
||||
message = "Enabled"
|
||||
total_disabled *= -1
|
||||
|
||||
if not silent:
|
||||
print(
|
||||
f"{message} {total_disabled} ({disabled_old_count} => {disabled_new_count}) checked sites. "
|
||||
"Run with `--info` flag to get more information"
|
||||
)
|
||||
if not silent:
|
||||
print(
|
||||
f"{message} {total_disabled} ({disabled_old_count} => {disabled_new_count}) checked sites. "
|
||||
"Run with `--info` flag to get more information"
|
||||
)
|
||||
|
||||
return total_disabled != 0
|
||||
if unchecked_new_count != unchecked_old_count:
|
||||
print(f"Unchecked sites verified: {unchecked_old_count - unchecked_new_count}")
|
||||
|
||||
return total_disabled != 0 or unchecked_new_count != unchecked_old_count
|
||||
|
||||
+8
-2
@@ -569,7 +569,11 @@ async def main():
|
||||
|
||||
# Database self-checking
|
||||
if args.self_check:
|
||||
print('Maigret sites database self-checking...')
|
||||
if len(site_data) == 0:
|
||||
query_notify.warning('No sites to self-check with the current filters! Exiting...')
|
||||
return
|
||||
|
||||
query_notify.success(f'Maigret sites database self-check started for {len(site_data)} sites...')
|
||||
is_need_update = await self_check(
|
||||
db,
|
||||
site_data,
|
||||
@@ -588,7 +592,9 @@ async def main():
|
||||
print('Database was successfully updated.')
|
||||
else:
|
||||
print('Updates will be applied only for current search session.')
|
||||
print('Scan sessions flags stats: ' + str(db.get_scan_stats(site_data)))
|
||||
|
||||
if args.verbose or args.debug:
|
||||
query_notify.info('Scan sessions flags stats: ' + str(db.get_scan_stats(site_data)))
|
||||
|
||||
# Database statistics
|
||||
if args.stats:
|
||||
|
||||
@@ -211,6 +211,10 @@ class QueryNotifyPrint(QueryNotify):
|
||||
else:
|
||||
print(msg)
|
||||
|
||||
def success(self, message, symbol="+"):
|
||||
msg = f"[{symbol}] {message}"
|
||||
self._colored_print(Fore.GREEN, msg)
|
||||
|
||||
def warning(self, message, symbol="-"):
|
||||
msg = f"[{symbol}] {message}"
|
||||
self._colored_print(Fore.YELLOW, msg)
|
||||
|
||||
@@ -1,16 +1,25 @@
|
||||
{
|
||||
"presence_strings": [
|
||||
"user not found",
|
||||
"404",
|
||||
"Page not found",
|
||||
"error 404",
|
||||
"username",
|
||||
"not found",
|
||||
"пользователь",
|
||||
"profile",
|
||||
"lastname",
|
||||
"firstname",
|
||||
"DisplayName",
|
||||
"biography",
|
||||
"title",
|
||||
"birthday",
|
||||
"репутация",
|
||||
"информация",
|
||||
"e-mail"
|
||||
"e-mail",
|
||||
"body",
|
||||
"html",
|
||||
"style"
|
||||
],
|
||||
"supposed_usernames": [
|
||||
"alex", "god", "admin", "red", "blue", "john"
|
||||
|
||||
+30
-9
@@ -43,7 +43,7 @@ class Submitter:
|
||||
"User-Agent": get_random_user_agent(),
|
||||
}
|
||||
|
||||
SEPARATORS = "\"'"
|
||||
SEPARATORS = "\"'\n"
|
||||
|
||||
RATIO = 0.6
|
||||
TOP_FEATURES = 5
|
||||
@@ -138,17 +138,19 @@ class Submitter:
|
||||
if status == QueryStatus.CLAIMED:
|
||||
changes["disabled"] = True
|
||||
elif status == QueryStatus.CLAIMED:
|
||||
self.logger.warning(
|
||||
f"Not found `{username}` in {site.name}, must be claimed"
|
||||
print(
|
||||
f"{Fore.YELLOW}[!] Not found `{username}` in {site.name}, must be claimed{Style.RESET_ALL}"
|
||||
)
|
||||
self.logger.info(results_dict[site.name])
|
||||
self.logger.warning(site.json)
|
||||
changes["disabled"] = True
|
||||
else:
|
||||
self.logger.warning(
|
||||
f"Found `{username}` in {site.name}, must be available"
|
||||
print(
|
||||
f"{Fore.YELLOW}[!] Found `{username}` in {site.name}, must be available{Style.RESET_ALL}"
|
||||
)
|
||||
self.logger.info(results_dict[site.name])
|
||||
self.logger.warning(site.json)
|
||||
changes["disabled"] = True
|
||||
else:
|
||||
print(f"{Fore.GREEN}[+] {username} is successfully checked: {status} in {site.name}{Style.RESET_ALL}")
|
||||
|
||||
self.logger.info(f"Site {site.name} checking is finished")
|
||||
|
||||
@@ -286,6 +288,10 @@ class Submitter:
|
||||
a_minus_b = tokens_a.difference(tokens_b)
|
||||
b_minus_a = tokens_b.difference(tokens_a)
|
||||
|
||||
# additional filtering by html response
|
||||
a_minus_b = [t for t in a_minus_b if not t in non_exists_resp_text]
|
||||
b_minus_a = [t for t in b_minus_a if not t in exists_resp_text]
|
||||
|
||||
if len(a_minus_b) == len(b_minus_a) == 0:
|
||||
print("The pages for existing and non-existing account are the same!")
|
||||
|
||||
@@ -302,6 +308,8 @@ class Submitter:
|
||||
:top_features_count
|
||||
]
|
||||
|
||||
self.logger.debug([(keyword, match_fun(keyword)) for keyword in presence_list])
|
||||
|
||||
print("Detected text features of existing account: " + ", ".join(presence_list))
|
||||
features = input("If features was not detected correctly, write it manually: ")
|
||||
|
||||
@@ -311,6 +319,8 @@ class Submitter:
|
||||
absence_list = sorted(b_minus_a, key=match_fun, reverse=True)[
|
||||
:top_features_count
|
||||
]
|
||||
self.logger.debug([(keyword, match_fun(keyword)) for keyword in absence_list])
|
||||
|
||||
print(
|
||||
"Detected text features of non-existing account: " + ", ".join(absence_list)
|
||||
)
|
||||
@@ -338,7 +348,6 @@ class Submitter:
|
||||
async def add_site(self, site):
|
||||
sem = asyncio.Semaphore(1)
|
||||
print(f"{Fore.BLUE}{Style.BRIGHT}[*] Adding site {site.name}, let's check it...{Style.RESET_ALL}")
|
||||
print(site.json)
|
||||
|
||||
result = await self.site_self_check(site, sem)
|
||||
if result["disabled"]:
|
||||
@@ -369,6 +378,7 @@ class Submitter:
|
||||
|
||||
print("0. finish editing")
|
||||
print("10. reject and block domain")
|
||||
print("11. invalid params, remove")
|
||||
|
||||
choice = input("\nSelect field number to edit (0-8): ").strip()
|
||||
|
||||
@@ -381,6 +391,12 @@ class Submitter:
|
||||
"reason": "manual block",
|
||||
}
|
||||
|
||||
if choice == '11':
|
||||
return {
|
||||
"valid": False,
|
||||
"reason": "remove",
|
||||
}
|
||||
|
||||
if choice in editable_fields:
|
||||
field = editable_fields[choice]
|
||||
current_value = getattr(site, field)
|
||||
@@ -477,7 +493,7 @@ class Submitter:
|
||||
|
||||
if not found:
|
||||
print(
|
||||
f"Sorry, we couldn't find params to detect account presence/absence in {chosen_site.name}."
|
||||
f"{Fore.RED}[!] The check for site '{chosen_site.name}' failed!{Style.RESET_ALL}"
|
||||
)
|
||||
print(
|
||||
"Try to run this mode again and increase features count or choose others."
|
||||
@@ -510,4 +526,9 @@ class Submitter:
|
||||
site_data = chosen_site.strip_engine_data()
|
||||
self.logger.debug(site_data.json)
|
||||
self.db.update_site(site_data)
|
||||
|
||||
if self.args.db:
|
||||
print(f"{Fore.GREEN}[+] Maigret DB is saved to {self.args.db}.{Style.RESET_ALL}")
|
||||
self.db.save_to_file(self.args.db)
|
||||
|
||||
return True
|
||||
|
||||
@@ -3117,7 +3117,6 @@ Rank data fetched from Alexa by domains.
|
||||
1.  [www.stopstalk.com (https://www.stopstalk.com)](https://www.stopstalk.com)*: top 100M*
|
||||
1.  [www.polywork.com (https://www.polywork.com)](https://www.polywork.com)*: top 100M*
|
||||
1.  [oshwlab.com (https://oshwlab.com)](https://oshwlab.com)*: top 100M*
|
||||
1.  [www.xshaker.net (https://www.xshaker.net)](https://www.xshaker.net)*: top 100M*
|
||||
1.  [chaturbator.su (https://chaturbator.su)](https://chaturbator.su)*: top 100M*
|
||||
1.  [imgflip.com (https://imgflip.com)](https://imgflip.com)*: top 100M*
|
||||
1.  [www.flickr.com (https://www.flickr.com)](https://www.flickr.com)*: top 100M*
|
||||
|
||||
@@ -13,4 +13,7 @@ def test_tags_validity(default_db):
|
||||
if tag not in tags:
|
||||
unknown_tags.add(tag)
|
||||
|
||||
# make sure all tags are known
|
||||
# if you see "unchecked" tag error, please, do
|
||||
# maigret --db `pwd`/maigret/resources/data.json --self-check --tag unchecked --use-disabled-sites
|
||||
assert unknown_tags == set()
|
||||
|
||||
Reference in New Issue
Block a user