mirror of
https://github.com/soxoj/maigret.git
synced 2026-05-07 06:24:35 +00:00
Improved self-check mode (#1887)
This commit is contained in:
+25
-13
@@ -869,6 +869,11 @@ async def site_self_check(
|
|||||||
action = "Disabled" if site.disabled else "Enabled"
|
action = "Disabled" if site.disabled else "Enabled"
|
||||||
print(f"{action} site {site.name}...")
|
print(f"{action} site {site.name}...")
|
||||||
|
|
||||||
|
# remove service tag "unchecked"
|
||||||
|
if "unchecked" in site.tags:
|
||||||
|
site.tags.remove("unchecked")
|
||||||
|
db.update_site(site)
|
||||||
|
|
||||||
return changes
|
return changes
|
||||||
|
|
||||||
|
|
||||||
@@ -889,6 +894,7 @@ async def self_check(
|
|||||||
def disabled_count(lst):
|
def disabled_count(lst):
|
||||||
return len(list(filter(lambda x: x.disabled, lst)))
|
return len(list(filter(lambda x: x.disabled, lst)))
|
||||||
|
|
||||||
|
unchecked_old_count = len([site for site in all_sites.values() if "unchecked" in site.tags])
|
||||||
disabled_old_count = disabled_count(all_sites.values())
|
disabled_old_count = disabled_count(all_sites.values())
|
||||||
|
|
||||||
for _, site in all_sites.items():
|
for _, site in all_sites.items():
|
||||||
@@ -898,22 +904,28 @@ async def self_check(
|
|||||||
future = asyncio.ensure_future(check_coro)
|
future = asyncio.ensure_future(check_coro)
|
||||||
tasks.append(future)
|
tasks.append(future)
|
||||||
|
|
||||||
for f in tqdm.asyncio.tqdm.as_completed(tasks):
|
if tasks:
|
||||||
await f
|
for f in tqdm.asyncio.tqdm.as_completed(tasks):
|
||||||
|
await f
|
||||||
|
|
||||||
|
unchecked_new_count = len([site for site in all_sites.values() if "unchecked" in site.tags])
|
||||||
disabled_new_count = disabled_count(all_sites.values())
|
disabled_new_count = disabled_count(all_sites.values())
|
||||||
total_disabled = disabled_new_count - disabled_old_count
|
total_disabled = disabled_new_count - disabled_old_count
|
||||||
|
|
||||||
if total_disabled >= 0:
|
if total_disabled:
|
||||||
message = "Disabled"
|
if total_disabled >= 0:
|
||||||
else:
|
message = "Disabled"
|
||||||
message = "Enabled"
|
else:
|
||||||
total_disabled *= -1
|
message = "Enabled"
|
||||||
|
total_disabled *= -1
|
||||||
|
|
||||||
if not silent:
|
if not silent:
|
||||||
print(
|
print(
|
||||||
f"{message} {total_disabled} ({disabled_old_count} => {disabled_new_count}) checked sites. "
|
f"{message} {total_disabled} ({disabled_old_count} => {disabled_new_count}) checked sites. "
|
||||||
"Run with `--info` flag to get more information"
|
"Run with `--info` flag to get more information"
|
||||||
)
|
)
|
||||||
|
|
||||||
return total_disabled != 0
|
if unchecked_new_count != unchecked_old_count:
|
||||||
|
print(f"Unchecked sites verified: {unchecked_old_count - unchecked_new_count}")
|
||||||
|
|
||||||
|
return total_disabled != 0 or unchecked_new_count != unchecked_old_count
|
||||||
|
|||||||
+8
-2
@@ -569,7 +569,11 @@ async def main():
|
|||||||
|
|
||||||
# Database self-checking
|
# Database self-checking
|
||||||
if args.self_check:
|
if args.self_check:
|
||||||
print('Maigret sites database self-checking...')
|
if len(site_data) == 0:
|
||||||
|
query_notify.warning('No sites to self-check with the current filters! Exiting...')
|
||||||
|
return
|
||||||
|
|
||||||
|
query_notify.success(f'Maigret sites database self-check started for {len(site_data)} sites...')
|
||||||
is_need_update = await self_check(
|
is_need_update = await self_check(
|
||||||
db,
|
db,
|
||||||
site_data,
|
site_data,
|
||||||
@@ -588,7 +592,9 @@ async def main():
|
|||||||
print('Database was successfully updated.')
|
print('Database was successfully updated.')
|
||||||
else:
|
else:
|
||||||
print('Updates will be applied only for current search session.')
|
print('Updates will be applied only for current search session.')
|
||||||
print('Scan sessions flags stats: ' + str(db.get_scan_stats(site_data)))
|
|
||||||
|
if args.verbose or args.debug:
|
||||||
|
query_notify.info('Scan sessions flags stats: ' + str(db.get_scan_stats(site_data)))
|
||||||
|
|
||||||
# Database statistics
|
# Database statistics
|
||||||
if args.stats:
|
if args.stats:
|
||||||
|
|||||||
@@ -211,6 +211,10 @@ class QueryNotifyPrint(QueryNotify):
|
|||||||
else:
|
else:
|
||||||
print(msg)
|
print(msg)
|
||||||
|
|
||||||
|
def success(self, message, symbol="+"):
|
||||||
|
msg = f"[{symbol}] {message}"
|
||||||
|
self._colored_print(Fore.GREEN, msg)
|
||||||
|
|
||||||
def warning(self, message, symbol="-"):
|
def warning(self, message, symbol="-"):
|
||||||
msg = f"[{symbol}] {message}"
|
msg = f"[{symbol}] {message}"
|
||||||
self._colored_print(Fore.YELLOW, msg)
|
self._colored_print(Fore.YELLOW, msg)
|
||||||
|
|||||||
@@ -1,16 +1,25 @@
|
|||||||
{
|
{
|
||||||
"presence_strings": [
|
"presence_strings": [
|
||||||
|
"user not found",
|
||||||
|
"404",
|
||||||
|
"Page not found",
|
||||||
|
"error 404",
|
||||||
"username",
|
"username",
|
||||||
"not found",
|
"not found",
|
||||||
"пользователь",
|
"пользователь",
|
||||||
"profile",
|
"profile",
|
||||||
"lastname",
|
"lastname",
|
||||||
"firstname",
|
"firstname",
|
||||||
|
"DisplayName",
|
||||||
"biography",
|
"biography",
|
||||||
|
"title",
|
||||||
"birthday",
|
"birthday",
|
||||||
"репутация",
|
"репутация",
|
||||||
"информация",
|
"информация",
|
||||||
"e-mail"
|
"e-mail",
|
||||||
|
"body",
|
||||||
|
"html",
|
||||||
|
"style"
|
||||||
],
|
],
|
||||||
"supposed_usernames": [
|
"supposed_usernames": [
|
||||||
"alex", "god", "admin", "red", "blue", "john"
|
"alex", "god", "admin", "red", "blue", "john"
|
||||||
|
|||||||
+30
-9
@@ -43,7 +43,7 @@ class Submitter:
|
|||||||
"User-Agent": get_random_user_agent(),
|
"User-Agent": get_random_user_agent(),
|
||||||
}
|
}
|
||||||
|
|
||||||
SEPARATORS = "\"'"
|
SEPARATORS = "\"'\n"
|
||||||
|
|
||||||
RATIO = 0.6
|
RATIO = 0.6
|
||||||
TOP_FEATURES = 5
|
TOP_FEATURES = 5
|
||||||
@@ -138,17 +138,19 @@ class Submitter:
|
|||||||
if status == QueryStatus.CLAIMED:
|
if status == QueryStatus.CLAIMED:
|
||||||
changes["disabled"] = True
|
changes["disabled"] = True
|
||||||
elif status == QueryStatus.CLAIMED:
|
elif status == QueryStatus.CLAIMED:
|
||||||
self.logger.warning(
|
print(
|
||||||
f"Not found `{username}` in {site.name}, must be claimed"
|
f"{Fore.YELLOW}[!] Not found `{username}` in {site.name}, must be claimed{Style.RESET_ALL}"
|
||||||
)
|
)
|
||||||
self.logger.info(results_dict[site.name])
|
self.logger.warning(site.json)
|
||||||
changes["disabled"] = True
|
changes["disabled"] = True
|
||||||
else:
|
else:
|
||||||
self.logger.warning(
|
print(
|
||||||
f"Found `{username}` in {site.name}, must be available"
|
f"{Fore.YELLOW}[!] Found `{username}` in {site.name}, must be available{Style.RESET_ALL}"
|
||||||
)
|
)
|
||||||
self.logger.info(results_dict[site.name])
|
self.logger.warning(site.json)
|
||||||
changes["disabled"] = True
|
changes["disabled"] = True
|
||||||
|
else:
|
||||||
|
print(f"{Fore.GREEN}[+] {username} is successfully checked: {status} in {site.name}{Style.RESET_ALL}")
|
||||||
|
|
||||||
self.logger.info(f"Site {site.name} checking is finished")
|
self.logger.info(f"Site {site.name} checking is finished")
|
||||||
|
|
||||||
@@ -286,6 +288,10 @@ class Submitter:
|
|||||||
a_minus_b = tokens_a.difference(tokens_b)
|
a_minus_b = tokens_a.difference(tokens_b)
|
||||||
b_minus_a = tokens_b.difference(tokens_a)
|
b_minus_a = tokens_b.difference(tokens_a)
|
||||||
|
|
||||||
|
# additional filtering by html response
|
||||||
|
a_minus_b = [t for t in a_minus_b if not t in non_exists_resp_text]
|
||||||
|
b_minus_a = [t for t in b_minus_a if not t in exists_resp_text]
|
||||||
|
|
||||||
if len(a_minus_b) == len(b_minus_a) == 0:
|
if len(a_minus_b) == len(b_minus_a) == 0:
|
||||||
print("The pages for existing and non-existing account are the same!")
|
print("The pages for existing and non-existing account are the same!")
|
||||||
|
|
||||||
@@ -302,6 +308,8 @@ class Submitter:
|
|||||||
:top_features_count
|
:top_features_count
|
||||||
]
|
]
|
||||||
|
|
||||||
|
self.logger.debug([(keyword, match_fun(keyword)) for keyword in presence_list])
|
||||||
|
|
||||||
print("Detected text features of existing account: " + ", ".join(presence_list))
|
print("Detected text features of existing account: " + ", ".join(presence_list))
|
||||||
features = input("If features was not detected correctly, write it manually: ")
|
features = input("If features was not detected correctly, write it manually: ")
|
||||||
|
|
||||||
@@ -311,6 +319,8 @@ class Submitter:
|
|||||||
absence_list = sorted(b_minus_a, key=match_fun, reverse=True)[
|
absence_list = sorted(b_minus_a, key=match_fun, reverse=True)[
|
||||||
:top_features_count
|
:top_features_count
|
||||||
]
|
]
|
||||||
|
self.logger.debug([(keyword, match_fun(keyword)) for keyword in absence_list])
|
||||||
|
|
||||||
print(
|
print(
|
||||||
"Detected text features of non-existing account: " + ", ".join(absence_list)
|
"Detected text features of non-existing account: " + ", ".join(absence_list)
|
||||||
)
|
)
|
||||||
@@ -338,7 +348,6 @@ class Submitter:
|
|||||||
async def add_site(self, site):
|
async def add_site(self, site):
|
||||||
sem = asyncio.Semaphore(1)
|
sem = asyncio.Semaphore(1)
|
||||||
print(f"{Fore.BLUE}{Style.BRIGHT}[*] Adding site {site.name}, let's check it...{Style.RESET_ALL}")
|
print(f"{Fore.BLUE}{Style.BRIGHT}[*] Adding site {site.name}, let's check it...{Style.RESET_ALL}")
|
||||||
print(site.json)
|
|
||||||
|
|
||||||
result = await self.site_self_check(site, sem)
|
result = await self.site_self_check(site, sem)
|
||||||
if result["disabled"]:
|
if result["disabled"]:
|
||||||
@@ -369,6 +378,7 @@ class Submitter:
|
|||||||
|
|
||||||
print("0. finish editing")
|
print("0. finish editing")
|
||||||
print("10. reject and block domain")
|
print("10. reject and block domain")
|
||||||
|
print("11. invalid params, remove")
|
||||||
|
|
||||||
choice = input("\nSelect field number to edit (0-8): ").strip()
|
choice = input("\nSelect field number to edit (0-8): ").strip()
|
||||||
|
|
||||||
@@ -381,6 +391,12 @@ class Submitter:
|
|||||||
"reason": "manual block",
|
"reason": "manual block",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if choice == '11':
|
||||||
|
return {
|
||||||
|
"valid": False,
|
||||||
|
"reason": "remove",
|
||||||
|
}
|
||||||
|
|
||||||
if choice in editable_fields:
|
if choice in editable_fields:
|
||||||
field = editable_fields[choice]
|
field = editable_fields[choice]
|
||||||
current_value = getattr(site, field)
|
current_value = getattr(site, field)
|
||||||
@@ -477,7 +493,7 @@ class Submitter:
|
|||||||
|
|
||||||
if not found:
|
if not found:
|
||||||
print(
|
print(
|
||||||
f"Sorry, we couldn't find params to detect account presence/absence in {chosen_site.name}."
|
f"{Fore.RED}[!] The check for site '{chosen_site.name}' failed!{Style.RESET_ALL}"
|
||||||
)
|
)
|
||||||
print(
|
print(
|
||||||
"Try to run this mode again and increase features count or choose others."
|
"Try to run this mode again and increase features count or choose others."
|
||||||
@@ -510,4 +526,9 @@ class Submitter:
|
|||||||
site_data = chosen_site.strip_engine_data()
|
site_data = chosen_site.strip_engine_data()
|
||||||
self.logger.debug(site_data.json)
|
self.logger.debug(site_data.json)
|
||||||
self.db.update_site(site_data)
|
self.db.update_site(site_data)
|
||||||
|
|
||||||
|
if self.args.db:
|
||||||
|
print(f"{Fore.GREEN}[+] Maigret DB is saved to {self.args.db}.{Style.RESET_ALL}")
|
||||||
|
self.db.save_to_file(self.args.db)
|
||||||
|
|
||||||
return True
|
return True
|
||||||
|
|||||||
@@ -3117,7 +3117,6 @@ Rank data fetched from Alexa by domains.
|
|||||||
1.  [www.stopstalk.com (https://www.stopstalk.com)](https://www.stopstalk.com)*: top 100M*
|
1.  [www.stopstalk.com (https://www.stopstalk.com)](https://www.stopstalk.com)*: top 100M*
|
||||||
1.  [www.polywork.com (https://www.polywork.com)](https://www.polywork.com)*: top 100M*
|
1.  [www.polywork.com (https://www.polywork.com)](https://www.polywork.com)*: top 100M*
|
||||||
1.  [oshwlab.com (https://oshwlab.com)](https://oshwlab.com)*: top 100M*
|
1.  [oshwlab.com (https://oshwlab.com)](https://oshwlab.com)*: top 100M*
|
||||||
1.  [www.xshaker.net (https://www.xshaker.net)](https://www.xshaker.net)*: top 100M*
|
|
||||||
1.  [chaturbator.su (https://chaturbator.su)](https://chaturbator.su)*: top 100M*
|
1.  [chaturbator.su (https://chaturbator.su)](https://chaturbator.su)*: top 100M*
|
||||||
1.  [imgflip.com (https://imgflip.com)](https://imgflip.com)*: top 100M*
|
1.  [imgflip.com (https://imgflip.com)](https://imgflip.com)*: top 100M*
|
||||||
1.  [www.flickr.com (https://www.flickr.com)](https://www.flickr.com)*: top 100M*
|
1.  [www.flickr.com (https://www.flickr.com)](https://www.flickr.com)*: top 100M*
|
||||||
|
|||||||
@@ -13,4 +13,7 @@ def test_tags_validity(default_db):
|
|||||||
if tag not in tags:
|
if tag not in tags:
|
||||||
unknown_tags.add(tag)
|
unknown_tags.add(tag)
|
||||||
|
|
||||||
|
# make sure all tags are known
|
||||||
|
# if you see "unchecked" tag error, please, do
|
||||||
|
# maigret --db `pwd`/maigret/resources/data.json --self-check --tag unchecked --use-disabled-sites
|
||||||
assert unknown_tags == set()
|
assert unknown_tags == set()
|
||||||
|
|||||||
Reference in New Issue
Block a user