mirror of
https://github.com/soxoj/maigret.git
synced 2026-05-07 06:24:35 +00:00
Refactoring and linting, added notifications about frequent search errors
This commit is contained in:
+235
-189
@@ -5,135 +5,138 @@ import re
|
||||
import ssl
|
||||
import sys
|
||||
import tqdm
|
||||
import time
|
||||
from typing import Tuple, Optional
|
||||
|
||||
import aiohttp
|
||||
import tqdm.asyncio
|
||||
from aiohttp_socks import ProxyConnector
|
||||
from mock import Mock
|
||||
from python_socks import _errors as proxy_errors
|
||||
from socid_extractor import extract
|
||||
|
||||
from .activation import ParsingActivator, import_aiohttp_cookies
|
||||
from . import errors
|
||||
from .errors import CheckError
|
||||
from .executors import AsyncioSimpleExecutor, AsyncioProgressbarQueueExecutor
|
||||
from .result import QueryResult, QueryStatus
|
||||
from .sites import MaigretDatabase, MaigretSite
|
||||
from .types import CheckError
|
||||
from .utils import get_random_user_agent
|
||||
|
||||
|
||||
supported_recursive_search_ids = (
|
||||
'yandex_public_id',
|
||||
'gaia_id',
|
||||
'vk_id',
|
||||
'ok_id',
|
||||
'wikimapia_uid',
|
||||
'steam_id',
|
||||
'uidme_uguid',
|
||||
"yandex_public_id",
|
||||
"gaia_id",
|
||||
"vk_id",
|
||||
"ok_id",
|
||||
"wikimapia_uid",
|
||||
"steam_id",
|
||||
"uidme_uguid",
|
||||
)
|
||||
|
||||
common_errors = {
|
||||
'<title>Attention Required! | Cloudflare</title>': CheckError('Captcha', 'Cloudflare'),
|
||||
'Please stand by, while we are checking your browser': CheckError('Bot protection', 'Cloudflare'),
|
||||
'<title>Доступ ограничен</title>': CheckError('Censorship', 'Rostelecom'),
|
||||
'document.getElementById(\'validate_form_submit\').disabled=true': CheckError('Captcha', 'Mail.ru'),
|
||||
'Verifying your browser, please wait...<br>DDoS Protection by</font> Blazingfast.io': CheckError('Bot protection', 'Blazingfast'),
|
||||
'404</h1><p class="error-card__description">Мы не нашли страницу': CheckError('Resolving', 'MegaFon 404 page'),
|
||||
'Доступ к информационному ресурсу ограничен на основании Федерального закона': CheckError('Censorship', 'MGTS'),
|
||||
'Incapsula incident ID': CheckError('Bot protection', 'Incapsula'),
|
||||
}
|
||||
|
||||
unsupported_characters = '#'
|
||||
unsupported_characters = "#"
|
||||
|
||||
|
||||
async def get_response(request_future, site_name, logger) -> (str, int, CheckError):
|
||||
async def get_response(
|
||||
request_future, site_name, logger
|
||||
) -> Tuple[str, int, Optional[CheckError]]:
|
||||
html_text = None
|
||||
status_code = 0
|
||||
error = CheckError('Error')
|
||||
error: Optional[CheckError] = CheckError("Error")
|
||||
|
||||
try:
|
||||
response = await request_future
|
||||
|
||||
status_code = response.status
|
||||
response_content = await response.content.read()
|
||||
charset = response.charset or 'utf-8'
|
||||
decoded_content = response_content.decode(charset, 'ignore')
|
||||
charset = response.charset or "utf-8"
|
||||
decoded_content = response_content.decode(charset, "ignore")
|
||||
html_text = decoded_content
|
||||
|
||||
if status_code == 0:
|
||||
error = CheckError('Connection lost')
|
||||
error = CheckError("Connection lost")
|
||||
else:
|
||||
error = None
|
||||
|
||||
logger.debug(html_text)
|
||||
|
||||
except asyncio.TimeoutError as e:
|
||||
error = CheckError('Request timeout', str(e))
|
||||
error = CheckError("Request timeout", str(e))
|
||||
except aiohttp.client_exceptions.ClientConnectorError as e:
|
||||
error = CheckError('Connecting failure', str(e))
|
||||
error = CheckError("Connecting failure", str(e))
|
||||
except aiohttp.http_exceptions.BadHttpMessage as e:
|
||||
error = CheckError('HTTP', str(e))
|
||||
error = CheckError("HTTP", str(e))
|
||||
except proxy_errors.ProxyError as e:
|
||||
error = CheckError('Proxy', str(e))
|
||||
error = CheckError("Proxy", str(e))
|
||||
except KeyboardInterrupt:
|
||||
error = CheckError("Interrupted")
|
||||
except Exception as e:
|
||||
# python-specific exceptions
|
||||
if sys.version_info.minor > 6:
|
||||
if isinstance(e, ssl.SSLCertVerificationError) or isinstance(e, ssl.SSLError):
|
||||
error = CheckError('SSL', str(e))
|
||||
if isinstance(e, ssl.SSLCertVerificationError) or isinstance(
|
||||
e, ssl.SSLError
|
||||
):
|
||||
error = CheckError("SSL", str(e))
|
||||
else:
|
||||
logger.warning(f'Unhandled error while requesting {site_name}: {e}')
|
||||
logger.warning(f"Unhandled error while requesting {site_name}: {e}")
|
||||
logger.debug(e, exc_info=True)
|
||||
error = CheckError('Error', str(e))
|
||||
error = CheckError("Error", str(e))
|
||||
|
||||
# TODO: return only needed information
|
||||
return html_text, status_code, error
|
||||
return str(html_text), status_code, error
|
||||
|
||||
|
||||
async def update_site_dict_from_response(sitename, site_dict, results_info, logger, query_notify):
|
||||
async def update_site_dict_from_response(
|
||||
sitename, site_dict, results_info, logger, query_notify
|
||||
):
|
||||
site_obj = site_dict[sitename]
|
||||
future = site_obj.request_future
|
||||
if not future:
|
||||
# ignore: search by incompatible id type
|
||||
return
|
||||
|
||||
response = await get_response(request_future=future,
|
||||
site_name=sitename,
|
||||
logger=logger)
|
||||
response = await get_response(
|
||||
request_future=future, site_name=sitename, logger=logger
|
||||
)
|
||||
|
||||
return sitename, process_site_result(response, query_notify, logger, results_info, site_obj)
|
||||
return sitename, process_site_result(
|
||||
response, query_notify, logger, results_info, site_obj
|
||||
)
|
||||
|
||||
|
||||
# TODO: move to separate class
|
||||
def detect_error_page(html_text, status_code, fail_flags, ignore_403) -> CheckError:
|
||||
def detect_error_page(
|
||||
html_text, status_code, fail_flags, ignore_403
|
||||
) -> Optional[CheckError]:
|
||||
# Detect service restrictions such as a country restriction
|
||||
for flag, msg in fail_flags.items():
|
||||
if flag in html_text:
|
||||
return CheckError('Site-specific', msg)
|
||||
return CheckError("Site-specific", msg)
|
||||
|
||||
# Detect common restrictions such as provider censorship and bot protection
|
||||
for flag, err in common_errors.items():
|
||||
if flag in html_text:
|
||||
return err
|
||||
err = errors.detect(html_text)
|
||||
if err:
|
||||
return err
|
||||
|
||||
# Detect common site errors
|
||||
if status_code == 403 and not ignore_403:
|
||||
return CheckError('Access denied', '403 status code, use proxy/vpn')
|
||||
return CheckError("Access denied", "403 status code, use proxy/vpn")
|
||||
|
||||
elif status_code >= 500:
|
||||
return CheckError(f'Server', f'{status_code} status code')
|
||||
return CheckError("Server", f"{status_code} status code")
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def process_site_result(response, query_notify, logger, results_info, site: MaigretSite):
|
||||
def process_site_result(
|
||||
response, query_notify, logger, results_info, site: MaigretSite
|
||||
):
|
||||
if not response:
|
||||
return results_info
|
||||
|
||||
fulltags = site.tags
|
||||
|
||||
# Retrieve other site information again
|
||||
username = results_info['username']
|
||||
is_parsing_enabled = results_info['parsing_enabled']
|
||||
username = results_info["username"]
|
||||
is_parsing_enabled = results_info["parsing_enabled"]
|
||||
url = results_info.get("url_user")
|
||||
logger.debug(url)
|
||||
|
||||
@@ -147,7 +150,7 @@ def process_site_result(response, query_notify, logger, results_info, site: Maig
|
||||
|
||||
# TODO: refactor
|
||||
if not response:
|
||||
logger.error(f'No response for {site.name}')
|
||||
logger.error(f"No response for {site.name}")
|
||||
return results_info
|
||||
|
||||
html_text, status_code, check_error = response
|
||||
@@ -156,28 +159,34 @@ def process_site_result(response, query_notify, logger, results_info, site: Maig
|
||||
response_time = None
|
||||
|
||||
if logger.level == logging.DEBUG:
|
||||
with open('debug.txt', 'a') as f:
|
||||
status = status_code or 'No response'
|
||||
f.write(f'url: {url}\nerror: {check_error}\nr: {status}\n')
|
||||
with open("debug.txt", "a") as f:
|
||||
status = status_code or "No response"
|
||||
f.write(f"url: {url}\nerror: {check_error}\nr: {status}\n")
|
||||
if html_text:
|
||||
f.write(f'code: {status}\nresponse: {str(html_text)}\n')
|
||||
f.write(f"code: {status}\nresponse: {str(html_text)}\n")
|
||||
|
||||
# additional check for errors
|
||||
if status_code and not check_error:
|
||||
check_error = detect_error_page(html_text, status_code, site.errors, site.ignore403)
|
||||
check_error = detect_error_page(
|
||||
html_text, status_code, site.errors, site.ignore403
|
||||
)
|
||||
|
||||
if site.activation and html_text:
|
||||
is_need_activation = any([s for s in site.activation['marks'] if s in html_text])
|
||||
is_need_activation = any(
|
||||
[s for s in site.activation["marks"] if s in html_text]
|
||||
)
|
||||
if is_need_activation:
|
||||
method = site.activation['method']
|
||||
method = site.activation["method"]
|
||||
try:
|
||||
activate_fun = getattr(ParsingActivator(), method)
|
||||
# TODO: async call
|
||||
activate_fun(site, logger)
|
||||
except AttributeError:
|
||||
logger.warning(f'Activation method {method} for site {site.name} not found!')
|
||||
logger.warning(
|
||||
f"Activation method {method} for site {site.name} not found!"
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(f'Failed activation {method} for site {site.name}: {e}')
|
||||
logger.warning(f"Failed activation {method} for site {site.name}: {e}")
|
||||
|
||||
site_name = site.pretty_name
|
||||
# presense flags
|
||||
@@ -187,56 +196,75 @@ def process_site_result(response, query_notify, logger, results_info, site: Maig
|
||||
if html_text:
|
||||
if not presense_flags:
|
||||
is_presense_detected = True
|
||||
site.stats['presense_flag'] = None
|
||||
site.stats["presense_flag"] = None
|
||||
else:
|
||||
for presense_flag in presense_flags:
|
||||
if presense_flag in html_text:
|
||||
is_presense_detected = True
|
||||
site.stats['presense_flag'] = presense_flag
|
||||
site.stats["presense_flag"] = presense_flag
|
||||
logger.debug(presense_flag)
|
||||
break
|
||||
|
||||
if check_error:
|
||||
logger.debug(check_error)
|
||||
result = QueryResult(username,
|
||||
site_name,
|
||||
url,
|
||||
QueryStatus.UNKNOWN,
|
||||
query_time=response_time,
|
||||
error=check_error,
|
||||
context=str(CheckError), tags=fulltags)
|
||||
result = QueryResult(
|
||||
username,
|
||||
site_name,
|
||||
url,
|
||||
QueryStatus.UNKNOWN,
|
||||
query_time=response_time,
|
||||
error=check_error,
|
||||
context=str(CheckError),
|
||||
tags=fulltags,
|
||||
)
|
||||
elif check_type == "message":
|
||||
absence_flags = site.absence_strs
|
||||
is_absence_flags_list = isinstance(absence_flags, list)
|
||||
absence_flags_set = set(absence_flags) if is_absence_flags_list else {absence_flags}
|
||||
absence_flags_set = (
|
||||
set(absence_flags) if is_absence_flags_list else {absence_flags}
|
||||
)
|
||||
# Checks if the error message is in the HTML
|
||||
is_absence_detected = any([(absence_flag in html_text) for absence_flag in absence_flags_set])
|
||||
is_absence_detected = any(
|
||||
[(absence_flag in html_text) for absence_flag in absence_flags_set]
|
||||
)
|
||||
if not is_absence_detected and is_presense_detected:
|
||||
result = QueryResult(username,
|
||||
site_name,
|
||||
url,
|
||||
QueryStatus.CLAIMED,
|
||||
query_time=response_time, tags=fulltags)
|
||||
result = QueryResult(
|
||||
username,
|
||||
site_name,
|
||||
url,
|
||||
QueryStatus.CLAIMED,
|
||||
query_time=response_time,
|
||||
tags=fulltags,
|
||||
)
|
||||
else:
|
||||
result = QueryResult(username,
|
||||
site_name,
|
||||
url,
|
||||
QueryStatus.AVAILABLE,
|
||||
query_time=response_time, tags=fulltags)
|
||||
result = QueryResult(
|
||||
username,
|
||||
site_name,
|
||||
url,
|
||||
QueryStatus.AVAILABLE,
|
||||
query_time=response_time,
|
||||
tags=fulltags,
|
||||
)
|
||||
elif check_type == "status_code":
|
||||
# Checks if the status code of the response is 2XX
|
||||
if (not status_code >= 300 or status_code < 200) and is_presense_detected:
|
||||
result = QueryResult(username,
|
||||
site_name,
|
||||
url,
|
||||
QueryStatus.CLAIMED,
|
||||
query_time=response_time, tags=fulltags)
|
||||
result = QueryResult(
|
||||
username,
|
||||
site_name,
|
||||
url,
|
||||
QueryStatus.CLAIMED,
|
||||
query_time=response_time,
|
||||
tags=fulltags,
|
||||
)
|
||||
else:
|
||||
result = QueryResult(username,
|
||||
site_name,
|
||||
url,
|
||||
QueryStatus.AVAILABLE,
|
||||
query_time=response_time, tags=fulltags)
|
||||
result = QueryResult(
|
||||
username,
|
||||
site_name,
|
||||
url,
|
||||
QueryStatus.AVAILABLE,
|
||||
query_time=response_time,
|
||||
tags=fulltags,
|
||||
)
|
||||
elif check_type == "response_url":
|
||||
# For this detection method, we have turned off the redirect.
|
||||
# So, there is no need to check the response URL: it will always
|
||||
@@ -244,21 +272,28 @@ def process_site_result(response, query_notify, logger, results_info, site: Maig
|
||||
# code indicates that the request was successful (i.e. no 404, or
|
||||
# forward to some odd redirect).
|
||||
if 200 <= status_code < 300 and is_presense_detected:
|
||||
result = QueryResult(username,
|
||||
site_name,
|
||||
url,
|
||||
QueryStatus.CLAIMED,
|
||||
query_time=response_time, tags=fulltags)
|
||||
result = QueryResult(
|
||||
username,
|
||||
site_name,
|
||||
url,
|
||||
QueryStatus.CLAIMED,
|
||||
query_time=response_time,
|
||||
tags=fulltags,
|
||||
)
|
||||
else:
|
||||
result = QueryResult(username,
|
||||
site_name,
|
||||
url,
|
||||
QueryStatus.AVAILABLE,
|
||||
query_time=response_time, tags=fulltags)
|
||||
result = QueryResult(
|
||||
username,
|
||||
site_name,
|
||||
url,
|
||||
QueryStatus.AVAILABLE,
|
||||
query_time=response_time,
|
||||
tags=fulltags,
|
||||
)
|
||||
else:
|
||||
# It should be impossible to ever get here...
|
||||
raise ValueError(f"Unknown check type '{check_type}' for "
|
||||
f"site '{site.name}'")
|
||||
raise ValueError(
|
||||
f"Unknown check type '{check_type}' for " f"site '{site.name}'"
|
||||
)
|
||||
|
||||
extracted_ids_data = {}
|
||||
|
||||
@@ -266,39 +301,49 @@ def process_site_result(response, query_notify, logger, results_info, site: Maig
|
||||
try:
|
||||
extracted_ids_data = extract(html_text)
|
||||
except Exception as e:
|
||||
logger.warning(f'Error while parsing {site.name}: {e}', exc_info=True)
|
||||
logger.warning(f"Error while parsing {site.name}: {e}", exc_info=True)
|
||||
|
||||
if extracted_ids_data:
|
||||
new_usernames = {}
|
||||
for k, v in extracted_ids_data.items():
|
||||
if 'username' in k:
|
||||
new_usernames[v] = 'username'
|
||||
if "username" in k:
|
||||
new_usernames[v] = "username"
|
||||
if k in supported_recursive_search_ids:
|
||||
new_usernames[v] = k
|
||||
|
||||
results_info['ids_usernames'] = new_usernames
|
||||
results_info['ids_links'] = eval(extracted_ids_data.get('links', '[]'))
|
||||
results_info["ids_usernames"] = new_usernames
|
||||
results_info["ids_links"] = eval(extracted_ids_data.get("links", "[]"))
|
||||
result.ids_data = extracted_ids_data
|
||||
|
||||
# Notify caller about results of query.
|
||||
query_notify.update(result, site.similar_search)
|
||||
|
||||
# Save status of request
|
||||
results_info['status'] = result
|
||||
results_info["status"] = result
|
||||
|
||||
# Save results from request
|
||||
results_info['http_status'] = status_code
|
||||
results_info['is_similar'] = site.similar_search
|
||||
results_info["http_status"] = status_code
|
||||
results_info["is_similar"] = site.similar_search
|
||||
# results_site['response_text'] = html_text
|
||||
results_info['rank'] = site.alexa_rank
|
||||
results_info["rank"] = site.alexa_rank
|
||||
return results_info
|
||||
|
||||
|
||||
async def maigret(username, site_dict, logger, query_notify=None,
|
||||
proxy=None, timeout=None, is_parsing_enabled=False,
|
||||
id_type='username', debug=False, forced=False,
|
||||
max_connections=100, no_progressbar=False,
|
||||
cookies=None):
|
||||
async def maigret(
|
||||
username,
|
||||
site_dict,
|
||||
logger,
|
||||
query_notify=None,
|
||||
proxy=None,
|
||||
timeout=None,
|
||||
is_parsing_enabled=False,
|
||||
id_type="username",
|
||||
debug=False,
|
||||
forced=False,
|
||||
max_connections=100,
|
||||
no_progressbar=False,
|
||||
cookies=None,
|
||||
):
|
||||
"""Main search func
|
||||
|
||||
Checks for existence of username on certain sites.
|
||||
@@ -342,24 +387,28 @@ async def maigret(username, site_dict, logger, query_notify=None,
|
||||
query_notify.start(username, id_type)
|
||||
|
||||
# TODO: connector
|
||||
connector = ProxyConnector.from_url(proxy) if proxy else aiohttp.TCPConnector(ssl=False)
|
||||
connector = (
|
||||
ProxyConnector.from_url(proxy) if proxy else aiohttp.TCPConnector(ssl=False)
|
||||
)
|
||||
# connector = aiohttp.TCPConnector(ssl=False)
|
||||
connector.verify_ssl = False
|
||||
|
||||
cookie_jar = None
|
||||
if cookies:
|
||||
logger.debug(f'Using cookies jar file {cookies}')
|
||||
logger.debug(f"Using cookies jar file {cookies}")
|
||||
cookie_jar = await import_aiohttp_cookies(cookies)
|
||||
|
||||
session = aiohttp.ClientSession(connector=connector, trust_env=True, cookie_jar=cookie_jar)
|
||||
session = aiohttp.ClientSession(
|
||||
connector=connector, trust_env=True, cookie_jar=cookie_jar
|
||||
)
|
||||
|
||||
if logger.level == logging.DEBUG:
|
||||
future = session.get(url='https://icanhazip.com')
|
||||
future = session.get(url="https://icanhazip.com")
|
||||
ip, status, check_error = await get_response(future, None, logger)
|
||||
if ip:
|
||||
logger.debug(f'My IP is: {ip.strip()}')
|
||||
logger.debug(f"My IP is: {ip.strip()}")
|
||||
else:
|
||||
logger.debug(f'IP requesting {check_error[0]}: {check_error[1]}')
|
||||
logger.debug(f"IP requesting {check_error[0]}: {check_error[1]}")
|
||||
|
||||
# Results from analysis of all sites
|
||||
results_total = {}
|
||||
@@ -371,46 +420,45 @@ async def maigret(username, site_dict, logger, query_notify=None,
|
||||
continue
|
||||
|
||||
if site.disabled and not forced:
|
||||
logger.debug(f'Site {site.name} is disabled, skipping...')
|
||||
logger.debug(f"Site {site.name} is disabled, skipping...")
|
||||
continue
|
||||
|
||||
# Results from analysis of this specific site
|
||||
results_site = {}
|
||||
|
||||
# Record URL of main site and username
|
||||
results_site['username'] = username
|
||||
results_site['parsing_enabled'] = is_parsing_enabled
|
||||
results_site['url_main'] = site.url_main
|
||||
results_site['cookies'] = cookie_jar and cookie_jar.filter_cookies(site.url_main) or None
|
||||
results_site["username"] = username
|
||||
results_site["parsing_enabled"] = is_parsing_enabled
|
||||
results_site["url_main"] = site.url_main
|
||||
results_site["cookies"] = (
|
||||
cookie_jar and cookie_jar.filter_cookies(site.url_main) or None
|
||||
)
|
||||
|
||||
headers = {
|
||||
'User-Agent': get_random_user_agent(),
|
||||
"User-Agent": get_random_user_agent(),
|
||||
}
|
||||
|
||||
headers.update(site.headers)
|
||||
|
||||
if 'url' not in site.__dict__:
|
||||
logger.error('No URL for site %s', site.name)
|
||||
if "url" not in site.__dict__:
|
||||
logger.error("No URL for site %s", site.name)
|
||||
# URL of user on site (if it exists)
|
||||
url = site.url.format(
|
||||
urlMain=site.url_main,
|
||||
urlSubpath=site.url_subpath,
|
||||
username=username
|
||||
urlMain=site.url_main, urlSubpath=site.url_subpath, username=username
|
||||
)
|
||||
# workaround to prevent slash errors
|
||||
url = re.sub('(?<!:)/+', '/', url)
|
||||
url = re.sub("(?<!:)/+", "/", url)
|
||||
|
||||
# Don't make request if username is invalid for the site
|
||||
if site.regex_check and re.search(site.regex_check, username) is None:
|
||||
# No need to do the check at the site: this user name is not allowed.
|
||||
results_site['status'] = QueryResult(username,
|
||||
site_name,
|
||||
url,
|
||||
QueryStatus.ILLEGAL)
|
||||
results_site["status"] = QueryResult(
|
||||
username, site_name, url, QueryStatus.ILLEGAL
|
||||
)
|
||||
results_site["url_user"] = ""
|
||||
results_site['http_status'] = ""
|
||||
results_site['response_text'] = ""
|
||||
query_notify.update(results_site['status'])
|
||||
results_site["http_status"] = ""
|
||||
results_site["response_text"] = ""
|
||||
query_notify.update(results_site["status"])
|
||||
else:
|
||||
# URL of user on site (if it exists)
|
||||
results_site["url_user"] = url
|
||||
@@ -428,9 +476,9 @@ async def maigret(username, site_dict, logger, query_notify=None,
|
||||
)
|
||||
|
||||
for k, v in site.get_params.items():
|
||||
url_probe += f'&{k}={v}'
|
||||
url_probe += f"&{k}={v}"
|
||||
|
||||
if site.check_type == 'status_code' and site.request_head_only:
|
||||
if site.check_type == "status_code" and site.request_head_only:
|
||||
# In most cases when we are detecting by status code,
|
||||
# it is not necessary to get the entire body: we can
|
||||
# detect fine with just the HEAD response.
|
||||
@@ -451,10 +499,12 @@ async def maigret(username, site_dict, logger, query_notify=None,
|
||||
# The final result of the request will be what is available.
|
||||
allow_redirects = True
|
||||
|
||||
future = request_method(url=url_probe, headers=headers,
|
||||
allow_redirects=allow_redirects,
|
||||
timeout=timeout,
|
||||
)
|
||||
future = request_method(
|
||||
url=url_probe,
|
||||
headers=headers,
|
||||
allow_redirects=allow_redirects,
|
||||
timeout=timeout,
|
||||
)
|
||||
|
||||
# Store future in data for access later
|
||||
# TODO: move to separate obj
|
||||
@@ -465,35 +515,25 @@ async def maigret(username, site_dict, logger, query_notify=None,
|
||||
|
||||
coroutines = []
|
||||
for sitename, result_obj in results_total.items():
|
||||
coroutines.append((update_site_dict_from_response, [sitename, site_dict, result_obj, logger, query_notify], {}))
|
||||
coroutines.append(
|
||||
(
|
||||
update_site_dict_from_response,
|
||||
[sitename, site_dict, result_obj, logger, query_notify],
|
||||
{},
|
||||
)
|
||||
)
|
||||
|
||||
if no_progressbar:
|
||||
executor = AsyncioSimpleExecutor(logger=logger)
|
||||
else:
|
||||
executor = AsyncioProgressbarQueueExecutor(logger=logger, in_parallel=max_connections, timeout=timeout+0.5)
|
||||
executor = AsyncioProgressbarQueueExecutor(
|
||||
logger=logger, in_parallel=max_connections, timeout=timeout + 0.5
|
||||
)
|
||||
|
||||
results = await executor.run(coroutines)
|
||||
|
||||
await session.close()
|
||||
|
||||
# TODO: move to separate function
|
||||
errors = {}
|
||||
for el in results:
|
||||
if not el:
|
||||
continue
|
||||
_, r = el
|
||||
if r and isinstance(r, dict) and r.get('status'):
|
||||
if not isinstance(r['status'], QueryResult):
|
||||
continue
|
||||
|
||||
err = r['status'].error
|
||||
if not err:
|
||||
continue
|
||||
errors[err.type] = errors.get(err.type, 0) + 1
|
||||
|
||||
for err, count in sorted(errors.items(), key=lambda x: x[1], reverse=True):
|
||||
logger.warning(f'Errors of type "{err}": {count}')
|
||||
|
||||
# Notify caller that all queries are finished.
|
||||
query_notify.finish()
|
||||
|
||||
@@ -537,7 +577,7 @@ def timeout_check(value):
|
||||
|
||||
async def site_self_check(site, logger, semaphore, db: MaigretDatabase, silent=False):
|
||||
changes = {
|
||||
'disabled': False,
|
||||
"disabled": False,
|
||||
}
|
||||
|
||||
try:
|
||||
@@ -550,7 +590,7 @@ async def site_self_check(site, logger, semaphore, db: MaigretDatabase, silent=F
|
||||
logger.error(site.__dict__)
|
||||
check_data = []
|
||||
|
||||
logger.info(f'Checking {site.name}...')
|
||||
logger.info(f"Checking {site.name}...")
|
||||
|
||||
for username, status in check_data:
|
||||
async with semaphore:
|
||||
@@ -568,10 +608,10 @@ async def site_self_check(site, logger, semaphore, db: MaigretDatabase, silent=F
|
||||
# TODO: make normal checking
|
||||
if site.name not in results_dict:
|
||||
logger.info(results_dict)
|
||||
changes['disabled'] = True
|
||||
changes["disabled"] = True
|
||||
continue
|
||||
|
||||
result = results_dict[site.name]['status']
|
||||
result = results_dict[site.name]["status"]
|
||||
|
||||
site_status = result.status
|
||||
|
||||
@@ -580,33 +620,37 @@ async def site_self_check(site, logger, semaphore, db: MaigretDatabase, silent=F
|
||||
msgs = site.absence_strs
|
||||
etype = site.check_type
|
||||
logger.warning(
|
||||
f'Error while searching {username} in {site.name}: {result.context}, {msgs}, type {etype}')
|
||||
f"Error while searching {username} in {site.name}: {result.context}, {msgs}, type {etype}"
|
||||
)
|
||||
# don't disable in case of available username
|
||||
if status == QueryStatus.CLAIMED:
|
||||
changes['disabled'] = True
|
||||
changes["disabled"] = True
|
||||
elif status == QueryStatus.CLAIMED:
|
||||
logger.warning(f'Not found `{username}` in {site.name}, must be claimed')
|
||||
logger.warning(
|
||||
f"Not found `{username}` in {site.name}, must be claimed"
|
||||
)
|
||||
logger.info(results_dict[site.name])
|
||||
changes['disabled'] = True
|
||||
changes["disabled"] = True
|
||||
else:
|
||||
logger.warning(f'Found `{username}` in {site.name}, must be available')
|
||||
logger.warning(f"Found `{username}` in {site.name}, must be available")
|
||||
logger.info(results_dict[site.name])
|
||||
changes['disabled'] = True
|
||||
changes["disabled"] = True
|
||||
|
||||
logger.info(f'Site {site.name} checking is finished')
|
||||
logger.info(f"Site {site.name} checking is finished")
|
||||
|
||||
if changes['disabled'] != site.disabled:
|
||||
site.disabled = changes['disabled']
|
||||
if changes["disabled"] != site.disabled:
|
||||
site.disabled = changes["disabled"]
|
||||
db.update_site(site)
|
||||
if not silent:
|
||||
action = 'Disabled' if site.disabled else 'Enabled'
|
||||
print(f'{action} site {site.name}...')
|
||||
action = "Disabled" if site.disabled else "Enabled"
|
||||
print(f"{action} site {site.name}...")
|
||||
|
||||
return changes
|
||||
|
||||
|
||||
async def self_check(db: MaigretDatabase, site_data: dict, logger, silent=False,
|
||||
max_connections=10) -> bool:
|
||||
async def self_check(
|
||||
db: MaigretDatabase, site_data: dict, logger, silent=False, max_connections=10
|
||||
) -> bool:
|
||||
sem = asyncio.Semaphore(max_connections)
|
||||
tasks = []
|
||||
all_sites = site_data
|
||||
@@ -628,13 +672,15 @@ async def self_check(db: MaigretDatabase, site_data: dict, logger, silent=False,
|
||||
total_disabled = disabled_new_count - disabled_old_count
|
||||
|
||||
if total_disabled >= 0:
|
||||
message = 'Disabled'
|
||||
message = "Disabled"
|
||||
else:
|
||||
message = 'Enabled'
|
||||
message = "Enabled"
|
||||
total_disabled *= -1
|
||||
|
||||
if not silent:
|
||||
print(
|
||||
f'{message} {total_disabled} ({disabled_old_count} => {disabled_new_count}) checked sites. Run with `--info` flag to get more information')
|
||||
f"{message} {total_disabled} ({disabled_old_count} => {disabled_new_count}) checked sites. "
|
||||
"Run with `--info` flag to get more information"
|
||||
)
|
||||
|
||||
return total_disabled != 0
|
||||
|
||||
Reference in New Issue
Block a user