mirror of
https://github.com/soxoj/maigret.git
synced 2026-05-07 06:24:35 +00:00
Merge pull request #113 from soxoj/errors-stats
Errors stats MVP, some fp fixes
This commit is contained in:
+71
-173
@@ -6,7 +6,6 @@ import ssl
|
|||||||
import sys
|
import sys
|
||||||
import tqdm
|
import tqdm
|
||||||
import time
|
import time
|
||||||
from typing import Callable, Any, Iterable, Tuple
|
|
||||||
|
|
||||||
import aiohttp
|
import aiohttp
|
||||||
import tqdm.asyncio
|
import tqdm.asyncio
|
||||||
@@ -16,8 +15,11 @@ from python_socks import _errors as proxy_errors
|
|||||||
from socid_extractor import extract
|
from socid_extractor import extract
|
||||||
|
|
||||||
from .activation import ParsingActivator, import_aiohttp_cookies
|
from .activation import ParsingActivator, import_aiohttp_cookies
|
||||||
|
from .executors import AsyncioSimpleExecutor, AsyncioProgressbarQueueExecutor
|
||||||
from .result import QueryResult, QueryStatus
|
from .result import QueryResult, QueryStatus
|
||||||
from .sites import MaigretDatabase, MaigretSite
|
from .sites import MaigretDatabase, MaigretSite
|
||||||
|
from .types import CheckError
|
||||||
|
|
||||||
|
|
||||||
supported_recursive_search_ids = (
|
supported_recursive_search_ids = (
|
||||||
'yandex_public_id',
|
'yandex_public_id',
|
||||||
@@ -30,139 +32,23 @@ supported_recursive_search_ids = (
|
|||||||
)
|
)
|
||||||
|
|
||||||
common_errors = {
|
common_errors = {
|
||||||
'<title>Attention Required! | Cloudflare</title>': 'Cloudflare captcha',
|
'<title>Attention Required! | Cloudflare</title>': CheckError('Captcha', 'Cloudflare'),
|
||||||
'Please stand by, while we are checking your browser': 'Cloudflare captcha',
|
'Please stand by, while we are checking your browser': CheckError('Bot protection', 'Cloudflare'),
|
||||||
'<title>Доступ ограничен</title>': 'Rostelecom censorship',
|
'<title>Доступ ограничен</title>': CheckError('Censorship', 'Rostelecom'),
|
||||||
'document.getElementById(\'validate_form_submit\').disabled=true': 'Mail.ru captcha',
|
'document.getElementById(\'validate_form_submit\').disabled=true': CheckError('Captcha', 'Mail.ru'),
|
||||||
'Verifying your browser, please wait...<br>DDoS Protection by</font> Blazingfast.io': 'Blazingfast protection',
|
'Verifying your browser, please wait...<br>DDoS Protection by</font> Blazingfast.io': CheckError('Bot protection', 'Blazingfast'),
|
||||||
'404</h1><p class="error-card__description">Мы не нашли страницу': 'MegaFon 404 page',
|
'404</h1><p class="error-card__description">Мы не нашли страницу': CheckError('Resolving', 'MegaFon 404 page'),
|
||||||
'Доступ к информационному ресурсу ограничен на основании Федерального закона': 'MGTS censorship',
|
'Доступ к информационному ресурсу ограничен на основании Федерального закона': CheckError('Censorship', 'MGTS'),
|
||||||
'Incapsula incident ID': 'Incapsula antibot protection',
|
'Incapsula incident ID': CheckError('Bot protection', 'Incapsula'),
|
||||||
}
|
}
|
||||||
|
|
||||||
unsupported_characters = '#'
|
unsupported_characters = '#'
|
||||||
|
|
||||||
QueryDraft = Tuple[Callable, Any, Any]
|
|
||||||
QueriesDraft = Iterable[QueryDraft]
|
|
||||||
|
|
||||||
|
async def get_response(request_future, site_name, logger) -> (str, int, CheckError):
|
||||||
def create_task_func():
|
|
||||||
if sys.version_info.minor > 6:
|
|
||||||
create_asyncio_task = asyncio.create_task
|
|
||||||
else:
|
|
||||||
loop = asyncio.get_event_loop()
|
|
||||||
create_asyncio_task = loop.create_task
|
|
||||||
return create_asyncio_task
|
|
||||||
|
|
||||||
class AsyncExecutor:
|
|
||||||
def __init__(self, *args, **kwargs):
|
|
||||||
self.logger = kwargs['logger']
|
|
||||||
|
|
||||||
async def run(self, tasks: QueriesDraft):
|
|
||||||
start_time = time.time()
|
|
||||||
results = await self._run(tasks)
|
|
||||||
self.execution_time = time.time() - start_time
|
|
||||||
self.logger.debug(f'Spent time: {self.execution_time}')
|
|
||||||
return results
|
|
||||||
|
|
||||||
async def _run(self, tasks: QueriesDraft):
|
|
||||||
await asyncio.sleep(0)
|
|
||||||
|
|
||||||
|
|
||||||
class AsyncioSimpleExecutor(AsyncExecutor):
|
|
||||||
def __init__(self, *args, **kwargs):
|
|
||||||
super().__init__(*args, **kwargs)
|
|
||||||
|
|
||||||
async def _run(self, tasks: QueriesDraft):
|
|
||||||
futures = [f(*args, **kwargs) for f, args, kwargs in tasks]
|
|
||||||
return await asyncio.gather(*futures)
|
|
||||||
|
|
||||||
|
|
||||||
class AsyncioProgressbarExecutor(AsyncExecutor):
|
|
||||||
def __init__(self, *args, **kwargs):
|
|
||||||
super().__init__(*args, **kwargs)
|
|
||||||
|
|
||||||
async def _run(self, tasks: QueriesDraft):
|
|
||||||
futures = [f(*args, **kwargs) for f, args, kwargs in tasks]
|
|
||||||
results = []
|
|
||||||
for f in tqdm.asyncio.tqdm.as_completed(futures):
|
|
||||||
results.append(await f)
|
|
||||||
return results
|
|
||||||
|
|
||||||
|
|
||||||
class AsyncioProgressbarSemaphoreExecutor(AsyncExecutor):
|
|
||||||
def __init__(self, *args, **kwargs):
|
|
||||||
super().__init__(*args, **kwargs)
|
|
||||||
self.semaphore = asyncio.Semaphore(kwargs.get('in_parallel', 1))
|
|
||||||
|
|
||||||
async def _run(self, tasks: QueriesDraft):
|
|
||||||
async def _wrap_query(q: QueryDraft):
|
|
||||||
async with self.semaphore:
|
|
||||||
f, args, kwargs = q
|
|
||||||
return await f(*args, **kwargs)
|
|
||||||
|
|
||||||
async def semaphore_gather(tasks: QueriesDraft):
|
|
||||||
coros = [_wrap_query(q) for q in tasks]
|
|
||||||
results = []
|
|
||||||
for f in tqdm.asyncio.tqdm.as_completed(coros):
|
|
||||||
results.append(await f)
|
|
||||||
return results
|
|
||||||
|
|
||||||
return await semaphore_gather(tasks)
|
|
||||||
|
|
||||||
|
|
||||||
class AsyncioProgressbarQueueExecutor(AsyncExecutor):
|
|
||||||
def __init__(self, *args, **kwargs):
|
|
||||||
super().__init__(*args, **kwargs)
|
|
||||||
self.workers_count = kwargs.get('in_parallel', 10)
|
|
||||||
self.progress_func = kwargs.get('progress_func', tqdm.tqdm)
|
|
||||||
self.queue = asyncio.Queue(self.workers_count)
|
|
||||||
self.timeout = kwargs.get('timeout')
|
|
||||||
|
|
||||||
async def worker(self):
|
|
||||||
while True:
|
|
||||||
try:
|
|
||||||
f, args, kwargs = self.queue.get_nowait()
|
|
||||||
except asyncio.QueueEmpty:
|
|
||||||
return
|
|
||||||
|
|
||||||
query_future = f(*args, **kwargs)
|
|
||||||
query_task = create_task_func()(query_future)
|
|
||||||
try:
|
|
||||||
result = await asyncio.wait_for(query_task, timeout=self.timeout)
|
|
||||||
except asyncio.TimeoutError:
|
|
||||||
result = None
|
|
||||||
|
|
||||||
self.results.append(result)
|
|
||||||
self.progress.update(1)
|
|
||||||
self.queue.task_done()
|
|
||||||
|
|
||||||
async def _run(self, queries: QueriesDraft):
|
|
||||||
self.results = []
|
|
||||||
|
|
||||||
queries_list = list(queries)
|
|
||||||
|
|
||||||
min_workers = min(len(queries_list), self.workers_count)
|
|
||||||
|
|
||||||
workers = [create_task_func()(self.worker())
|
|
||||||
for _ in range(min_workers)]
|
|
||||||
|
|
||||||
self.progress = self.progress_func(total=len(queries_list))
|
|
||||||
for t in queries_list:
|
|
||||||
await self.queue.put(t)
|
|
||||||
await self.queue.join()
|
|
||||||
for w in workers:
|
|
||||||
w.cancel()
|
|
||||||
self.progress.close()
|
|
||||||
return self.results
|
|
||||||
|
|
||||||
|
|
||||||
async def get_response(request_future, site_name, logger):
|
|
||||||
html_text = None
|
html_text = None
|
||||||
status_code = 0
|
status_code = 0
|
||||||
|
error = CheckError('Error')
|
||||||
error_text = "General Unknown Error"
|
|
||||||
expection_text = None
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
response = await request_future
|
response = await request_future
|
||||||
@@ -173,37 +59,33 @@ async def get_response(request_future, site_name, logger):
|
|||||||
decoded_content = response_content.decode(charset, 'ignore')
|
decoded_content = response_content.decode(charset, 'ignore')
|
||||||
html_text = decoded_content
|
html_text = decoded_content
|
||||||
|
|
||||||
if status_code > 0:
|
if status_code == 0:
|
||||||
error_text = None
|
error = CheckError('Connection lost')
|
||||||
|
else:
|
||||||
|
error = None
|
||||||
|
|
||||||
logger.debug(html_text)
|
logger.debug(html_text)
|
||||||
|
|
||||||
except asyncio.TimeoutError as errt:
|
except asyncio.TimeoutError as e:
|
||||||
error_text = "Timeout Error"
|
error = CheckError('Request timeout', str(e))
|
||||||
expection_text = str(errt)
|
except aiohttp.client_exceptions.ClientConnectorError as e:
|
||||||
except aiohttp.client_exceptions.ClientConnectorError as err:
|
error = CheckError('Connecting failure', str(e))
|
||||||
error_text = "Error Connecting"
|
except aiohttp.http_exceptions.BadHttpMessage as e:
|
||||||
expection_text = str(err)
|
error = CheckError('HTTP', str(e))
|
||||||
except aiohttp.http_exceptions.BadHttpMessage as err:
|
except proxy_errors.ProxyError as e:
|
||||||
error_text = "HTTP Error"
|
error = CheckError('Proxy', str(e))
|
||||||
expection_text = str(err)
|
except Exception as e:
|
||||||
except proxy_errors.ProxyError as err:
|
|
||||||
error_text = "Proxy Error"
|
|
||||||
expection_text = str(err)
|
|
||||||
except Exception as err:
|
|
||||||
# python-specific exceptions
|
# python-specific exceptions
|
||||||
if sys.version_info.minor > 6:
|
if sys.version_info.minor > 6:
|
||||||
if isinstance(err, ssl.SSLCertVerificationError) or isinstance(err, ssl.SSLError):
|
if isinstance(e, ssl.SSLCertVerificationError) or isinstance(e, ssl.SSLError):
|
||||||
error_text = "SSL Error"
|
error = CheckError('SSL', str(e))
|
||||||
expection_text = str(err)
|
|
||||||
else:
|
else:
|
||||||
logger.warning(f'Unhandled error while requesting {site_name}: {err}')
|
logger.warning(f'Unhandled error while requesting {site_name}: {e}')
|
||||||
logger.debug(err, exc_info=True)
|
logger.debug(e, exc_info=True)
|
||||||
error_text = "Some Error"
|
error = CheckError('Error', str(e))
|
||||||
expection_text = str(err)
|
|
||||||
|
|
||||||
# TODO: return only needed information
|
# TODO: return only needed information
|
||||||
return html_text, status_code, error_text, expection_text
|
return html_text, status_code, error
|
||||||
|
|
||||||
|
|
||||||
async def update_site_dict_from_response(sitename, site_dict, results_info, logger, query_notify):
|
async def update_site_dict_from_response(sitename, site_dict, results_info, logger, query_notify):
|
||||||
@@ -221,24 +103,25 @@ async def update_site_dict_from_response(sitename, site_dict, results_info, logg
|
|||||||
|
|
||||||
|
|
||||||
# TODO: move to separate class
|
# TODO: move to separate class
|
||||||
def detect_error_page(html_text, status_code, fail_flags, ignore_403):
|
def detect_error_page(html_text, status_code, fail_flags, ignore_403) -> CheckError:
|
||||||
# Detect service restrictions such as a country restriction
|
# Detect service restrictions such as a country restriction
|
||||||
for flag, msg in fail_flags.items():
|
for flag, msg in fail_flags.items():
|
||||||
if flag in html_text:
|
if flag in html_text:
|
||||||
return 'Some site error', msg
|
return CheckError('Site-specific', msg)
|
||||||
|
|
||||||
# Detect common restrictions such as provider censorship and bot protection
|
# Detect common restrictions such as provider censorship and bot protection
|
||||||
for flag, msg in common_errors.items():
|
for flag, err in common_errors.items():
|
||||||
if flag in html_text:
|
if flag in html_text:
|
||||||
return 'Error', msg
|
return err
|
||||||
|
|
||||||
# Detect common site errors
|
# Detect common site errors
|
||||||
if status_code == 403 and not ignore_403:
|
if status_code == 403 and not ignore_403:
|
||||||
return 'Access denied', 'Access denied, use proxy/vpn'
|
return CheckError('Access denied', '403 status code, use proxy/vpn')
|
||||||
elif status_code >= 500:
|
|
||||||
return f'Error {status_code}', f'Site error {status_code}'
|
|
||||||
|
|
||||||
return None, None
|
elif status_code >= 500:
|
||||||
|
return CheckError(f'Server', f'{status_code} status code')
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
def process_site_result(response, query_notify, logger, results_info, site: MaigretSite):
|
def process_site_result(response, query_notify, logger, results_info, site: MaigretSite):
|
||||||
@@ -261,16 +144,12 @@ def process_site_result(response, query_notify, logger, results_info, site: Maig
|
|||||||
# Get the expected check type
|
# Get the expected check type
|
||||||
check_type = site.check_type
|
check_type = site.check_type
|
||||||
|
|
||||||
# Get the failure messages and comments
|
|
||||||
failure_errors = site.errors
|
|
||||||
|
|
||||||
# TODO: refactor
|
# TODO: refactor
|
||||||
if not response:
|
if not response:
|
||||||
logger.error(f'No response for {site.name}')
|
logger.error(f'No response for {site.name}')
|
||||||
return results_info
|
return results_info
|
||||||
|
|
||||||
html_text, status_code, error_text, expection_text = response
|
html_text, status_code, check_error = response
|
||||||
site_error_text = '?'
|
|
||||||
|
|
||||||
# TODO: add elapsed request time counting
|
# TODO: add elapsed request time counting
|
||||||
response_time = None
|
response_time = None
|
||||||
@@ -278,13 +157,13 @@ def process_site_result(response, query_notify, logger, results_info, site: Maig
|
|||||||
if logger.level == logging.DEBUG:
|
if logger.level == logging.DEBUG:
|
||||||
with open('debug.txt', 'a') as f:
|
with open('debug.txt', 'a') as f:
|
||||||
status = status_code or 'No response'
|
status = status_code or 'No response'
|
||||||
f.write(f'url: {url}\nerror: {str(error_text)}\nr: {status}\n')
|
f.write(f'url: {url}\nerror: {check_error}\nr: {status}\n')
|
||||||
if html_text:
|
if html_text:
|
||||||
f.write(f'code: {status}\nresponse: {str(html_text)}\n')
|
f.write(f'code: {status}\nresponse: {str(html_text)}\n')
|
||||||
|
|
||||||
if status_code and not error_text:
|
# additional check for errors
|
||||||
error_text, site_error_text = detect_error_page(html_text, status_code, failure_errors,
|
if status_code and not check_error:
|
||||||
site.ignore403)
|
check_error = detect_error_page(html_text, status_code, site.errors, site.ignore403)
|
||||||
|
|
||||||
if site.activation and html_text:
|
if site.activation and html_text:
|
||||||
is_need_activation = any([s for s in site.activation['marks'] if s in html_text])
|
is_need_activation = any([s for s in site.activation['marks'] if s in html_text])
|
||||||
@@ -312,17 +191,18 @@ def process_site_result(response, query_notify, logger, results_info, site: Maig
|
|||||||
if presense_flag in html_text:
|
if presense_flag in html_text:
|
||||||
is_presense_detected = True
|
is_presense_detected = True
|
||||||
site.stats['presense_flag'] = presense_flag
|
site.stats['presense_flag'] = presense_flag
|
||||||
logger.info(presense_flag)
|
logger.debug(presense_flag)
|
||||||
break
|
break
|
||||||
|
|
||||||
if error_text is not None:
|
if check_error:
|
||||||
logger.debug(error_text)
|
logger.debug(check_error)
|
||||||
result = QueryResult(username,
|
result = QueryResult(username,
|
||||||
site.name,
|
site.name,
|
||||||
url,
|
url,
|
||||||
QueryStatus.UNKNOWN,
|
QueryStatus.UNKNOWN,
|
||||||
query_time=response_time,
|
query_time=response_time,
|
||||||
context=f'{error_text}: {site_error_text}', tags=fulltags)
|
error=check_error,
|
||||||
|
context=str(CheckError), tags=fulltags)
|
||||||
elif check_type == "message":
|
elif check_type == "message":
|
||||||
absence_flags = site.absence_strs
|
absence_flags = site.absence_strs
|
||||||
is_absence_flags_list = isinstance(absence_flags, list)
|
is_absence_flags_list = isinstance(absence_flags, list)
|
||||||
@@ -473,11 +353,11 @@ async def maigret(username, site_dict, logger, query_notify=None,
|
|||||||
|
|
||||||
if logger.level == logging.DEBUG:
|
if logger.level == logging.DEBUG:
|
||||||
future = session.get(url='https://icanhazip.com')
|
future = session.get(url='https://icanhazip.com')
|
||||||
ip, status, error, expection = await get_response(future, None, logger)
|
ip, status, check_error = await get_response(future, None, logger)
|
||||||
if ip:
|
if ip:
|
||||||
logger.debug(f'My IP is: {ip.strip()}')
|
logger.debug(f'My IP is: {ip.strip()}')
|
||||||
else:
|
else:
|
||||||
logger.debug(f'IP requesting {error}: {expection}')
|
logger.debug(f'IP requesting {check_error[0]}: {check_error[1]}')
|
||||||
|
|
||||||
# Results from analysis of all sites
|
# Results from analysis of all sites
|
||||||
results_total = {}
|
results_total = {}
|
||||||
@@ -594,6 +474,24 @@ async def maigret(username, site_dict, logger, query_notify=None,
|
|||||||
|
|
||||||
await session.close()
|
await session.close()
|
||||||
|
|
||||||
|
# TODO: move to separate function
|
||||||
|
errors = {}
|
||||||
|
for el in results:
|
||||||
|
if not el:
|
||||||
|
continue
|
||||||
|
_, r = el
|
||||||
|
if r and isinstance(r, dict) and r.get('status'):
|
||||||
|
if not isinstance(r['status'], QueryResult):
|
||||||
|
continue
|
||||||
|
|
||||||
|
err = r['status'].error
|
||||||
|
if not err:
|
||||||
|
continue
|
||||||
|
errors[err.type] = errors.get(err.type, 0) + 1
|
||||||
|
|
||||||
|
for err, count in sorted(errors.items(), key=lambda x: x[1], reverse=True):
|
||||||
|
logger.warning(f'Errors of type "{err}": {count}')
|
||||||
|
|
||||||
# Notify caller that all queries are finished.
|
# Notify caller that all queries are finished.
|
||||||
query_notify.finish()
|
query_notify.finish()
|
||||||
|
|
||||||
|
|||||||
@@ -0,0 +1,119 @@
|
|||||||
|
import asyncio
|
||||||
|
import time
|
||||||
|
import tqdm
|
||||||
|
import sys
|
||||||
|
from typing import Iterable
|
||||||
|
|
||||||
|
from .types import QueryDraft
|
||||||
|
|
||||||
|
|
||||||
|
def create_task_func():
|
||||||
|
if sys.version_info.minor > 6:
|
||||||
|
create_asyncio_task = asyncio.create_task
|
||||||
|
else:
|
||||||
|
loop = asyncio.get_event_loop()
|
||||||
|
create_asyncio_task = loop.create_task
|
||||||
|
return create_asyncio_task
|
||||||
|
|
||||||
|
|
||||||
|
class AsyncExecutor:
|
||||||
|
def __init__(self, *args, **kwargs):
|
||||||
|
self.logger = kwargs['logger']
|
||||||
|
|
||||||
|
async def run(self, tasks: Iterable[QueryDraft]):
|
||||||
|
start_time = time.time()
|
||||||
|
results = await self._run(tasks)
|
||||||
|
self.execution_time = time.time() - start_time
|
||||||
|
self.logger.debug(f'Spent time: {self.execution_time}')
|
||||||
|
return results
|
||||||
|
|
||||||
|
async def _run(self, tasks: Iterable[QueryDraft]):
|
||||||
|
await asyncio.sleep(0)
|
||||||
|
|
||||||
|
|
||||||
|
class AsyncioSimpleExecutor(AsyncExecutor):
|
||||||
|
def __init__(self, *args, **kwargs):
|
||||||
|
super().__init__(*args, **kwargs)
|
||||||
|
|
||||||
|
async def _run(self, tasks: Iterable[QueryDraft]):
|
||||||
|
futures = [f(*args, **kwargs) for f, args, kwargs in tasks]
|
||||||
|
return await asyncio.gather(*futures)
|
||||||
|
|
||||||
|
|
||||||
|
class AsyncioProgressbarExecutor(AsyncExecutor):
|
||||||
|
def __init__(self, *args, **kwargs):
|
||||||
|
super().__init__(*args, **kwargs)
|
||||||
|
|
||||||
|
async def _run(self, tasks: Iterable[QueryDraft]):
|
||||||
|
futures = [f(*args, **kwargs) for f, args, kwargs in tasks]
|
||||||
|
results = []
|
||||||
|
for f in tqdm.asyncio.tqdm.as_completed(futures):
|
||||||
|
results.append(await f)
|
||||||
|
return results
|
||||||
|
|
||||||
|
|
||||||
|
class AsyncioProgressbarSemaphoreExecutor(AsyncExecutor):
|
||||||
|
def __init__(self, *args, **kwargs):
|
||||||
|
super().__init__(*args, **kwargs)
|
||||||
|
self.semaphore = asyncio.Semaphore(kwargs.get('in_parallel', 1))
|
||||||
|
|
||||||
|
async def _run(self, tasks: Iterable[QueryDraft]):
|
||||||
|
async def _wrap_query(q: QueryDraft):
|
||||||
|
async with self.semaphore:
|
||||||
|
f, args, kwargs = q
|
||||||
|
return await f(*args, **kwargs)
|
||||||
|
|
||||||
|
async def semaphore_gather(tasks: Iterable[QueryDraft]):
|
||||||
|
coros = [_wrap_query(q) for q in tasks]
|
||||||
|
results = []
|
||||||
|
for f in tqdm.asyncio.tqdm.as_completed(coros):
|
||||||
|
results.append(await f)
|
||||||
|
return results
|
||||||
|
|
||||||
|
return await semaphore_gather(tasks)
|
||||||
|
|
||||||
|
|
||||||
|
class AsyncioProgressbarQueueExecutor(AsyncExecutor):
|
||||||
|
def __init__(self, *args, **kwargs):
|
||||||
|
super().__init__(*args, **kwargs)
|
||||||
|
self.workers_count = kwargs.get('in_parallel', 10)
|
||||||
|
self.progress_func = kwargs.get('progress_func', tqdm.tqdm)
|
||||||
|
self.queue = asyncio.Queue(self.workers_count)
|
||||||
|
self.timeout = kwargs.get('timeout')
|
||||||
|
|
||||||
|
async def worker(self):
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
|
f, args, kwargs = self.queue.get_nowait()
|
||||||
|
except asyncio.QueueEmpty:
|
||||||
|
return
|
||||||
|
|
||||||
|
query_future = f(*args, **kwargs)
|
||||||
|
query_task = create_task_func()(query_future)
|
||||||
|
try:
|
||||||
|
result = await asyncio.wait_for(query_task, timeout=self.timeout)
|
||||||
|
except asyncio.TimeoutError:
|
||||||
|
result = None
|
||||||
|
|
||||||
|
self.results.append(result)
|
||||||
|
self.progress.update(1)
|
||||||
|
self.queue.task_done()
|
||||||
|
|
||||||
|
async def _run(self, queries: Iterable[QueryDraft]):
|
||||||
|
self.results = []
|
||||||
|
|
||||||
|
queries_list = list(queries)
|
||||||
|
|
||||||
|
min_workers = min(len(queries_list), self.workers_count)
|
||||||
|
|
||||||
|
workers = [create_task_func()(self.worker())
|
||||||
|
for _ in range(min_workers)]
|
||||||
|
|
||||||
|
self.progress = self.progress_func(total=len(queries_list))
|
||||||
|
for t in queries_list:
|
||||||
|
await self.queue.put(t)
|
||||||
|
await self.queue.join()
|
||||||
|
for w in workers:
|
||||||
|
w.cancel()
|
||||||
|
self.progress.close()
|
||||||
|
return self.results
|
||||||
+1
-1
@@ -241,7 +241,7 @@ class QueryNotifyPrint(QueryNotify):
|
|||||||
self.color,
|
self.color,
|
||||||
'?', result.site_name,
|
'?', result.site_name,
|
||||||
Fore.RED, Fore.RED,
|
Fore.RED, Fore.RED,
|
||||||
self.result.context + ids_data_text
|
str(self.result.error) + ids_data_text
|
||||||
)
|
)
|
||||||
elif result.status == QueryStatus.ILLEGAL:
|
elif result.status == QueryStatus.ILLEGAL:
|
||||||
if not self.print_found_only:
|
if not self.print_found_only:
|
||||||
|
|||||||
@@ -294,9 +294,10 @@
|
|||||||
],
|
],
|
||||||
"errors": {
|
"errors": {
|
||||||
"INTERNAL_SERVER_ERROR": "Site error",
|
"INTERNAL_SERVER_ERROR": "Site error",
|
||||||
"Something just went wrong": "Site error"
|
"Something just went wrong": "Site error",
|
||||||
|
"PersistedQueryNotFound": "Site error"
|
||||||
},
|
},
|
||||||
"urlProbe": "https://api.500px.com/graphql?operationName=ProfileRendererQuery&variables=%7B%22username%22%3A%22{username}%22%7D&extensions=%7B%22persistedQuery%22%3A%7B%22version%22%3A1%2C%22sha256Hash%22%3A%22105058632482dd2786fd5775745908dc928f537b28e28356b076522757d65c19%22%7D%7D",
|
"urlProbe": "https://api.500px.com/graphql?operationName=ProfileRendererQuery&variables=%7B%22username%22%3A%22{username}%22%7D&extensions=%7B%22persistedQuery%22%3A%7B%22version%22%3A1%2C%22sha256Hash%22%3A%22fcecc7028c308115b0defebc63acec3fe3c12df86a602c3e1785ba5cfb8fff47%22%7D%7D",
|
||||||
"checkType": "message",
|
"checkType": "message",
|
||||||
"absenceStrs": "No message available",
|
"absenceStrs": "No message available",
|
||||||
"alexaRank": 3029,
|
"alexaRank": 3029,
|
||||||
@@ -1919,6 +1920,7 @@
|
|||||||
"usernameUnclaimed": "noonewouldeverusethis7"
|
"usernameUnclaimed": "noonewouldeverusethis7"
|
||||||
},
|
},
|
||||||
"Blu-ray": {
|
"Blu-ray": {
|
||||||
|
"disabled": true,
|
||||||
"tags": [
|
"tags": [
|
||||||
"us"
|
"us"
|
||||||
],
|
],
|
||||||
@@ -12149,7 +12151,7 @@
|
|||||||
"us"
|
"us"
|
||||||
],
|
],
|
||||||
"headers": {
|
"headers": {
|
||||||
"authorization": "Bearer BQC38O_gP1qY7X8Ui7RsgyutAuZ2QissgeFgsDEX6siaE_dAFmzV0mWMSziGB_dLErQwtfJZa7qM9IsmNHI"
|
"authorization": "Bearer BQAjb32z4TLh0t19LDuYfk2BV3gUXCpqyUuy2gBOyJTN_2xoZlN4AW1B6ZVmdKMDcI3Hc8agrrQsKbQZE90"
|
||||||
},
|
},
|
||||||
"errors": {
|
"errors": {
|
||||||
"Spotify is currently not available in your country.": "Access denied in your country, use proxy/vpn"
|
"Spotify is currently not available in your country.": "Access denied in your country, use proxy/vpn"
|
||||||
@@ -13453,7 +13455,7 @@
|
|||||||
"sec-ch-ua": "Google Chrome\";v=\"87\", \" Not;A Brand\";v=\"99\", \"Chromium\";v=\"87\"",
|
"sec-ch-ua": "Google Chrome\";v=\"87\", \" Not;A Brand\";v=\"99\", \"Chromium\";v=\"87\"",
|
||||||
"authorization": "Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA",
|
"authorization": "Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA",
|
||||||
"user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36",
|
"user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36",
|
||||||
"x-guest-token": "1383863637358432258"
|
"x-guest-token": "1386060728566681601"
|
||||||
},
|
},
|
||||||
"errors": {
|
"errors": {
|
||||||
"Bad guest token": "x-guest-token update required"
|
"Bad guest token": "x-guest-token update required"
|
||||||
@@ -13830,7 +13832,7 @@
|
|||||||
"video"
|
"video"
|
||||||
],
|
],
|
||||||
"headers": {
|
"headers": {
|
||||||
"Authorization": "jwt eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJleHAiOjE2MTg3NzQ2ODAsInVzZXJfaWQiOm51bGwsImFwcF9pZCI6NTg0NzksInNjb3BlcyI6InB1YmxpYyIsInRlYW1fdXNlcl9pZCI6bnVsbH0.q3cuo2lPiamc4wj5NgWOl3JPr_d33y5C06epHB92thk"
|
"Authorization": "jwt eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJleHAiOjE2MTkzMDI0NDAsInVzZXJfaWQiOm51bGwsImFwcF9pZCI6NTg0NzksInNjb3BlcyI6InB1YmxpYyIsInRlYW1fdXNlcl9pZCI6bnVsbH0.fN8PQIEkzQjfu7znGoIaLEP9Qr6bV8JbA2ZwpBSFI5E"
|
||||||
},
|
},
|
||||||
"activation": {
|
"activation": {
|
||||||
"url": "https://vimeo.com/_rv/viewer",
|
"url": "https://vimeo.com/_rv/viewer",
|
||||||
|
|||||||
+2
-1
@@ -34,7 +34,7 @@ class QueryResult():
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, username, site_name, site_url_user, status, ids_data=None,
|
def __init__(self, username, site_name, site_url_user, status, ids_data=None,
|
||||||
query_time=None, context=None, tags=[]):
|
query_time=None, context=None, error=None, tags=[]):
|
||||||
"""Create Query Result Object.
|
"""Create Query Result Object.
|
||||||
|
|
||||||
Contains information about a specific method of detecting usernames on
|
Contains information about a specific method of detecting usernames on
|
||||||
@@ -73,6 +73,7 @@ class QueryResult():
|
|||||||
self.context = context
|
self.context = context
|
||||||
self.ids_data = ids_data
|
self.ids_data = ids_data
|
||||||
self.tags = tags
|
self.tags = tags
|
||||||
|
self.error = error
|
||||||
|
|
||||||
def json(self):
|
def json(self):
|
||||||
return {
|
return {
|
||||||
|
|||||||
@@ -0,0 +1,28 @@
|
|||||||
|
from typing import Callable, Any, Tuple
|
||||||
|
|
||||||
|
|
||||||
|
# search query
|
||||||
|
QueryDraft = Tuple[Callable, Any, Any]
|
||||||
|
|
||||||
|
# error got as a result of completed search query
|
||||||
|
class CheckError:
|
||||||
|
_type = 'Unknown'
|
||||||
|
_desc = ''
|
||||||
|
|
||||||
|
def __init__(self, typename, desc=''):
|
||||||
|
self._type = typename
|
||||||
|
self._desc = desc
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
if not self._desc:
|
||||||
|
return f'{self._type} error'
|
||||||
|
|
||||||
|
return f'{self._type} error: {self._desc}'
|
||||||
|
|
||||||
|
@property
|
||||||
|
def type(self):
|
||||||
|
return self._type
|
||||||
|
|
||||||
|
@property
|
||||||
|
def desc(self):
|
||||||
|
return self._desc
|
||||||
@@ -2,7 +2,8 @@
|
|||||||
import pytest
|
import pytest
|
||||||
import asyncio
|
import asyncio
|
||||||
import logging
|
import logging
|
||||||
from maigret.checking import AsyncioSimpleExecutor, AsyncioProgressbarExecutor, AsyncioProgressbarSemaphoreExecutor, AsyncioProgressbarQueueExecutor
|
from maigret.executors import AsyncioSimpleExecutor, AsyncioProgressbarExecutor, \
|
||||||
|
AsyncioProgressbarSemaphoreExecutor, AsyncioProgressbarQueueExecutor
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
Reference in New Issue
Block a user