From a76f95858f14d650828738ba6bbdad74aeebc141 Mon Sep 17 00:00:00 2001 From: Soxoj Date: Thu, 20 May 2021 23:26:02 +0300 Subject: [PATCH] Added checker of Tor sites --- maigret/activation.py | 2 +- maigret/checking.py | 177 ++++++++++++++++++++++++------------ maigret/maigret.py | 8 ++ maigret/report.py | 5 +- maigret/resources/data.json | 17 ++++ maigret/sites.py | 3 + tests/test_activation.py | 2 +- tests/test_checking.py | 4 + tests/test_cli.py | 1 + tests/test_maigret.py | 1 + 10 files changed, 157 insertions(+), 63 deletions(-) diff --git a/maigret/activation.py b/maigret/activation.py index 82a0376..77fe394 100644 --- a/maigret/activation.py +++ b/maigret/activation.py @@ -35,7 +35,7 @@ class ParsingActivator: site.headers["authorization"] = f"Bearer {bearer_token}" -async def import_aiohttp_cookies(cookiestxt_filename): +def import_aiohttp_cookies(cookiestxt_filename): cookies_obj = MozillaCookieJar(cookiestxt_filename) cookies_obj.load(ignore_discard=True, ignore_expires=True) diff --git a/maigret/checking.py b/maigret/checking.py index 2b7ce01..79e31df 100644 --- a/maigret/checking.py +++ b/maigret/checking.py @@ -43,49 +43,96 @@ SUPPORTED_IDS = ( BAD_CHARS = "#" -async def get_response(request_future, logger) -> Tuple[str, int, Optional[CheckError]]: - html_text = None - status_code = 0 - error: Optional[CheckError] = CheckError("Unknown") +class SimpleAiohttpChecker: + def __init__(self, *args, **kwargs): + proxy = kwargs.get('proxy') + cookie_jar = kwargs.get('cookie_jar') + self.logger = kwargs.get('logger', Mock()) - try: - response = await request_future + # make http client session + connector = ( + ProxyConnector.from_url(proxy) if proxy else aiohttp.TCPConnector(ssl=False) + ) + connector.verify_ssl = False + self.session = aiohttp.ClientSession( + connector=connector, trust_env=True, cookie_jar=cookie_jar + ) - status_code = response.status - response_content = await response.content.read() - charset = response.charset or "utf-8" - decoded_content = response_content.decode(charset, "ignore") - html_text = decoded_content - - error = None - if status_code == 0: - error = CheckError("Connection lost") - - logger.debug(html_text) - - except asyncio.TimeoutError as e: - error = CheckError("Request timeout", str(e)) - except ClientConnectorError as e: - error = CheckError("Connecting failure", str(e)) - except ServerDisconnectedError as e: - error = CheckError("Server disconnected", str(e)) - except aiohttp.http_exceptions.BadHttpMessage as e: - error = CheckError("HTTP", str(e)) - except proxy_errors.ProxyError as e: - error = CheckError("Proxy", str(e)) - except KeyboardInterrupt: - error = CheckError("Interrupted") - except Exception as e: - # python-specific exceptions - if sys.version_info.minor > 6 and ( - isinstance(e, ssl.SSLCertVerificationError) or isinstance(e, ssl.SSLError) - ): - error = CheckError("SSL", str(e)) + def prepare(self, url, headers=None, allow_redirects=True, timeout=0, method='get'): + if method == 'get': + request_method = self.session.get else: - logger.debug(e, exc_info=True) - error = CheckError("Unexpected", str(e)) + request_method = self.session.head - return str(html_text), status_code, error + future = request_method( + url=url, + headers=headers, + allow_redirects=allow_redirects, + timeout=timeout, + ) + + return future + + async def close(self): + await self.session.close() + + async def check(self, future) -> Tuple[str, int, Optional[CheckError]]: + html_text = None + status_code = 0 + error: Optional[CheckError] = CheckError("Unknown") + + try: + response = await future + + status_code = response.status + response_content = await response.content.read() + charset = response.charset or "utf-8" + decoded_content = response_content.decode(charset, "ignore") + html_text = decoded_content + + error = None + if status_code == 0: + error = CheckError("Connection lost") + + self.logger.debug(html_text) + + except asyncio.TimeoutError as e: + error = CheckError("Request timeout", str(e)) + except ClientConnectorError as e: + error = CheckError("Connecting failure", str(e)) + except ServerDisconnectedError as e: + error = CheckError("Server disconnected", str(e)) + except aiohttp.http_exceptions.BadHttpMessage as e: + error = CheckError("HTTP", str(e)) + except proxy_errors.ProxyError as e: + error = CheckError("Proxy", str(e)) + except KeyboardInterrupt: + error = CheckError("Interrupted") + except Exception as e: + # python-specific exceptions + if sys.version_info.minor > 6 and ( + isinstance(e, ssl.SSLCertVerificationError) + or isinstance(e, ssl.SSLError) + ): + error = CheckError("SSL", str(e)) + else: + self.logger.debug(e, exc_info=True) + error = CheckError("Unexpected", str(e)) + + return str(html_text), status_code, error + + +class TorAiohttpChecker(SimpleAiohttpChecker): + def __init__(self, *args, **kwargs): + proxy = kwargs.get('proxy') + cookie_jar = kwargs.get('cookie_jar') + self.logger = kwargs.get('logger', Mock()) + + connector = ProxyConnector.from_url(proxy) + connector.verify_ssl = False + self.session = aiohttp.ClientSession( + connector=connector, trust_env=True, cookie_jar=cookie_jar + ) # TODO: move to separate class @@ -322,7 +369,8 @@ def make_site_result( # workaround to prevent slash errors url = re.sub("(?