diff --git a/maigret/maigret.py b/maigret/maigret.py index c3c2c96..8240106 100755 --- a/maigret/maigret.py +++ b/maigret/maigret.py @@ -16,6 +16,8 @@ from argparse import ArgumentParser, RawDescriptionHelpFormatter from http.cookies import SimpleCookie import aiohttp +from aiohttp_socks import ProxyConnector +from python_socks import _errors as proxy_errors import requests from mock import Mock from socid_extractor import parse, extract @@ -41,6 +43,7 @@ common_errors = { 'document.getElementById(\'validate_form_submit\').disabled=true': 'Mail.ru captcha', 'Verifying your browser, please wait...
DDoS Protection by Blazingfast.io': 'Blazingfast protection', '404

Мы не нашли страницу': 'MegaFon 404 page', + 'Доступ к информационному ресурсу ограничен на основании Федерального закона': 'MGTS censorship', } unsupported_characters = '#' @@ -81,6 +84,9 @@ async def get_response(request_future, error_type, social_network, logger): except aiohttp.http_exceptions.BadHttpMessage as err: error_text = "HTTP Error" expection_text = str(err) + except proxy_errors.ProxyError as err: + error_text = "Proxy Error" + expection_text = str(err) except Exception as err: logger.warning(f'Unhandled error while requesting {social_network}: {err}') logger.debug(err, exc_info=True) @@ -166,9 +172,20 @@ async def maigret(username, site_data, query_notify, logger, query_notify.start(username, id_type) # TODO: connector - connector = aiohttp.TCPConnector(ssl=False) + connector = ProxyConnector.from_url(proxy) if proxy else aiohttp.TCPConnector(ssl=False) + # connector = aiohttp.TCPConnector(ssl=False) + connector.verify_ssl=False session = aiohttp.ClientSession(connector=connector) + if logger.level == logging.DEBUG: + future = session.get(url='https://icanhazip.com') + ip, status, error, expection = await get_response(future, None, 'probe', logger) + if ip: + logger.debug(f'My IP is: {ip.strip()}') + else: + logger.debug(f'IP requesting {error}: {expection}') + + # Results from analysis of all sites results_total = {} @@ -270,19 +287,10 @@ async def maigret(username, site_data, query_notify, logger, else: cookies_obj = [] - # This future starts running the request in a new thread, doesn't block the main thread - if proxy is not None: - proxies = {"http": proxy, "https": proxy} - future = request_method(url=url_probe, headers=headers, - proxies=proxies, - allow_redirects=allow_redirects, - timeout=timeout, - ) - else: - future = request_method(url=url_probe, headers=headers, - allow_redirects=allow_redirects, - timeout=timeout, - ) + future = request_method(url=url_probe, headers=headers, + allow_redirects=allow_redirects, + timeout=timeout, + ) # Store future in data for access later net_info["request_future"] = future @@ -331,6 +339,7 @@ async def maigret(username, site_data, query_notify, logger, continue html_text, status_code, error_text, expection_text = resp + site_error_text = '?' # TODO: add elapsed request time counting response_time = None @@ -359,7 +368,7 @@ async def maigret(username, site_data, query_notify, logger, url, QueryStatus.UNKNOWN, query_time=response_time, - context=error_text) + context=f'{error_text}: {site_error_text}') elif error_type == "message": absence_flags = net_info.get("errorMsg") is_absence_flags_list = isinstance(absence_flags, list) @@ -572,7 +581,7 @@ async def main(): f"Python: {platform.python_version()}" parser = ArgumentParser(formatter_class=RawDescriptionHelpFormatter, - description=f"Maigret v{__version__})" + description=f"Maigret v{__version__}" ) parser.add_argument("--version", action="version", version=version_string, diff --git a/maigret/resources/data.json b/maigret/resources/data.json index 5908950..ee79d46 100644 --- a/maigret/resources/data.json +++ b/maigret/resources/data.json @@ -2028,18 +2028,21 @@ "username_unclaimed": "noonewouldeverusethis7" }, "Bobrdobr": { - "errorType": "status_code", + "errorType": "message", + "errorMsg": ["Страница не найдена."], + "presenseStrs": ["Закладки пользователя"], "rank": 440113, "tags": [ - "az", - "in", - "ru", - "ua" + "az", + "in", + "ru", + "ua" ], "url": "https://bobrdobr.ru/people/{username}/", "urlMain": "https://bobrdobr.ru", "username_claimed": "igrozona", - "username_unclaimed": "noonewouldeverusethis7" + "username_unclaimed": "noonewouldeverusethis7", + "disabled": false }, "BodyBuilding": { "errorType": "response_url", @@ -5952,7 +5955,7 @@ "username_unclaimed": "noonewouldeverusethis7" }, "Gps-data-team": { - "disabled": false, + "disabled": true, "errorMsg": "", "errorType": "message", "rank": 1286248, @@ -8266,6 +8269,9 @@ }, "Medikforum": { "errorMsg": "\u041f\u043e\u0434\u0445\u043e\u0434\u044f\u0449\u0438\u0445 \u0442\u0435\u043c \u0438\u043b\u0438 \u0441\u043e\u043e\u0431\u0449\u0435\u043d\u0438\u0439 \u043d\u0435 \u043d\u0430\u0439\u0434\u0435\u043d\u043e.", + "errors": { + "Вы не можете произвести поиск сразу после предыдущего": "Rate limit" + }, "errorType": "message", "rank": 51249, "tags": [ @@ -8273,6 +8279,7 @@ "nl", "ru" ], + "request_head_only": false, "url": "https://www.medikforum.ru/forum/search.php?keywords=&terms=all&author={username}", "urlMain": "https://www.medikforum.ru", "username_claimed": "adam", @@ -15228,16 +15235,19 @@ "username_unclaimed": "noonewouldeverusethis7" }, "YouPorn": { - "errorType": "status_code", + "errorType": "message", + "errorMsg": ["BUT CAN'T FIND WHAT YOU'RE LOOKING FOR."], + "presenseStrs": ["Videos uploaded by"], "rank": 459, "tags": [ - "porno", - "us" + "porno", + "us" ], "url": "https://youporn.com/uservids/{username}", "urlMain": "https://youporn.com", "username_claimed": "blue", - "username_unclaimed": "noonewouldeverusethis77777" + "username_unclaimed": "noonewouldeverusethis77777", + "disabled": false }, "YouTube": { "errorMsg": "Not Found",