Fixed proxies using and some false positives

This commit is contained in:
Soxoj
2020-12-11 03:21:11 +03:00
parent 36e06c73db
commit dbf8de8075
2 changed files with 46 additions and 27 deletions
+25 -16
View File
@@ -16,6 +16,8 @@ from argparse import ArgumentParser, RawDescriptionHelpFormatter
from http.cookies import SimpleCookie from http.cookies import SimpleCookie
import aiohttp import aiohttp
from aiohttp_socks import ProxyConnector
from python_socks import _errors as proxy_errors
import requests import requests
from mock import Mock from mock import Mock
from socid_extractor import parse, extract from socid_extractor import parse, extract
@@ -41,6 +43,7 @@ common_errors = {
'document.getElementById(\'validate_form_submit\').disabled=true': 'Mail.ru captcha', 'document.getElementById(\'validate_form_submit\').disabled=true': 'Mail.ru captcha',
'Verifying your browser, please wait...<br>DDoS Protection by</font> Blazingfast.io': 'Blazingfast protection', 'Verifying your browser, please wait...<br>DDoS Protection by</font> Blazingfast.io': 'Blazingfast protection',
'404</h1><p class="error-card__description">Мы&nbsp;не&nbsp;нашли страницу': 'MegaFon 404 page', '404</h1><p class="error-card__description">Мы&nbsp;не&nbsp;нашли страницу': 'MegaFon 404 page',
'Доступ к информационному ресурсу ограничен на основании Федерального закона': 'MGTS censorship',
} }
unsupported_characters = '#' unsupported_characters = '#'
@@ -81,6 +84,9 @@ async def get_response(request_future, error_type, social_network, logger):
except aiohttp.http_exceptions.BadHttpMessage as err: except aiohttp.http_exceptions.BadHttpMessage as err:
error_text = "HTTP Error" error_text = "HTTP Error"
expection_text = str(err) expection_text = str(err)
except proxy_errors.ProxyError as err:
error_text = "Proxy Error"
expection_text = str(err)
except Exception as err: except Exception as err:
logger.warning(f'Unhandled error while requesting {social_network}: {err}') logger.warning(f'Unhandled error while requesting {social_network}: {err}')
logger.debug(err, exc_info=True) logger.debug(err, exc_info=True)
@@ -166,9 +172,20 @@ async def maigret(username, site_data, query_notify, logger,
query_notify.start(username, id_type) query_notify.start(username, id_type)
# TODO: connector # TODO: connector
connector = aiohttp.TCPConnector(ssl=False) connector = ProxyConnector.from_url(proxy) if proxy else aiohttp.TCPConnector(ssl=False)
# connector = aiohttp.TCPConnector(ssl=False)
connector.verify_ssl=False
session = aiohttp.ClientSession(connector=connector) session = aiohttp.ClientSession(connector=connector)
if logger.level == logging.DEBUG:
future = session.get(url='https://icanhazip.com')
ip, status, error, expection = await get_response(future, None, 'probe', logger)
if ip:
logger.debug(f'My IP is: {ip.strip()}')
else:
logger.debug(f'IP requesting {error}: {expection}')
# Results from analysis of all sites # Results from analysis of all sites
results_total = {} results_total = {}
@@ -270,19 +287,10 @@ async def maigret(username, site_data, query_notify, logger,
else: else:
cookies_obj = [] cookies_obj = []
# This future starts running the request in a new thread, doesn't block the main thread future = request_method(url=url_probe, headers=headers,
if proxy is not None: allow_redirects=allow_redirects,
proxies = {"http": proxy, "https": proxy} timeout=timeout,
future = request_method(url=url_probe, headers=headers, )
proxies=proxies,
allow_redirects=allow_redirects,
timeout=timeout,
)
else:
future = request_method(url=url_probe, headers=headers,
allow_redirects=allow_redirects,
timeout=timeout,
)
# Store future in data for access later # Store future in data for access later
net_info["request_future"] = future net_info["request_future"] = future
@@ -331,6 +339,7 @@ async def maigret(username, site_data, query_notify, logger,
continue continue
html_text, status_code, error_text, expection_text = resp html_text, status_code, error_text, expection_text = resp
site_error_text = '?'
# TODO: add elapsed request time counting # TODO: add elapsed request time counting
response_time = None response_time = None
@@ -359,7 +368,7 @@ async def maigret(username, site_data, query_notify, logger,
url, url,
QueryStatus.UNKNOWN, QueryStatus.UNKNOWN,
query_time=response_time, query_time=response_time,
context=error_text) context=f'{error_text}: {site_error_text}')
elif error_type == "message": elif error_type == "message":
absence_flags = net_info.get("errorMsg") absence_flags = net_info.get("errorMsg")
is_absence_flags_list = isinstance(absence_flags, list) is_absence_flags_list = isinstance(absence_flags, list)
@@ -572,7 +581,7 @@ async def main():
f"Python: {platform.python_version()}" f"Python: {platform.python_version()}"
parser = ArgumentParser(formatter_class=RawDescriptionHelpFormatter, parser = ArgumentParser(formatter_class=RawDescriptionHelpFormatter,
description=f"Maigret v{__version__})" description=f"Maigret v{__version__}"
) )
parser.add_argument("--version", parser.add_argument("--version",
action="version", version=version_string, action="version", version=version_string,
+21 -11
View File
@@ -2028,18 +2028,21 @@
"username_unclaimed": "noonewouldeverusethis7" "username_unclaimed": "noonewouldeverusethis7"
}, },
"Bobrdobr": { "Bobrdobr": {
"errorType": "status_code", "errorType": "message",
"errorMsg": ["Страница не найдена."],
"presenseStrs": ["Закладки пользователя"],
"rank": 440113, "rank": 440113,
"tags": [ "tags": [
"az", "az",
"in", "in",
"ru", "ru",
"ua" "ua"
], ],
"url": "https://bobrdobr.ru/people/{username}/", "url": "https://bobrdobr.ru/people/{username}/",
"urlMain": "https://bobrdobr.ru", "urlMain": "https://bobrdobr.ru",
"username_claimed": "igrozona", "username_claimed": "igrozona",
"username_unclaimed": "noonewouldeverusethis7" "username_unclaimed": "noonewouldeverusethis7",
"disabled": false
}, },
"BodyBuilding": { "BodyBuilding": {
"errorType": "response_url", "errorType": "response_url",
@@ -5952,7 +5955,7 @@
"username_unclaimed": "noonewouldeverusethis7" "username_unclaimed": "noonewouldeverusethis7"
}, },
"Gps-data-team": { "Gps-data-team": {
"disabled": false, "disabled": true,
"errorMsg": "<title></title>", "errorMsg": "<title></title>",
"errorType": "message", "errorType": "message",
"rank": 1286248, "rank": 1286248,
@@ -8266,6 +8269,9 @@
}, },
"Medikforum": { "Medikforum": {
"errorMsg": "\u041f\u043e\u0434\u0445\u043e\u0434\u044f\u0449\u0438\u0445 \u0442\u0435\u043c \u0438\u043b\u0438 \u0441\u043e\u043e\u0431\u0449\u0435\u043d\u0438\u0439 \u043d\u0435 \u043d\u0430\u0439\u0434\u0435\u043d\u043e.", "errorMsg": "\u041f\u043e\u0434\u0445\u043e\u0434\u044f\u0449\u0438\u0445 \u0442\u0435\u043c \u0438\u043b\u0438 \u0441\u043e\u043e\u0431\u0449\u0435\u043d\u0438\u0439 \u043d\u0435 \u043d\u0430\u0439\u0434\u0435\u043d\u043e.",
"errors": {
"Вы не можете произвести поиск сразу после предыдущего": "Rate limit"
},
"errorType": "message", "errorType": "message",
"rank": 51249, "rank": 51249,
"tags": [ "tags": [
@@ -8273,6 +8279,7 @@
"nl", "nl",
"ru" "ru"
], ],
"request_head_only": false,
"url": "https://www.medikforum.ru/forum/search.php?keywords=&terms=all&author={username}", "url": "https://www.medikforum.ru/forum/search.php?keywords=&terms=all&author={username}",
"urlMain": "https://www.medikforum.ru", "urlMain": "https://www.medikforum.ru",
"username_claimed": "adam", "username_claimed": "adam",
@@ -15228,16 +15235,19 @@
"username_unclaimed": "noonewouldeverusethis7" "username_unclaimed": "noonewouldeverusethis7"
}, },
"YouPorn": { "YouPorn": {
"errorType": "status_code", "errorType": "message",
"errorMsg": ["BUT CAN'T FIND WHAT YOU'RE LOOKING FOR."],
"presenseStrs": ["Videos uploaded by"],
"rank": 459, "rank": 459,
"tags": [ "tags": [
"porno", "porno",
"us" "us"
], ],
"url": "https://youporn.com/uservids/{username}", "url": "https://youporn.com/uservids/{username}",
"urlMain": "https://youporn.com", "urlMain": "https://youporn.com",
"username_claimed": "blue", "username_claimed": "blue",
"username_unclaimed": "noonewouldeverusethis77777" "username_unclaimed": "noonewouldeverusethis77777",
"disabled": false
}, },
"YouTube": { "YouTube": {
"errorMsg": "Not Found", "errorMsg": "Not Found",