Added some new sites, engines updates

This commit is contained in:
Soxoj
2021-05-03 03:16:02 +03:00
parent bdfb4911ce
commit 66d6c7a93c
6 changed files with 3580 additions and 2395 deletions
-18
View File
@@ -34,24 +34,6 @@ class ParsingActivator:
bearer_token = r.json()["accessToken"]
site.headers["authorization"] = f"Bearer {bearer_token}"
@staticmethod
def xssis(site, logger, cookies={}):
if not cookies:
logger.debug("You must have cookies to activate xss.is parsing!")
return
headers = dict(site.headers)
post_data = {
"_xfResponseType": "json",
"_xfToken": "1611177919,a2710362e45dad9aa1da381e21941a38",
}
headers["content-type"] = "application/x-www-form-urlencoded; charset=UTF-8"
r = requests.post(
site.activation["url"], headers=headers, cookies=cookies, data=post_data
)
csrf = r.json()["csrf"]
site.get_params["_xfToken"] = csrf
async def import_aiohttp_cookies(cookiestxt_filename):
cookies_obj = MozillaCookieJar(cookiestxt_filename)
+4 -3
View File
@@ -121,7 +121,7 @@ def process_site_result(
username = results_info["username"]
is_parsing_enabled = results_info["parsing_enabled"]
url = results_info.get("url_user")
logger.debug(url)
logger.info(url)
status = results_info.get("status")
if status is not None:
@@ -169,7 +169,8 @@ def process_site_result(
f"Activation method {method} for site {site.name} not found!"
)
except Exception as e:
logger.warning(f"Failed activation {method} for site {site.name}: {e}")
logger.warning(f"Failed activation {method} for site {site.name}: {str(e)}", exc_info=True)
# TODO: temporary check error
site_name = site.pretty_name
# presense flags
@@ -200,7 +201,7 @@ def process_site_result(
)
if check_error:
logger.debug(check_error)
logger.warning(check_error)
result = QueryResult(
username,
site_name,
+12
View File
@@ -34,6 +34,12 @@ COMMON_ERRORS = {
'Please stand by, while we are checking your browser': CheckError(
'Bot protection', 'Cloudflare'
),
'<span data-translate="checking_browser">Checking your browser before accessing</span>': CheckError(
'Bot protection', 'Cloudflare'
),
'This website is using a security service to protect itself from online attacks.': CheckError(
'Access denied', 'Cloudflare'
),
'<title>Доступ ограничен</title>': CheckError('Censorship', 'Rostelecom'),
'document.getElementById(\'validate_form_submit\').disabled=true': CheckError(
'Captcha', 'Mail.ru'
@@ -48,6 +54,7 @@ COMMON_ERRORS = {
'Censorship', 'MGTS'
),
'Incapsula incident ID': CheckError('Bot protection', 'Incapsula'),
'Сайт заблокирован хостинг-провайдером': CheckError('Site-specific', 'Site is disabled (Beget)'),
}
ERRORS_TYPES = {
@@ -57,6 +64,11 @@ ERRORS_TYPES = {
'Request timeout': 'Try to increase timeout or to switch to another internet service provider',
}
# TODO: checking for reason
ERRORS_REASONS = {
'Login required': 'Add authorization cookies through `--cookies-jar-file` (see cookies.txt)',
}
TEMPORARY_ERRORS_TYPES = [
'Request timeout',
'Unknown',
+2851 -1778
View File
File diff suppressed because it is too large Load Diff
+708 -591
View File
File diff suppressed because it is too large Load Diff
+5 -5
View File
@@ -40,13 +40,13 @@ def test_case_convert_camel_with_digits_to_snake():
def test_is_country_tag():
assert is_country_tag('ru') == True
assert is_country_tag('FR') == True
assert is_country_tag('ru') is True
assert is_country_tag('FR') is True
assert is_country_tag('a1') == False
assert is_country_tag('dating') == False
assert is_country_tag('a1') is False
assert is_country_tag('dating') is False
assert is_country_tag('global') == True
assert is_country_tag('global') is True
def test_enrich_link_str():