Added some new sites, engines updates

This commit is contained in:
Soxoj
2021-05-03 03:16:02 +03:00
parent bdfb4911ce
commit 66d6c7a93c
6 changed files with 3580 additions and 2395 deletions
-18
View File
@@ -34,24 +34,6 @@ class ParsingActivator:
bearer_token = r.json()["accessToken"] bearer_token = r.json()["accessToken"]
site.headers["authorization"] = f"Bearer {bearer_token}" site.headers["authorization"] = f"Bearer {bearer_token}"
@staticmethod
def xssis(site, logger, cookies={}):
if not cookies:
logger.debug("You must have cookies to activate xss.is parsing!")
return
headers = dict(site.headers)
post_data = {
"_xfResponseType": "json",
"_xfToken": "1611177919,a2710362e45dad9aa1da381e21941a38",
}
headers["content-type"] = "application/x-www-form-urlencoded; charset=UTF-8"
r = requests.post(
site.activation["url"], headers=headers, cookies=cookies, data=post_data
)
csrf = r.json()["csrf"]
site.get_params["_xfToken"] = csrf
async def import_aiohttp_cookies(cookiestxt_filename): async def import_aiohttp_cookies(cookiestxt_filename):
cookies_obj = MozillaCookieJar(cookiestxt_filename) cookies_obj = MozillaCookieJar(cookiestxt_filename)
+4 -3
View File
@@ -121,7 +121,7 @@ def process_site_result(
username = results_info["username"] username = results_info["username"]
is_parsing_enabled = results_info["parsing_enabled"] is_parsing_enabled = results_info["parsing_enabled"]
url = results_info.get("url_user") url = results_info.get("url_user")
logger.debug(url) logger.info(url)
status = results_info.get("status") status = results_info.get("status")
if status is not None: if status is not None:
@@ -169,7 +169,8 @@ def process_site_result(
f"Activation method {method} for site {site.name} not found!" f"Activation method {method} for site {site.name} not found!"
) )
except Exception as e: except Exception as e:
logger.warning(f"Failed activation {method} for site {site.name}: {e}") logger.warning(f"Failed activation {method} for site {site.name}: {str(e)}", exc_info=True)
# TODO: temporary check error
site_name = site.pretty_name site_name = site.pretty_name
# presense flags # presense flags
@@ -200,7 +201,7 @@ def process_site_result(
) )
if check_error: if check_error:
logger.debug(check_error) logger.warning(check_error)
result = QueryResult( result = QueryResult(
username, username,
site_name, site_name,
+12
View File
@@ -34,6 +34,12 @@ COMMON_ERRORS = {
'Please stand by, while we are checking your browser': CheckError( 'Please stand by, while we are checking your browser': CheckError(
'Bot protection', 'Cloudflare' 'Bot protection', 'Cloudflare'
), ),
'<span data-translate="checking_browser">Checking your browser before accessing</span>': CheckError(
'Bot protection', 'Cloudflare'
),
'This website is using a security service to protect itself from online attacks.': CheckError(
'Access denied', 'Cloudflare'
),
'<title>Доступ ограничен</title>': CheckError('Censorship', 'Rostelecom'), '<title>Доступ ограничен</title>': CheckError('Censorship', 'Rostelecom'),
'document.getElementById(\'validate_form_submit\').disabled=true': CheckError( 'document.getElementById(\'validate_form_submit\').disabled=true': CheckError(
'Captcha', 'Mail.ru' 'Captcha', 'Mail.ru'
@@ -48,6 +54,7 @@ COMMON_ERRORS = {
'Censorship', 'MGTS' 'Censorship', 'MGTS'
), ),
'Incapsula incident ID': CheckError('Bot protection', 'Incapsula'), 'Incapsula incident ID': CheckError('Bot protection', 'Incapsula'),
'Сайт заблокирован хостинг-провайдером': CheckError('Site-specific', 'Site is disabled (Beget)'),
} }
ERRORS_TYPES = { ERRORS_TYPES = {
@@ -57,6 +64,11 @@ ERRORS_TYPES = {
'Request timeout': 'Try to increase timeout or to switch to another internet service provider', 'Request timeout': 'Try to increase timeout or to switch to another internet service provider',
} }
# TODO: checking for reason
ERRORS_REASONS = {
'Login required': 'Add authorization cookies through `--cookies-jar-file` (see cookies.txt)',
}
TEMPORARY_ERRORS_TYPES = [ TEMPORARY_ERRORS_TYPES = [
'Request timeout', 'Request timeout',
'Unknown', 'Unknown',
+2851 -1778
View File
File diff suppressed because it is too large Load Diff
+708 -591
View File
File diff suppressed because it is too large Load Diff
+5 -5
View File
@@ -40,13 +40,13 @@ def test_case_convert_camel_with_digits_to_snake():
def test_is_country_tag(): def test_is_country_tag():
assert is_country_tag('ru') == True assert is_country_tag('ru') is True
assert is_country_tag('FR') == True assert is_country_tag('FR') is True
assert is_country_tag('a1') == False assert is_country_tag('a1') is False
assert is_country_tag('dating') == False assert is_country_tag('dating') is False
assert is_country_tag('global') == True assert is_country_tag('global') is True
def test_enrich_link_str(): def test_enrich_link_str():