mirror of
https://github.com/soxoj/maigret.git
synced 2026-05-06 14:08:59 +00:00
Added some new sites, engines updates
This commit is contained in:
@@ -34,24 +34,6 @@ class ParsingActivator:
|
||||
bearer_token = r.json()["accessToken"]
|
||||
site.headers["authorization"] = f"Bearer {bearer_token}"
|
||||
|
||||
@staticmethod
|
||||
def xssis(site, logger, cookies={}):
|
||||
if not cookies:
|
||||
logger.debug("You must have cookies to activate xss.is parsing!")
|
||||
return
|
||||
|
||||
headers = dict(site.headers)
|
||||
post_data = {
|
||||
"_xfResponseType": "json",
|
||||
"_xfToken": "1611177919,a2710362e45dad9aa1da381e21941a38",
|
||||
}
|
||||
headers["content-type"] = "application/x-www-form-urlencoded; charset=UTF-8"
|
||||
r = requests.post(
|
||||
site.activation["url"], headers=headers, cookies=cookies, data=post_data
|
||||
)
|
||||
csrf = r.json()["csrf"]
|
||||
site.get_params["_xfToken"] = csrf
|
||||
|
||||
|
||||
async def import_aiohttp_cookies(cookiestxt_filename):
|
||||
cookies_obj = MozillaCookieJar(cookiestxt_filename)
|
||||
|
||||
+4
-3
@@ -121,7 +121,7 @@ def process_site_result(
|
||||
username = results_info["username"]
|
||||
is_parsing_enabled = results_info["parsing_enabled"]
|
||||
url = results_info.get("url_user")
|
||||
logger.debug(url)
|
||||
logger.info(url)
|
||||
|
||||
status = results_info.get("status")
|
||||
if status is not None:
|
||||
@@ -169,7 +169,8 @@ def process_site_result(
|
||||
f"Activation method {method} for site {site.name} not found!"
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed activation {method} for site {site.name}: {e}")
|
||||
logger.warning(f"Failed activation {method} for site {site.name}: {str(e)}", exc_info=True)
|
||||
# TODO: temporary check error
|
||||
|
||||
site_name = site.pretty_name
|
||||
# presense flags
|
||||
@@ -200,7 +201,7 @@ def process_site_result(
|
||||
)
|
||||
|
||||
if check_error:
|
||||
logger.debug(check_error)
|
||||
logger.warning(check_error)
|
||||
result = QueryResult(
|
||||
username,
|
||||
site_name,
|
||||
|
||||
@@ -34,6 +34,12 @@ COMMON_ERRORS = {
|
||||
'Please stand by, while we are checking your browser': CheckError(
|
||||
'Bot protection', 'Cloudflare'
|
||||
),
|
||||
'<span data-translate="checking_browser">Checking your browser before accessing</span>': CheckError(
|
||||
'Bot protection', 'Cloudflare'
|
||||
),
|
||||
'This website is using a security service to protect itself from online attacks.': CheckError(
|
||||
'Access denied', 'Cloudflare'
|
||||
),
|
||||
'<title>Доступ ограничен</title>': CheckError('Censorship', 'Rostelecom'),
|
||||
'document.getElementById(\'validate_form_submit\').disabled=true': CheckError(
|
||||
'Captcha', 'Mail.ru'
|
||||
@@ -48,6 +54,7 @@ COMMON_ERRORS = {
|
||||
'Censorship', 'MGTS'
|
||||
),
|
||||
'Incapsula incident ID': CheckError('Bot protection', 'Incapsula'),
|
||||
'Сайт заблокирован хостинг-провайдером': CheckError('Site-specific', 'Site is disabled (Beget)'),
|
||||
}
|
||||
|
||||
ERRORS_TYPES = {
|
||||
@@ -57,6 +64,11 @@ ERRORS_TYPES = {
|
||||
'Request timeout': 'Try to increase timeout or to switch to another internet service provider',
|
||||
}
|
||||
|
||||
# TODO: checking for reason
|
||||
ERRORS_REASONS = {
|
||||
'Login required': 'Add authorization cookies through `--cookies-jar-file` (see cookies.txt)',
|
||||
}
|
||||
|
||||
TEMPORARY_ERRORS_TYPES = [
|
||||
'Request timeout',
|
||||
'Unknown',
|
||||
|
||||
+2851
-1778
File diff suppressed because it is too large
Load Diff
+5
-5
@@ -40,13 +40,13 @@ def test_case_convert_camel_with_digits_to_snake():
|
||||
|
||||
|
||||
def test_is_country_tag():
|
||||
assert is_country_tag('ru') == True
|
||||
assert is_country_tag('FR') == True
|
||||
assert is_country_tag('ru') is True
|
||||
assert is_country_tag('FR') is True
|
||||
|
||||
assert is_country_tag('a1') == False
|
||||
assert is_country_tag('dating') == False
|
||||
assert is_country_tag('a1') is False
|
||||
assert is_country_tag('dating') is False
|
||||
|
||||
assert is_country_tag('global') == True
|
||||
assert is_country_tag('global') is True
|
||||
|
||||
|
||||
def test_enrich_link_str():
|
||||
|
||||
Reference in New Issue
Block a user