From 66d6c7a93c850054fc55219d6c9099c7c8416746 Mon Sep 17 00:00:00 2001 From: Soxoj Date: Mon, 3 May 2021 03:16:02 +0300 Subject: [PATCH] Added some new sites, engines updates --- maigret/activation.py | 18 - maigret/checking.py | 7 +- maigret/errors.py | 12 + maigret/resources/data.json | 4629 +++++++++++++++++++++-------------- sites.md | 1299 +++++----- tests/test_utils.py | 10 +- 6 files changed, 3580 insertions(+), 2395 deletions(-) diff --git a/maigret/activation.py b/maigret/activation.py index fe22a87..82a0376 100644 --- a/maigret/activation.py +++ b/maigret/activation.py @@ -34,24 +34,6 @@ class ParsingActivator: bearer_token = r.json()["accessToken"] site.headers["authorization"] = f"Bearer {bearer_token}" - @staticmethod - def xssis(site, logger, cookies={}): - if not cookies: - logger.debug("You must have cookies to activate xss.is parsing!") - return - - headers = dict(site.headers) - post_data = { - "_xfResponseType": "json", - "_xfToken": "1611177919,a2710362e45dad9aa1da381e21941a38", - } - headers["content-type"] = "application/x-www-form-urlencoded; charset=UTF-8" - r = requests.post( - site.activation["url"], headers=headers, cookies=cookies, data=post_data - ) - csrf = r.json()["csrf"] - site.get_params["_xfToken"] = csrf - async def import_aiohttp_cookies(cookiestxt_filename): cookies_obj = MozillaCookieJar(cookiestxt_filename) diff --git a/maigret/checking.py b/maigret/checking.py index a3ff22b..50e05c9 100644 --- a/maigret/checking.py +++ b/maigret/checking.py @@ -121,7 +121,7 @@ def process_site_result( username = results_info["username"] is_parsing_enabled = results_info["parsing_enabled"] url = results_info.get("url_user") - logger.debug(url) + logger.info(url) status = results_info.get("status") if status is not None: @@ -169,7 +169,8 @@ def process_site_result( f"Activation method {method} for site {site.name} not found!" ) except Exception as e: - logger.warning(f"Failed activation {method} for site {site.name}: {e}") + logger.warning(f"Failed activation {method} for site {site.name}: {str(e)}", exc_info=True) + # TODO: temporary check error site_name = site.pretty_name # presense flags @@ -200,7 +201,7 @@ def process_site_result( ) if check_error: - logger.debug(check_error) + logger.warning(check_error) result = QueryResult( username, site_name, diff --git a/maigret/errors.py b/maigret/errors.py index f2e4d2f..77413ee 100644 --- a/maigret/errors.py +++ b/maigret/errors.py @@ -34,6 +34,12 @@ COMMON_ERRORS = { 'Please stand by, while we are checking your browser': CheckError( 'Bot protection', 'Cloudflare' ), + 'Checking your browser before accessing': CheckError( + 'Bot protection', 'Cloudflare' + ), + 'This website is using a security service to protect itself from online attacks.': CheckError( + 'Access denied', 'Cloudflare' + ), 'Доступ ограничен': CheckError('Censorship', 'Rostelecom'), 'document.getElementById(\'validate_form_submit\').disabled=true': CheckError( 'Captcha', 'Mail.ru' @@ -48,6 +54,7 @@ COMMON_ERRORS = { 'Censorship', 'MGTS' ), 'Incapsula incident ID': CheckError('Bot protection', 'Incapsula'), + 'Сайт заблокирован хостинг-провайдером': CheckError('Site-specific', 'Site is disabled (Beget)'), } ERRORS_TYPES = { @@ -57,6 +64,11 @@ ERRORS_TYPES = { 'Request timeout': 'Try to increase timeout or to switch to another internet service provider', } +# TODO: checking for reason +ERRORS_REASONS = { + 'Login required': 'Add authorization cookies through `--cookies-jar-file` (see cookies.txt)', +} + TEMPORARY_ERRORS_TYPES = [ 'Request timeout', 'Unknown', diff --git a/maigret/resources/data.json b/maigret/resources/data.json index b919d44..4fbbf9f 100644 --- a/maigret/resources/data.json +++ b/maigret/resources/data.json @@ -5,7 +5,7 @@ "ru" ], "engine": "XenForo", - "alexaRank": 5838790, + "alexaRank": 5789058, "urlMain": "http://0-3.ru", "usernameClaimed": "donna", "usernameUnclaimed": "noonewouldeverusethis7" @@ -15,7 +15,7 @@ "ru" ], "engine": "uCoz", - "alexaRank": 9075623, + "alexaRank": 9005599, "urlMain": "http://0k.clan.su", "usernameClaimed": "eruzz", "usernameUnclaimed": "noonewouldeverusethis7" @@ -29,7 +29,7 @@ "absenceStrs": [ "\u042d\u0442\u043e\u0442 \u043f\u043e\u043b\u044c\u0437\u043e\u0432\u0430\u0442\u0435\u043b\u044c \u043d\u0435 \u0441\u0443\u0449\u0435\u0441\u0442\u0432\u0443\u0435\u0442, \u0438\u043b\u0438 \u0437\u0430\u0431\u043b\u043e\u043a\u0438\u0440\u043e\u0432\u0430\u043d." ], - "alexaRank": 1189349, + "alexaRank": 1178877, "urlMain": "http://1001mem.ru", "url": "http://1001mem.ru/{username}", "usernameClaimed": "adam", @@ -46,7 +46,7 @@ "absenceStrs": [ "Sorry, the requested user is not valid!" ], - "alexaRank": 37423, + "alexaRank": 37269, "urlMain": "https://www.1001tracklists.com", "url": "https://www.1001tracklists.com/user/{username}/index.html", "usernameClaimed": "JacoWilles", @@ -58,14 +58,14 @@ "ru" ], "engine": "XenForo", - "alexaRank": 67279, + "alexaRank": 66162, "urlMain": "https://forum-ru.101xp.com", "usernameClaimed": "aida", "usernameUnclaimed": "noonewouldeverusethis7" }, "11x2": { "checkType": "status_code", - "alexaRank": 1661135, + "alexaRank": 1646492, "urlMain": "https://11x2.com", "url": "https://11x2.com/user/home/{username}", "usernameClaimed": "hazelamy", @@ -78,7 +78,7 @@ "us" ], "checkType": "response_url", - "alexaRank": 1117, + "alexaRank": 1119, "urlMain": "https://ru.123rf.com", "url": "https://ru.123rf.com/profile_{username}", "usernameClaimed": "rawpixel", @@ -104,7 +104,7 @@ "ru" ], "engine": "vBulletin", - "alexaRank": 1277869, + "alexaRank": 1267171, "urlMain": "https://1xforum.com", "usernameClaimed": "adam", "usernameUnclaimed": "noonewouldeverusethis7" @@ -115,7 +115,7 @@ "us" ], "checkType": "status_code", - "alexaRank": 2012, + "alexaRank": 2011, "urlMain": "https://247sports.com", "url": "https://247sports.com/user/{username}/", "usernameClaimed": "adam", @@ -129,7 +129,7 @@ "us" ], "checkType": "status_code", - "alexaRank": 44559, + "alexaRank": 44433, "urlMain": "https://24open.ru", "url": "https://24open.ru/user/{username}/", "usernameClaimed": "niko3193", @@ -137,7 +137,7 @@ }, "2Dimensions": { "checkType": "status_code", - "alexaRank": 7364364, + "alexaRank": 7307514, "urlMain": "https://2Dimensions.com/", "url": "https://2Dimensions.com/a/{username}", "usernameClaimed": "blue", @@ -151,7 +151,7 @@ "absenceStrs": [ "\u041f\u043e\u043b\u044c\u0437\u043e\u0432\u0430\u0442\u0435\u043b\u044c \u043d\u0435 \u043d\u0430\u0439\u0434\u0435\u043d" ], - "alexaRank": 975416, + "alexaRank": 993896, "urlMain": "https://2berega.spb.ru", "url": "https://2berega.spb.ru/user/{username}", "usernameClaimed": "adam", @@ -162,7 +162,7 @@ "ru" ], "checkType": "status_code", - "alexaRank": 494051, + "alexaRank": 506974, "urlMain": "https://www.2d-3d.ru", "url": "https://www.2d-3d.ru/user/{username}/", "usernameClaimed": "adam", @@ -176,7 +176,7 @@ "absenceStrs": [ "Deze gebruiker is niet geregistreerd, zodat je zijn of haar profiel niet kunt bekijken." ], - "alexaRank": 1297652, + "alexaRank": 1286759, "urlMain": "https://www.2fast4u.be", "url": "https://www.2fast4u.be/members/?username={username}", "usernameClaimed": "Schussboelie", @@ -195,7 +195,7 @@ "absenceStrs": [ "\u0418\u0437\u0432\u0438\u043d\u0438\u0442\u0435, \u0442\u0430\u043a\u043e\u0433\u043e \u043f\u043e\u043b\u044c\u0437\u043e\u0432\u0430\u0442\u0435\u043b\u044f \u043d\u0435 \u0441\u0443\u0449\u0435\u0441\u0442\u0432\u0443\u0435\u0442" ], - "alexaRank": 1201752, + "alexaRank": 1191182, "urlMain": "http://33bru.com/", "url": "http://{username}.33bru.com/", "usernameClaimed": "adam", @@ -219,7 +219,7 @@ "absenceStrs": [ "The specified member cannot be found" ], - "alexaRank": 1032185, + "alexaRank": 1023505, "urlMain": "https://www.3dcadforums.com/", "url": "https://www.3dcadforums.com/members/?username={username}", "usernameClaimed": "adam", @@ -230,7 +230,7 @@ "ru" ], "checkType": "status_code", - "alexaRank": 11233, + "alexaRank": 11178, "urlMain": "https://3ddd.ru", "url": "https://3ddd.ru/users/{username}", "usernameClaimed": "adam", @@ -241,7 +241,7 @@ "ru" ], "engine": "vBulletin", - "alexaRank": 6214, + "alexaRank": 6125, "urlMain": "http://forum.3dnews.ru/", "usernameClaimed": "red", "usernameUnclaimed": "noonewouldeverusethis7" @@ -251,7 +251,7 @@ "ru" ], "checkType": "response_url", - "alexaRank": 80286, + "alexaRank": 80128, "urlMain": "https://3dtoday.ru/", "url": "https://3dtoday.ru/blogs/{username}", "usernameClaimed": "adam", @@ -262,7 +262,7 @@ "ru" ], "engine": "vBulletin", - "alexaRank": 254345, + "alexaRank": 254696, "urlMain": "https://4cheat.ru", "usernameClaimed": "adam", "usernameUnclaimed": "noonewouldeverusethis7" @@ -273,7 +273,7 @@ "ru" ], "engine": "XenForo", - "alexaRank": 77752, + "alexaRank": 76277, "urlMain": "https://4gameforum.com", "usernameClaimed": "persty", "usernameUnclaimed": "noonewouldeverusethis7" @@ -286,7 +286,7 @@ "absenceStrs": [ "\u041a \u0441\u043e\u0436\u0430\u043b\u0435\u043d\u0438\u044e, \u0412\u0430\u0448 \u043f\u043e\u0438\u0441\u043a \u043d\u0435 \u0434\u0430\u043b \u043d\u0438\u043a\u0430\u043a\u0438\u0445 \u0440\u0435\u0437\u0443\u043b\u044c\u0442\u0430\u0442\u043e\u0432." ], - "alexaRank": 3220, + "alexaRank": 3210, "urlMain": "https://4pda.ru/", "url": "https://4pda.ru/forum/index.php?act=search&source=pst&noform=1&username={username}", "usernameClaimed": "green", @@ -297,7 +297,7 @@ "ru" ], "checkType": "status_code", - "alexaRank": 293861, + "alexaRank": 292124, "urlMain": "https://4stor.ru", "url": "https://4stor.ru/user/{username}", "usernameClaimed": "adam", @@ -317,7 +317,7 @@ "absenceStrs": [ "No message available" ], - "alexaRank": 2898, + "alexaRank": 2868, "urlMain": "https://500px.com/", "url": "https://500px.com/p/{username}", "usernameClaimed": "blue", @@ -348,7 +348,7 @@ "us" ], "checkType": "status_code", - "alexaRank": 49793, + "alexaRank": 50413, "urlMain": "https://www.7cups.com/", "url": "https://www.7cups.com/@{username}", "usernameClaimed": "blue", @@ -359,7 +359,7 @@ "ru" ], "checkType": "status_code", - "alexaRank": 16183, + "alexaRank": 16095, "urlMain": "https://7dach.ru/", "url": "https://7dach.ru/profile/{username}", "usernameClaimed": "adam", @@ -370,7 +370,7 @@ "ru" ], "checkType": "status_code", - "alexaRank": 37296, + "alexaRank": 37168, "urlMain": "https://blog.7ya.ru", "url": "https://blog.7ya.ru/{username}/", "usernameClaimed": "trotter", @@ -382,7 +382,7 @@ "us" ], "checkType": "status_code", - "alexaRank": 457, + "alexaRank": 458, "urlMain": "https://www.9gag.com/", "url": "https://www.9gag.com/u/{username}", "usernameClaimed": "blue", @@ -410,7 +410,7 @@ "absenceStrs": [ "\u041f\u043e\u043b\u044c\u0437\u043e\u0432\u0430\u0442\u0435\u043b\u044c \u0441 \u0442\u0430\u043a\u0438\u043c \u0438\u043c\u0435\u043d\u0435\u043c \u043d\u0435 \u043d\u0430\u0439\u0434\u0435\u043d." ], - "alexaRank": 8512255, + "alexaRank": 8449116, "urlMain": "https://aback.com.ua", "url": "https://aback.com.ua/user/{username}", "usernameClaimed": "adam", @@ -422,7 +422,7 @@ "in" ], "checkType": "status_code", - "alexaRank": 7646, + "alexaRank": 7733, "urlMain": "https://about.me/", "url": "https://about.me/{username}", "usernameClaimed": "blue", @@ -436,7 +436,7 @@ "absenceStrs": [ "\u041f\u043e\u043b\u044c\u0437\u043e\u0432\u0430\u0442\u0435\u043b\u044c \u043d\u0435 \u0437\u0430\u0440\u0435\u0433\u0438\u0441\u0442\u0440\u0438\u0440\u043e\u0432\u0430\u043d \u0438 \u043d\u0435 \u0438\u043c\u0435\u0435\u0442 \u043f\u0440\u043e\u0444\u0438\u043b\u044f \u0434\u043b\u044f \u043f\u0440\u043e\u0441\u043c\u043e\u0442\u0440\u0430." ], - "alexaRank": 2506366, + "alexaRank": 2484621, "urlMain": "http://aboutcar.ru", "url": "http://aboutcar.ru/members/{username}.html", "usernameClaimed": "krolenya", @@ -459,7 +459,7 @@ "ru" ], "checkType": "status_code", - "alexaRank": 156851, + "alexaRank": 157110, "urlMain": "https://acomics.ru", "url": "https://acomics.ru/-{username}", "usernameClaimed": "Garage", @@ -474,7 +474,7 @@ "absenceStrs": [ "