diff --git a/maigret/checking.py b/maigret/checking.py index 9b4b5a8..6e9fea5 100644 --- a/maigret/checking.py +++ b/maigret/checking.py @@ -178,6 +178,7 @@ def process_site_result(response, query_notify, logger, results_info, site: Maig except Exception as e: logger.warning(f'Failed activation {method} for site {site.name}: {e}') + site_name = site.pretty_name # presense flags # True by default presense_flags = site.presense_strs @@ -197,7 +198,7 @@ def process_site_result(response, query_notify, logger, results_info, site: Maig if check_error: logger.debug(check_error) result = QueryResult(username, - site.name, + site_name, url, QueryStatus.UNKNOWN, query_time=response_time, @@ -211,13 +212,13 @@ def process_site_result(response, query_notify, logger, results_info, site: Maig is_absence_detected = any([(absence_flag in html_text) for absence_flag in absence_flags_set]) if not is_absence_detected and is_presense_detected: result = QueryResult(username, - site.name, + site_name, url, QueryStatus.CLAIMED, query_time=response_time, tags=fulltags) else: result = QueryResult(username, - site.name, + site_name, url, QueryStatus.AVAILABLE, query_time=response_time, tags=fulltags) @@ -225,13 +226,13 @@ def process_site_result(response, query_notify, logger, results_info, site: Maig # Checks if the status code of the response is 2XX if (not status_code >= 300 or status_code < 200) and is_presense_detected: result = QueryResult(username, - site.name, + site_name, url, QueryStatus.CLAIMED, query_time=response_time, tags=fulltags) else: result = QueryResult(username, - site.name, + site_name, url, QueryStatus.AVAILABLE, query_time=response_time, tags=fulltags) @@ -243,13 +244,13 @@ def process_site_result(response, query_notify, logger, results_info, site: Maig # forward to some odd redirect). if 200 <= status_code < 300 and is_presense_detected: result = QueryResult(username, - site.name, + site_name, url, QueryStatus.CLAIMED, query_time=response_time, tags=fulltags) else: result = QueryResult(username, - site.name, + site_name, url, QueryStatus.AVAILABLE, query_time=response_time, tags=fulltags) diff --git a/maigret/resources/data.json b/maigret/resources/data.json index 5a8d458..297b529 100644 --- a/maigret/resources/data.json +++ b/maigret/resources/data.json @@ -9835,6 +9835,7 @@ "Error 404" ], "alexaRank": 2076, + "source": "Instagram", "url": "https://www.picuki.com/profile/{username}", "urlMain": "https://www.picuki.com/", "usernameClaimed": "adam", @@ -12151,7 +12152,7 @@ "us" ], "headers": { - "authorization": "Bearer BQAjb32z4TLh0t19LDuYfk2BV3gUXCpqyUuy2gBOyJTN_2xoZlN4AW1B6ZVmdKMDcI3Hc8agrrQsKbQZE90" + "authorization": "Bearer BQAEeuyBT6S535Anlx4wU-pfPjjgiE8r2e7j0eOSnwZjSvjFvQgDzxwV__03-WNbwxPKyGehoJ5pQCBwUqs" }, "errors": { "Spotify is currently not available in your country.": "Access denied in your country, use proxy/vpn" @@ -13455,7 +13456,7 @@ "sec-ch-ua": "Google Chrome\";v=\"87\", \" Not;A Brand\";v=\"99\", \"Chromium\";v=\"87\"", "authorization": "Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA", "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36", - "x-guest-token": "1386060728566681601" + "x-guest-token": "1387733472027070474" }, "errors": { "Bad guest token": "x-guest-token update required" @@ -13832,7 +13833,7 @@ "video" ], "headers": { - "Authorization": "jwt eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJleHAiOjE2MTkzMDI0NDAsInVzZXJfaWQiOm51bGwsImFwcF9pZCI6NTg0NzksInNjb3BlcyI6InB1YmxpYyIsInRlYW1fdXNlcl9pZCI6bnVsbH0.fN8PQIEkzQjfu7znGoIaLEP9Qr6bV8JbA2ZwpBSFI5E" + "Authorization": "jwt eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJleHAiOjE2MTk2OTczNjAsInVzZXJfaWQiOm51bGwsImFwcF9pZCI6NTg0NzksInNjb3BlcyI6InB1YmxpYyIsInRlYW1fdXNlcl9pZCI6bnVsbH0.yLRq0lhenTYfe0EKKJsk5HZJZt3ykUVNBGuiMCC5HR4" }, "activation": { "url": "https://vimeo.com/_rv/viewer", @@ -23602,6 +23603,80 @@ "urlMain": "https://tapd.co", "usernameClaimed": "blue", "usernameUnclaimed": "noonewouldeverusethis7" + }, + "wblitz.net": { + "checkType": "message", + "presenseStrs": [ + "profileBlock", + "tournaments", + "serverna", + " role=", + " name=" + ], + "absenceStrs": [ + "404 \u0421\u0442\u0440\u0430\u043d\u0438\u0446\u0430 \u043d\u0435 \u043d\u0430\u0439\u0434\u0435\u043d\u0430

404 \u0421\u0442\u0440\u0430\u043d\u0438\u0446\u0430 \u043d\u0435 \u043d\u0430\u0439\u0434\u0435\u043d\u0430

" + ], + "url": "https://wblitz.net/stat/ru/{username}", + "urlMain": "https://wblitz.net", + "usernameClaimed": "lucklev12", + "usernameUnclaimed": "noonewouldeverusethis7" + }, + "unc.ua": { + "checkType": "message", + "presenseStrs": [ + "page-user_profile" + ], + "absenceStrs": [ + "Error Site" + ], + "url": "https://unc.ua/{username}", + "urlMain": "https://unc.ua", + "usernameClaimed": "admin", + "usernameUnclaimed": "noonewouldeverusethis7" + }, + "kloomba.com": { + "checkType": "message", + "presenseStrs": [ + "name", + " role=", + " main" + ], + "absenceStrs": [ + "error-page" + ], + "url": "https://kloomba.com/users/{username}", + "urlMain": "https://kloomba.com", + "usernameClaimed": "dima", + "usernameUnclaimed": "noonewouldeverusethis7" + }, + "nevrotic.net": { + "checkType": "message", + "presenseStrs": [ + "profile-tabs", + " profile-rating" + ], + "absenceStrs": [ + "table-404" + ], + "url": "http://nevrotic.net/user/{username}", + "urlMain": "http://nevrotic.net", + "usernameClaimed": "admin", + "usernameUnclaimed": "noonewouldeverusethis7" + }, + "pikabu.monster": { + "checkType": "message", + "presenseStrs": [ + "usertotalcomments", + " usertotalposts" + ], + "absenceStrs": [ + "\u041e\u0448\u0438\u0431\u043a\u0430" + ], + "source": "Pikabu", + "url": "https://pikabu.monster/user/{username}-summary", + "urlMain": "https://pikabu.monster", + "usernameClaimed": "Avezenit", + "usernameUnclaimed": "noonewouldeverusethis7" } }, "engines": { diff --git a/maigret/sites.py b/maigret/sites.py index f52055d..39b33c9 100644 --- a/maigret/sites.py +++ b/maigret/sites.py @@ -69,6 +69,7 @@ class MaigretSite: self.engine_obj = None self.request_future = None self.alexa_rank = None + self.source = None for k, v in information.items(): self.__dict__[CaseConverter.camel_to_snake(k)] = v @@ -99,6 +100,12 @@ class MaigretSite: return None + @property + def pretty_name(self): + if self.source: + return f'{self.name} [{self.source}]' + return self.name + @property def json(self): result = {} diff --git a/maigret/submit.py b/maigret/submit.py index c5865f2..94bd90b 100644 --- a/maigret/submit.py +++ b/maigret/submit.py @@ -10,6 +10,10 @@ DESIRED_STRINGS = ["username", "not found", "пользователь", "profile SUPPOSED_USERNAMES = ['alex', 'god', 'admin', 'red', 'blue', 'john'] +HEADERS = { + 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 11.1; rv:55.0) Gecko/20100101 Firefox/55.0', +} + RATIO = 0.6 TOP_FEATURES = 5 URL_RE = re.compile(r'https?://(www\.)?') @@ -121,7 +125,7 @@ async def detect_known_engine(db, url_exists, url_mainpage): return None -async def check_features_manually(db, url_exists, url_mainpage, cookie_file): +async def check_features_manually(db, url_exists, url_mainpage, cookie_file, redirects=False): url_parts = url_exists.split('/') supposed_username = url_parts[-1] new_name = input(f'Is "{supposed_username}" a valid username? If not, write it manually: ') @@ -138,8 +142,11 @@ async def check_features_manually(db, url_exists, url_mainpage, cookie_file): cookie_jar = await import_aiohttp_cookies(cookie_file) cookie_dict = {c.key: c.value for c in cookie_jar} - a = requests.get(url_exists, cookies=cookie_dict).text - b = requests.get(url_not_exists, cookies=cookie_dict).text + exists_resp = requests.get(url_exists, cookies=cookie_dict, headers=HEADERS, allow_redirects=redirects) + non_exists_resp = requests.get(url_not_exists, cookies=cookie_dict, headers=HEADERS, allow_redirects=redirects) + + a = exists_resp.text + b = non_exists_resp.text tokens_a = set(a.split('"')) tokens_b = set(b.split('"')) @@ -147,6 +154,9 @@ async def check_features_manually(db, url_exists, url_mainpage, cookie_file): a_minus_b = tokens_a.difference(tokens_b) b_minus_a = tokens_b.difference(tokens_a) + if len(a_minus_b) == len(b_minus_a) == 0: + print('The pages for existing and non-existing account are the same!') + top_features_count = int(input(f'Specify count of features to extract [default {TOP_FEATURES}]: ') or TOP_FEATURES) presence_list = sorted(a_minus_b, key=get_match_ratio, reverse=True)[:top_features_count]