diff --git a/maigret/checking.py b/maigret/checking.py
index 9b4b5a8..6e9fea5 100644
--- a/maigret/checking.py
+++ b/maigret/checking.py
@@ -178,6 +178,7 @@ def process_site_result(response, query_notify, logger, results_info, site: Maig
except Exception as e:
logger.warning(f'Failed activation {method} for site {site.name}: {e}')
+ site_name = site.pretty_name
# presense flags
# True by default
presense_flags = site.presense_strs
@@ -197,7 +198,7 @@ def process_site_result(response, query_notify, logger, results_info, site: Maig
if check_error:
logger.debug(check_error)
result = QueryResult(username,
- site.name,
+ site_name,
url,
QueryStatus.UNKNOWN,
query_time=response_time,
@@ -211,13 +212,13 @@ def process_site_result(response, query_notify, logger, results_info, site: Maig
is_absence_detected = any([(absence_flag in html_text) for absence_flag in absence_flags_set])
if not is_absence_detected and is_presense_detected:
result = QueryResult(username,
- site.name,
+ site_name,
url,
QueryStatus.CLAIMED,
query_time=response_time, tags=fulltags)
else:
result = QueryResult(username,
- site.name,
+ site_name,
url,
QueryStatus.AVAILABLE,
query_time=response_time, tags=fulltags)
@@ -225,13 +226,13 @@ def process_site_result(response, query_notify, logger, results_info, site: Maig
# Checks if the status code of the response is 2XX
if (not status_code >= 300 or status_code < 200) and is_presense_detected:
result = QueryResult(username,
- site.name,
+ site_name,
url,
QueryStatus.CLAIMED,
query_time=response_time, tags=fulltags)
else:
result = QueryResult(username,
- site.name,
+ site_name,
url,
QueryStatus.AVAILABLE,
query_time=response_time, tags=fulltags)
@@ -243,13 +244,13 @@ def process_site_result(response, query_notify, logger, results_info, site: Maig
# forward to some odd redirect).
if 200 <= status_code < 300 and is_presense_detected:
result = QueryResult(username,
- site.name,
+ site_name,
url,
QueryStatus.CLAIMED,
query_time=response_time, tags=fulltags)
else:
result = QueryResult(username,
- site.name,
+ site_name,
url,
QueryStatus.AVAILABLE,
query_time=response_time, tags=fulltags)
diff --git a/maigret/resources/data.json b/maigret/resources/data.json
index 5a8d458..297b529 100644
--- a/maigret/resources/data.json
+++ b/maigret/resources/data.json
@@ -9835,6 +9835,7 @@
"
Error 404"
],
"alexaRank": 2076,
+ "source": "Instagram",
"url": "https://www.picuki.com/profile/{username}",
"urlMain": "https://www.picuki.com/",
"usernameClaimed": "adam",
@@ -12151,7 +12152,7 @@
"us"
],
"headers": {
- "authorization": "Bearer BQAjb32z4TLh0t19LDuYfk2BV3gUXCpqyUuy2gBOyJTN_2xoZlN4AW1B6ZVmdKMDcI3Hc8agrrQsKbQZE90"
+ "authorization": "Bearer BQAEeuyBT6S535Anlx4wU-pfPjjgiE8r2e7j0eOSnwZjSvjFvQgDzxwV__03-WNbwxPKyGehoJ5pQCBwUqs"
},
"errors": {
"Spotify is currently not available in your country.": "Access denied in your country, use proxy/vpn"
@@ -13455,7 +13456,7 @@
"sec-ch-ua": "Google Chrome\";v=\"87\", \" Not;A Brand\";v=\"99\", \"Chromium\";v=\"87\"",
"authorization": "Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA",
"user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36",
- "x-guest-token": "1386060728566681601"
+ "x-guest-token": "1387733472027070474"
},
"errors": {
"Bad guest token": "x-guest-token update required"
@@ -13832,7 +13833,7 @@
"video"
],
"headers": {
- "Authorization": "jwt eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJleHAiOjE2MTkzMDI0NDAsInVzZXJfaWQiOm51bGwsImFwcF9pZCI6NTg0NzksInNjb3BlcyI6InB1YmxpYyIsInRlYW1fdXNlcl9pZCI6bnVsbH0.fN8PQIEkzQjfu7znGoIaLEP9Qr6bV8JbA2ZwpBSFI5E"
+ "Authorization": "jwt eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJleHAiOjE2MTk2OTczNjAsInVzZXJfaWQiOm51bGwsImFwcF9pZCI6NTg0NzksInNjb3BlcyI6InB1YmxpYyIsInRlYW1fdXNlcl9pZCI6bnVsbH0.yLRq0lhenTYfe0EKKJsk5HZJZt3ykUVNBGuiMCC5HR4"
},
"activation": {
"url": "https://vimeo.com/_rv/viewer",
@@ -23602,6 +23603,80 @@
"urlMain": "https://tapd.co",
"usernameClaimed": "blue",
"usernameUnclaimed": "noonewouldeverusethis7"
+ },
+ "wblitz.net": {
+ "checkType": "message",
+ "presenseStrs": [
+ "profileBlock",
+ "tournaments",
+ "serverna",
+ " role=",
+ " name="
+ ],
+ "absenceStrs": [
+ "404 \u0421\u0442\u0440\u0430\u043d\u0438\u0446\u0430 \u043d\u0435 \u043d\u0430\u0439\u0434\u0435\u043d\u0430404 \u0421\u0442\u0440\u0430\u043d\u0438\u0446\u0430 \u043d\u0435 \u043d\u0430\u0439\u0434\u0435\u043d\u0430
"
+ ],
+ "url": "https://wblitz.net/stat/ru/{username}",
+ "urlMain": "https://wblitz.net",
+ "usernameClaimed": "lucklev12",
+ "usernameUnclaimed": "noonewouldeverusethis7"
+ },
+ "unc.ua": {
+ "checkType": "message",
+ "presenseStrs": [
+ "page-user_profile"
+ ],
+ "absenceStrs": [
+ "Error Site"
+ ],
+ "url": "https://unc.ua/{username}",
+ "urlMain": "https://unc.ua",
+ "usernameClaimed": "admin",
+ "usernameUnclaimed": "noonewouldeverusethis7"
+ },
+ "kloomba.com": {
+ "checkType": "message",
+ "presenseStrs": [
+ "name",
+ " role=",
+ " main"
+ ],
+ "absenceStrs": [
+ "error-page"
+ ],
+ "url": "https://kloomba.com/users/{username}",
+ "urlMain": "https://kloomba.com",
+ "usernameClaimed": "dima",
+ "usernameUnclaimed": "noonewouldeverusethis7"
+ },
+ "nevrotic.net": {
+ "checkType": "message",
+ "presenseStrs": [
+ "profile-tabs",
+ " profile-rating"
+ ],
+ "absenceStrs": [
+ "table-404"
+ ],
+ "url": "http://nevrotic.net/user/{username}",
+ "urlMain": "http://nevrotic.net",
+ "usernameClaimed": "admin",
+ "usernameUnclaimed": "noonewouldeverusethis7"
+ },
+ "pikabu.monster": {
+ "checkType": "message",
+ "presenseStrs": [
+ "usertotalcomments",
+ " usertotalposts"
+ ],
+ "absenceStrs": [
+ "\u041e\u0448\u0438\u0431\u043a\u0430"
+ ],
+ "source": "Pikabu",
+ "url": "https://pikabu.monster/user/{username}-summary",
+ "urlMain": "https://pikabu.monster",
+ "usernameClaimed": "Avezenit",
+ "usernameUnclaimed": "noonewouldeverusethis7"
}
},
"engines": {
diff --git a/maigret/sites.py b/maigret/sites.py
index f52055d..39b33c9 100644
--- a/maigret/sites.py
+++ b/maigret/sites.py
@@ -69,6 +69,7 @@ class MaigretSite:
self.engine_obj = None
self.request_future = None
self.alexa_rank = None
+ self.source = None
for k, v in information.items():
self.__dict__[CaseConverter.camel_to_snake(k)] = v
@@ -99,6 +100,12 @@ class MaigretSite:
return None
+ @property
+ def pretty_name(self):
+ if self.source:
+ return f'{self.name} [{self.source}]'
+ return self.name
+
@property
def json(self):
result = {}
diff --git a/maigret/submit.py b/maigret/submit.py
index c5865f2..94bd90b 100644
--- a/maigret/submit.py
+++ b/maigret/submit.py
@@ -10,6 +10,10 @@ DESIRED_STRINGS = ["username", "not found", "пользователь", "profile
SUPPOSED_USERNAMES = ['alex', 'god', 'admin', 'red', 'blue', 'john']
+HEADERS = {
+ 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 11.1; rv:55.0) Gecko/20100101 Firefox/55.0',
+}
+
RATIO = 0.6
TOP_FEATURES = 5
URL_RE = re.compile(r'https?://(www\.)?')
@@ -121,7 +125,7 @@ async def detect_known_engine(db, url_exists, url_mainpage):
return None
-async def check_features_manually(db, url_exists, url_mainpage, cookie_file):
+async def check_features_manually(db, url_exists, url_mainpage, cookie_file, redirects=False):
url_parts = url_exists.split('/')
supposed_username = url_parts[-1]
new_name = input(f'Is "{supposed_username}" a valid username? If not, write it manually: ')
@@ -138,8 +142,11 @@ async def check_features_manually(db, url_exists, url_mainpage, cookie_file):
cookie_jar = await import_aiohttp_cookies(cookie_file)
cookie_dict = {c.key: c.value for c in cookie_jar}
- a = requests.get(url_exists, cookies=cookie_dict).text
- b = requests.get(url_not_exists, cookies=cookie_dict).text
+ exists_resp = requests.get(url_exists, cookies=cookie_dict, headers=HEADERS, allow_redirects=redirects)
+ non_exists_resp = requests.get(url_not_exists, cookies=cookie_dict, headers=HEADERS, allow_redirects=redirects)
+
+ a = exists_resp.text
+ b = non_exists_resp.text
tokens_a = set(a.split('"'))
tokens_b = set(b.split('"'))
@@ -147,6 +154,9 @@ async def check_features_manually(db, url_exists, url_mainpage, cookie_file):
a_minus_b = tokens_a.difference(tokens_b)
b_minus_a = tokens_b.difference(tokens_a)
+ if len(a_minus_b) == len(b_minus_a) == 0:
+ print('The pages for existing and non-existing account are the same!')
+
top_features_count = int(input(f'Specify count of features to extract [default {TOP_FEATURES}]: ') or TOP_FEATURES)
presence_list = sorted(a_minus_b, key=get_match_ratio, reverse=True)[:top_features_count]