mirror of
https://github.com/soxoj/maigret.git
synced 2026-05-06 22:19:01 +00:00
Merge pull request #114 from soxoj/new-sites-source-feature
Added some new sites and introduced 'source' feature
This commit is contained in:
+8
-7
@@ -178,6 +178,7 @@ def process_site_result(response, query_notify, logger, results_info, site: Maig
|
||||
except Exception as e:
|
||||
logger.warning(f'Failed activation {method} for site {site.name}: {e}')
|
||||
|
||||
site_name = site.pretty_name
|
||||
# presense flags
|
||||
# True by default
|
||||
presense_flags = site.presense_strs
|
||||
@@ -197,7 +198,7 @@ def process_site_result(response, query_notify, logger, results_info, site: Maig
|
||||
if check_error:
|
||||
logger.debug(check_error)
|
||||
result = QueryResult(username,
|
||||
site.name,
|
||||
site_name,
|
||||
url,
|
||||
QueryStatus.UNKNOWN,
|
||||
query_time=response_time,
|
||||
@@ -211,13 +212,13 @@ def process_site_result(response, query_notify, logger, results_info, site: Maig
|
||||
is_absence_detected = any([(absence_flag in html_text) for absence_flag in absence_flags_set])
|
||||
if not is_absence_detected and is_presense_detected:
|
||||
result = QueryResult(username,
|
||||
site.name,
|
||||
site_name,
|
||||
url,
|
||||
QueryStatus.CLAIMED,
|
||||
query_time=response_time, tags=fulltags)
|
||||
else:
|
||||
result = QueryResult(username,
|
||||
site.name,
|
||||
site_name,
|
||||
url,
|
||||
QueryStatus.AVAILABLE,
|
||||
query_time=response_time, tags=fulltags)
|
||||
@@ -225,13 +226,13 @@ def process_site_result(response, query_notify, logger, results_info, site: Maig
|
||||
# Checks if the status code of the response is 2XX
|
||||
if (not status_code >= 300 or status_code < 200) and is_presense_detected:
|
||||
result = QueryResult(username,
|
||||
site.name,
|
||||
site_name,
|
||||
url,
|
||||
QueryStatus.CLAIMED,
|
||||
query_time=response_time, tags=fulltags)
|
||||
else:
|
||||
result = QueryResult(username,
|
||||
site.name,
|
||||
site_name,
|
||||
url,
|
||||
QueryStatus.AVAILABLE,
|
||||
query_time=response_time, tags=fulltags)
|
||||
@@ -243,13 +244,13 @@ def process_site_result(response, query_notify, logger, results_info, site: Maig
|
||||
# forward to some odd redirect).
|
||||
if 200 <= status_code < 300 and is_presense_detected:
|
||||
result = QueryResult(username,
|
||||
site.name,
|
||||
site_name,
|
||||
url,
|
||||
QueryStatus.CLAIMED,
|
||||
query_time=response_time, tags=fulltags)
|
||||
else:
|
||||
result = QueryResult(username,
|
||||
site.name,
|
||||
site_name,
|
||||
url,
|
||||
QueryStatus.AVAILABLE,
|
||||
query_time=response_time, tags=fulltags)
|
||||
|
||||
@@ -9835,6 +9835,7 @@
|
||||
"<title>Error 404</title>"
|
||||
],
|
||||
"alexaRank": 2076,
|
||||
"source": "Instagram",
|
||||
"url": "https://www.picuki.com/profile/{username}",
|
||||
"urlMain": "https://www.picuki.com/",
|
||||
"usernameClaimed": "adam",
|
||||
@@ -12151,7 +12152,7 @@
|
||||
"us"
|
||||
],
|
||||
"headers": {
|
||||
"authorization": "Bearer BQAjb32z4TLh0t19LDuYfk2BV3gUXCpqyUuy2gBOyJTN_2xoZlN4AW1B6ZVmdKMDcI3Hc8agrrQsKbQZE90"
|
||||
"authorization": "Bearer BQAEeuyBT6S535Anlx4wU-pfPjjgiE8r2e7j0eOSnwZjSvjFvQgDzxwV__03-WNbwxPKyGehoJ5pQCBwUqs"
|
||||
},
|
||||
"errors": {
|
||||
"Spotify is currently not available in your country.": "Access denied in your country, use proxy/vpn"
|
||||
@@ -13455,7 +13456,7 @@
|
||||
"sec-ch-ua": "Google Chrome\";v=\"87\", \" Not;A Brand\";v=\"99\", \"Chromium\";v=\"87\"",
|
||||
"authorization": "Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA",
|
||||
"user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36",
|
||||
"x-guest-token": "1386060728566681601"
|
||||
"x-guest-token": "1387733472027070474"
|
||||
},
|
||||
"errors": {
|
||||
"Bad guest token": "x-guest-token update required"
|
||||
@@ -13832,7 +13833,7 @@
|
||||
"video"
|
||||
],
|
||||
"headers": {
|
||||
"Authorization": "jwt eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJleHAiOjE2MTkzMDI0NDAsInVzZXJfaWQiOm51bGwsImFwcF9pZCI6NTg0NzksInNjb3BlcyI6InB1YmxpYyIsInRlYW1fdXNlcl9pZCI6bnVsbH0.fN8PQIEkzQjfu7znGoIaLEP9Qr6bV8JbA2ZwpBSFI5E"
|
||||
"Authorization": "jwt eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJleHAiOjE2MTk2OTczNjAsInVzZXJfaWQiOm51bGwsImFwcF9pZCI6NTg0NzksInNjb3BlcyI6InB1YmxpYyIsInRlYW1fdXNlcl9pZCI6bnVsbH0.yLRq0lhenTYfe0EKKJsk5HZJZt3ykUVNBGuiMCC5HR4"
|
||||
},
|
||||
"activation": {
|
||||
"url": "https://vimeo.com/_rv/viewer",
|
||||
@@ -23602,6 +23603,80 @@
|
||||
"urlMain": "https://tapd.co",
|
||||
"usernameClaimed": "blue",
|
||||
"usernameUnclaimed": "noonewouldeverusethis7"
|
||||
},
|
||||
"wblitz.net": {
|
||||
"checkType": "message",
|
||||
"presenseStrs": [
|
||||
"profileBlock",
|
||||
"tournaments",
|
||||
"serverna",
|
||||
" role=",
|
||||
" name="
|
||||
],
|
||||
"absenceStrs": [
|
||||
"<html><head><title>404 \u0421\u0442\u0440\u0430\u043d\u0438\u0446\u0430 \u043d\u0435 \u043d\u0430\u0439\u0434\u0435\u043d\u0430</title></head><body><h2>404 \u0421\u0442\u0440\u0430\u043d\u0438\u0446\u0430 \u043d\u0435 \u043d\u0430\u0439\u0434\u0435\u043d\u0430</h2></body></html>"
|
||||
],
|
||||
"url": "https://wblitz.net/stat/ru/{username}",
|
||||
"urlMain": "https://wblitz.net",
|
||||
"usernameClaimed": "lucklev12",
|
||||
"usernameUnclaimed": "noonewouldeverusethis7"
|
||||
},
|
||||
"unc.ua": {
|
||||
"checkType": "message",
|
||||
"presenseStrs": [
|
||||
"page-user_profile"
|
||||
],
|
||||
"absenceStrs": [
|
||||
"Error Site"
|
||||
],
|
||||
"url": "https://unc.ua/{username}",
|
||||
"urlMain": "https://unc.ua",
|
||||
"usernameClaimed": "admin",
|
||||
"usernameUnclaimed": "noonewouldeverusethis7"
|
||||
},
|
||||
"kloomba.com": {
|
||||
"checkType": "message",
|
||||
"presenseStrs": [
|
||||
"name",
|
||||
" role=",
|
||||
" main"
|
||||
],
|
||||
"absenceStrs": [
|
||||
"error-page"
|
||||
],
|
||||
"url": "https://kloomba.com/users/{username}",
|
||||
"urlMain": "https://kloomba.com",
|
||||
"usernameClaimed": "dima",
|
||||
"usernameUnclaimed": "noonewouldeverusethis7"
|
||||
},
|
||||
"nevrotic.net": {
|
||||
"checkType": "message",
|
||||
"presenseStrs": [
|
||||
"profile-tabs",
|
||||
" profile-rating"
|
||||
],
|
||||
"absenceStrs": [
|
||||
"table-404"
|
||||
],
|
||||
"url": "http://nevrotic.net/user/{username}",
|
||||
"urlMain": "http://nevrotic.net",
|
||||
"usernameClaimed": "admin",
|
||||
"usernameUnclaimed": "noonewouldeverusethis7"
|
||||
},
|
||||
"pikabu.monster": {
|
||||
"checkType": "message",
|
||||
"presenseStrs": [
|
||||
"usertotalcomments",
|
||||
" usertotalposts"
|
||||
],
|
||||
"absenceStrs": [
|
||||
"<title>\u041e\u0448\u0438\u0431\u043a\u0430</title>"
|
||||
],
|
||||
"source": "Pikabu",
|
||||
"url": "https://pikabu.monster/user/{username}-summary",
|
||||
"urlMain": "https://pikabu.monster",
|
||||
"usernameClaimed": "Avezenit",
|
||||
"usernameUnclaimed": "noonewouldeverusethis7"
|
||||
}
|
||||
},
|
||||
"engines": {
|
||||
|
||||
@@ -69,6 +69,7 @@ class MaigretSite:
|
||||
self.engine_obj = None
|
||||
self.request_future = None
|
||||
self.alexa_rank = None
|
||||
self.source = None
|
||||
|
||||
for k, v in information.items():
|
||||
self.__dict__[CaseConverter.camel_to_snake(k)] = v
|
||||
@@ -99,6 +100,12 @@ class MaigretSite:
|
||||
|
||||
return None
|
||||
|
||||
@property
|
||||
def pretty_name(self):
|
||||
if self.source:
|
||||
return f'{self.name} [{self.source}]'
|
||||
return self.name
|
||||
|
||||
@property
|
||||
def json(self):
|
||||
result = {}
|
||||
|
||||
+13
-3
@@ -10,6 +10,10 @@ DESIRED_STRINGS = ["username", "not found", "пользователь", "profile
|
||||
|
||||
SUPPOSED_USERNAMES = ['alex', 'god', 'admin', 'red', 'blue', 'john']
|
||||
|
||||
HEADERS = {
|
||||
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 11.1; rv:55.0) Gecko/20100101 Firefox/55.0',
|
||||
}
|
||||
|
||||
RATIO = 0.6
|
||||
TOP_FEATURES = 5
|
||||
URL_RE = re.compile(r'https?://(www\.)?')
|
||||
@@ -121,7 +125,7 @@ async def detect_known_engine(db, url_exists, url_mainpage):
|
||||
return None
|
||||
|
||||
|
||||
async def check_features_manually(db, url_exists, url_mainpage, cookie_file):
|
||||
async def check_features_manually(db, url_exists, url_mainpage, cookie_file, redirects=False):
|
||||
url_parts = url_exists.split('/')
|
||||
supposed_username = url_parts[-1]
|
||||
new_name = input(f'Is "{supposed_username}" a valid username? If not, write it manually: ')
|
||||
@@ -138,8 +142,11 @@ async def check_features_manually(db, url_exists, url_mainpage, cookie_file):
|
||||
cookie_jar = await import_aiohttp_cookies(cookie_file)
|
||||
cookie_dict = {c.key: c.value for c in cookie_jar}
|
||||
|
||||
a = requests.get(url_exists, cookies=cookie_dict).text
|
||||
b = requests.get(url_not_exists, cookies=cookie_dict).text
|
||||
exists_resp = requests.get(url_exists, cookies=cookie_dict, headers=HEADERS, allow_redirects=redirects)
|
||||
non_exists_resp = requests.get(url_not_exists, cookies=cookie_dict, headers=HEADERS, allow_redirects=redirects)
|
||||
|
||||
a = exists_resp.text
|
||||
b = non_exists_resp.text
|
||||
|
||||
tokens_a = set(a.split('"'))
|
||||
tokens_b = set(b.split('"'))
|
||||
@@ -147,6 +154,9 @@ async def check_features_manually(db, url_exists, url_mainpage, cookie_file):
|
||||
a_minus_b = tokens_a.difference(tokens_b)
|
||||
b_minus_a = tokens_b.difference(tokens_a)
|
||||
|
||||
if len(a_minus_b) == len(b_minus_a) == 0:
|
||||
print('The pages for existing and non-existing account are the same!')
|
||||
|
||||
top_features_count = int(input(f'Specify count of features to extract [default {TOP_FEATURES}]: ') or TOP_FEATURES)
|
||||
|
||||
presence_list = sorted(a_minus_b, key=get_match_ratio, reverse=True)[:top_features_count]
|
||||
|
||||
Reference in New Issue
Block a user