mirror of
https://github.com/soxoj/maigret.git
synced 2026-05-13 18:05:39 +00:00
Added some new sites
This commit is contained in:
+193
-18
@@ -5795,19 +5795,6 @@
|
||||
"usernameClaimed": "adam",
|
||||
"usernameUnclaimed": "noonewouldeverusethis7"
|
||||
},
|
||||
"Giphy": {
|
||||
"tags": [
|
||||
"photo",
|
||||
"us",
|
||||
"video"
|
||||
],
|
||||
"checkType": "status_code",
|
||||
"alexaRank": 653,
|
||||
"urlMain": "https://giphy.com/",
|
||||
"url": "https://giphy.com/{username}",
|
||||
"usernameClaimed": "blue",
|
||||
"usernameUnclaimed": "noonewouldeverusethis7"
|
||||
},
|
||||
"GipsysTeam": {
|
||||
"tags": [
|
||||
"ru"
|
||||
@@ -8230,6 +8217,7 @@
|
||||
],
|
||||
"checkType": "message",
|
||||
"absenceStrs": [
|
||||
"\u0417\u0430\u043f\u0440\u043e\u0448\u0435\u043d\u043d\u0430\u044f \u0432\u0430\u043c\u0438 \u0441\u0442\u0440\u0430\u043d\u0438\u0446\u0430 \u043d\u0435 \u043d\u0430\u0439\u0434\u0435\u043d\u0430.",
|
||||
"\u0414\u0430\u043d\u043d\u044b\u0435 \u043e \u0432\u044b\u0431\u0440\u0430\u043d\u043d\u043e\u043c \u043f\u043e\u043b\u044c\u0437\u043e\u0432\u0430\u0442\u0435\u043b\u0435 \u043d\u0435 \u0441\u0443\u0449\u0435\u0441\u0442\u0432\u0443\u044e\u0442",
|
||||
"Information on selected user does not exist"
|
||||
],
|
||||
@@ -13035,7 +13023,7 @@
|
||||
"us"
|
||||
],
|
||||
"headers": {
|
||||
"authorization": "Bearer BQB2-7eTXELo9F-na1La0I286JG5MpvElF5fQE_teYchfGXgxlVCie_wD4tGR7b6XedgiH7cOQY_PG4YC5Y"
|
||||
"authorization": "Bearer BQBKzy1QSQQO4wR2vRVROUOaj8T9gr0Vkjup9wUkLh0MZDtMEVZ0WEtyoZ_tTc4utIhyvvn9V7URwVWGeuU"
|
||||
},
|
||||
"errors": {
|
||||
"Spotify is currently not available in your country.": "Access denied in your country, use proxy/vpn"
|
||||
@@ -14463,7 +14451,7 @@
|
||||
"sec-ch-ua": "Google Chrome\";v=\"87\", \" Not;A Brand\";v=\"99\", \"Chromium\";v=\"87\"",
|
||||
"authorization": "Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA",
|
||||
"user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36",
|
||||
"x-guest-token": "1397282274475978756"
|
||||
"x-guest-token": "1397644352072163331"
|
||||
},
|
||||
"errors": {
|
||||
"Bad guest token": "x-guest-token update required"
|
||||
@@ -14870,7 +14858,7 @@
|
||||
"video"
|
||||
],
|
||||
"headers": {
|
||||
"Authorization": "jwt eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJleHAiOjE2MjE5NzM5NDAsInVzZXJfaWQiOm51bGwsImFwcF9pZCI6NTg0NzksInNjb3BlcyI6InB1YmxpYyIsInRlYW1fdXNlcl9pZCI6bnVsbH0.te2LwkItSxRZMIfFYGRKj5ZUpyZaCIgnBpxgfjT2RTA"
|
||||
"Authorization": "jwt eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJleHAiOjE2MjIwNjAyODAsInVzZXJfaWQiOm51bGwsImFwcF9pZCI6NTg0NzksInNjb3BlcyI6InB1YmxpYyIsInRlYW1fdXNlcl9pZCI6bnVsbH0.RBYc81QRYfs9m7yzcGkUXhyA3rGPhQJaoAG8dnt61I4"
|
||||
},
|
||||
"activation": {
|
||||
"url": "https://vimeo.com/_rv/viewer",
|
||||
@@ -16275,8 +16263,8 @@
|
||||
},
|
||||
"author.today": {
|
||||
"tags": [
|
||||
"ru",
|
||||
"reading"
|
||||
"reading",
|
||||
"ru"
|
||||
],
|
||||
"checkType": "status_code",
|
||||
"alexaRank": 12218,
|
||||
@@ -27769,6 +27757,193 @@
|
||||
"usernameClaimed": "soxoj",
|
||||
"usernameUnclaimed": "noonewouldeverusethis7",
|
||||
"checkType": "status_code"
|
||||
},
|
||||
"Ameblo": {
|
||||
"absenceStrs": [
|
||||
"THROW_NOT_FOUND_EXCEPTION"
|
||||
],
|
||||
"presenseStrs": [
|
||||
"profile"
|
||||
],
|
||||
"url": "https://ameblo.jp/{username}",
|
||||
"urlMain": "https://ameblo.jp",
|
||||
"usernameClaimed": "senpai",
|
||||
"usernameUnclaimed": "noonewouldeverusethis7",
|
||||
"checkType": "message",
|
||||
"alexaRank": 374,
|
||||
"tags": [
|
||||
"blog",
|
||||
"jp"
|
||||
]
|
||||
},
|
||||
"Observable": {
|
||||
"absenceStrs": [
|
||||
"<title>Observable</title>"
|
||||
],
|
||||
"presenseStrs": [
|
||||
"profile_email"
|
||||
],
|
||||
"url": "https://observablehq.com/@{username}",
|
||||
"urlMain": "https://observablehq.com",
|
||||
"usernameClaimed": "theabbie",
|
||||
"usernameUnclaimed": "noonewouldeverusethis7",
|
||||
"checkType": "message",
|
||||
"alexaRank": 25120,
|
||||
"tags": [
|
||||
"sharing"
|
||||
]
|
||||
},
|
||||
"galactictalk.org": {
|
||||
"urlMain": "https://galactictalk.org",
|
||||
"engine": "Flarum",
|
||||
"usernameClaimed": "theabbie",
|
||||
"usernameUnclaimed": "noonewouldeverusethis7"
|
||||
},
|
||||
"discuss.bootstrapped.fm": {
|
||||
"urlMain": "https://discuss.bootstrapped.fm",
|
||||
"engine": "Discourse",
|
||||
"usernameClaimed": "theabbie",
|
||||
"usernameUnclaimed": "noonewouldeverusethis7"
|
||||
},
|
||||
"discourse.mozilla.org": {
|
||||
"urlMain": "https://discourse.mozilla.org",
|
||||
"engine": "Discourse",
|
||||
"usernameClaimed": "theabbie",
|
||||
"usernameUnclaimed": "noonewouldeverusethis7"
|
||||
},
|
||||
"ipinit.in": {
|
||||
"urlMain": "http://ipinit.in",
|
||||
"engine": "Wordpress/Author",
|
||||
"usernameClaimed": "god",
|
||||
"usernameUnclaimed": "noonewouldeverusethis7"
|
||||
},
|
||||
"donorbox": {
|
||||
"absenceStrs": [
|
||||
"/orgs/new"
|
||||
],
|
||||
"presenseStrs": [
|
||||
"donation_first_name"
|
||||
],
|
||||
"url": "https://donorbox.org/{username}",
|
||||
"urlMain": "https://donorbox.org",
|
||||
"usernameClaimed": "theabbie",
|
||||
"usernameUnclaimed": "noonewouldeverusethis7",
|
||||
"checkType": "message",
|
||||
"alexaRank": 19812,
|
||||
"tags": [
|
||||
"finance"
|
||||
]
|
||||
},
|
||||
"telescope.ac": {
|
||||
"absenceStrs": [
|
||||
">Not found</h1>"
|
||||
],
|
||||
"presenseStrs": [
|
||||
"og:site_name",
|
||||
"alternate",
|
||||
"article",
|
||||
"project",
|
||||
"og:title"
|
||||
],
|
||||
"url": "https://telescope.ac/{username}",
|
||||
"urlMain": "https://telescope.ac",
|
||||
"usernameClaimed": "theabbie",
|
||||
"usernameUnclaimed": "noonewouldeverusethis7",
|
||||
"checkType": "message",
|
||||
"alexaRank": 167480,
|
||||
"tags": [
|
||||
"blog"
|
||||
]
|
||||
},
|
||||
"sessionize.com": {
|
||||
"absenceStrs": [
|
||||
"Page Not Found</h3>"
|
||||
],
|
||||
"presenseStrs": [
|
||||
"role=",
|
||||
"filter"
|
||||
],
|
||||
"url": "https://sessionize.com/{username}/",
|
||||
"urlMain": "https://sessionize.com",
|
||||
"usernameClaimed": "theabbie",
|
||||
"usernameUnclaimed": "noonewouldeverusethis7",
|
||||
"checkType": "message",
|
||||
"alexaRank": 132025,
|
||||
"tags": [
|
||||
"business"
|
||||
]
|
||||
},
|
||||
"getmakerlog.com": {
|
||||
"absenceStrs": [
|
||||
"<title>Home | Makerlog</title>"
|
||||
],
|
||||
"presenseStrs": [
|
||||
"profile",
|
||||
"first_name",
|
||||
"username\\"
|
||||
],
|
||||
"url": "https://getmakerlog.com/@{username}",
|
||||
"urlMain": "https://getmakerlog.com",
|
||||
"usernameClaimed": "theabbie",
|
||||
"usernameUnclaimed": "noonewouldeverusethis7",
|
||||
"checkType": "message",
|
||||
"alexaRank": 224990,
|
||||
"tags": [
|
||||
"business"
|
||||
]
|
||||
},
|
||||
"giphy.com": {
|
||||
"absenceStrs": [
|
||||
"404 Not Found"
|
||||
],
|
||||
"presenseStrs": [
|
||||
"Giphy",
|
||||
"al:ios:app_name"
|
||||
],
|
||||
"url": "https://giphy.com/channel/{username}",
|
||||
"urlMain": "https://giphy.com",
|
||||
"usernameClaimed": "theabbie",
|
||||
"usernameUnclaimed": "noonewouldeverusethis7",
|
||||
"checkType": "message",
|
||||
"alexaRank": 695,
|
||||
"tags": [
|
||||
"video"
|
||||
]
|
||||
},
|
||||
"clarity.fm": {
|
||||
"absenceStrs": [
|
||||
"On Demand Business Advice</title"
|
||||
],
|
||||
"presenseStrs": [
|
||||
"user-profile-image"
|
||||
],
|
||||
"url": "https://clarity.fm/{username}",
|
||||
"urlMain": "https://clarity.fm",
|
||||
"usernameClaimed": "theabbie",
|
||||
"usernameUnclaimed": "noonewouldeverusethis7",
|
||||
"checkType": "message",
|
||||
"alexaRank": 31250,
|
||||
"tags": [
|
||||
"business"
|
||||
]
|
||||
},
|
||||
"videohive.net": {
|
||||
"absenceStrs": [
|
||||
"Page Not Found | VideoHive"
|
||||
],
|
||||
"presenseStrs": [
|
||||
"user-info",
|
||||
"user-info__badges"
|
||||
],
|
||||
"url": "https://videohive.net/user/{username}",
|
||||
"urlMain": "https://videohive.net",
|
||||
"usernameClaimed": "theabbie",
|
||||
"usernameUnclaimed": "noonewouldeverusethis7",
|
||||
"checkType": "message",
|
||||
"alexaRank": 4270,
|
||||
"tags": [
|
||||
"video"
|
||||
]
|
||||
}
|
||||
},
|
||||
"engines": {
|
||||
|
||||
+26
-7
@@ -32,6 +32,8 @@ HEADERS = {
|
||||
"User-Agent": get_random_user_agent(),
|
||||
}
|
||||
|
||||
SEPARATORS = "\"'"
|
||||
|
||||
RATIO = 0.6
|
||||
TOP_FEATURES = 5
|
||||
URL_RE = re.compile(r"https?://(www\.)?")
|
||||
@@ -195,7 +197,7 @@ async def detect_known_engine(
|
||||
|
||||
def extract_username_dialog(url):
|
||||
url_parts = url.rstrip("/").split("/")
|
||||
supposed_username = url_parts[-1]
|
||||
supposed_username = url_parts[-1].strip('@')
|
||||
entered_username = input(
|
||||
f'Is "{supposed_username}" a valid username? If not, write it manually: '
|
||||
)
|
||||
@@ -203,38 +205,51 @@ def extract_username_dialog(url):
|
||||
|
||||
|
||||
async def check_features_manually(
|
||||
db, url_exists, url_mainpage, cookie_file, logger, redirects=True
|
||||
db, url_exists, url_mainpage, cookie_file, logger, redirects=False
|
||||
):
|
||||
custom_headers = {}
|
||||
while True:
|
||||
header_key = input('Specify custom header if you need or just press Enter to skip. Header name: ')
|
||||
if not header_key:
|
||||
break
|
||||
header_value = input('Header value: ')
|
||||
custom_headers[header_key.strip()] = header_value.strip()
|
||||
|
||||
supposed_username = extract_username_dialog(url_exists)
|
||||
non_exist_username = "noonewouldeverusethis7"
|
||||
|
||||
url_user = url_exists.replace(supposed_username, "{username}")
|
||||
url_not_exists = url_exists.replace(supposed_username, non_exist_username)
|
||||
|
||||
headers = dict(HEADERS)
|
||||
headers.update(custom_headers)
|
||||
|
||||
# cookies
|
||||
cookie_dict = None
|
||||
if cookie_file:
|
||||
logger.info(f'Use {cookie_file} for cookies')
|
||||
cookie_jar = await import_aiohttp_cookies(cookie_file)
|
||||
cookie_jar = import_aiohttp_cookies(cookie_file)
|
||||
cookie_dict = {c.key: c.value for c in cookie_jar}
|
||||
|
||||
exists_resp = requests.get(
|
||||
url_exists, cookies=cookie_dict, headers=HEADERS, allow_redirects=redirects
|
||||
url_exists, cookies=cookie_dict, headers=headers, allow_redirects=redirects
|
||||
)
|
||||
logger.debug(url_exists)
|
||||
logger.debug(exists_resp.status_code)
|
||||
logger.debug(exists_resp.text)
|
||||
|
||||
non_exists_resp = requests.get(
|
||||
url_not_exists, cookies=cookie_dict, headers=HEADERS, allow_redirects=redirects
|
||||
url_not_exists, cookies=cookie_dict, headers=headers, allow_redirects=redirects
|
||||
)
|
||||
logger.debug(url_not_exists)
|
||||
logger.debug(non_exists_resp.status_code)
|
||||
logger.debug(non_exists_resp.text)
|
||||
|
||||
a = exists_resp.text
|
||||
b = non_exists_resp.text
|
||||
|
||||
tokens_a = set(a.split('"'))
|
||||
tokens_b = set(b.split('"'))
|
||||
tokens_a = set(re.split(f'[{SEPARATORS}]', a))
|
||||
tokens_b = set(re.split(f'[{SEPARATORS}]', b))
|
||||
|
||||
a_minus_b = tokens_a.difference(tokens_b)
|
||||
b_minus_a = tokens_b.difference(tokens_a)
|
||||
@@ -276,6 +291,9 @@ async def check_features_manually(
|
||||
"checkType": "message",
|
||||
}
|
||||
|
||||
if headers != HEADERS:
|
||||
site_data['headers'] = headers
|
||||
|
||||
site = MaigretSite(url_mainpage.split("/")[-1], site_data)
|
||||
return site
|
||||
|
||||
@@ -283,6 +301,7 @@ async def check_features_manually(
|
||||
async def submit_dialog(db, url_exists, cookie_file, logger):
|
||||
domain_raw = URL_RE.sub("", url_exists).strip().strip("/")
|
||||
domain_raw = domain_raw.split("/")[0]
|
||||
logger.info('Domain is %s', domain_raw)
|
||||
|
||||
# check for existence
|
||||
matched_sites = list(filter(lambda x: domain_raw in x.url_main + x.url, db.sites))
|
||||
|
||||
Reference in New Issue
Block a user