mirror of
https://github.com/soxoj/maigret.git
synced 2026-05-07 06:24:35 +00:00
Merge pull request #115 from soxoj/submit-source-improving
Added some new sites, implemented filtering by source site with `--na…
This commit is contained in:
+2
-1
@@ -19,6 +19,7 @@ from .executors import AsyncioSimpleExecutor, AsyncioProgressbarQueueExecutor
|
||||
from .result import QueryResult, QueryStatus
|
||||
from .sites import MaigretDatabase, MaigretSite
|
||||
from .types import CheckError
|
||||
from .utils import get_random_user_agent
|
||||
|
||||
|
||||
supported_recursive_search_ids = (
|
||||
@@ -383,7 +384,7 @@ async def maigret(username, site_dict, logger, query_notify=None,
|
||||
results_site['cookies'] = cookie_jar and cookie_jar.filter_cookies(site.url_main) or None
|
||||
|
||||
headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 11.1; rv:55.0) Gecko/20100101 Firefox/55.0',
|
||||
'User-Agent': get_random_user_agent(),
|
||||
}
|
||||
|
||||
headers.update(site.headers)
|
||||
|
||||
+1
-1
@@ -275,7 +275,7 @@ async def main():
|
||||
site_data = get_top_sites_for_id(args.id_type)
|
||||
|
||||
if args.new_site_to_submit:
|
||||
is_submitted = await submit_dialog(db, args.new_site_to_submit, args.cookie_file)
|
||||
is_submitted = await submit_dialog(db, args.new_site_to_submit, args.cookie_file, logger)
|
||||
if is_submitted:
|
||||
db.save_to_file(args.db_file)
|
||||
|
||||
|
||||
+111
-39
@@ -5436,13 +5436,12 @@
|
||||
},
|
||||
"Gitmemory": {
|
||||
"tags": [
|
||||
"coding",
|
||||
"github",
|
||||
"in"
|
||||
"coding"
|
||||
],
|
||||
"checkType": "message",
|
||||
"absenceStrs": "Oops,404",
|
||||
"alexaRank": 6827,
|
||||
"source": "GitHub",
|
||||
"url": "https://www.gitmemory.com/{username}",
|
||||
"urlMain": "https://www.gitmemory.com",
|
||||
"usernameClaimed": "adam",
|
||||
@@ -5746,12 +5745,11 @@
|
||||
},
|
||||
"Gramho": {
|
||||
"tags": [
|
||||
"instagram",
|
||||
"jp",
|
||||
"photo"
|
||||
],
|
||||
"checkType": "status_code",
|
||||
"alexaRank": 4445,
|
||||
"source": "Instagram",
|
||||
"url": "https://gramho.com/explore-hashtag/{username}",
|
||||
"urlMain": "https://gramho.com/",
|
||||
"usernameClaimed": "adam",
|
||||
@@ -7228,13 +7226,12 @@
|
||||
},
|
||||
"Libraries": {
|
||||
"tags": [
|
||||
"coding",
|
||||
"github",
|
||||
"in"
|
||||
"coding"
|
||||
],
|
||||
"regexCheck": "^[^\\.]+$",
|
||||
"checkType": "status_code",
|
||||
"alexaRank": 65552,
|
||||
"source": "GitHub",
|
||||
"url": "https://libraries.io/github/{username}/",
|
||||
"urlMain": "https://libraries.io",
|
||||
"usernameClaimed": "snooppr",
|
||||
@@ -9825,9 +9822,7 @@
|
||||
},
|
||||
"Picuki": {
|
||||
"tags": [
|
||||
"instagram",
|
||||
"photo",
|
||||
"us"
|
||||
"photo"
|
||||
],
|
||||
"checkType": "message",
|
||||
"absenceStrs": [
|
||||
@@ -11722,8 +11717,9 @@
|
||||
},
|
||||
"Shutterstock": {
|
||||
"tags": [
|
||||
"fi",
|
||||
"us"
|
||||
"photo",
|
||||
"music",
|
||||
"stock"
|
||||
],
|
||||
"checkType": "message",
|
||||
"absenceStrs": "T\u00e4m\u00e4p\u00e4 yll\u00e4tt\u00e4v\u00e4\u00e4...",
|
||||
@@ -12244,9 +12240,7 @@
|
||||
},
|
||||
"Steam": {
|
||||
"tags": [
|
||||
"gaming",
|
||||
"steam",
|
||||
"us"
|
||||
"gaming"
|
||||
],
|
||||
"checkType": "message",
|
||||
"absenceStrs": "The specified profile could not be found",
|
||||
@@ -12256,14 +12250,28 @@
|
||||
"usernameClaimed": "blue",
|
||||
"usernameUnclaimed": "noonewouldeverusethis7"
|
||||
},
|
||||
"SteamGroup": {
|
||||
"Steam (by id)": {
|
||||
"tags": [
|
||||
"steam",
|
||||
"us"
|
||||
"gaming"
|
||||
],
|
||||
"type": "steam_id",
|
||||
"checkType": "message",
|
||||
"absenceStrs": "The specified profile could not be found",
|
||||
"alexaRank": 370,
|
||||
"source": "Steam",
|
||||
"url": "https://steamcommunity.com/profiles/{username}",
|
||||
"urlMain": "https://steamcommunity.com/",
|
||||
"usernameClaimed": "76561197960287930",
|
||||
"usernameUnclaimed": "noonewouldeverusethis7"
|
||||
},
|
||||
"Steam (Group)": {
|
||||
"tags": [
|
||||
"gaming"
|
||||
],
|
||||
"checkType": "message",
|
||||
"absenceStrs": "No group could be retrieved for the given URL",
|
||||
"alexaRank": 370,
|
||||
"source": "Steam",
|
||||
"url": "https://steamcommunity.com/groups/{username}",
|
||||
"urlMain": "https://steamcommunity.com/",
|
||||
"usernameClaimed": "blue",
|
||||
@@ -12271,14 +12279,12 @@
|
||||
},
|
||||
"Steamid": {
|
||||
"tags": [
|
||||
"eg",
|
||||
"gaming",
|
||||
"steam",
|
||||
"us"
|
||||
"gaming"
|
||||
],
|
||||
"checkType": "message",
|
||||
"absenceStrs": "<div class=\"alert alert-warning\">Profile not found</div>",
|
||||
"alexaRank": 302717,
|
||||
"source": "Steam",
|
||||
"url": "https://steamid.uk/profile/{username}",
|
||||
"urlMain": "https://steamid.uk/",
|
||||
"usernameClaimed": "blue",
|
||||
@@ -12286,15 +12292,13 @@
|
||||
},
|
||||
"Steamid (by id)": {
|
||||
"tags": [
|
||||
"eg",
|
||||
"gaming",
|
||||
"steam",
|
||||
"us"
|
||||
"gaming"
|
||||
],
|
||||
"type": "steam_id",
|
||||
"checkType": "message",
|
||||
"absenceStrs": "<div class=\"alert alert-warning\">Profile not found</div>",
|
||||
"alexaRank": 302717,
|
||||
"source": "Steam",
|
||||
"url": "https://steamid.uk/profile/{username}",
|
||||
"urlMain": "https://steamid.uk/",
|
||||
"usernameClaimed": "76561197982198022",
|
||||
@@ -12302,9 +12306,7 @@
|
||||
},
|
||||
"Steamidfinder": {
|
||||
"tags": [
|
||||
"gaming",
|
||||
"steam",
|
||||
"us"
|
||||
"gaming"
|
||||
],
|
||||
"checkType": "message",
|
||||
"presenseStrs": [
|
||||
@@ -12314,6 +12316,7 @@
|
||||
"could not be found."
|
||||
],
|
||||
"alexaRank": 72851,
|
||||
"source": "Steam",
|
||||
"url": "https://steamidfinder.com/lookup/{username}",
|
||||
"urlMain": "https://steamidfinder.com",
|
||||
"usernameClaimed": "channel",
|
||||
@@ -12321,9 +12324,7 @@
|
||||
},
|
||||
"Steamidfinder (by id)": {
|
||||
"tags": [
|
||||
"gaming",
|
||||
"steam",
|
||||
"us"
|
||||
"gaming"
|
||||
],
|
||||
"type": "steam_id",
|
||||
"checkType": "message",
|
||||
@@ -12334,6 +12335,7 @@
|
||||
"could not be found."
|
||||
],
|
||||
"alexaRank": 72851,
|
||||
"source": "Steam",
|
||||
"url": "https://steamidfinder.com/lookup/{username}",
|
||||
"urlMain": "https://steamidfinder.com",
|
||||
"usernameClaimed": "76561197982198022",
|
||||
@@ -14688,6 +14690,7 @@
|
||||
"\u041f\u043e\u043b\u044c\u0437\u043e\u0432\u0430\u0442\u0435\u043b\u044c \u0441\u043a\u0440\u044b\u043b \u0441\u0432\u043e\u044e \u043f\u0443\u0431\u043b\u0438\u0447\u043d\u0443\u044e \u0441\u0442\u0440\u0430\u043d\u0438\u0446\u0443"
|
||||
],
|
||||
"alexaRank": 48,
|
||||
"source": "Yandex",
|
||||
"url": "https://reviews.yandex.ru/user/{username}",
|
||||
"urlMain": "https://yandex.ru/",
|
||||
"usernameClaimed": "20vpvmmwpnwyb0dpbnjvy3k14c",
|
||||
@@ -14700,6 +14703,7 @@
|
||||
],
|
||||
"checkType": "status_code",
|
||||
"alexaRank": 48,
|
||||
"source": "Yandex",
|
||||
"url": "https://yandex.ru/bugbounty/researchers/{username}/",
|
||||
"urlMain": "https://yandex.ru/bugbounty/",
|
||||
"usernameClaimed": "pyrk1",
|
||||
@@ -14722,18 +14726,21 @@
|
||||
],
|
||||
"absenceStrs": "cl-not-found-content__title",
|
||||
"alexaRank": 48,
|
||||
"source": "Yandex",
|
||||
"url": "https://yandex.ru/collections/user/{username}",
|
||||
"urlMain": "https://yandex.ru/collections/",
|
||||
"usernameClaimed": "yandex",
|
||||
"usernameUnclaimed": "noonewouldeverusethis7"
|
||||
},
|
||||
"YandexLocal": {
|
||||
"disabled": true,
|
||||
"tags": [
|
||||
"ru"
|
||||
],
|
||||
"type": "yandex_public_id",
|
||||
"checkType": "status_code",
|
||||
"alexaRank": 48,
|
||||
"source": "Yandex",
|
||||
"url": "https://local.yandex.ru/users/{username}",
|
||||
"urlMain": "https://local.yandex.ru/",
|
||||
"usernameClaimed": "gp7v6ufryzw3m1nvdj4ycexa8g",
|
||||
@@ -14747,6 +14754,7 @@
|
||||
"checkType": "message",
|
||||
"absenceStrs": "//yastatic.net/market-export/_/i/zero-state/404.svg",
|
||||
"alexaRank": 48,
|
||||
"source": "Yandex",
|
||||
"url": "https://market.yandex.ru/user/{username}",
|
||||
"urlMain": "https://market.yandex.ru/",
|
||||
"usernameClaimed": "6j2uh4rhp5d9gqgbynaqy2p75m",
|
||||
@@ -14763,6 +14771,7 @@
|
||||
"urlProbe": "https://music.yandex.ru/handlers/library.jsx?owner={username}",
|
||||
"checkType": "status_code",
|
||||
"alexaRank": 48,
|
||||
"source": "Yandex",
|
||||
"url": "https://music.yandex.ru/users/{username}/playlists",
|
||||
"urlMain": "https://music.yandex.ru/",
|
||||
"usernameClaimed": "YandexMusic",
|
||||
@@ -14785,6 +14794,7 @@
|
||||
"type": "yandex_public_id",
|
||||
"checkType": "status_code",
|
||||
"alexaRank": 48,
|
||||
"source": "Yandex",
|
||||
"url": "https://yandex.ru/q/profile/{username}",
|
||||
"urlMain": "https://yandex.ru/q/",
|
||||
"usernameClaimed": "blue",
|
||||
@@ -14796,6 +14806,7 @@
|
||||
],
|
||||
"checkType": "status_code",
|
||||
"alexaRank": 48,
|
||||
"source": "Yandex",
|
||||
"url": "https://zen.yandex.ru/{username}",
|
||||
"urlMain": "https://zen.yandex.ru",
|
||||
"usernameClaimed": "tema",
|
||||
@@ -14808,6 +14819,7 @@
|
||||
"type": "yandex_public_id",
|
||||
"checkType": "status_code",
|
||||
"alexaRank": 48,
|
||||
"source": "Yandex",
|
||||
"url": "https://zen.yandex.ru/user/{username}",
|
||||
"urlMain": "https://zen.yandex.ru",
|
||||
"usernameClaimed": "20vpvmmwpnwyb0dpbnjvy3k14c",
|
||||
@@ -18124,8 +18136,7 @@
|
||||
"tracr.co": {
|
||||
"disabled": true,
|
||||
"tags": [
|
||||
"gaming",
|
||||
"discord"
|
||||
"gaming"
|
||||
],
|
||||
"errors": {
|
||||
"502 - Bad Gateway": "Site error",
|
||||
@@ -18134,6 +18145,7 @@
|
||||
"regexCheck": "^[A-Za-z0-9]{2,32}$",
|
||||
"checkType": "message",
|
||||
"absenceStrs": "No search results",
|
||||
"source": "Discord",
|
||||
"url": "https://tracr.co/users/1/{username}",
|
||||
"urlMain": "https://tracr.co/",
|
||||
"usernameClaimed": "blue",
|
||||
@@ -18171,8 +18183,7 @@
|
||||
},
|
||||
"uID.me (by username)": {
|
||||
"tags": [
|
||||
"ru",
|
||||
"ucoz"
|
||||
"ru"
|
||||
],
|
||||
"checkType": "status_code",
|
||||
"alexaRank": 24715,
|
||||
@@ -18183,8 +18194,7 @@
|
||||
},
|
||||
"uID.me (by uguid)": {
|
||||
"tags": [
|
||||
"ru",
|
||||
"ucoz"
|
||||
"ru"
|
||||
],
|
||||
"type": "uidme_uguid",
|
||||
"checkType": "status_code",
|
||||
@@ -22825,6 +22835,7 @@
|
||||
],
|
||||
"engine": "engineRedirect",
|
||||
"alexaRank": 72,
|
||||
"source": "GitHub",
|
||||
"url": "https://gist.github.com/{username}",
|
||||
"urlMain": "https://gist.github.com",
|
||||
"usernameUnclaimed": "noonewouldeverusethis7",
|
||||
@@ -23664,6 +23675,9 @@
|
||||
"usernameUnclaimed": "noonewouldeverusethis7"
|
||||
},
|
||||
"pikabu.monster": {
|
||||
"tags": [
|
||||
"ru"
|
||||
],
|
||||
"checkType": "message",
|
||||
"presenseStrs": [
|
||||
"usertotalcomments",
|
||||
@@ -23677,6 +23691,64 @@
|
||||
"urlMain": "https://pikabu.monster",
|
||||
"usernameClaimed": "Avezenit",
|
||||
"usernameUnclaimed": "noonewouldeverusethis7"
|
||||
},
|
||||
"steamdb.info": {
|
||||
"tags": [
|
||||
"gaming"
|
||||
],
|
||||
"type": "steam_id",
|
||||
"checkType": "message",
|
||||
"presenseStrs": [
|
||||
"profileForm",
|
||||
" player-name",
|
||||
" progress",
|
||||
" data-not-game="
|
||||
],
|
||||
"absenceStrs": [
|
||||
"error-page",
|
||||
" Error 404"
|
||||
],
|
||||
"source": "Steam",
|
||||
"url": "https://steamdb.info/calculator/{username}",
|
||||
"urlMain": "https://steamdb.info",
|
||||
"usernameClaimed": "76561197978866368",
|
||||
"usernameUnclaimed": "noonewouldeverusethis7"
|
||||
},
|
||||
"Niftygateway": {
|
||||
"urlProbe": "https://api.niftygateway.com/user/profile-and-offchain-nifties-by-url/?profile_url={username}",
|
||||
"checkType": "message",
|
||||
"presenseStrs": [
|
||||
"profile_url",
|
||||
"name",
|
||||
"profile_pic_url",
|
||||
"verified",
|
||||
"bio"
|
||||
],
|
||||
"absenceStrs": [
|
||||
"not_found",
|
||||
" User profile not located in our system."
|
||||
],
|
||||
"url": "https://niftygateway.com/profile/{username}",
|
||||
"urlMain": "https://api.niftygateway.com",
|
||||
"usernameClaimed": "admin",
|
||||
"usernameUnclaimed": "noonewouldeverusethis7"
|
||||
},
|
||||
"opensea.io": {
|
||||
"checkType": "message",
|
||||
"presenseStrs": [
|
||||
"username\\",
|
||||
"lastSale",
|
||||
"publicUsername",
|
||||
"name",
|
||||
"user"
|
||||
],
|
||||
"absenceStrs": [
|
||||
"><div width="
|
||||
],
|
||||
"url": "https://opensea.io/accounts/{username}",
|
||||
"urlMain": "https://opensea.io",
|
||||
"usernameClaimed": "admin",
|
||||
"usernameUnclaimed": "noonewouldeverusethis7"
|
||||
}
|
||||
},
|
||||
"engines": {
|
||||
|
||||
+3
-2
@@ -15,7 +15,7 @@ SUPPORTED_TAGS = [
|
||||
'discussion', 'sharing', 'writing', 'wiki', 'business', 'shopping', 'sport',
|
||||
'books', 'news', 'documents', 'travel', 'maps', 'hobby', 'apps', 'classified',
|
||||
'career', 'geosocial', 'streaming', 'education', 'networking', 'torrent',
|
||||
'science', 'medicine',
|
||||
'science', 'medicine', 'reading', 'stock',
|
||||
]
|
||||
|
||||
|
||||
@@ -199,13 +199,14 @@ class MaigretDatabase:
|
||||
normalized_tags = list(map(str.lower, tags))
|
||||
|
||||
is_name_ok = lambda x: x.name.lower() in normalized_names
|
||||
is_source_ok = lambda x: x.source and x.source.lower() in normalized_names
|
||||
is_engine_ok = lambda x: isinstance(x.engine, str) and x.engine.lower() in normalized_tags
|
||||
is_tags_ok = lambda x: set(x.tags).intersection(set(normalized_tags))
|
||||
is_disabled_needed = lambda x: not x.disabled or ('disabled' in tags or disabled)
|
||||
is_id_type_ok = lambda x: x.type == id_type
|
||||
|
||||
filter_tags_engines_fun = lambda x: not tags or is_engine_ok(x) or is_tags_ok(x)
|
||||
filter_names_fun = lambda x: not names or is_name_ok(x)
|
||||
filter_names_fun = lambda x: not names or is_name_ok(x) or is_source_ok(x)
|
||||
|
||||
filter_fun = lambda x: filter_tags_engines_fun(x) and filter_names_fun(x) \
|
||||
and is_disabled_needed(x) and is_id_type_ok(x)
|
||||
|
||||
+15
-15
@@ -3,6 +3,7 @@ import difflib
|
||||
import requests
|
||||
|
||||
from .checking import *
|
||||
from .utils import get_random_user_agent
|
||||
|
||||
|
||||
DESIRED_STRINGS = ["username", "not found", "пользователь", "profile", "lastname", "firstname", "biography",
|
||||
@@ -11,7 +12,7 @@ DESIRED_STRINGS = ["username", "not found", "пользователь", "profile
|
||||
SUPPOSED_USERNAMES = ['alex', 'god', 'admin', 'red', 'blue', 'john']
|
||||
|
||||
HEADERS = {
|
||||
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 11.1; rv:55.0) Gecko/20100101 Firefox/55.0',
|
||||
'User-Agent': get_random_user_agent(),
|
||||
}
|
||||
|
||||
RATIO = 0.6
|
||||
@@ -125,7 +126,7 @@ async def detect_known_engine(db, url_exists, url_mainpage):
|
||||
return None
|
||||
|
||||
|
||||
async def check_features_manually(db, url_exists, url_mainpage, cookie_file, redirects=False):
|
||||
async def check_features_manually(db, url_exists, url_mainpage, cookie_file, logger, redirects=True):
|
||||
url_parts = url_exists.split('/')
|
||||
supposed_username = url_parts[-1]
|
||||
new_name = input(f'Is "{supposed_username}" a valid username? If not, write it manually: ')
|
||||
@@ -143,7 +144,13 @@ async def check_features_manually(db, url_exists, url_mainpage, cookie_file, red
|
||||
cookie_dict = {c.key: c.value for c in cookie_jar}
|
||||
|
||||
exists_resp = requests.get(url_exists, cookies=cookie_dict, headers=HEADERS, allow_redirects=redirects)
|
||||
logger.debug(exists_resp.status_code)
|
||||
logger.debug(exists_resp.text)
|
||||
|
||||
non_exists_resp = requests.get(url_not_exists, cookies=cookie_dict, headers=HEADERS, allow_redirects=redirects)
|
||||
logger.debug(non_exists_resp.status_code)
|
||||
logger.debug(non_exists_resp.text)
|
||||
|
||||
|
||||
a = exists_resp.text
|
||||
b = non_exists_resp.text
|
||||
@@ -187,7 +194,8 @@ async def check_features_manually(db, url_exists, url_mainpage, cookie_file, red
|
||||
site = MaigretSite(url_mainpage.split('/')[-1], site_data)
|
||||
return site
|
||||
|
||||
async def submit_dialog(db, url_exists, cookie_file):
|
||||
|
||||
async def submit_dialog(db, url_exists, cookie_file, logger):
|
||||
domain_raw = URL_RE.sub('', url_exists).strip().strip('/')
|
||||
domain_raw = domain_raw.split('/')[0]
|
||||
|
||||
@@ -208,19 +216,11 @@ async def submit_dialog(db, url_exists, cookie_file):
|
||||
sites = await detect_known_engine(db, url_exists, url_mainpage)
|
||||
if not sites:
|
||||
print('Unable to detect site engine, lets generate checking features')
|
||||
sites = [await check_features_manually(db, url_exists, url_mainpage, cookie_file)]
|
||||
sites = [await check_features_manually(db, url_exists, url_mainpage, cookie_file, logger)]
|
||||
|
||||
print(sites[0].__dict__)
|
||||
logger.debug(sites[0].__dict__)
|
||||
|
||||
sem = asyncio.Semaphore(1)
|
||||
log_level = logging.INFO
|
||||
logging.basicConfig(
|
||||
format='[%(filename)s:%(lineno)d] %(levelname)-3s %(asctime)s %(message)s',
|
||||
datefmt='%H:%M:%S',
|
||||
level=log_level
|
||||
)
|
||||
logger = logging.getLogger('site-submit')
|
||||
logger.setLevel(log_level)
|
||||
|
||||
found = False
|
||||
chosen_site = None
|
||||
@@ -236,9 +236,9 @@ async def submit_dialog(db, url_exists, cookie_file):
|
||||
print('Try to run this mode again and increase features count or choose others.')
|
||||
else:
|
||||
if input(f'Site {chosen_site.name} successfully checked. Do you want to save it in the Maigret DB? [Yn] ').lower() in 'y':
|
||||
print(chosen_site.json)
|
||||
logger.debug(chosen_site.json)
|
||||
site_data = chosen_site.strip_engine_data()
|
||||
print(site_data.json)
|
||||
logger.debug(site_data.json)
|
||||
db.update_site(site_data)
|
||||
return True
|
||||
|
||||
|
||||
@@ -1,4 +1,10 @@
|
||||
import re
|
||||
import random
|
||||
|
||||
|
||||
DEFAULT_USER_AGENTS = [
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.114 Safari/537.36',
|
||||
]
|
||||
|
||||
|
||||
class CaseConverter:
|
||||
@@ -76,3 +82,7 @@ def get_dict_ascii_tree(items, prepend='', new_line=True):
|
||||
text = text[1:]
|
||||
|
||||
return text
|
||||
|
||||
|
||||
def get_random_user_agent():
|
||||
return random.choice(DEFAULT_USER_AGENTS)
|
||||
|
||||
Reference in New Issue
Block a user