Merge pull request #115 from soxoj/submit-source-improving

Added some new sites, implemented filtering by source site with `--na…
This commit is contained in:
soxoj
2021-04-29 17:18:31 +03:00
committed by GitHub
6 changed files with 142 additions and 58 deletions
+2 -1
View File
@@ -19,6 +19,7 @@ from .executors import AsyncioSimpleExecutor, AsyncioProgressbarQueueExecutor
from .result import QueryResult, QueryStatus from .result import QueryResult, QueryStatus
from .sites import MaigretDatabase, MaigretSite from .sites import MaigretDatabase, MaigretSite
from .types import CheckError from .types import CheckError
from .utils import get_random_user_agent
supported_recursive_search_ids = ( supported_recursive_search_ids = (
@@ -383,7 +384,7 @@ async def maigret(username, site_dict, logger, query_notify=None,
results_site['cookies'] = cookie_jar and cookie_jar.filter_cookies(site.url_main) or None results_site['cookies'] = cookie_jar and cookie_jar.filter_cookies(site.url_main) or None
headers = { headers = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 11.1; rv:55.0) Gecko/20100101 Firefox/55.0', 'User-Agent': get_random_user_agent(),
} }
headers.update(site.headers) headers.update(site.headers)
+1 -1
View File
@@ -275,7 +275,7 @@ async def main():
site_data = get_top_sites_for_id(args.id_type) site_data = get_top_sites_for_id(args.id_type)
if args.new_site_to_submit: if args.new_site_to_submit:
is_submitted = await submit_dialog(db, args.new_site_to_submit, args.cookie_file) is_submitted = await submit_dialog(db, args.new_site_to_submit, args.cookie_file, logger)
if is_submitted: if is_submitted:
db.save_to_file(args.db_file) db.save_to_file(args.db_file)
+111 -39
View File
@@ -5436,13 +5436,12 @@
}, },
"Gitmemory": { "Gitmemory": {
"tags": [ "tags": [
"coding", "coding"
"github",
"in"
], ],
"checkType": "message", "checkType": "message",
"absenceStrs": "Oops,404", "absenceStrs": "Oops,404",
"alexaRank": 6827, "alexaRank": 6827,
"source": "GitHub",
"url": "https://www.gitmemory.com/{username}", "url": "https://www.gitmemory.com/{username}",
"urlMain": "https://www.gitmemory.com", "urlMain": "https://www.gitmemory.com",
"usernameClaimed": "adam", "usernameClaimed": "adam",
@@ -5746,12 +5745,11 @@
}, },
"Gramho": { "Gramho": {
"tags": [ "tags": [
"instagram",
"jp",
"photo" "photo"
], ],
"checkType": "status_code", "checkType": "status_code",
"alexaRank": 4445, "alexaRank": 4445,
"source": "Instagram",
"url": "https://gramho.com/explore-hashtag/{username}", "url": "https://gramho.com/explore-hashtag/{username}",
"urlMain": "https://gramho.com/", "urlMain": "https://gramho.com/",
"usernameClaimed": "adam", "usernameClaimed": "adam",
@@ -7228,13 +7226,12 @@
}, },
"Libraries": { "Libraries": {
"tags": [ "tags": [
"coding", "coding"
"github",
"in"
], ],
"regexCheck": "^[^\\.]+$", "regexCheck": "^[^\\.]+$",
"checkType": "status_code", "checkType": "status_code",
"alexaRank": 65552, "alexaRank": 65552,
"source": "GitHub",
"url": "https://libraries.io/github/{username}/", "url": "https://libraries.io/github/{username}/",
"urlMain": "https://libraries.io", "urlMain": "https://libraries.io",
"usernameClaimed": "snooppr", "usernameClaimed": "snooppr",
@@ -9825,9 +9822,7 @@
}, },
"Picuki": { "Picuki": {
"tags": [ "tags": [
"instagram", "photo"
"photo",
"us"
], ],
"checkType": "message", "checkType": "message",
"absenceStrs": [ "absenceStrs": [
@@ -11722,8 +11717,9 @@
}, },
"Shutterstock": { "Shutterstock": {
"tags": [ "tags": [
"fi", "photo",
"us" "music",
"stock"
], ],
"checkType": "message", "checkType": "message",
"absenceStrs": "T\u00e4m\u00e4p\u00e4 yll\u00e4tt\u00e4v\u00e4\u00e4...", "absenceStrs": "T\u00e4m\u00e4p\u00e4 yll\u00e4tt\u00e4v\u00e4\u00e4...",
@@ -12244,9 +12240,7 @@
}, },
"Steam": { "Steam": {
"tags": [ "tags": [
"gaming", "gaming"
"steam",
"us"
], ],
"checkType": "message", "checkType": "message",
"absenceStrs": "The specified profile could not be found", "absenceStrs": "The specified profile could not be found",
@@ -12256,14 +12250,28 @@
"usernameClaimed": "blue", "usernameClaimed": "blue",
"usernameUnclaimed": "noonewouldeverusethis7" "usernameUnclaimed": "noonewouldeverusethis7"
}, },
"SteamGroup": { "Steam (by id)": {
"tags": [ "tags": [
"steam", "gaming"
"us" ],
"type": "steam_id",
"checkType": "message",
"absenceStrs": "The specified profile could not be found",
"alexaRank": 370,
"source": "Steam",
"url": "https://steamcommunity.com/profiles/{username}",
"urlMain": "https://steamcommunity.com/",
"usernameClaimed": "76561197960287930",
"usernameUnclaimed": "noonewouldeverusethis7"
},
"Steam (Group)": {
"tags": [
"gaming"
], ],
"checkType": "message", "checkType": "message",
"absenceStrs": "No group could be retrieved for the given URL", "absenceStrs": "No group could be retrieved for the given URL",
"alexaRank": 370, "alexaRank": 370,
"source": "Steam",
"url": "https://steamcommunity.com/groups/{username}", "url": "https://steamcommunity.com/groups/{username}",
"urlMain": "https://steamcommunity.com/", "urlMain": "https://steamcommunity.com/",
"usernameClaimed": "blue", "usernameClaimed": "blue",
@@ -12271,14 +12279,12 @@
}, },
"Steamid": { "Steamid": {
"tags": [ "tags": [
"eg", "gaming"
"gaming",
"steam",
"us"
], ],
"checkType": "message", "checkType": "message",
"absenceStrs": "<div class=\"alert alert-warning\">Profile not found</div>", "absenceStrs": "<div class=\"alert alert-warning\">Profile not found</div>",
"alexaRank": 302717, "alexaRank": 302717,
"source": "Steam",
"url": "https://steamid.uk/profile/{username}", "url": "https://steamid.uk/profile/{username}",
"urlMain": "https://steamid.uk/", "urlMain": "https://steamid.uk/",
"usernameClaimed": "blue", "usernameClaimed": "blue",
@@ -12286,15 +12292,13 @@
}, },
"Steamid (by id)": { "Steamid (by id)": {
"tags": [ "tags": [
"eg", "gaming"
"gaming",
"steam",
"us"
], ],
"type": "steam_id", "type": "steam_id",
"checkType": "message", "checkType": "message",
"absenceStrs": "<div class=\"alert alert-warning\">Profile not found</div>", "absenceStrs": "<div class=\"alert alert-warning\">Profile not found</div>",
"alexaRank": 302717, "alexaRank": 302717,
"source": "Steam",
"url": "https://steamid.uk/profile/{username}", "url": "https://steamid.uk/profile/{username}",
"urlMain": "https://steamid.uk/", "urlMain": "https://steamid.uk/",
"usernameClaimed": "76561197982198022", "usernameClaimed": "76561197982198022",
@@ -12302,9 +12306,7 @@
}, },
"Steamidfinder": { "Steamidfinder": {
"tags": [ "tags": [
"gaming", "gaming"
"steam",
"us"
], ],
"checkType": "message", "checkType": "message",
"presenseStrs": [ "presenseStrs": [
@@ -12314,6 +12316,7 @@
"could not be found." "could not be found."
], ],
"alexaRank": 72851, "alexaRank": 72851,
"source": "Steam",
"url": "https://steamidfinder.com/lookup/{username}", "url": "https://steamidfinder.com/lookup/{username}",
"urlMain": "https://steamidfinder.com", "urlMain": "https://steamidfinder.com",
"usernameClaimed": "channel", "usernameClaimed": "channel",
@@ -12321,9 +12324,7 @@
}, },
"Steamidfinder (by id)": { "Steamidfinder (by id)": {
"tags": [ "tags": [
"gaming", "gaming"
"steam",
"us"
], ],
"type": "steam_id", "type": "steam_id",
"checkType": "message", "checkType": "message",
@@ -12334,6 +12335,7 @@
"could not be found." "could not be found."
], ],
"alexaRank": 72851, "alexaRank": 72851,
"source": "Steam",
"url": "https://steamidfinder.com/lookup/{username}", "url": "https://steamidfinder.com/lookup/{username}",
"urlMain": "https://steamidfinder.com", "urlMain": "https://steamidfinder.com",
"usernameClaimed": "76561197982198022", "usernameClaimed": "76561197982198022",
@@ -14688,6 +14690,7 @@
"\u041f\u043e\u043b\u044c\u0437\u043e\u0432\u0430\u0442\u0435\u043b\u044c \u0441\u043a\u0440\u044b\u043b \u0441\u0432\u043e\u044e \u043f\u0443\u0431\u043b\u0438\u0447\u043d\u0443\u044e \u0441\u0442\u0440\u0430\u043d\u0438\u0446\u0443" "\u041f\u043e\u043b\u044c\u0437\u043e\u0432\u0430\u0442\u0435\u043b\u044c \u0441\u043a\u0440\u044b\u043b \u0441\u0432\u043e\u044e \u043f\u0443\u0431\u043b\u0438\u0447\u043d\u0443\u044e \u0441\u0442\u0440\u0430\u043d\u0438\u0446\u0443"
], ],
"alexaRank": 48, "alexaRank": 48,
"source": "Yandex",
"url": "https://reviews.yandex.ru/user/{username}", "url": "https://reviews.yandex.ru/user/{username}",
"urlMain": "https://yandex.ru/", "urlMain": "https://yandex.ru/",
"usernameClaimed": "20vpvmmwpnwyb0dpbnjvy3k14c", "usernameClaimed": "20vpvmmwpnwyb0dpbnjvy3k14c",
@@ -14700,6 +14703,7 @@
], ],
"checkType": "status_code", "checkType": "status_code",
"alexaRank": 48, "alexaRank": 48,
"source": "Yandex",
"url": "https://yandex.ru/bugbounty/researchers/{username}/", "url": "https://yandex.ru/bugbounty/researchers/{username}/",
"urlMain": "https://yandex.ru/bugbounty/", "urlMain": "https://yandex.ru/bugbounty/",
"usernameClaimed": "pyrk1", "usernameClaimed": "pyrk1",
@@ -14722,18 +14726,21 @@
], ],
"absenceStrs": "cl-not-found-content__title", "absenceStrs": "cl-not-found-content__title",
"alexaRank": 48, "alexaRank": 48,
"source": "Yandex",
"url": "https://yandex.ru/collections/user/{username}", "url": "https://yandex.ru/collections/user/{username}",
"urlMain": "https://yandex.ru/collections/", "urlMain": "https://yandex.ru/collections/",
"usernameClaimed": "yandex", "usernameClaimed": "yandex",
"usernameUnclaimed": "noonewouldeverusethis7" "usernameUnclaimed": "noonewouldeverusethis7"
}, },
"YandexLocal": { "YandexLocal": {
"disabled": true,
"tags": [ "tags": [
"ru" "ru"
], ],
"type": "yandex_public_id", "type": "yandex_public_id",
"checkType": "status_code", "checkType": "status_code",
"alexaRank": 48, "alexaRank": 48,
"source": "Yandex",
"url": "https://local.yandex.ru/users/{username}", "url": "https://local.yandex.ru/users/{username}",
"urlMain": "https://local.yandex.ru/", "urlMain": "https://local.yandex.ru/",
"usernameClaimed": "gp7v6ufryzw3m1nvdj4ycexa8g", "usernameClaimed": "gp7v6ufryzw3m1nvdj4ycexa8g",
@@ -14747,6 +14754,7 @@
"checkType": "message", "checkType": "message",
"absenceStrs": "//yastatic.net/market-export/_/i/zero-state/404.svg", "absenceStrs": "//yastatic.net/market-export/_/i/zero-state/404.svg",
"alexaRank": 48, "alexaRank": 48,
"source": "Yandex",
"url": "https://market.yandex.ru/user/{username}", "url": "https://market.yandex.ru/user/{username}",
"urlMain": "https://market.yandex.ru/", "urlMain": "https://market.yandex.ru/",
"usernameClaimed": "6j2uh4rhp5d9gqgbynaqy2p75m", "usernameClaimed": "6j2uh4rhp5d9gqgbynaqy2p75m",
@@ -14763,6 +14771,7 @@
"urlProbe": "https://music.yandex.ru/handlers/library.jsx?owner={username}", "urlProbe": "https://music.yandex.ru/handlers/library.jsx?owner={username}",
"checkType": "status_code", "checkType": "status_code",
"alexaRank": 48, "alexaRank": 48,
"source": "Yandex",
"url": "https://music.yandex.ru/users/{username}/playlists", "url": "https://music.yandex.ru/users/{username}/playlists",
"urlMain": "https://music.yandex.ru/", "urlMain": "https://music.yandex.ru/",
"usernameClaimed": "YandexMusic", "usernameClaimed": "YandexMusic",
@@ -14785,6 +14794,7 @@
"type": "yandex_public_id", "type": "yandex_public_id",
"checkType": "status_code", "checkType": "status_code",
"alexaRank": 48, "alexaRank": 48,
"source": "Yandex",
"url": "https://yandex.ru/q/profile/{username}", "url": "https://yandex.ru/q/profile/{username}",
"urlMain": "https://yandex.ru/q/", "urlMain": "https://yandex.ru/q/",
"usernameClaimed": "blue", "usernameClaimed": "blue",
@@ -14796,6 +14806,7 @@
], ],
"checkType": "status_code", "checkType": "status_code",
"alexaRank": 48, "alexaRank": 48,
"source": "Yandex",
"url": "https://zen.yandex.ru/{username}", "url": "https://zen.yandex.ru/{username}",
"urlMain": "https://zen.yandex.ru", "urlMain": "https://zen.yandex.ru",
"usernameClaimed": "tema", "usernameClaimed": "tema",
@@ -14808,6 +14819,7 @@
"type": "yandex_public_id", "type": "yandex_public_id",
"checkType": "status_code", "checkType": "status_code",
"alexaRank": 48, "alexaRank": 48,
"source": "Yandex",
"url": "https://zen.yandex.ru/user/{username}", "url": "https://zen.yandex.ru/user/{username}",
"urlMain": "https://zen.yandex.ru", "urlMain": "https://zen.yandex.ru",
"usernameClaimed": "20vpvmmwpnwyb0dpbnjvy3k14c", "usernameClaimed": "20vpvmmwpnwyb0dpbnjvy3k14c",
@@ -18124,8 +18136,7 @@
"tracr.co": { "tracr.co": {
"disabled": true, "disabled": true,
"tags": [ "tags": [
"gaming", "gaming"
"discord"
], ],
"errors": { "errors": {
"502 - Bad Gateway": "Site error", "502 - Bad Gateway": "Site error",
@@ -18134,6 +18145,7 @@
"regexCheck": "^[A-Za-z0-9]{2,32}$", "regexCheck": "^[A-Za-z0-9]{2,32}$",
"checkType": "message", "checkType": "message",
"absenceStrs": "No search results", "absenceStrs": "No search results",
"source": "Discord",
"url": "https://tracr.co/users/1/{username}", "url": "https://tracr.co/users/1/{username}",
"urlMain": "https://tracr.co/", "urlMain": "https://tracr.co/",
"usernameClaimed": "blue", "usernameClaimed": "blue",
@@ -18171,8 +18183,7 @@
}, },
"uID.me (by username)": { "uID.me (by username)": {
"tags": [ "tags": [
"ru", "ru"
"ucoz"
], ],
"checkType": "status_code", "checkType": "status_code",
"alexaRank": 24715, "alexaRank": 24715,
@@ -18183,8 +18194,7 @@
}, },
"uID.me (by uguid)": { "uID.me (by uguid)": {
"tags": [ "tags": [
"ru", "ru"
"ucoz"
], ],
"type": "uidme_uguid", "type": "uidme_uguid",
"checkType": "status_code", "checkType": "status_code",
@@ -22825,6 +22835,7 @@
], ],
"engine": "engineRedirect", "engine": "engineRedirect",
"alexaRank": 72, "alexaRank": 72,
"source": "GitHub",
"url": "https://gist.github.com/{username}", "url": "https://gist.github.com/{username}",
"urlMain": "https://gist.github.com", "urlMain": "https://gist.github.com",
"usernameUnclaimed": "noonewouldeverusethis7", "usernameUnclaimed": "noonewouldeverusethis7",
@@ -23664,6 +23675,9 @@
"usernameUnclaimed": "noonewouldeverusethis7" "usernameUnclaimed": "noonewouldeverusethis7"
}, },
"pikabu.monster": { "pikabu.monster": {
"tags": [
"ru"
],
"checkType": "message", "checkType": "message",
"presenseStrs": [ "presenseStrs": [
"usertotalcomments", "usertotalcomments",
@@ -23677,6 +23691,64 @@
"urlMain": "https://pikabu.monster", "urlMain": "https://pikabu.monster",
"usernameClaimed": "Avezenit", "usernameClaimed": "Avezenit",
"usernameUnclaimed": "noonewouldeverusethis7" "usernameUnclaimed": "noonewouldeverusethis7"
},
"steamdb.info": {
"tags": [
"gaming"
],
"type": "steam_id",
"checkType": "message",
"presenseStrs": [
"profileForm",
" player-name",
" progress",
" data-not-game="
],
"absenceStrs": [
"error-page",
" Error 404"
],
"source": "Steam",
"url": "https://steamdb.info/calculator/{username}",
"urlMain": "https://steamdb.info",
"usernameClaimed": "76561197978866368",
"usernameUnclaimed": "noonewouldeverusethis7"
},
"Niftygateway": {
"urlProbe": "https://api.niftygateway.com/user/profile-and-offchain-nifties-by-url/?profile_url={username}",
"checkType": "message",
"presenseStrs": [
"profile_url",
"name",
"profile_pic_url",
"verified",
"bio"
],
"absenceStrs": [
"not_found",
" User profile not located in our system."
],
"url": "https://niftygateway.com/profile/{username}",
"urlMain": "https://api.niftygateway.com",
"usernameClaimed": "admin",
"usernameUnclaimed": "noonewouldeverusethis7"
},
"opensea.io": {
"checkType": "message",
"presenseStrs": [
"username\\",
"lastSale",
"publicUsername",
"name",
"user"
],
"absenceStrs": [
"><div width="
],
"url": "https://opensea.io/accounts/{username}",
"urlMain": "https://opensea.io",
"usernameClaimed": "admin",
"usernameUnclaimed": "noonewouldeverusethis7"
} }
}, },
"engines": { "engines": {
+3 -2
View File
@@ -15,7 +15,7 @@ SUPPORTED_TAGS = [
'discussion', 'sharing', 'writing', 'wiki', 'business', 'shopping', 'sport', 'discussion', 'sharing', 'writing', 'wiki', 'business', 'shopping', 'sport',
'books', 'news', 'documents', 'travel', 'maps', 'hobby', 'apps', 'classified', 'books', 'news', 'documents', 'travel', 'maps', 'hobby', 'apps', 'classified',
'career', 'geosocial', 'streaming', 'education', 'networking', 'torrent', 'career', 'geosocial', 'streaming', 'education', 'networking', 'torrent',
'science', 'medicine', 'science', 'medicine', 'reading', 'stock',
] ]
@@ -199,13 +199,14 @@ class MaigretDatabase:
normalized_tags = list(map(str.lower, tags)) normalized_tags = list(map(str.lower, tags))
is_name_ok = lambda x: x.name.lower() in normalized_names is_name_ok = lambda x: x.name.lower() in normalized_names
is_source_ok = lambda x: x.source and x.source.lower() in normalized_names
is_engine_ok = lambda x: isinstance(x.engine, str) and x.engine.lower() in normalized_tags is_engine_ok = lambda x: isinstance(x.engine, str) and x.engine.lower() in normalized_tags
is_tags_ok = lambda x: set(x.tags).intersection(set(normalized_tags)) is_tags_ok = lambda x: set(x.tags).intersection(set(normalized_tags))
is_disabled_needed = lambda x: not x.disabled or ('disabled' in tags or disabled) is_disabled_needed = lambda x: not x.disabled or ('disabled' in tags or disabled)
is_id_type_ok = lambda x: x.type == id_type is_id_type_ok = lambda x: x.type == id_type
filter_tags_engines_fun = lambda x: not tags or is_engine_ok(x) or is_tags_ok(x) filter_tags_engines_fun = lambda x: not tags or is_engine_ok(x) or is_tags_ok(x)
filter_names_fun = lambda x: not names or is_name_ok(x) filter_names_fun = lambda x: not names or is_name_ok(x) or is_source_ok(x)
filter_fun = lambda x: filter_tags_engines_fun(x) and filter_names_fun(x) \ filter_fun = lambda x: filter_tags_engines_fun(x) and filter_names_fun(x) \
and is_disabled_needed(x) and is_id_type_ok(x) and is_disabled_needed(x) and is_id_type_ok(x)
+15 -15
View File
@@ -3,6 +3,7 @@ import difflib
import requests import requests
from .checking import * from .checking import *
from .utils import get_random_user_agent
DESIRED_STRINGS = ["username", "not found", "пользователь", "profile", "lastname", "firstname", "biography", DESIRED_STRINGS = ["username", "not found", "пользователь", "profile", "lastname", "firstname", "biography",
@@ -11,7 +12,7 @@ DESIRED_STRINGS = ["username", "not found", "пользователь", "profile
SUPPOSED_USERNAMES = ['alex', 'god', 'admin', 'red', 'blue', 'john'] SUPPOSED_USERNAMES = ['alex', 'god', 'admin', 'red', 'blue', 'john']
HEADERS = { HEADERS = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 11.1; rv:55.0) Gecko/20100101 Firefox/55.0', 'User-Agent': get_random_user_agent(),
} }
RATIO = 0.6 RATIO = 0.6
@@ -125,7 +126,7 @@ async def detect_known_engine(db, url_exists, url_mainpage):
return None return None
async def check_features_manually(db, url_exists, url_mainpage, cookie_file, redirects=False): async def check_features_manually(db, url_exists, url_mainpage, cookie_file, logger, redirects=True):
url_parts = url_exists.split('/') url_parts = url_exists.split('/')
supposed_username = url_parts[-1] supposed_username = url_parts[-1]
new_name = input(f'Is "{supposed_username}" a valid username? If not, write it manually: ') new_name = input(f'Is "{supposed_username}" a valid username? If not, write it manually: ')
@@ -143,7 +144,13 @@ async def check_features_manually(db, url_exists, url_mainpage, cookie_file, red
cookie_dict = {c.key: c.value for c in cookie_jar} cookie_dict = {c.key: c.value for c in cookie_jar}
exists_resp = requests.get(url_exists, cookies=cookie_dict, headers=HEADERS, allow_redirects=redirects) exists_resp = requests.get(url_exists, cookies=cookie_dict, headers=HEADERS, allow_redirects=redirects)
logger.debug(exists_resp.status_code)
logger.debug(exists_resp.text)
non_exists_resp = requests.get(url_not_exists, cookies=cookie_dict, headers=HEADERS, allow_redirects=redirects) non_exists_resp = requests.get(url_not_exists, cookies=cookie_dict, headers=HEADERS, allow_redirects=redirects)
logger.debug(non_exists_resp.status_code)
logger.debug(non_exists_resp.text)
a = exists_resp.text a = exists_resp.text
b = non_exists_resp.text b = non_exists_resp.text
@@ -187,7 +194,8 @@ async def check_features_manually(db, url_exists, url_mainpage, cookie_file, red
site = MaigretSite(url_mainpage.split('/')[-1], site_data) site = MaigretSite(url_mainpage.split('/')[-1], site_data)
return site return site
async def submit_dialog(db, url_exists, cookie_file):
async def submit_dialog(db, url_exists, cookie_file, logger):
domain_raw = URL_RE.sub('', url_exists).strip().strip('/') domain_raw = URL_RE.sub('', url_exists).strip().strip('/')
domain_raw = domain_raw.split('/')[0] domain_raw = domain_raw.split('/')[0]
@@ -208,19 +216,11 @@ async def submit_dialog(db, url_exists, cookie_file):
sites = await detect_known_engine(db, url_exists, url_mainpage) sites = await detect_known_engine(db, url_exists, url_mainpage)
if not sites: if not sites:
print('Unable to detect site engine, lets generate checking features') print('Unable to detect site engine, lets generate checking features')
sites = [await check_features_manually(db, url_exists, url_mainpage, cookie_file)] sites = [await check_features_manually(db, url_exists, url_mainpage, cookie_file, logger)]
print(sites[0].__dict__) logger.debug(sites[0].__dict__)
sem = asyncio.Semaphore(1) sem = asyncio.Semaphore(1)
log_level = logging.INFO
logging.basicConfig(
format='[%(filename)s:%(lineno)d] %(levelname)-3s %(asctime)s %(message)s',
datefmt='%H:%M:%S',
level=log_level
)
logger = logging.getLogger('site-submit')
logger.setLevel(log_level)
found = False found = False
chosen_site = None chosen_site = None
@@ -236,9 +236,9 @@ async def submit_dialog(db, url_exists, cookie_file):
print('Try to run this mode again and increase features count or choose others.') print('Try to run this mode again and increase features count or choose others.')
else: else:
if input(f'Site {chosen_site.name} successfully checked. Do you want to save it in the Maigret DB? [Yn] ').lower() in 'y': if input(f'Site {chosen_site.name} successfully checked. Do you want to save it in the Maigret DB? [Yn] ').lower() in 'y':
print(chosen_site.json) logger.debug(chosen_site.json)
site_data = chosen_site.strip_engine_data() site_data = chosen_site.strip_engine_data()
print(site_data.json) logger.debug(site_data.json)
db.update_site(site_data) db.update_site(site_data)
return True return True
+10
View File
@@ -1,4 +1,10 @@
import re import re
import random
DEFAULT_USER_AGENTS = [
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.114 Safari/537.36',
]
class CaseConverter: class CaseConverter:
@@ -76,3 +82,7 @@ def get_dict_ascii_tree(items, prepend='', new_line=True):
text = text[1:] text = text[1:]
return text return text
def get_random_user_agent():
return random.choice(DEFAULT_USER_AGENTS)