mirror of
https://github.com/soxoj/maigret.git
synced 2026-05-06 22:19:01 +00:00
Added some photo sites, improved errors detecting
This commit is contained in:
+5
-2
@@ -13,6 +13,7 @@ import tqdm.asyncio
|
|||||||
from aiohttp_socks import ProxyConnector
|
from aiohttp_socks import ProxyConnector
|
||||||
from python_socks import _errors as proxy_errors
|
from python_socks import _errors as proxy_errors
|
||||||
from socid_extractor import extract
|
from socid_extractor import extract
|
||||||
|
from aiohttp.client_exceptions import ServerDisconnectedError, ClientConnectorError
|
||||||
|
|
||||||
from .activation import ParsingActivator, import_aiohttp_cookies
|
from .activation import ParsingActivator, import_aiohttp_cookies
|
||||||
from . import errors
|
from . import errors
|
||||||
@@ -64,8 +65,10 @@ async def get_response(request_future, logger) -> Tuple[str, int, Optional[Check
|
|||||||
|
|
||||||
except asyncio.TimeoutError as e:
|
except asyncio.TimeoutError as e:
|
||||||
error = CheckError("Request timeout", str(e))
|
error = CheckError("Request timeout", str(e))
|
||||||
except aiohttp.client_exceptions.ClientConnectorError as e:
|
except ClientConnectorError as e:
|
||||||
error = CheckError("Connecting failure", str(e))
|
error = CheckError("Connecting failure", str(e))
|
||||||
|
except ServerDisconnectedError as e:
|
||||||
|
error = CheckError("Server disconnected", str(e))
|
||||||
except aiohttp.http_exceptions.BadHttpMessage as e:
|
except aiohttp.http_exceptions.BadHttpMessage as e:
|
||||||
error = CheckError("HTTP", str(e))
|
error = CheckError("HTTP", str(e))
|
||||||
except proxy_errors.ProxyError as e:
|
except proxy_errors.ProxyError as e:
|
||||||
@@ -155,7 +158,7 @@ def process_site_result(
|
|||||||
# additional check for errors
|
# additional check for errors
|
||||||
if status_code and not check_error:
|
if status_code and not check_error:
|
||||||
check_error = detect_error_page(
|
check_error = detect_error_page(
|
||||||
html_text, status_code, site.errors, site.ignore403
|
html_text, status_code, site.errors_dict, site.ignore403
|
||||||
)
|
)
|
||||||
|
|
||||||
# parsing activation
|
# parsing activation
|
||||||
|
|||||||
+1
-1
@@ -48,7 +48,7 @@ def notify_about_errors(search_results: QueryResultWrapper, query_notify):
|
|||||||
text = f'Too many errors of type "{e["err"]}" ({e["perc"]}%)'
|
text = f'Too many errors of type "{e["err"]}" ({e["perc"]}%)'
|
||||||
solution = errors.solution_of(e['err'])
|
solution = errors.solution_of(e['err'])
|
||||||
if solution:
|
if solution:
|
||||||
text = '. '.join([text, solution])
|
text = '. '.join([text, solution.capitalize()])
|
||||||
|
|
||||||
query_notify.warning(text, '!')
|
query_notify.warning(text, '!')
|
||||||
was_errs_displayed = True
|
was_errs_displayed = True
|
||||||
|
|||||||
+213
-19
@@ -8676,8 +8676,7 @@
|
|||||||
},
|
},
|
||||||
"Mobypicture": {
|
"Mobypicture": {
|
||||||
"tags": [
|
"tags": [
|
||||||
"in",
|
"photo"
|
||||||
"nl"
|
|
||||||
],
|
],
|
||||||
"checkType": "message",
|
"checkType": "message",
|
||||||
"absenceStrs": [
|
"absenceStrs": [
|
||||||
@@ -9477,16 +9476,17 @@
|
|||||||
"usernameUnclaimed": "noonewouldeverusethis7"
|
"usernameUnclaimed": "noonewouldeverusethis7"
|
||||||
},
|
},
|
||||||
"Newgrounds": {
|
"Newgrounds": {
|
||||||
"tags": [
|
"absenceStrs": [
|
||||||
"us"
|
"icon-steam"
|
||||||
|
],
|
||||||
|
"presenseStrs": [
|
||||||
|
"user-header-name"
|
||||||
],
|
],
|
||||||
"regexCheck": "^[a-zA-Z][a-zA-Z0-9_-]*$",
|
|
||||||
"checkType": "status_code",
|
|
||||||
"alexaRank": 5748,
|
|
||||||
"urlMain": "https://newgrounds.com",
|
|
||||||
"url": "https://{username}.newgrounds.com",
|
"url": "https://{username}.newgrounds.com",
|
||||||
"usernameClaimed": "blue",
|
"urlMain": "https://newgrounds.com",
|
||||||
"usernameUnclaimed": "noonewouldeverusethis7"
|
"usernameClaimed": "john",
|
||||||
|
"usernameUnclaimed": "noonewouldeverusethis7",
|
||||||
|
"checkType": "message"
|
||||||
},
|
},
|
||||||
"Newreporter": {
|
"Newreporter": {
|
||||||
"tags": [
|
"tags": [
|
||||||
@@ -10308,8 +10308,7 @@
|
|||||||
},
|
},
|
||||||
"Picsart": {
|
"Picsart": {
|
||||||
"tags": [
|
"tags": [
|
||||||
"in",
|
"photo"
|
||||||
"us"
|
|
||||||
],
|
],
|
||||||
"checkType": "status_code",
|
"checkType": "status_code",
|
||||||
"alexaRank": 9103,
|
"alexaRank": 9103,
|
||||||
@@ -12755,7 +12754,7 @@
|
|||||||
"us"
|
"us"
|
||||||
],
|
],
|
||||||
"headers": {
|
"headers": {
|
||||||
"authorization": "Bearer BQC9jhqCGwU7SIBfusXOot9I_1wQ2F_4pS7ySsOwO20H9jWZE2ICJ-6rQ6gITvtIrXu6WbDLBTBUB4Ms1wI"
|
"authorization": "Bearer BQB2AjQkezkQ8gs4IPTf-DaiUD5xf0_agBYgfI8wGAI-tdC7GANMDZMwiYBls8kfD_bIhmtQoYwNfUgB3rg"
|
||||||
},
|
},
|
||||||
"errors": {
|
"errors": {
|
||||||
"Spotify is currently not available in your country.": "Access denied in your country, use proxy/vpn"
|
"Spotify is currently not available in your country.": "Access denied in your country, use proxy/vpn"
|
||||||
@@ -14140,7 +14139,7 @@
|
|||||||
"sec-ch-ua": "Google Chrome\";v=\"87\", \" Not;A Brand\";v=\"99\", \"Chromium\";v=\"87\"",
|
"sec-ch-ua": "Google Chrome\";v=\"87\", \" Not;A Brand\";v=\"99\", \"Chromium\";v=\"87\"",
|
||||||
"authorization": "Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA",
|
"authorization": "Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA",
|
||||||
"user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36",
|
"user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36",
|
||||||
"x-guest-token": "1390788770346147842"
|
"x-guest-token": "1391077876263444481"
|
||||||
},
|
},
|
||||||
"errors": {
|
"errors": {
|
||||||
"Bad guest token": "x-guest-token update required"
|
"Bad guest token": "x-guest-token update required"
|
||||||
@@ -14540,7 +14539,7 @@
|
|||||||
"video"
|
"video"
|
||||||
],
|
],
|
||||||
"headers": {
|
"headers": {
|
||||||
"Authorization": "jwt eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJleHAiOjE2MjA0MjU3NjAsInVzZXJfaWQiOm51bGwsImFwcF9pZCI6NTg0NzksInNjb3BlcyI6InB1YmxpYyIsInRlYW1fdXNlcl9pZCI6bnVsbH0.hn10YMXkg1jk0qy4XBCxdfeN5kbWzjzJcYD8J84mjWk"
|
"Authorization": "jwt eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJleHAiOjE2MjA0OTU2NjAsInVzZXJfaWQiOm51bGwsImFwcF9pZCI6NTg0NzksInNjb3BlcyI6InB1YmxpYyIsInRlYW1fdXNlcl9pZCI6bnVsbH0.p2UxuvEPXrKk6wE_qMze9n9KeZIGo_uZ6nAE-UFKGJs"
|
||||||
},
|
},
|
||||||
"activation": {
|
"activation": {
|
||||||
"url": "https://vimeo.com/_rv/viewer",
|
"url": "https://vimeo.com/_rv/viewer",
|
||||||
@@ -17068,7 +17067,8 @@
|
|||||||
},
|
},
|
||||||
"gfycat": {
|
"gfycat": {
|
||||||
"tags": [
|
"tags": [
|
||||||
"sharing"
|
"sharing",
|
||||||
|
"photo"
|
||||||
],
|
],
|
||||||
"checkType": "status_code",
|
"checkType": "status_code",
|
||||||
"alexaRank": 2217,
|
"alexaRank": 2217,
|
||||||
@@ -24424,9 +24424,7 @@
|
|||||||
},
|
},
|
||||||
"1x": {
|
"1x": {
|
||||||
"tags": [
|
"tags": [
|
||||||
"in",
|
"photo"
|
||||||
"photo",
|
|
||||||
"us"
|
|
||||||
],
|
],
|
||||||
"checkType": "message",
|
"checkType": "message",
|
||||||
"presenseStrs": [
|
"presenseStrs": [
|
||||||
@@ -26245,6 +26243,202 @@
|
|||||||
"Cookie": "SUB=_2AkMXyuc_f8NxqwJRmP8SyWPrbo13zAvEieKhlhbkJRMxHRl-123"
|
"Cookie": "SUB=_2AkMXyuc_f8NxqwJRmP8SyWPrbo13zAvEieKhlhbkJRMxHRl-123"
|
||||||
},
|
},
|
||||||
"checkType": "message"
|
"checkType": "message"
|
||||||
|
},
|
||||||
|
"Hatena": {
|
||||||
|
"absenceStrs": [
|
||||||
|
"404 Not Found"
|
||||||
|
],
|
||||||
|
"presenseStrs": [
|
||||||
|
"profile",
|
||||||
|
"myprofile",
|
||||||
|
"profile-dt",
|
||||||
|
"profile-dd",
|
||||||
|
"hatena-profile"
|
||||||
|
],
|
||||||
|
"url": "http://profile.hatena.com/{username}/",
|
||||||
|
"urlMain": "http://profile.hatena.com",
|
||||||
|
"usernameClaimed": "john",
|
||||||
|
"usernameUnclaimed": "noonewouldeverusethis7",
|
||||||
|
"checkType": "message"
|
||||||
|
},
|
||||||
|
"angel.co": {
|
||||||
|
"absenceStrs": [
|
||||||
|
"render_not_found"
|
||||||
|
],
|
||||||
|
"presenseStrs": [
|
||||||
|
"Profile",
|
||||||
|
"profiles",
|
||||||
|
"User profile",
|
||||||
|
"name",
|
||||||
|
"layouts/profile"
|
||||||
|
],
|
||||||
|
"url": "https://angel.co/u/{username}",
|
||||||
|
"urlMain": "https://angel.co",
|
||||||
|
"usernameClaimed": "john",
|
||||||
|
"usernameUnclaimed": "noonewouldeverusethis7",
|
||||||
|
"checkType": "message"
|
||||||
|
},
|
||||||
|
"nelubit.ru": {
|
||||||
|
"urlMain": "https://nelubit.ru",
|
||||||
|
"engine": "phpBB/Search",
|
||||||
|
"usernameClaimed": "alex",
|
||||||
|
"usernameUnclaimed": "noonewouldeverusethis7"
|
||||||
|
},
|
||||||
|
"actual-porn.org": {
|
||||||
|
"urlMain": "http://actual-porn.org",
|
||||||
|
"engine": "vBulletin",
|
||||||
|
"usernameClaimed": "alex",
|
||||||
|
"usernameUnclaimed": "noonewouldeverusethis7"
|
||||||
|
},
|
||||||
|
"Aminus3": {
|
||||||
|
"absenceStrs": [
|
||||||
|
"Expires",
|
||||||
|
" no-cache"
|
||||||
|
],
|
||||||
|
"presenseStrs": [
|
||||||
|
"image/ico",
|
||||||
|
" title="
|
||||||
|
],
|
||||||
|
"url": "https://{username}.aminus3.com/",
|
||||||
|
"urlMain": "https://aminus3.com",
|
||||||
|
"usernameClaimed": "beautifulworld",
|
||||||
|
"usernameUnclaimed": "noonewouldeverusethis7",
|
||||||
|
"checkType": "message",
|
||||||
|
"tags": [
|
||||||
|
"photo"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"lomography": {
|
||||||
|
"absenceStrs": [
|
||||||
|
"<title>404 \u00b7 Lomography</title>"
|
||||||
|
],
|
||||||
|
"presenseStrs": [
|
||||||
|
"Lomography",
|
||||||
|
" @lomography"
|
||||||
|
],
|
||||||
|
"url": "https://www.lomography.com/homes/{username}",
|
||||||
|
"urlMain": "https://www.lomography.com",
|
||||||
|
"usernameClaimed": "steved7755",
|
||||||
|
"usernameUnclaimed": "noonewouldeverusethis7",
|
||||||
|
"checkType": "message",
|
||||||
|
"tags": [
|
||||||
|
"photo"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"jAlbum.net": {
|
||||||
|
"absenceStrs": [
|
||||||
|
"section",
|
||||||
|
" error_head"
|
||||||
|
],
|
||||||
|
"regexCheck": "^[^\\.]+$",
|
||||||
|
"presenseStrs": [
|
||||||
|
"alternate",
|
||||||
|
" og:image"
|
||||||
|
],
|
||||||
|
"url": "https://{username}.jalbum.net/",
|
||||||
|
"urlMain": "https://jalbum.net",
|
||||||
|
"usernameClaimed": "laza",
|
||||||
|
"usernameUnclaimed": "noonewouldeverusethis7",
|
||||||
|
"checkType": "message",
|
||||||
|
"tags": [
|
||||||
|
"photo"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"23hq": {
|
||||||
|
"absenceStrs": [
|
||||||
|
"my-modal",
|
||||||
|
" enable",
|
||||||
|
" modal"
|
||||||
|
],
|
||||||
|
"presenseStrs": [
|
||||||
|
"frame",
|
||||||
|
"first active",
|
||||||
|
"user",
|
||||||
|
"last",
|
||||||
|
"country-name"
|
||||||
|
],
|
||||||
|
"url": "http://www.23hq.com/{username}",
|
||||||
|
"urlMain": "http://www.23hq.com",
|
||||||
|
"usernameClaimed": "nellyb",
|
||||||
|
"usernameUnclaimed": "noonewouldeverusethis7",
|
||||||
|
"checkType": "message",
|
||||||
|
"tags": [
|
||||||
|
"photo"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"bliphoto": {
|
||||||
|
"absenceStrs": [
|
||||||
|
"<title>Your photo journal | Blipfoto</title>"
|
||||||
|
],
|
||||||
|
"presenseStrs": [
|
||||||
|
"biography",
|
||||||
|
"biography-full",
|
||||||
|
"profile-sidebar",
|
||||||
|
"profile-content",
|
||||||
|
"state"
|
||||||
|
],
|
||||||
|
"url": "https://www.blipfoto.com/{username}",
|
||||||
|
"urlMain": "https://www.blipfoto.com",
|
||||||
|
"usernameClaimed": "Wildstar",
|
||||||
|
"usernameUnclaimed": "noonewouldeverusethis7",
|
||||||
|
"checkType": "message",
|
||||||
|
"tags": [
|
||||||
|
"photo"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"Fotki": {
|
||||||
|
"absenceStrs": [
|
||||||
|
"'404 - Member Not Found'"
|
||||||
|
],
|
||||||
|
"presenseStrs": [
|
||||||
|
"profile-cities",
|
||||||
|
"profile-friends",
|
||||||
|
"profile-aboutme",
|
||||||
|
"profile-country",
|
||||||
|
"user_profile_info"
|
||||||
|
],
|
||||||
|
"url": "https://members.fotki.com/{username}/about/",
|
||||||
|
"urlMain": "https://fotki.com",
|
||||||
|
"usernameClaimed": "normargab",
|
||||||
|
"usernameUnclaimed": "noonewouldeverusethis7",
|
||||||
|
"checkType": "message",
|
||||||
|
"tags": [
|
||||||
|
"photo"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"viewbug": {
|
||||||
|
"absenceStrs": [
|
||||||
|
"missing-photos"
|
||||||
|
],
|
||||||
|
"presenseStrs": [
|
||||||
|
"profile",
|
||||||
|
" profile_content"
|
||||||
|
],
|
||||||
|
"url": "https://www.viewbug.com/member/{username}",
|
||||||
|
"urlMain": "https://www.viewbug.com",
|
||||||
|
"usernameClaimed": "melaniejwood",
|
||||||
|
"usernameUnclaimed": "noonewouldeverusethis7",
|
||||||
|
"checkType": "message",
|
||||||
|
"tags": [
|
||||||
|
"photo"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"Piccsy": {
|
||||||
|
"absenceStrs": [
|
||||||
|
"my-modal"
|
||||||
|
],
|
||||||
|
"presenseStrs": [
|
||||||
|
"Username"
|
||||||
|
],
|
||||||
|
"regexCheck": "^[^\\.]+$",
|
||||||
|
"url": "http://{username}.piccsy.com/",
|
||||||
|
"urlMain": "http://piccsy.com",
|
||||||
|
"usernameClaimed": "orientcement",
|
||||||
|
"usernameUnclaimed": "noonewouldeverusethis7",
|
||||||
|
"checkType": "message",
|
||||||
|
"tags": [
|
||||||
|
"Photo"
|
||||||
|
]
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"engines": {
|
"engines": {
|
||||||
|
|||||||
@@ -182,6 +182,14 @@ class MaigretSite:
|
|||||||
|
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
@property
|
||||||
|
def errors_dict(self) -> dict:
|
||||||
|
errors: Dict[str, str] = {}
|
||||||
|
if self.engine_obj:
|
||||||
|
errors.update(self.engine_obj.site.get('errors', {}))
|
||||||
|
errors.update(self.errors)
|
||||||
|
return errors
|
||||||
|
|
||||||
def get_url_type(self) -> str:
|
def get_url_type(self) -> str:
|
||||||
url = URLMatcher.extract_main_part(self.url)
|
url = URLMatcher.extract_main_part(self.url)
|
||||||
if url.startswith("{username}"):
|
if url.startswith("{username}"):
|
||||||
|
|||||||
+10
-4
@@ -200,6 +200,7 @@ async def check_features_manually(
|
|||||||
# cookies
|
# cookies
|
||||||
cookie_dict = None
|
cookie_dict = None
|
||||||
if cookie_file:
|
if cookie_file:
|
||||||
|
logger.info(f'Use {cookie_file} for cookies')
|
||||||
cookie_jar = await import_aiohttp_cookies(cookie_file)
|
cookie_jar = await import_aiohttp_cookies(cookie_file)
|
||||||
cookie_dict = {c.key: c.value for c in cookie_jar}
|
cookie_dict = {c.key: c.value for c in cookie_jar}
|
||||||
|
|
||||||
@@ -328,17 +329,22 @@ async def submit_dialog(db, url_exists, cookie_file, logger):
|
|||||||
print(
|
print(
|
||||||
"Try to run this mode again and increase features count or choose others."
|
"Try to run this mode again and increase features count or choose others."
|
||||||
)
|
)
|
||||||
|
return False
|
||||||
else:
|
else:
|
||||||
if (
|
if (
|
||||||
input(
|
input(
|
||||||
f"Site {chosen_site.name} successfully checked. Do you want to save it in the Maigret DB? [Yn] "
|
f"Site {chosen_site.name} successfully checked. Do you want to save it in the Maigret DB? [Yn] "
|
||||||
).lower()
|
)
|
||||||
in "y"
|
.lower()
|
||||||
|
.strip("y")
|
||||||
):
|
):
|
||||||
|
return False
|
||||||
|
|
||||||
|
chosen_site.name = input("Change site name if you want: ") or chosen_site.name
|
||||||
|
chosen_site.tags = input("Site tags: ").split(',')
|
||||||
|
|
||||||
logger.debug(chosen_site.json)
|
logger.debug(chosen_site.json)
|
||||||
site_data = chosen_site.strip_engine_data()
|
site_data = chosen_site.strip_engine_data()
|
||||||
logger.debug(site_data.json)
|
logger.debug(site_data.json)
|
||||||
db.update_site(site_data)
|
db.update_site(site_data)
|
||||||
return True
|
return True
|
||||||
|
|
||||||
return False
|
|
||||||
|
|||||||
@@ -103,6 +103,7 @@ def test_saving_site_error():
|
|||||||
|
|
||||||
amperka = db.sites[0]
|
amperka = db.sites[0]
|
||||||
assert len(amperka.errors) == 2
|
assert len(amperka.errors) == 2
|
||||||
|
assert len(amperka.errors_dict) == 2
|
||||||
|
|
||||||
assert amperka.strip_engine_data().errors == {'error1': 'text1'}
|
assert amperka.strip_engine_data().errors == {'error1': 'text1'}
|
||||||
assert amperka.strip_engine_data().json['errors'] == {'error1': 'text1'}
|
assert amperka.strip_engine_data().json['errors'] == {'error1': 'text1'}
|
||||||
|
|||||||
Reference in New Issue
Block a user