Added some photo sites, improved errors detecting

This commit is contained in:
Soxoj
2021-05-08 20:37:34 +03:00
parent d4234036c0
commit b696b982f4
6 changed files with 242 additions and 30 deletions
+5 -2
View File
@@ -13,6 +13,7 @@ import tqdm.asyncio
from aiohttp_socks import ProxyConnector from aiohttp_socks import ProxyConnector
from python_socks import _errors as proxy_errors from python_socks import _errors as proxy_errors
from socid_extractor import extract from socid_extractor import extract
from aiohttp.client_exceptions import ServerDisconnectedError, ClientConnectorError
from .activation import ParsingActivator, import_aiohttp_cookies from .activation import ParsingActivator, import_aiohttp_cookies
from . import errors from . import errors
@@ -64,8 +65,10 @@ async def get_response(request_future, logger) -> Tuple[str, int, Optional[Check
except asyncio.TimeoutError as e: except asyncio.TimeoutError as e:
error = CheckError("Request timeout", str(e)) error = CheckError("Request timeout", str(e))
except aiohttp.client_exceptions.ClientConnectorError as e: except ClientConnectorError as e:
error = CheckError("Connecting failure", str(e)) error = CheckError("Connecting failure", str(e))
except ServerDisconnectedError as e:
error = CheckError("Server disconnected", str(e))
except aiohttp.http_exceptions.BadHttpMessage as e: except aiohttp.http_exceptions.BadHttpMessage as e:
error = CheckError("HTTP", str(e)) error = CheckError("HTTP", str(e))
except proxy_errors.ProxyError as e: except proxy_errors.ProxyError as e:
@@ -155,7 +158,7 @@ def process_site_result(
# additional check for errors # additional check for errors
if status_code and not check_error: if status_code and not check_error:
check_error = detect_error_page( check_error = detect_error_page(
html_text, status_code, site.errors, site.ignore403 html_text, status_code, site.errors_dict, site.ignore403
) )
# parsing activation # parsing activation
+1 -1
View File
@@ -48,7 +48,7 @@ def notify_about_errors(search_results: QueryResultWrapper, query_notify):
text = f'Too many errors of type "{e["err"]}" ({e["perc"]}%)' text = f'Too many errors of type "{e["err"]}" ({e["perc"]}%)'
solution = errors.solution_of(e['err']) solution = errors.solution_of(e['err'])
if solution: if solution:
text = '. '.join([text, solution]) text = '. '.join([text, solution.capitalize()])
query_notify.warning(text, '!') query_notify.warning(text, '!')
was_errs_displayed = True was_errs_displayed = True
+213 -19
View File
@@ -8676,8 +8676,7 @@
}, },
"Mobypicture": { "Mobypicture": {
"tags": [ "tags": [
"in", "photo"
"nl"
], ],
"checkType": "message", "checkType": "message",
"absenceStrs": [ "absenceStrs": [
@@ -9477,16 +9476,17 @@
"usernameUnclaimed": "noonewouldeverusethis7" "usernameUnclaimed": "noonewouldeverusethis7"
}, },
"Newgrounds": { "Newgrounds": {
"tags": [ "absenceStrs": [
"us" "icon-steam"
],
"presenseStrs": [
"user-header-name"
], ],
"regexCheck": "^[a-zA-Z][a-zA-Z0-9_-]*$",
"checkType": "status_code",
"alexaRank": 5748,
"urlMain": "https://newgrounds.com",
"url": "https://{username}.newgrounds.com", "url": "https://{username}.newgrounds.com",
"usernameClaimed": "blue", "urlMain": "https://newgrounds.com",
"usernameUnclaimed": "noonewouldeverusethis7" "usernameClaimed": "john",
"usernameUnclaimed": "noonewouldeverusethis7",
"checkType": "message"
}, },
"Newreporter": { "Newreporter": {
"tags": [ "tags": [
@@ -10308,8 +10308,7 @@
}, },
"Picsart": { "Picsart": {
"tags": [ "tags": [
"in", "photo"
"us"
], ],
"checkType": "status_code", "checkType": "status_code",
"alexaRank": 9103, "alexaRank": 9103,
@@ -12755,7 +12754,7 @@
"us" "us"
], ],
"headers": { "headers": {
"authorization": "Bearer BQC9jhqCGwU7SIBfusXOot9I_1wQ2F_4pS7ySsOwO20H9jWZE2ICJ-6rQ6gITvtIrXu6WbDLBTBUB4Ms1wI" "authorization": "Bearer BQB2AjQkezkQ8gs4IPTf-DaiUD5xf0_agBYgfI8wGAI-tdC7GANMDZMwiYBls8kfD_bIhmtQoYwNfUgB3rg"
}, },
"errors": { "errors": {
"Spotify is currently not available in your country.": "Access denied in your country, use proxy/vpn" "Spotify is currently not available in your country.": "Access denied in your country, use proxy/vpn"
@@ -14140,7 +14139,7 @@
"sec-ch-ua": "Google Chrome\";v=\"87\", \" Not;A Brand\";v=\"99\", \"Chromium\";v=\"87\"", "sec-ch-ua": "Google Chrome\";v=\"87\", \" Not;A Brand\";v=\"99\", \"Chromium\";v=\"87\"",
"authorization": "Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA", "authorization": "Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA",
"user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36", "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36",
"x-guest-token": "1390788770346147842" "x-guest-token": "1391077876263444481"
}, },
"errors": { "errors": {
"Bad guest token": "x-guest-token update required" "Bad guest token": "x-guest-token update required"
@@ -14540,7 +14539,7 @@
"video" "video"
], ],
"headers": { "headers": {
"Authorization": "jwt eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJleHAiOjE2MjA0MjU3NjAsInVzZXJfaWQiOm51bGwsImFwcF9pZCI6NTg0NzksInNjb3BlcyI6InB1YmxpYyIsInRlYW1fdXNlcl9pZCI6bnVsbH0.hn10YMXkg1jk0qy4XBCxdfeN5kbWzjzJcYD8J84mjWk" "Authorization": "jwt eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJleHAiOjE2MjA0OTU2NjAsInVzZXJfaWQiOm51bGwsImFwcF9pZCI6NTg0NzksInNjb3BlcyI6InB1YmxpYyIsInRlYW1fdXNlcl9pZCI6bnVsbH0.p2UxuvEPXrKk6wE_qMze9n9KeZIGo_uZ6nAE-UFKGJs"
}, },
"activation": { "activation": {
"url": "https://vimeo.com/_rv/viewer", "url": "https://vimeo.com/_rv/viewer",
@@ -17068,7 +17067,8 @@
}, },
"gfycat": { "gfycat": {
"tags": [ "tags": [
"sharing" "sharing",
"photo"
], ],
"checkType": "status_code", "checkType": "status_code",
"alexaRank": 2217, "alexaRank": 2217,
@@ -24424,9 +24424,7 @@
}, },
"1x": { "1x": {
"tags": [ "tags": [
"in", "photo"
"photo",
"us"
], ],
"checkType": "message", "checkType": "message",
"presenseStrs": [ "presenseStrs": [
@@ -26245,6 +26243,202 @@
"Cookie": "SUB=_2AkMXyuc_f8NxqwJRmP8SyWPrbo13zAvEieKhlhbkJRMxHRl-123" "Cookie": "SUB=_2AkMXyuc_f8NxqwJRmP8SyWPrbo13zAvEieKhlhbkJRMxHRl-123"
}, },
"checkType": "message" "checkType": "message"
},
"Hatena": {
"absenceStrs": [
"404 Not Found"
],
"presenseStrs": [
"profile",
"myprofile",
"profile-dt",
"profile-dd",
"hatena-profile"
],
"url": "http://profile.hatena.com/{username}/",
"urlMain": "http://profile.hatena.com",
"usernameClaimed": "john",
"usernameUnclaimed": "noonewouldeverusethis7",
"checkType": "message"
},
"angel.co": {
"absenceStrs": [
"render_not_found"
],
"presenseStrs": [
"Profile",
"profiles",
"User profile",
"name",
"layouts/profile"
],
"url": "https://angel.co/u/{username}",
"urlMain": "https://angel.co",
"usernameClaimed": "john",
"usernameUnclaimed": "noonewouldeverusethis7",
"checkType": "message"
},
"nelubit.ru": {
"urlMain": "https://nelubit.ru",
"engine": "phpBB/Search",
"usernameClaimed": "alex",
"usernameUnclaimed": "noonewouldeverusethis7"
},
"actual-porn.org": {
"urlMain": "http://actual-porn.org",
"engine": "vBulletin",
"usernameClaimed": "alex",
"usernameUnclaimed": "noonewouldeverusethis7"
},
"Aminus3": {
"absenceStrs": [
"Expires",
" no-cache"
],
"presenseStrs": [
"image/ico",
" title="
],
"url": "https://{username}.aminus3.com/",
"urlMain": "https://aminus3.com",
"usernameClaimed": "beautifulworld",
"usernameUnclaimed": "noonewouldeverusethis7",
"checkType": "message",
"tags": [
"photo"
]
},
"lomography": {
"absenceStrs": [
"<title>404 \u00b7 Lomography</title>"
],
"presenseStrs": [
"Lomography",
" @lomography"
],
"url": "https://www.lomography.com/homes/{username}",
"urlMain": "https://www.lomography.com",
"usernameClaimed": "steved7755",
"usernameUnclaimed": "noonewouldeverusethis7",
"checkType": "message",
"tags": [
"photo"
]
},
"jAlbum.net": {
"absenceStrs": [
"section",
" error_head"
],
"regexCheck": "^[^\\.]+$",
"presenseStrs": [
"alternate",
" og:image"
],
"url": "https://{username}.jalbum.net/",
"urlMain": "https://jalbum.net",
"usernameClaimed": "laza",
"usernameUnclaimed": "noonewouldeverusethis7",
"checkType": "message",
"tags": [
"photo"
]
},
"23hq": {
"absenceStrs": [
"my-modal",
" enable",
" modal"
],
"presenseStrs": [
"frame",
"first active",
"user",
"last",
"country-name"
],
"url": "http://www.23hq.com/{username}",
"urlMain": "http://www.23hq.com",
"usernameClaimed": "nellyb",
"usernameUnclaimed": "noonewouldeverusethis7",
"checkType": "message",
"tags": [
"photo"
]
},
"bliphoto": {
"absenceStrs": [
"<title>Your photo journal | Blipfoto</title>"
],
"presenseStrs": [
"biography",
"biography-full",
"profile-sidebar",
"profile-content",
"state"
],
"url": "https://www.blipfoto.com/{username}",
"urlMain": "https://www.blipfoto.com",
"usernameClaimed": "Wildstar",
"usernameUnclaimed": "noonewouldeverusethis7",
"checkType": "message",
"tags": [
"photo"
]
},
"Fotki": {
"absenceStrs": [
"'404 - Member Not Found'"
],
"presenseStrs": [
"profile-cities",
"profile-friends",
"profile-aboutme",
"profile-country",
"user_profile_info"
],
"url": "https://members.fotki.com/{username}/about/",
"urlMain": "https://fotki.com",
"usernameClaimed": "normargab",
"usernameUnclaimed": "noonewouldeverusethis7",
"checkType": "message",
"tags": [
"photo"
]
},
"viewbug": {
"absenceStrs": [
"missing-photos"
],
"presenseStrs": [
"profile",
" profile_content"
],
"url": "https://www.viewbug.com/member/{username}",
"urlMain": "https://www.viewbug.com",
"usernameClaimed": "melaniejwood",
"usernameUnclaimed": "noonewouldeverusethis7",
"checkType": "message",
"tags": [
"photo"
]
},
"Piccsy": {
"absenceStrs": [
"my-modal"
],
"presenseStrs": [
"Username"
],
"regexCheck": "^[^\\.]+$",
"url": "http://{username}.piccsy.com/",
"urlMain": "http://piccsy.com",
"usernameClaimed": "orientcement",
"usernameUnclaimed": "noonewouldeverusethis7",
"checkType": "message",
"tags": [
"Photo"
]
} }
}, },
"engines": { "engines": {
+8
View File
@@ -182,6 +182,14 @@ class MaigretSite:
return result return result
@property
def errors_dict(self) -> dict:
errors: Dict[str, str] = {}
if self.engine_obj:
errors.update(self.engine_obj.site.get('errors', {}))
errors.update(self.errors)
return errors
def get_url_type(self) -> str: def get_url_type(self) -> str:
url = URLMatcher.extract_main_part(self.url) url = URLMatcher.extract_main_part(self.url)
if url.startswith("{username}"): if url.startswith("{username}"):
+10 -4
View File
@@ -200,6 +200,7 @@ async def check_features_manually(
# cookies # cookies
cookie_dict = None cookie_dict = None
if cookie_file: if cookie_file:
logger.info(f'Use {cookie_file} for cookies')
cookie_jar = await import_aiohttp_cookies(cookie_file) cookie_jar = await import_aiohttp_cookies(cookie_file)
cookie_dict = {c.key: c.value for c in cookie_jar} cookie_dict = {c.key: c.value for c in cookie_jar}
@@ -328,17 +329,22 @@ async def submit_dialog(db, url_exists, cookie_file, logger):
print( print(
"Try to run this mode again and increase features count or choose others." "Try to run this mode again and increase features count or choose others."
) )
return False
else: else:
if ( if (
input( input(
f"Site {chosen_site.name} successfully checked. Do you want to save it in the Maigret DB? [Yn] " f"Site {chosen_site.name} successfully checked. Do you want to save it in the Maigret DB? [Yn] "
).lower() )
in "y" .lower()
.strip("y")
): ):
return False
chosen_site.name = input("Change site name if you want: ") or chosen_site.name
chosen_site.tags = input("Site tags: ").split(',')
logger.debug(chosen_site.json) logger.debug(chosen_site.json)
site_data = chosen_site.strip_engine_data() site_data = chosen_site.strip_engine_data()
logger.debug(site_data.json) logger.debug(site_data.json)
db.update_site(site_data) db.update_site(site_data)
return True return True
return False
+1
View File
@@ -103,6 +103,7 @@ def test_saving_site_error():
amperka = db.sites[0] amperka = db.sites[0]
assert len(amperka.errors) == 2 assert len(amperka.errors) == 2
assert len(amperka.errors_dict) == 2
assert amperka.strip_engine_data().errors == {'error1': 'text1'} assert amperka.strip_engine_data().errors == {'error1': 'text1'}
assert amperka.strip_engine_data().json['errors'] == {'error1': 'text1'} assert amperka.strip_engine_data().json['errors'] == {'error1': 'text1'}