Fixed BongaCams, links parsing improved (#297)

* Fixed BongaCams, links parsing improved

* Fixed tests
This commit is contained in:
Soxoj
2022-01-08 03:45:49 +03:00
committed by GitHub
parent 3e884d4b76
commit 5c05cfa5bc
4 changed files with 13 additions and 7 deletions
+9 -3
View File
@@ -2144,11 +2144,17 @@
"cz", "cz",
"webcam" "webcam"
], ],
"checkType": "status_code", "absenceStrs": [
"<link rel=\"preconnect\""
],
"presenseStrs": [
"<title>Informa\u00e7\u00e3o e p\u00e1gina"
],
"checkType": "message",
"alexaRank": 30, "alexaRank": 30,
"urlMain": "https://pt.bongacams.com", "urlMain": "https://pt.bongacams.com",
"url": "https://pt.bongacams.com/profile/{username}", "url": "https://pt.bongacams.com/profile/{username}",
"usernameClaimed": "asuna-black", "usernameClaimed": "Icehotangel",
"usernameUnclaimed": "noonewouldeverusethis77777" "usernameUnclaimed": "noonewouldeverusethis77777"
}, },
"Bookandreader": { "Bookandreader": {
@@ -13126,7 +13132,7 @@
"checkType": "response_url", "checkType": "response_url",
"alexaRank": 82345, "alexaRank": 82345,
"urlMain": "http://sprashivai.ru", "urlMain": "http://sprashivai.ru",
"url": "http://sprashivai.ru/{username}?sl", "url": "http://sprashivai.ru/{username}",
"usernameClaimed": "red", "usernameClaimed": "red",
"usernameUnclaimed": "noonewouldeverusethis7" "usernameUnclaimed": "noonewouldeverusethis7"
}, },
+2 -2
View File
@@ -42,7 +42,7 @@ def enrich_link_str(link: str) -> str:
class URLMatcher: class URLMatcher:
_HTTP_URL_RE_STR = "^https?://(www.)?(.+)$" _HTTP_URL_RE_STR = "^https?://(www.|m.)?(.+)$"
HTTP_URL_RE = re.compile(_HTTP_URL_RE_STR) HTTP_URL_RE = re.compile(_HTTP_URL_RE_STR)
UNSAFE_SYMBOLS = ".?" UNSAFE_SYMBOLS = ".?"
@@ -66,7 +66,7 @@ class URLMatcher:
) )
regexp_str = self._HTTP_URL_RE_STR.replace("(.+)", url_regexp) regexp_str = self._HTTP_URL_RE_STR.replace("(.+)", url_regexp)
return re.compile(regexp_str) return re.compile(regexp_str, re.IGNORECASE)
def ascii_data_display(data: str) -> Any: def ascii_data_display(data: str) -> Any:
+1 -1
View File
@@ -115,7 +115,7 @@ def test_site_url_detector():
assert ( assert (
db.sites[0].url_regexp.pattern db.sites[0].url_regexp.pattern
== r'^https?://(www.)?forum\.amperka\.ru/members/\?username=(.+?)$' == r'^https?://(www.|m.)?forum\.amperka\.ru/members/\?username=(.+?)$'
) )
assert ( assert (
db.sites[0].detect_username('http://forum.amperka.ru/members/?username=test') db.sites[0].detect_username('http://forum.amperka.ru/members/?username=test')
+1 -1
View File
@@ -98,7 +98,7 @@ def test_url_make_profile_url_regexp():
# ensure all combinations match pattern # ensure all combinations match pattern
assert ( assert (
URLMatcher.make_profile_url_regexp(url).pattern URLMatcher.make_profile_url_regexp(url).pattern
== r'^https?://(www.)?flickr\.com/photos/(.+?)$' == r'^https?://(www.|m.)?flickr\.com/photos/(.+?)$'
) )