Fixed BongaCams, links parsing improved (#297)

* Fixed BongaCams, links parsing improved

* Fixed tests
This commit is contained in:
Soxoj
2022-01-08 03:45:49 +03:00
committed by GitHub
parent 3e884d4b76
commit 5c05cfa5bc
4 changed files with 13 additions and 7 deletions
+9 -3
View File
@@ -2144,11 +2144,17 @@
"cz",
"webcam"
],
"checkType": "status_code",
"absenceStrs": [
"<link rel=\"preconnect\""
],
"presenseStrs": [
"<title>Informa\u00e7\u00e3o e p\u00e1gina"
],
"checkType": "message",
"alexaRank": 30,
"urlMain": "https://pt.bongacams.com",
"url": "https://pt.bongacams.com/profile/{username}",
"usernameClaimed": "asuna-black",
"usernameClaimed": "Icehotangel",
"usernameUnclaimed": "noonewouldeverusethis77777"
},
"Bookandreader": {
@@ -13126,7 +13132,7 @@
"checkType": "response_url",
"alexaRank": 82345,
"urlMain": "http://sprashivai.ru",
"url": "http://sprashivai.ru/{username}?sl",
"url": "http://sprashivai.ru/{username}",
"usernameClaimed": "red",
"usernameUnclaimed": "noonewouldeverusethis7"
},
+2 -2
View File
@@ -42,7 +42,7 @@ def enrich_link_str(link: str) -> str:
class URLMatcher:
_HTTP_URL_RE_STR = "^https?://(www.)?(.+)$"
_HTTP_URL_RE_STR = "^https?://(www.|m.)?(.+)$"
HTTP_URL_RE = re.compile(_HTTP_URL_RE_STR)
UNSAFE_SYMBOLS = ".?"
@@ -66,7 +66,7 @@ class URLMatcher:
)
regexp_str = self._HTTP_URL_RE_STR.replace("(.+)", url_regexp)
return re.compile(regexp_str)
return re.compile(regexp_str, re.IGNORECASE)
def ascii_data_display(data: str) -> Any:
+1 -1
View File
@@ -115,7 +115,7 @@ def test_site_url_detector():
assert (
db.sites[0].url_regexp.pattern
== r'^https?://(www.)?forum\.amperka\.ru/members/\?username=(.+?)$'
== r'^https?://(www.|m.)?forum\.amperka\.ru/members/\?username=(.+?)$'
)
assert (
db.sites[0].detect_username('http://forum.amperka.ru/members/?username=test')
+1 -1
View File
@@ -98,7 +98,7 @@ def test_url_make_profile_url_regexp():
# ensure all combinations match pattern
assert (
URLMatcher.make_profile_url_regexp(url).pattern
== r'^https?://(www.)?flickr\.com/photos/(.+?)$'
== r'^https?://(www.|m.)?flickr\.com/photos/(.+?)$'
)