mirror of
https://github.com/soxoj/maigret.git
synced 2026-05-07 06:24:35 +00:00
Fixed BongaCams, links parsing improved (#297)
* Fixed BongaCams, links parsing improved * Fixed tests
This commit is contained in:
@@ -2144,11 +2144,17 @@
|
|||||||
"cz",
|
"cz",
|
||||||
"webcam"
|
"webcam"
|
||||||
],
|
],
|
||||||
"checkType": "status_code",
|
"absenceStrs": [
|
||||||
|
"<link rel=\"preconnect\""
|
||||||
|
],
|
||||||
|
"presenseStrs": [
|
||||||
|
"<title>Informa\u00e7\u00e3o e p\u00e1gina"
|
||||||
|
],
|
||||||
|
"checkType": "message",
|
||||||
"alexaRank": 30,
|
"alexaRank": 30,
|
||||||
"urlMain": "https://pt.bongacams.com",
|
"urlMain": "https://pt.bongacams.com",
|
||||||
"url": "https://pt.bongacams.com/profile/{username}",
|
"url": "https://pt.bongacams.com/profile/{username}",
|
||||||
"usernameClaimed": "asuna-black",
|
"usernameClaimed": "Icehotangel",
|
||||||
"usernameUnclaimed": "noonewouldeverusethis77777"
|
"usernameUnclaimed": "noonewouldeverusethis77777"
|
||||||
},
|
},
|
||||||
"Bookandreader": {
|
"Bookandreader": {
|
||||||
@@ -13126,7 +13132,7 @@
|
|||||||
"checkType": "response_url",
|
"checkType": "response_url",
|
||||||
"alexaRank": 82345,
|
"alexaRank": 82345,
|
||||||
"urlMain": "http://sprashivai.ru",
|
"urlMain": "http://sprashivai.ru",
|
||||||
"url": "http://sprashivai.ru/{username}?sl",
|
"url": "http://sprashivai.ru/{username}",
|
||||||
"usernameClaimed": "red",
|
"usernameClaimed": "red",
|
||||||
"usernameUnclaimed": "noonewouldeverusethis7"
|
"usernameUnclaimed": "noonewouldeverusethis7"
|
||||||
},
|
},
|
||||||
|
|||||||
+2
-2
@@ -42,7 +42,7 @@ def enrich_link_str(link: str) -> str:
|
|||||||
|
|
||||||
|
|
||||||
class URLMatcher:
|
class URLMatcher:
|
||||||
_HTTP_URL_RE_STR = "^https?://(www.)?(.+)$"
|
_HTTP_URL_RE_STR = "^https?://(www.|m.)?(.+)$"
|
||||||
HTTP_URL_RE = re.compile(_HTTP_URL_RE_STR)
|
HTTP_URL_RE = re.compile(_HTTP_URL_RE_STR)
|
||||||
UNSAFE_SYMBOLS = ".?"
|
UNSAFE_SYMBOLS = ".?"
|
||||||
|
|
||||||
@@ -66,7 +66,7 @@ class URLMatcher:
|
|||||||
)
|
)
|
||||||
regexp_str = self._HTTP_URL_RE_STR.replace("(.+)", url_regexp)
|
regexp_str = self._HTTP_URL_RE_STR.replace("(.+)", url_regexp)
|
||||||
|
|
||||||
return re.compile(regexp_str)
|
return re.compile(regexp_str, re.IGNORECASE)
|
||||||
|
|
||||||
|
|
||||||
def ascii_data_display(data: str) -> Any:
|
def ascii_data_display(data: str) -> Any:
|
||||||
|
|||||||
+1
-1
@@ -115,7 +115,7 @@ def test_site_url_detector():
|
|||||||
|
|
||||||
assert (
|
assert (
|
||||||
db.sites[0].url_regexp.pattern
|
db.sites[0].url_regexp.pattern
|
||||||
== r'^https?://(www.)?forum\.amperka\.ru/members/\?username=(.+?)$'
|
== r'^https?://(www.|m.)?forum\.amperka\.ru/members/\?username=(.+?)$'
|
||||||
)
|
)
|
||||||
assert (
|
assert (
|
||||||
db.sites[0].detect_username('http://forum.amperka.ru/members/?username=test')
|
db.sites[0].detect_username('http://forum.amperka.ru/members/?username=test')
|
||||||
|
|||||||
+1
-1
@@ -98,7 +98,7 @@ def test_url_make_profile_url_regexp():
|
|||||||
# ensure all combinations match pattern
|
# ensure all combinations match pattern
|
||||||
assert (
|
assert (
|
||||||
URLMatcher.make_profile_url_regexp(url).pattern
|
URLMatcher.make_profile_url_regexp(url).pattern
|
||||||
== r'^https?://(www.)?flickr\.com/photos/(.+?)$'
|
== r'^https?://(www.|m.)?flickr\.com/photos/(.+?)$'
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user