mirror of
https://github.com/soxoj/maigret.git
synced 2026-05-06 22:19:01 +00:00
Fix ID extraction crash when regex groups are optional (#2572)
* Fix ID extraction crash when regex groups are optional Handle None capture groups in username/id extraction and add regression coverage for optional trailing groups. * Remove leftover line that overwrote safe _id in extract_id_from_url
This commit is contained in:
+19
-3
@@ -181,7 +181,15 @@ class MaigretSite:
|
|||||||
if self.url_regexp:
|
if self.url_regexp:
|
||||||
match_groups = self.url_regexp.match(url)
|
match_groups = self.url_regexp.match(url)
|
||||||
if match_groups:
|
if match_groups:
|
||||||
return match_groups.groups()[-1].rstrip("/")
|
username = next(
|
||||||
|
(
|
||||||
|
group.rstrip("/")
|
||||||
|
for group in reversed(match_groups.groups())
|
||||||
|
if isinstance(group, str) and group
|
||||||
|
),
|
||||||
|
None,
|
||||||
|
)
|
||||||
|
return username
|
||||||
|
|
||||||
return None
|
return None
|
||||||
|
|
||||||
@@ -196,8 +204,16 @@ class MaigretSite:
|
|||||||
match_groups = self.url_regexp.match(url)
|
match_groups = self.url_regexp.match(url)
|
||||||
if not match_groups:
|
if not match_groups:
|
||||||
return None
|
return None
|
||||||
|
_id = next(
|
||||||
_id = match_groups.groups()[-1].rstrip("/")
|
(
|
||||||
|
group.rstrip("/")
|
||||||
|
for group in reversed(match_groups.groups())
|
||||||
|
if isinstance(group, str) and group
|
||||||
|
),
|
||||||
|
None,
|
||||||
|
)
|
||||||
|
if _id is None:
|
||||||
|
return None
|
||||||
_type = self.type
|
_type = self.type
|
||||||
|
|
||||||
return _id, _type
|
return _id, _type
|
||||||
|
|||||||
@@ -1,5 +1,7 @@
|
|||||||
"""Maigret Database test functions"""
|
"""Maigret Database test functions"""
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
from typing import Any, Dict
|
from typing import Any, Dict
|
||||||
|
|
||||||
from maigret.sites import MaigretDatabase, MaigretSite
|
from maigret.sites import MaigretDatabase, MaigretSite
|
||||||
@@ -126,6 +128,22 @@ def test_site_url_detector():
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_extract_id_from_url_skips_none_groups():
|
||||||
|
site = MaigretSite(
|
||||||
|
"Example",
|
||||||
|
{
|
||||||
|
"urlMain": "https://example.com",
|
||||||
|
"url": "https://example.com/{username}",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
site.url_regexp = re.compile(r"^https://example\.com/([^/?#]+)(?:/(.*))?$")
|
||||||
|
|
||||||
|
assert site.extract_id_from_url("https://example.com/username") == (
|
||||||
|
"username",
|
||||||
|
"username",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def test_ranked_sites_dict():
|
def test_ranked_sites_dict():
|
||||||
db = MaigretDatabase()
|
db = MaigretDatabase()
|
||||||
db.update_site(MaigretSite('3', {'alexaRank': 1000, 'engine': 'ucoz'}))
|
db.update_site(MaigretSite('3', {'alexaRank': 1000, 'engine': 'ucoz'}))
|
||||||
|
|||||||
Reference in New Issue
Block a user