Fix ID extraction crash when regex groups are optional (#2572)

* Fix ID extraction crash when regex groups are optional

Handle None capture groups in username/id extraction and add regression coverage for optional trailing groups.

* Remove leftover line that overwrote safe _id in extract_id_from_url
This commit is contained in:
egrezeli
2026-04-30 19:14:40 -03:00
committed by GitHub
parent 533884bad5
commit 9dbefcef11
2 changed files with 37 additions and 3 deletions
+19 -3
View File
@@ -181,7 +181,15 @@ class MaigretSite:
if self.url_regexp:
match_groups = self.url_regexp.match(url)
if match_groups:
return match_groups.groups()[-1].rstrip("/")
username = next(
(
group.rstrip("/")
for group in reversed(match_groups.groups())
if isinstance(group, str) and group
),
None,
)
return username
return None
@@ -196,8 +204,16 @@ class MaigretSite:
match_groups = self.url_regexp.match(url)
if not match_groups:
return None
_id = match_groups.groups()[-1].rstrip("/")
_id = next(
(
group.rstrip("/")
for group in reversed(match_groups.groups())
if isinstance(group, str) and group
),
None,
)
if _id is None:
return None
_type = self.type
return _id, _type
+18
View File
@@ -1,5 +1,7 @@
"""Maigret Database test functions"""
import re
from typing import Any, Dict
from maigret.sites import MaigretDatabase, MaigretSite
@@ -126,6 +128,22 @@ def test_site_url_detector():
)
def test_extract_id_from_url_skips_none_groups():
site = MaigretSite(
"Example",
{
"urlMain": "https://example.com",
"url": "https://example.com/{username}",
},
)
site.url_regexp = re.compile(r"^https://example\.com/([^/?#]+)(?:/(.*))?$")
assert site.extract_id_from_url("https://example.com/username") == (
"username",
"username",
)
def test_ranked_sites_dict():
db = MaigretDatabase()
db.update_site(MaigretSite('3', {'alexaRank': 1000, 'engine': 'ucoz'}))