Fix ID extraction crash when regex groups are optional (#2572)

* Fix ID extraction crash when regex groups are optional

Handle None capture groups in username/id extraction and add regression coverage for optional trailing groups.

* Remove leftover line that overwrote safe _id in extract_id_from_url
This commit is contained in:
egrezeli
2026-04-30 19:14:40 -03:00
committed by GitHub
parent 533884bad5
commit 9dbefcef11
2 changed files with 37 additions and 3 deletions
+18
View File
@@ -1,5 +1,7 @@
"""Maigret Database test functions"""
import re
from typing import Any, Dict
from maigret.sites import MaigretDatabase, MaigretSite
@@ -126,6 +128,22 @@ def test_site_url_detector():
)
def test_extract_id_from_url_skips_none_groups():
site = MaigretSite(
"Example",
{
"urlMain": "https://example.com",
"url": "https://example.com/{username}",
},
)
site.url_regexp = re.compile(r"^https://example\.com/([^/?#]+)(?:/(.*))?$")
assert site.extract_id_from_url("https://example.com/username") == (
"username",
"username",
)
def test_ranked_sites_dict():
db = MaigretDatabase()
db.update_site(MaigretSite('3', {'alexaRank': 1000, 'engine': 'ucoz'}))