mirror of
https://github.com/soxoj/maigret.git
synced 2026-05-07 06:24:35 +00:00
Fix ID extraction crash when regex groups are optional (#2572)
* Fix ID extraction crash when regex groups are optional Handle None capture groups in username/id extraction and add regression coverage for optional trailing groups. * Remove leftover line that overwrote safe _id in extract_id_from_url
This commit is contained in:
+19
-3
@@ -181,7 +181,15 @@ class MaigretSite:
|
||||
if self.url_regexp:
|
||||
match_groups = self.url_regexp.match(url)
|
||||
if match_groups:
|
||||
return match_groups.groups()[-1].rstrip("/")
|
||||
username = next(
|
||||
(
|
||||
group.rstrip("/")
|
||||
for group in reversed(match_groups.groups())
|
||||
if isinstance(group, str) and group
|
||||
),
|
||||
None,
|
||||
)
|
||||
return username
|
||||
|
||||
return None
|
||||
|
||||
@@ -196,8 +204,16 @@ class MaigretSite:
|
||||
match_groups = self.url_regexp.match(url)
|
||||
if not match_groups:
|
||||
return None
|
||||
|
||||
_id = match_groups.groups()[-1].rstrip("/")
|
||||
_id = next(
|
||||
(
|
||||
group.rstrip("/")
|
||||
for group in reversed(match_groups.groups())
|
||||
if isinstance(group, str) and group
|
||||
),
|
||||
None,
|
||||
)
|
||||
if _id is None:
|
||||
return None
|
||||
_type = self.type
|
||||
|
||||
return _id, _type
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
"""Maigret Database test functions"""
|
||||
|
||||
import re
|
||||
|
||||
from typing import Any, Dict
|
||||
|
||||
from maigret.sites import MaigretDatabase, MaigretSite
|
||||
@@ -126,6 +128,22 @@ def test_site_url_detector():
|
||||
)
|
||||
|
||||
|
||||
def test_extract_id_from_url_skips_none_groups():
|
||||
site = MaigretSite(
|
||||
"Example",
|
||||
{
|
||||
"urlMain": "https://example.com",
|
||||
"url": "https://example.com/{username}",
|
||||
},
|
||||
)
|
||||
site.url_regexp = re.compile(r"^https://example\.com/([^/?#]+)(?:/(.*))?$")
|
||||
|
||||
assert site.extract_id_from_url("https://example.com/username") == (
|
||||
"username",
|
||||
"username",
|
||||
)
|
||||
|
||||
|
||||
def test_ranked_sites_dict():
|
||||
db = MaigretDatabase()
|
||||
db.update_site(MaigretSite('3', {'alexaRank': 1000, 'engine': 'ucoz'}))
|
||||
|
||||
Reference in New Issue
Block a user