diff --git a/maigret/sites.py b/maigret/sites.py index b30cfae..6ee6a0b 100644 --- a/maigret/sites.py +++ b/maigret/sites.py @@ -181,7 +181,15 @@ class MaigretSite: if self.url_regexp: match_groups = self.url_regexp.match(url) if match_groups: - return match_groups.groups()[-1].rstrip("/") + username = next( + ( + group.rstrip("/") + for group in reversed(match_groups.groups()) + if isinstance(group, str) and group + ), + None, + ) + return username return None @@ -196,8 +204,16 @@ class MaigretSite: match_groups = self.url_regexp.match(url) if not match_groups: return None - - _id = match_groups.groups()[-1].rstrip("/") + _id = next( + ( + group.rstrip("/") + for group in reversed(match_groups.groups()) + if isinstance(group, str) and group + ), + None, + ) + if _id is None: + return None _type = self.type return _id, _type diff --git a/tests/test_sites.py b/tests/test_sites.py index 464b1cd..2f42ac5 100644 --- a/tests/test_sites.py +++ b/tests/test_sites.py @@ -1,5 +1,7 @@ """Maigret Database test functions""" +import re + from typing import Any, Dict from maigret.sites import MaigretDatabase, MaigretSite @@ -126,6 +128,22 @@ def test_site_url_detector(): ) +def test_extract_id_from_url_skips_none_groups(): + site = MaigretSite( + "Example", + { + "urlMain": "https://example.com", + "url": "https://example.com/{username}", + }, + ) + site.url_regexp = re.compile(r"^https://example\.com/([^/?#]+)(?:/(.*))?$") + + assert site.extract_id_from_url("https://example.com/username") == ( + "username", + "username", + ) + + def test_ranked_sites_dict(): db = MaigretDatabase() db.update_site(MaigretSite('3', {'alexaRank': 1000, 'engine': 'ucoz'}))