diff --git a/maigret/resources/data.json b/maigret/resources/data.json index ae3fa6e..46d0f05 100644 --- a/maigret/resources/data.json +++ b/maigret/resources/data.json @@ -57,7 +57,8 @@ "\"routePath\":null" ], "errors": { - "Login • Instagram": "Login required" + "Login • Instagram": "Login required", + "\"routePath\":\"\\/\"": "Login required (rate-limited or session blocked)" }, "alexaRank": 4, "urlMain": "https://www.instagram.com/", diff --git a/maigret/resources/db_meta.json b/maigret/resources/db_meta.json index 08dbad9..a5335c6 100644 --- a/maigret/resources/db_meta.json +++ b/maigret/resources/db_meta.json @@ -1,8 +1,8 @@ { "version": 1, - "updated_at": "2026-05-16T15:49:11Z", + "updated_at": "2026-05-16T16:00:20Z", "sites_count": 3155, "min_maigret_version": "0.6.1", - "data_sha256": "df2ab3dbc96bdcdc8aa4e9da485df75ce6c3274814080f00a35e89f7f43783e1", + "data_sha256": "0997b68c05eedb6e714432ed79580688d4923c56ef1ebf46db69b90039ef00d7", "data_url": "https://raw.githubusercontent.com/soxoj/maigret/main/maigret/resources/data.json" } \ No newline at end of file diff --git a/tests/test_checking.py b/tests/test_checking.py index 9db3ef7..12530a9 100644 --- a/tests/test_checking.py +++ b/tests/test_checking.py @@ -126,6 +126,40 @@ def test_detect_error_page_ok(): assert detect_error_page("hello world", 200, {}, ignore_403=False) is None +def test_detect_error_page_instagram_login_wall(): + """Regression for #11: when Instagram serves the login wall (typically the + response after rate-limiting an unauthenticated client), the JSON state + contains `"routePath":"\\/"` (root path) rather than a username route. The + Instagram entry in data.json carries this marker in `errors` so the result + surfaces as UNKNOWN instead of a false AVAILABLE. + """ + instagram_errors = { + "Login • Instagram": "Login required", + '"routePath":"\\/"': "Login required (rate-limited or session blocked)", + } + login_wall_html = '...{"routePath":"\\/"},"timeSpent":...' + err = detect_error_page(login_wall_html, 200, instagram_errors, ignore_403=False) + assert err is not None + assert err.type == "Site-specific" + assert "rate-limited" in err.desc + + +def test_detect_error_page_instagram_marker_no_false_positive_on_profile(): + """The login-wall marker must NOT match a real profile page. On a claimed + user page, `routePath` carries the user-route template + (`"routePath":"\\/{username}\\/..."`); the closing-quote form + `"routePath":"\\/"` only appears on the login wall. + """ + instagram_errors = { + '"routePath":"\\/"': "Login required (rate-limited or session blocked)", + } + profile_html = ( + 'foo,"routePath":"\\/{username}\\/{?tab}\\/{?view_type}\\/",bar' + ) + err = detect_error_page(profile_html, 200, instagram_errors, ignore_403=False) + assert err is None + + def test_parse_usernames_single_username(): logger = Mock() result = parse_usernames({"profile_username": "alice"}, logger)