mirror of
https://github.com/soxoj/maigret.git
synced 2026-05-06 14:08:59 +00:00
Fix failing test for custom DB path resolution (#2468)
* Fix `--db` bug * Fix test_resolve_db_path_custom_file to create the file before testing Agent-Logs-Url: https://github.com/soxoj/maigret/sessions/3ea7b2e8-0565-4fca-8ec2-eff8eb4ee617 Co-authored-by: soxoj <31013580+soxoj@users.noreply.github.com> --------- Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com>
This commit is contained in:
@@ -82,11 +82,63 @@ id types, sites will be filtered automatically.
|
||||
ids. Useful for repeated scanning with found known irrelevant usernames.
|
||||
|
||||
``--db`` - Load Maigret database from a JSON file or an online, valid,
|
||||
JSON file.
|
||||
JSON file. See :ref:`custom-database` below.
|
||||
|
||||
``--no-autoupdate`` - Disable the automatic database update check that
|
||||
runs at startup. The currently cached (or bundled) database is used
|
||||
as-is.
|
||||
|
||||
``--force-update`` - Force a database update check at startup, ignoring
|
||||
the usual check interval. Implies ``--no-autoupdate`` for the rest of
|
||||
the run after the explicit update finishes.
|
||||
|
||||
``--retries RETRIES`` - Count of attempts to restart temporarily failed
|
||||
requests.
|
||||
|
||||
.. _custom-database:
|
||||
|
||||
Using a custom sites database
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
The ``--db`` flag accepts three forms:
|
||||
|
||||
1. **HTTP(S) URL** — fetched as-is, e.g.
|
||||
``--db https://example.com/my_db.json``.
|
||||
2. **Local file path** — absolute (``--db /tmp/private.json``) or
|
||||
relative to the current working directory
|
||||
(``--db LLM/maigret_private_db.json``).
|
||||
3. **Module-relative path** — kept for backwards compatibility, resolved
|
||||
against the installed ``maigret/`` package directory (e.g. the
|
||||
default ``resources/data.json``).
|
||||
|
||||
Resolution order for local paths: the path is first tried as given
|
||||
(absolute or cwd-relative); if that file does not exist, Maigret falls
|
||||
back to the legacy module-relative resolution. If neither location
|
||||
contains the file, Maigret exits with an error rather than silently
|
||||
loading the bundled database.
|
||||
|
||||
When ``--db`` points to a custom file, automatic database updates are
|
||||
skipped — the file is used exactly as provided.
|
||||
|
||||
On every run Maigret prints the database it actually loaded, for
|
||||
example::
|
||||
|
||||
[+] Using sites database: /path/to/maigret_private_db.json (6 sites)
|
||||
|
||||
If loading the requested database fails for any other reason (corrupt
|
||||
JSON, missing required keys, …), Maigret prints a warning, falls back
|
||||
to the bundled database, and reports the fallback explicitly::
|
||||
|
||||
[-] Falling back to bundled database: /…/maigret/resources/data.json
|
||||
[+] Using sites database: /…/maigret/resources/data.json (3154 sites)
|
||||
|
||||
A typical invocation against a private database, with auto-update
|
||||
disabled and all sites scanned, looks like::
|
||||
|
||||
python3 -m maigret username \
|
||||
--db LLM/maigret_private_db.json \
|
||||
--no-autoupdate -a
|
||||
|
||||
Reports
|
||||
-------
|
||||
|
||||
|
||||
+13
-1
@@ -203,7 +203,19 @@ def resolve_db_path(
|
||||
if is_url:
|
||||
return db_file_arg
|
||||
if not is_default:
|
||||
return path.join(path.dirname(path.realpath(__file__)), db_file_arg)
|
||||
# Try the path as-is (absolute or relative to cwd) first.
|
||||
if path.isfile(db_file_arg):
|
||||
return path.abspath(db_file_arg)
|
||||
# Fall back to legacy behavior: resolve relative to the maigret module dir.
|
||||
module_relative = path.join(path.dirname(path.realpath(__file__)), db_file_arg)
|
||||
if module_relative != db_file_arg and path.isfile(module_relative):
|
||||
return module_relative
|
||||
if module_relative != db_file_arg:
|
||||
raise FileNotFoundError(
|
||||
f"Custom database file not found: {db_file_arg!r} "
|
||||
f"(also tried {module_relative!r})"
|
||||
)
|
||||
raise FileNotFoundError(f"Custom database file not found: {db_file_arg!r}")
|
||||
|
||||
# Auto-update disabled
|
||||
if no_autoupdate:
|
||||
|
||||
+18
-8
@@ -574,13 +574,17 @@ async def main():
|
||||
color=not args.no_color,
|
||||
)
|
||||
|
||||
db_file = resolve_db_path(
|
||||
db_file_arg=args.db_file,
|
||||
no_autoupdate=args.no_autoupdate or args.force_update,
|
||||
meta_url=settings.db_update_meta_url,
|
||||
check_interval_hours=settings.autoupdate_check_interval_hours,
|
||||
color=not args.no_color,
|
||||
)
|
||||
try:
|
||||
db_file = resolve_db_path(
|
||||
db_file_arg=args.db_file,
|
||||
no_autoupdate=args.no_autoupdate or args.force_update,
|
||||
meta_url=settings.db_update_meta_url,
|
||||
check_interval_hours=settings.autoupdate_check_interval_hours,
|
||||
color=not args.no_color,
|
||||
)
|
||||
except FileNotFoundError as e:
|
||||
logger.error(str(e))
|
||||
sys.exit(2)
|
||||
|
||||
if args.top_sites == 0 or args.all_sites:
|
||||
args.top_sites = sys.maxsize
|
||||
@@ -597,11 +601,17 @@ async def main():
|
||||
# Create object with all information about sites we are aware of.
|
||||
try:
|
||||
db = MaigretDatabase().load_from_path(db_file)
|
||||
query_notify.success(f'Using sites database: {db_file} ({len(db.sites)} sites)')
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to load database from {db_file}: {e}")
|
||||
if db_file != BUNDLED_DB_PATH:
|
||||
logger.warning("Falling back to bundled database")
|
||||
query_notify.warning(
|
||||
f'Falling back to bundled database: {BUNDLED_DB_PATH}'
|
||||
)
|
||||
db = MaigretDatabase().load_from_path(BUNDLED_DB_PATH)
|
||||
query_notify.success(
|
||||
f'Using sites database: {BUNDLED_DB_PATH} ({len(db.sites)} sites)'
|
||||
)
|
||||
else:
|
||||
raise
|
||||
get_top_sites_for_id = lambda x: db.ranked_sites_dict(
|
||||
|
||||
@@ -293,7 +293,7 @@
|
||||
"method": "vimeo"
|
||||
},
|
||||
"headers": {
|
||||
"Authorization": "jwt eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJleHAiOjE3NzU1MDM2ODAsInVzZXJfaWQiOm51bGwsImFwcF9pZCI6NTg0NzksInNjb3BlcyI6InB1YmxpYyIsInRlYW1fdXNlcl9pZCI6bnVsbCwianRpIjoiMWVlMjg4ZTQtZGRkMC00ZWYyLTgyOWYtMDRmMjg3NjI1MTA5In0.FkO1cjuIS9jpn5nxkRWWp-jr0Meh_WUvRP1L46qVhcw"
|
||||
"Authorization": "jwt eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJleHAiOjE3NzU1Nzk3NjAsInVzZXJfaWQiOm51bGwsImFwcF9pZCI6NTg0NzksInNjb3BlcyI6InB1YmxpYyIsInRlYW1fdXNlcl9pZCI6bnVsbCwianRpIjoiYWQwNWIzMWUtMGU4NC00NDUzLThjZGEtZWFjNDkxNzYwOTVhIn0.Q7VI5NgbZ5rGsmgDQMxa8cKIxWiFwNYa3BLgIBIuj54"
|
||||
},
|
||||
"urlProbe": "https://api.vimeo.com/users/{username}?fields=name%2Cgender%2Cbio%2Curi%2Clink%2Cbackground_video%2Clocation_details%2Cpictures%2Cverified%2Cmetadata.public_videos.total%2Cavailable_for_hire%2Ccan_work_remotely%2Cmetadata.connections.videos.total%2Cmetadata.connections.albums.total%2Cmetadata.connections.followers.total%2Cmetadata.connections.following.total%2Cmetadata.public_videos.total%2Cmetadata.connections.vimeo_experts.is_enrolled%2Ctotal_collection_count%2Ccreated_time%2Cprofile_preferences%2Cmembership%2Cclients%2Cskills%2Cproject_types%2Crates%2Ccategories%2Cis_expert%2Cprofile_discovery%2Cwebsites%2Ccontact_emails&fetch_user_profile=1",
|
||||
"checkType": "status_code",
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
{
|
||||
"version": 1,
|
||||
"updated_at": "2026-04-07T16:18:18Z",
|
||||
"sites_count": 3155,
|
||||
"updated_at": "2026-04-07T22:38:58Z",
|
||||
"sites_count": 3154,
|
||||
"min_maigret_version": "0.5.0",
|
||||
"data_sha256": "279fb90280814cd11dcd711b1b8e6c6a99fefea4ce6ef05c9d64dced6ac795c0",
|
||||
"data_sha256": "1f1abd85bad2a358e4af1919f2d89d38bf374640e78f6f33b362ac620c55d47f",
|
||||
"data_url": "https://raw.githubusercontent.com/soxoj/maigret/main/maigret/resources/data.json"
|
||||
}
|
||||
@@ -129,8 +129,11 @@ def test_resolve_db_path_custom_url():
|
||||
assert result == "https://example.com/db.json"
|
||||
|
||||
|
||||
def test_resolve_db_path_custom_file():
|
||||
result = resolve_db_path("custom/path.json")
|
||||
def test_resolve_db_path_custom_file(tmp_path):
|
||||
custom_db = tmp_path / "custom" / "path.json"
|
||||
custom_db.parent.mkdir(parents=True)
|
||||
custom_db.write_text("{}")
|
||||
result = resolve_db_path(str(custom_db))
|
||||
assert result.endswith("custom/path.json")
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user