diff --git a/docs/source/command-line-options.rst b/docs/source/command-line-options.rst index e7fe06f..b111d91 100644 --- a/docs/source/command-line-options.rst +++ b/docs/source/command-line-options.rst @@ -82,11 +82,63 @@ id types, sites will be filtered automatically. ids. Useful for repeated scanning with found known irrelevant usernames. ``--db`` - Load Maigret database from a JSON file or an online, valid, -JSON file. +JSON file. See :ref:`custom-database` below. + +``--no-autoupdate`` - Disable the automatic database update check that +runs at startup. The currently cached (or bundled) database is used +as-is. + +``--force-update`` - Force a database update check at startup, ignoring +the usual check interval. Implies ``--no-autoupdate`` for the rest of +the run after the explicit update finishes. ``--retries RETRIES`` - Count of attempts to restart temporarily failed requests. +.. _custom-database: + +Using a custom sites database +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The ``--db`` flag accepts three forms: + +1. **HTTP(S) URL** — fetched as-is, e.g. + ``--db https://example.com/my_db.json``. +2. **Local file path** — absolute (``--db /tmp/private.json``) or + relative to the current working directory + (``--db LLM/maigret_private_db.json``). +3. **Module-relative path** — kept for backwards compatibility, resolved + against the installed ``maigret/`` package directory (e.g. the + default ``resources/data.json``). + +Resolution order for local paths: the path is first tried as given +(absolute or cwd-relative); if that file does not exist, Maigret falls +back to the legacy module-relative resolution. If neither location +contains the file, Maigret exits with an error rather than silently +loading the bundled database. + +When ``--db`` points to a custom file, automatic database updates are +skipped — the file is used exactly as provided. + +On every run Maigret prints the database it actually loaded, for +example:: + + [+] Using sites database: /path/to/maigret_private_db.json (6 sites) + +If loading the requested database fails for any other reason (corrupt +JSON, missing required keys, …), Maigret prints a warning, falls back +to the bundled database, and reports the fallback explicitly:: + + [-] Falling back to bundled database: /…/maigret/resources/data.json + [+] Using sites database: /…/maigret/resources/data.json (3154 sites) + +A typical invocation against a private database, with auto-update +disabled and all sites scanned, looks like:: + + python3 -m maigret username \ + --db LLM/maigret_private_db.json \ + --no-autoupdate -a + Reports ------- diff --git a/maigret/db_updater.py b/maigret/db_updater.py index dc8e6bc..146fbee 100644 --- a/maigret/db_updater.py +++ b/maigret/db_updater.py @@ -203,7 +203,19 @@ def resolve_db_path( if is_url: return db_file_arg if not is_default: - return path.join(path.dirname(path.realpath(__file__)), db_file_arg) + # Try the path as-is (absolute or relative to cwd) first. + if path.isfile(db_file_arg): + return path.abspath(db_file_arg) + # Fall back to legacy behavior: resolve relative to the maigret module dir. + module_relative = path.join(path.dirname(path.realpath(__file__)), db_file_arg) + if module_relative != db_file_arg and path.isfile(module_relative): + return module_relative + if module_relative != db_file_arg: + raise FileNotFoundError( + f"Custom database file not found: {db_file_arg!r} " + f"(also tried {module_relative!r})" + ) + raise FileNotFoundError(f"Custom database file not found: {db_file_arg!r}") # Auto-update disabled if no_autoupdate: diff --git a/maigret/maigret.py b/maigret/maigret.py index b1595dc..cead5a0 100755 --- a/maigret/maigret.py +++ b/maigret/maigret.py @@ -574,13 +574,17 @@ async def main(): color=not args.no_color, ) - db_file = resolve_db_path( - db_file_arg=args.db_file, - no_autoupdate=args.no_autoupdate or args.force_update, - meta_url=settings.db_update_meta_url, - check_interval_hours=settings.autoupdate_check_interval_hours, - color=not args.no_color, - ) + try: + db_file = resolve_db_path( + db_file_arg=args.db_file, + no_autoupdate=args.no_autoupdate or args.force_update, + meta_url=settings.db_update_meta_url, + check_interval_hours=settings.autoupdate_check_interval_hours, + color=not args.no_color, + ) + except FileNotFoundError as e: + logger.error(str(e)) + sys.exit(2) if args.top_sites == 0 or args.all_sites: args.top_sites = sys.maxsize @@ -597,11 +601,17 @@ async def main(): # Create object with all information about sites we are aware of. try: db = MaigretDatabase().load_from_path(db_file) + query_notify.success(f'Using sites database: {db_file} ({len(db.sites)} sites)') except Exception as e: logger.warning(f"Failed to load database from {db_file}: {e}") if db_file != BUNDLED_DB_PATH: - logger.warning("Falling back to bundled database") + query_notify.warning( + f'Falling back to bundled database: {BUNDLED_DB_PATH}' + ) db = MaigretDatabase().load_from_path(BUNDLED_DB_PATH) + query_notify.success( + f'Using sites database: {BUNDLED_DB_PATH} ({len(db.sites)} sites)' + ) else: raise get_top_sites_for_id = lambda x: db.ranked_sites_dict( diff --git a/maigret/resources/data.json b/maigret/resources/data.json index 173df8b..f826ce9 100644 --- a/maigret/resources/data.json +++ b/maigret/resources/data.json @@ -293,7 +293,7 @@ "method": "vimeo" }, "headers": { - "Authorization": "jwt eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJleHAiOjE3NzU1MDM2ODAsInVzZXJfaWQiOm51bGwsImFwcF9pZCI6NTg0NzksInNjb3BlcyI6InB1YmxpYyIsInRlYW1fdXNlcl9pZCI6bnVsbCwianRpIjoiMWVlMjg4ZTQtZGRkMC00ZWYyLTgyOWYtMDRmMjg3NjI1MTA5In0.FkO1cjuIS9jpn5nxkRWWp-jr0Meh_WUvRP1L46qVhcw" + "Authorization": "jwt eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJleHAiOjE3NzU1Nzk3NjAsInVzZXJfaWQiOm51bGwsImFwcF9pZCI6NTg0NzksInNjb3BlcyI6InB1YmxpYyIsInRlYW1fdXNlcl9pZCI6bnVsbCwianRpIjoiYWQwNWIzMWUtMGU4NC00NDUzLThjZGEtZWFjNDkxNzYwOTVhIn0.Q7VI5NgbZ5rGsmgDQMxa8cKIxWiFwNYa3BLgIBIuj54" }, "urlProbe": "https://api.vimeo.com/users/{username}?fields=name%2Cgender%2Cbio%2Curi%2Clink%2Cbackground_video%2Clocation_details%2Cpictures%2Cverified%2Cmetadata.public_videos.total%2Cavailable_for_hire%2Ccan_work_remotely%2Cmetadata.connections.videos.total%2Cmetadata.connections.albums.total%2Cmetadata.connections.followers.total%2Cmetadata.connections.following.total%2Cmetadata.public_videos.total%2Cmetadata.connections.vimeo_experts.is_enrolled%2Ctotal_collection_count%2Ccreated_time%2Cprofile_preferences%2Cmembership%2Cclients%2Cskills%2Cproject_types%2Crates%2Ccategories%2Cis_expert%2Cprofile_discovery%2Cwebsites%2Ccontact_emails&fetch_user_profile=1", "checkType": "status_code", diff --git a/maigret/resources/db_meta.json b/maigret/resources/db_meta.json index 3d772c6..4e1eec9 100644 --- a/maigret/resources/db_meta.json +++ b/maigret/resources/db_meta.json @@ -1,8 +1,8 @@ { "version": 1, - "updated_at": "2026-04-07T16:18:18Z", - "sites_count": 3155, + "updated_at": "2026-04-07T22:38:58Z", + "sites_count": 3154, "min_maigret_version": "0.5.0", - "data_sha256": "279fb90280814cd11dcd711b1b8e6c6a99fefea4ce6ef05c9d64dced6ac795c0", + "data_sha256": "1f1abd85bad2a358e4af1919f2d89d38bf374640e78f6f33b362ac620c55d47f", "data_url": "https://raw.githubusercontent.com/soxoj/maigret/main/maigret/resources/data.json" } \ No newline at end of file diff --git a/tests/test_db_updater.py b/tests/test_db_updater.py index a5cf8d1..5ae2b3e 100644 --- a/tests/test_db_updater.py +++ b/tests/test_db_updater.py @@ -129,8 +129,11 @@ def test_resolve_db_path_custom_url(): assert result == "https://example.com/db.json" -def test_resolve_db_path_custom_file(): - result = resolve_db_path("custom/path.json") +def test_resolve_db_path_custom_file(tmp_path): + custom_db = tmp_path / "custom" / "path.json" + custom_db.parent.mkdir(parents=True) + custom_db.write_text("{}") + result = resolve_db_path(str(custom_db)) assert result.endswith("custom/path.json")