mirror of
https://github.com/soxoj/maigret.git
synced 2026-05-06 22:19:01 +00:00
@@ -1,3 +1,7 @@
|
|||||||
#!/bin/sh
|
#!/bin/sh
|
||||||
echo 'Activating update_sitesmd hook script...'
|
echo 'Activating update_sitesmd hook script...'
|
||||||
poetry run update_sitesmd
|
poetry run update_sitesmd
|
||||||
|
|
||||||
|
echo 'Regenerating db_meta.json...'
|
||||||
|
python3 utils/generate_db_meta.py
|
||||||
|
git add maigret/resources/db_meta.json
|
||||||
@@ -27,6 +27,9 @@ jobs:
|
|||||||
pip3 install .
|
pip3 install .
|
||||||
python3 ./utils/update_site_data.py --empty-only
|
python3 ./utils/update_site_data.py --empty-only
|
||||||
|
|
||||||
|
- name: Regenerate db_meta.json
|
||||||
|
run: python3 utils/generate_db_meta.py
|
||||||
|
|
||||||
- name: Remove ambiguous main tag
|
- name: Remove ambiguous main tag
|
||||||
run: git tag -d main || true
|
run: git tag -d main || true
|
||||||
|
|
||||||
|
|||||||
@@ -27,3 +27,77 @@ Missing any of these files is not an error.
|
|||||||
If the next settings file contains already known option,
|
If the next settings file contains already known option,
|
||||||
this option will be rewrited. So it is possible to make
|
this option will be rewrited. So it is possible to make
|
||||||
custom configuration for different users and directories.
|
custom configuration for different users and directories.
|
||||||
|
|
||||||
|
.. _database-auto-update:
|
||||||
|
|
||||||
|
Database auto-update
|
||||||
|
--------------------
|
||||||
|
|
||||||
|
Maigret ships with a bundled site database, but it gets outdated between releases. To keep the database current, Maigret automatically checks for updates on startup.
|
||||||
|
|
||||||
|
**How it works:**
|
||||||
|
|
||||||
|
1. On startup, Maigret checks if more than 24 hours have passed since the last update check.
|
||||||
|
2. If so, it fetches a lightweight metadata file (~200 bytes) from GitHub to see if a newer database is available.
|
||||||
|
3. If a newer, compatible database exists, Maigret downloads it to ``~/.maigret/data.json`` and uses it instead of the bundled copy.
|
||||||
|
4. If the download fails or the new database is incompatible with your Maigret version, the bundled database is used as a fallback.
|
||||||
|
|
||||||
|
The downloaded database has **higher priority** than the bundled one — it replaces, not overlays.
|
||||||
|
|
||||||
|
**Status messages** are printed only when an action occurs:
|
||||||
|
|
||||||
|
.. code-block:: text
|
||||||
|
|
||||||
|
[*] DB auto-update: checking for updates...
|
||||||
|
[+] DB auto-update: database updated successfully (3180 sites)
|
||||||
|
[*] DB auto-update: database is up to date (3157 sites)
|
||||||
|
[!] DB auto-update: latest database requires maigret >= 0.6.0, you have 0.5.0
|
||||||
|
|
||||||
|
**Forcing an update:**
|
||||||
|
|
||||||
|
Use the ``--force-update`` flag to check for updates immediately, ignoring the check interval:
|
||||||
|
|
||||||
|
.. code-block:: console
|
||||||
|
|
||||||
|
maigret username --force-update
|
||||||
|
|
||||||
|
The update happens at startup, then the search continues normally with the freshly downloaded database.
|
||||||
|
|
||||||
|
**Disabling auto-update:**
|
||||||
|
|
||||||
|
Use the ``--no-autoupdate`` flag to skip the update check entirely:
|
||||||
|
|
||||||
|
.. code-block:: console
|
||||||
|
|
||||||
|
maigret username --no-autoupdate
|
||||||
|
|
||||||
|
Or set it permanently in ``~/.maigret/settings.json``:
|
||||||
|
|
||||||
|
.. code-block:: json
|
||||||
|
|
||||||
|
{
|
||||||
|
"no_autoupdate": true
|
||||||
|
}
|
||||||
|
|
||||||
|
This is recommended for **Docker containers**, **CI pipelines**, and **air-gapped environments**.
|
||||||
|
|
||||||
|
**Configuration options** (in ``settings.json``):
|
||||||
|
|
||||||
|
.. list-table::
|
||||||
|
:header-rows: 1
|
||||||
|
:widths: 35 15 50
|
||||||
|
|
||||||
|
* - Setting
|
||||||
|
- Default
|
||||||
|
- Description
|
||||||
|
* - ``no_autoupdate``
|
||||||
|
- ``false``
|
||||||
|
- Disable auto-update entirely
|
||||||
|
* - ``autoupdate_check_interval_hours``
|
||||||
|
- ``24``
|
||||||
|
- How often to check for updates (in hours)
|
||||||
|
* - ``db_update_meta_url``
|
||||||
|
- GitHub raw URL
|
||||||
|
- URL of the metadata file (for custom mirrors)
|
||||||
|
|
||||||
|
**Using a custom database** with ``--db`` always skips auto-update — you are explicitly choosing your data source.
|
||||||
|
|||||||
@@ -0,0 +1,330 @@
|
|||||||
|
"""
|
||||||
|
Database auto-update logic for maigret.
|
||||||
|
|
||||||
|
Checks a lightweight meta file to determine if a newer site database is available,
|
||||||
|
downloads it if compatible, and caches it locally in ~/.maigret/.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import hashlib
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import os.path as path
|
||||||
|
import tempfile
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
import requests
|
||||||
|
from colorama import Fore, Style
|
||||||
|
|
||||||
|
from .__version__ import __version__
|
||||||
|
|
||||||
|
logger = logging.getLogger("maigret")
|
||||||
|
|
||||||
|
_use_color = True
|
||||||
|
|
||||||
|
|
||||||
|
def _print_info(msg: str) -> None:
|
||||||
|
text = f"[*] {msg}"
|
||||||
|
if _use_color:
|
||||||
|
print(Style.BRIGHT + Fore.GREEN + text + Style.RESET_ALL)
|
||||||
|
else:
|
||||||
|
print(text)
|
||||||
|
|
||||||
|
|
||||||
|
def _print_success(msg: str) -> None:
|
||||||
|
text = f"[+] {msg}"
|
||||||
|
if _use_color:
|
||||||
|
print(Style.BRIGHT + Fore.GREEN + text + Style.RESET_ALL)
|
||||||
|
else:
|
||||||
|
print(text)
|
||||||
|
|
||||||
|
|
||||||
|
def _print_warning(msg: str) -> None:
|
||||||
|
text = f"[!] {msg}"
|
||||||
|
if _use_color:
|
||||||
|
print(Style.BRIGHT + Fore.YELLOW + text + Style.RESET_ALL)
|
||||||
|
else:
|
||||||
|
print(text)
|
||||||
|
|
||||||
|
|
||||||
|
DEFAULT_META_URL = (
|
||||||
|
"https://raw.githubusercontent.com/soxoj/maigret/main/maigret/resources/db_meta.json"
|
||||||
|
)
|
||||||
|
DEFAULT_CHECK_INTERVAL_HOURS = 24
|
||||||
|
MAIGRET_HOME = path.expanduser("~/.maigret")
|
||||||
|
CACHED_DB_PATH = path.join(MAIGRET_HOME, "data.json")
|
||||||
|
STATE_PATH = path.join(MAIGRET_HOME, "autoupdate_state.json")
|
||||||
|
BUNDLED_DB_PATH = path.join(path.dirname(path.realpath(__file__)), "resources", "data.json")
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_version(version_str: str) -> tuple:
|
||||||
|
"""Parse a version string like '0.5.0' into a comparable tuple (0, 5, 0)."""
|
||||||
|
try:
|
||||||
|
return tuple(int(x) for x in version_str.strip().split("."))
|
||||||
|
except (ValueError, AttributeError):
|
||||||
|
return (0, 0, 0)
|
||||||
|
|
||||||
|
|
||||||
|
def _ensure_maigret_home() -> None:
|
||||||
|
os.makedirs(MAIGRET_HOME, exist_ok=True)
|
||||||
|
|
||||||
|
|
||||||
|
def _load_state() -> dict:
|
||||||
|
try:
|
||||||
|
with open(STATE_PATH, "r", encoding="utf-8") as f:
|
||||||
|
return json.load(f)
|
||||||
|
except (FileNotFoundError, json.JSONDecodeError, OSError):
|
||||||
|
return {}
|
||||||
|
|
||||||
|
|
||||||
|
def _save_state(state: dict) -> None:
|
||||||
|
_ensure_maigret_home()
|
||||||
|
tmp_path = STATE_PATH + ".tmp"
|
||||||
|
try:
|
||||||
|
with open(tmp_path, "w", encoding="utf-8") as f:
|
||||||
|
json.dump(state, f, indent=2, ensure_ascii=False)
|
||||||
|
os.replace(tmp_path, STATE_PATH)
|
||||||
|
except OSError:
|
||||||
|
try:
|
||||||
|
os.unlink(tmp_path)
|
||||||
|
except OSError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def _needs_check(state: dict, interval_hours: int) -> bool:
|
||||||
|
last_check = state.get("last_check_at")
|
||||||
|
if not last_check:
|
||||||
|
return True
|
||||||
|
try:
|
||||||
|
last_dt = datetime.fromisoformat(last_check.replace("Z", "+00:00"))
|
||||||
|
elapsed = (datetime.now(timezone.utc) - last_dt).total_seconds() / 3600
|
||||||
|
return elapsed >= interval_hours
|
||||||
|
except (ValueError, TypeError):
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
def _fetch_meta(meta_url: str, timeout: int = 10) -> Optional[dict]:
|
||||||
|
try:
|
||||||
|
response = requests.get(meta_url, timeout=timeout)
|
||||||
|
if response.status_code == 200:
|
||||||
|
return response.json()
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _is_version_compatible(meta: dict) -> bool:
|
||||||
|
min_ver = meta.get("min_maigret_version", "0.0.0")
|
||||||
|
return _parse_version(__version__) >= _parse_version(min_ver)
|
||||||
|
|
||||||
|
|
||||||
|
def _is_update_available(meta: dict, state: dict) -> bool:
|
||||||
|
if not path.isfile(CACHED_DB_PATH):
|
||||||
|
return True
|
||||||
|
remote_date = meta.get("updated_at", "")
|
||||||
|
cached_date = state.get("last_meta", {}).get("updated_at", "")
|
||||||
|
return remote_date > cached_date
|
||||||
|
|
||||||
|
|
||||||
|
def _download_and_verify(data_url: str, expected_sha256: str, timeout: int = 60) -> Optional[str]:
|
||||||
|
_ensure_maigret_home()
|
||||||
|
tmp_fd, tmp_path = tempfile.mkstemp(dir=MAIGRET_HOME, suffix=".json")
|
||||||
|
try:
|
||||||
|
response = requests.get(data_url, timeout=timeout)
|
||||||
|
if response.status_code != 200:
|
||||||
|
return None
|
||||||
|
|
||||||
|
content = response.content
|
||||||
|
actual_sha256 = hashlib.sha256(content).hexdigest()
|
||||||
|
if actual_sha256 != expected_sha256:
|
||||||
|
_print_warning("DB auto-update: SHA-256 mismatch, download rejected")
|
||||||
|
return None
|
||||||
|
|
||||||
|
# Validate JSON structure
|
||||||
|
data = json.loads(content)
|
||||||
|
if not all(k in data for k in ("sites", "engines", "tags")):
|
||||||
|
_print_warning("DB auto-update: invalid database structure")
|
||||||
|
return None
|
||||||
|
|
||||||
|
os.write(tmp_fd, content)
|
||||||
|
os.close(tmp_fd)
|
||||||
|
tmp_fd = None
|
||||||
|
os.replace(tmp_path, CACHED_DB_PATH)
|
||||||
|
return CACHED_DB_PATH
|
||||||
|
except Exception:
|
||||||
|
return None
|
||||||
|
finally:
|
||||||
|
if tmp_fd is not None:
|
||||||
|
os.close(tmp_fd)
|
||||||
|
try:
|
||||||
|
os.unlink(tmp_path)
|
||||||
|
except OSError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def _best_local() -> str:
|
||||||
|
"""Return cached DB if it exists and is valid, otherwise bundled."""
|
||||||
|
if path.isfile(CACHED_DB_PATH):
|
||||||
|
try:
|
||||||
|
with open(CACHED_DB_PATH, "r", encoding="utf-8") as f:
|
||||||
|
data = json.load(f)
|
||||||
|
if "sites" in data:
|
||||||
|
return CACHED_DB_PATH
|
||||||
|
except (json.JSONDecodeError, OSError):
|
||||||
|
pass
|
||||||
|
return BUNDLED_DB_PATH
|
||||||
|
|
||||||
|
|
||||||
|
def _now_iso() -> str:
|
||||||
|
return datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
|
||||||
|
|
||||||
|
|
||||||
|
def resolve_db_path(
|
||||||
|
db_file_arg: str,
|
||||||
|
no_autoupdate: bool = False,
|
||||||
|
meta_url: str = DEFAULT_META_URL,
|
||||||
|
check_interval_hours: int = DEFAULT_CHECK_INTERVAL_HOURS,
|
||||||
|
color: bool = True,
|
||||||
|
) -> str:
|
||||||
|
"""
|
||||||
|
Determine which database file to use, potentially downloading an update.
|
||||||
|
|
||||||
|
Returns the path to the database file that should be loaded.
|
||||||
|
"""
|
||||||
|
global _use_color
|
||||||
|
_use_color = color
|
||||||
|
|
||||||
|
default_db_name = "resources/data.json"
|
||||||
|
|
||||||
|
# User specified a custom DB — skip auto-update
|
||||||
|
is_url = db_file_arg.startswith("http://") or db_file_arg.startswith("https://")
|
||||||
|
is_default = db_file_arg == default_db_name
|
||||||
|
if is_url:
|
||||||
|
return db_file_arg
|
||||||
|
if not is_default:
|
||||||
|
return path.join(path.dirname(path.realpath(__file__)), db_file_arg)
|
||||||
|
|
||||||
|
# Auto-update disabled
|
||||||
|
if no_autoupdate:
|
||||||
|
return _best_local()
|
||||||
|
|
||||||
|
# Check interval
|
||||||
|
_ensure_maigret_home()
|
||||||
|
state = _load_state()
|
||||||
|
if not _needs_check(state, check_interval_hours):
|
||||||
|
return _best_local()
|
||||||
|
|
||||||
|
# Time to check
|
||||||
|
_print_info("DB auto-update: checking for updates...")
|
||||||
|
meta = _fetch_meta(meta_url)
|
||||||
|
if meta is None:
|
||||||
|
_print_warning("DB auto-update: could not reach update server, using local database")
|
||||||
|
state["last_check_at"] = _now_iso()
|
||||||
|
_save_state(state)
|
||||||
|
return _best_local()
|
||||||
|
|
||||||
|
# Version compatibility
|
||||||
|
if not _is_version_compatible(meta):
|
||||||
|
min_ver = meta.get("min_maigret_version", "?")
|
||||||
|
_print_warning(
|
||||||
|
f"DB auto-update: latest database requires maigret >= {min_ver}, "
|
||||||
|
f"you have {__version__}. Please upgrade with: pip install -U maigret"
|
||||||
|
)
|
||||||
|
state["last_check_at"] = _now_iso()
|
||||||
|
_save_state(state)
|
||||||
|
return _best_local()
|
||||||
|
|
||||||
|
# Check if update available
|
||||||
|
if not _is_update_available(meta, state):
|
||||||
|
sites_count = meta.get("sites_count", "?")
|
||||||
|
_print_info(f"DB auto-update: database is up to date ({sites_count} sites)")
|
||||||
|
state["last_check_at"] = _now_iso()
|
||||||
|
state["last_meta"] = meta
|
||||||
|
_save_state(state)
|
||||||
|
return _best_local()
|
||||||
|
|
||||||
|
# Download update
|
||||||
|
new_count = meta.get("sites_count", "?")
|
||||||
|
old_count = state.get("last_meta", {}).get("sites_count")
|
||||||
|
if old_count:
|
||||||
|
_print_info(f"DB auto-update: downloading updated database ({new_count} sites, was {old_count})...")
|
||||||
|
else:
|
||||||
|
_print_info(f"DB auto-update: downloading database ({new_count} sites)...")
|
||||||
|
|
||||||
|
data_url = meta.get("data_url", "")
|
||||||
|
expected_sha = meta.get("data_sha256", "")
|
||||||
|
result = _download_and_verify(data_url, expected_sha)
|
||||||
|
|
||||||
|
if result is None:
|
||||||
|
_print_warning("DB auto-update: download failed, using local database")
|
||||||
|
state["last_check_at"] = _now_iso()
|
||||||
|
_save_state(state)
|
||||||
|
return _best_local()
|
||||||
|
|
||||||
|
_print_success(f"DB auto-update: database updated successfully ({new_count} sites)")
|
||||||
|
state["last_check_at"] = _now_iso()
|
||||||
|
state["last_meta"] = meta
|
||||||
|
state["cached_db_sha256"] = expected_sha
|
||||||
|
_save_state(state)
|
||||||
|
return CACHED_DB_PATH
|
||||||
|
|
||||||
|
|
||||||
|
def force_update(
|
||||||
|
meta_url: str = DEFAULT_META_URL,
|
||||||
|
color: bool = True,
|
||||||
|
) -> bool:
|
||||||
|
"""
|
||||||
|
Force check for database updates and download if available.
|
||||||
|
|
||||||
|
Returns True if database was updated, False otherwise.
|
||||||
|
"""
|
||||||
|
global _use_color
|
||||||
|
_use_color = color
|
||||||
|
|
||||||
|
_ensure_maigret_home()
|
||||||
|
|
||||||
|
_print_info("DB update: checking for updates...")
|
||||||
|
meta = _fetch_meta(meta_url)
|
||||||
|
if meta is None:
|
||||||
|
_print_warning("DB update: could not reach update server")
|
||||||
|
return False
|
||||||
|
|
||||||
|
if not _is_version_compatible(meta):
|
||||||
|
min_ver = meta.get("min_maigret_version", "?")
|
||||||
|
_print_warning(
|
||||||
|
f"DB update: latest database requires maigret >= {min_ver}, "
|
||||||
|
f"you have {__version__}. Please upgrade with: pip install -U maigret"
|
||||||
|
)
|
||||||
|
return False
|
||||||
|
|
||||||
|
state = _load_state()
|
||||||
|
new_count = meta.get("sites_count", "?")
|
||||||
|
old_count = state.get("last_meta", {}).get("sites_count")
|
||||||
|
|
||||||
|
if not _is_update_available(meta, state):
|
||||||
|
_print_info(f"DB update: database is already up to date ({new_count} sites)")
|
||||||
|
state["last_check_at"] = _now_iso()
|
||||||
|
state["last_meta"] = meta
|
||||||
|
_save_state(state)
|
||||||
|
return False
|
||||||
|
|
||||||
|
if old_count:
|
||||||
|
_print_info(f"DB update: downloading updated database ({new_count} sites, was {old_count})...")
|
||||||
|
else:
|
||||||
|
_print_info(f"DB update: downloading database ({new_count} sites)...")
|
||||||
|
|
||||||
|
data_url = meta.get("data_url", "")
|
||||||
|
expected_sha = meta.get("data_sha256", "")
|
||||||
|
result = _download_and_verify(data_url, expected_sha)
|
||||||
|
|
||||||
|
if result is None:
|
||||||
|
_print_warning("DB update: download failed")
|
||||||
|
return False
|
||||||
|
|
||||||
|
_print_success(f"DB update: database updated successfully ({new_count} sites)")
|
||||||
|
state["last_check_at"] = _now_iso()
|
||||||
|
state["last_meta"] = meta
|
||||||
|
state["cached_db_sha256"] = expected_sha
|
||||||
|
_save_state(state)
|
||||||
|
return True
|
||||||
+38
-4
@@ -201,6 +201,20 @@ def setup_arguments_parser(settings: Settings):
|
|||||||
default=settings.sites_db_path,
|
default=settings.sites_db_path,
|
||||||
help="Load Maigret database from a JSON file or HTTP web resource.",
|
help="Load Maigret database from a JSON file or HTTP web resource.",
|
||||||
)
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--no-autoupdate",
|
||||||
|
action="store_true",
|
||||||
|
dest="no_autoupdate",
|
||||||
|
default=settings.no_autoupdate,
|
||||||
|
help="Disable automatic database updates on startup.",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--force-update",
|
||||||
|
action="store_true",
|
||||||
|
dest="force_update",
|
||||||
|
default=False,
|
||||||
|
help="Force check for database updates and download if available.",
|
||||||
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--cookies-jar-file",
|
"--cookies-jar-file",
|
||||||
metavar="COOKIE_FILE",
|
metavar="COOKIE_FILE",
|
||||||
@@ -543,9 +557,21 @@ async def main():
|
|||||||
else:
|
else:
|
||||||
args.exclude_tags = []
|
args.exclude_tags = []
|
||||||
|
|
||||||
db_file = args.db_file \
|
from .db_updater import resolve_db_path, force_update, BUNDLED_DB_PATH
|
||||||
if (args.db_file.startswith("http://") or args.db_file.startswith("https://")) \
|
|
||||||
else path.join(path.dirname(path.realpath(__file__)), args.db_file)
|
if args.force_update:
|
||||||
|
force_update(
|
||||||
|
meta_url=settings.db_update_meta_url,
|
||||||
|
color=not args.no_color,
|
||||||
|
)
|
||||||
|
|
||||||
|
db_file = resolve_db_path(
|
||||||
|
db_file_arg=args.db_file,
|
||||||
|
no_autoupdate=args.no_autoupdate or args.force_update,
|
||||||
|
meta_url=settings.db_update_meta_url,
|
||||||
|
check_interval_hours=settings.autoupdate_check_interval_hours,
|
||||||
|
color=not args.no_color,
|
||||||
|
)
|
||||||
|
|
||||||
if args.top_sites == 0 or args.all_sites:
|
if args.top_sites == 0 or args.all_sites:
|
||||||
args.top_sites = sys.maxsize
|
args.top_sites = sys.maxsize
|
||||||
@@ -560,7 +586,15 @@ async def main():
|
|||||||
)
|
)
|
||||||
|
|
||||||
# Create object with all information about sites we are aware of.
|
# Create object with all information about sites we are aware of.
|
||||||
db = MaigretDatabase().load_from_path(db_file)
|
try:
|
||||||
|
db = MaigretDatabase().load_from_path(db_file)
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Failed to load database from {db_file}: {e}")
|
||||||
|
if db_file != BUNDLED_DB_PATH:
|
||||||
|
logger.warning("Falling back to bundled database")
|
||||||
|
db = MaigretDatabase().load_from_path(BUNDLED_DB_PATH)
|
||||||
|
else:
|
||||||
|
raise
|
||||||
get_top_sites_for_id = lambda x: db.ranked_sites_dict(
|
get_top_sites_for_id = lambda x: db.ranked_sites_dict(
|
||||||
top=args.top_sites,
|
top=args.top_sites,
|
||||||
tags=args.tags,
|
tags=args.tags,
|
||||||
|
|||||||
@@ -29262,7 +29262,6 @@
|
|||||||
"usernameClaimed": "alex",
|
"usernameClaimed": "alex",
|
||||||
"usernameUnclaimed": "noonewouldeverusethis7"
|
"usernameUnclaimed": "noonewouldeverusethis7"
|
||||||
},
|
},
|
||||||
|
|
||||||
"izmailonline.com": {
|
"izmailonline.com": {
|
||||||
"tags": [
|
"tags": [
|
||||||
"ua"
|
"ua"
|
||||||
|
|||||||
@@ -0,0 +1,8 @@
|
|||||||
|
{
|
||||||
|
"version": 1,
|
||||||
|
"updated_at": "2026-04-04T15:54:23Z",
|
||||||
|
"sites_count": 3157,
|
||||||
|
"min_maigret_version": "0.5.0",
|
||||||
|
"data_sha256": "880a56363cf5d71e13ca389330388fbc4796bff50d6e207a056112c4a5606f83",
|
||||||
|
"data_url": "https://raw.githubusercontent.com/soxoj/maigret/main/maigret/resources/data.json"
|
||||||
|
}
|
||||||
@@ -54,5 +54,8 @@
|
|||||||
"graph_report": false,
|
"graph_report": false,
|
||||||
"pdf_report": false,
|
"pdf_report": false,
|
||||||
"html_report": false,
|
"html_report": false,
|
||||||
"web_interface_port": 5000
|
"web_interface_port": 5000,
|
||||||
|
"no_autoupdate": false,
|
||||||
|
"db_update_meta_url": "https://raw.githubusercontent.com/soxoj/maigret/main/maigret/resources/db_meta.json",
|
||||||
|
"autoupdate_check_interval_hours": 24
|
||||||
}
|
}
|
||||||
@@ -43,6 +43,9 @@ class Settings:
|
|||||||
html_report: bool
|
html_report: bool
|
||||||
graph_report: bool
|
graph_report: bool
|
||||||
web_interface_port: int
|
web_interface_port: int
|
||||||
|
no_autoupdate: bool
|
||||||
|
db_update_meta_url: str
|
||||||
|
autoupdate_check_interval_hours: int
|
||||||
|
|
||||||
# submit mode settings
|
# submit mode settings
|
||||||
presence_strings: list
|
presence_strings: list
|
||||||
|
|||||||
@@ -48,6 +48,8 @@ DEFAULT_ARGS: Dict[str, Any] = {
|
|||||||
'web': None,
|
'web': None,
|
||||||
'with_domains': False,
|
'with_domains': False,
|
||||||
'xmind': False,
|
'xmind': False,
|
||||||
|
'no_autoupdate': False,
|
||||||
|
'force_update': False,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -0,0 +1,233 @@
|
|||||||
|
"""Tests for the database auto-update system."""
|
||||||
|
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import hashlib
|
||||||
|
from datetime import datetime, timezone, timedelta
|
||||||
|
from unittest.mock import patch, MagicMock
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from maigret.db_updater import (
|
||||||
|
_parse_version,
|
||||||
|
_needs_check,
|
||||||
|
_is_version_compatible,
|
||||||
|
_is_update_available,
|
||||||
|
_load_state,
|
||||||
|
_save_state,
|
||||||
|
_best_local,
|
||||||
|
_now_iso,
|
||||||
|
resolve_db_path,
|
||||||
|
force_update,
|
||||||
|
CACHED_DB_PATH,
|
||||||
|
BUNDLED_DB_PATH,
|
||||||
|
STATE_PATH,
|
||||||
|
MAIGRET_HOME,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_parse_version():
|
||||||
|
assert _parse_version("0.5.0") == (0, 5, 0)
|
||||||
|
assert _parse_version("1.2.3") == (1, 2, 3)
|
||||||
|
assert _parse_version("bad") == (0, 0, 0)
|
||||||
|
assert _parse_version("") == (0, 0, 0)
|
||||||
|
|
||||||
|
|
||||||
|
def test_needs_check_no_state():
|
||||||
|
assert _needs_check({}, 24) is True
|
||||||
|
|
||||||
|
|
||||||
|
def test_needs_check_recent():
|
||||||
|
state = {"last_check_at": _now_iso()}
|
||||||
|
assert _needs_check(state, 24) is False
|
||||||
|
|
||||||
|
|
||||||
|
def test_needs_check_expired():
|
||||||
|
old_time = (datetime.now(timezone.utc) - timedelta(hours=25)).strftime("%Y-%m-%dT%H:%M:%SZ")
|
||||||
|
state = {"last_check_at": old_time}
|
||||||
|
assert _needs_check(state, 24) is True
|
||||||
|
|
||||||
|
|
||||||
|
def test_needs_check_corrupt():
|
||||||
|
state = {"last_check_at": "not-a-date"}
|
||||||
|
assert _needs_check(state, 24) is True
|
||||||
|
|
||||||
|
|
||||||
|
def test_version_compatible():
|
||||||
|
with patch("maigret.db_updater.__version__", "0.5.0"):
|
||||||
|
assert _is_version_compatible({"min_maigret_version": "0.5.0"}) is True
|
||||||
|
assert _is_version_compatible({"min_maigret_version": "0.4.0"}) is True
|
||||||
|
assert _is_version_compatible({"min_maigret_version": "0.6.0"}) is False
|
||||||
|
assert _is_version_compatible({}) is True # missing field = compatible
|
||||||
|
|
||||||
|
|
||||||
|
def test_update_available_no_cache(tmp_path):
|
||||||
|
with patch("maigret.db_updater.CACHED_DB_PATH", str(tmp_path / "nonexistent.json")):
|
||||||
|
assert _is_update_available({"updated_at": "2026-01-01T00:00:00Z"}, {}) is True
|
||||||
|
|
||||||
|
|
||||||
|
def test_update_available_newer(tmp_path):
|
||||||
|
cache = tmp_path / "data.json"
|
||||||
|
cache.write_text("{}")
|
||||||
|
with patch("maigret.db_updater.CACHED_DB_PATH", str(cache)):
|
||||||
|
state = {"last_meta": {"updated_at": "2026-01-01T00:00:00Z"}}
|
||||||
|
meta = {"updated_at": "2026-02-01T00:00:00Z"}
|
||||||
|
assert _is_update_available(meta, state) is True
|
||||||
|
|
||||||
|
|
||||||
|
def test_update_available_same(tmp_path):
|
||||||
|
cache = tmp_path / "data.json"
|
||||||
|
cache.write_text("{}")
|
||||||
|
with patch("maigret.db_updater.CACHED_DB_PATH", str(cache)):
|
||||||
|
state = {"last_meta": {"updated_at": "2026-01-01T00:00:00Z"}}
|
||||||
|
meta = {"updated_at": "2026-01-01T00:00:00Z"}
|
||||||
|
assert _is_update_available(meta, state) is False
|
||||||
|
|
||||||
|
|
||||||
|
def test_load_state_missing(tmp_path):
|
||||||
|
with patch("maigret.db_updater.STATE_PATH", str(tmp_path / "missing.json")):
|
||||||
|
assert _load_state() == {}
|
||||||
|
|
||||||
|
|
||||||
|
def test_load_state_corrupt(tmp_path):
|
||||||
|
corrupt = tmp_path / "state.json"
|
||||||
|
corrupt.write_text("not json{{{")
|
||||||
|
with patch("maigret.db_updater.STATE_PATH", str(corrupt)):
|
||||||
|
assert _load_state() == {}
|
||||||
|
|
||||||
|
|
||||||
|
def test_save_and_load_state(tmp_path):
|
||||||
|
state_file = tmp_path / "state.json"
|
||||||
|
with patch("maigret.db_updater.STATE_PATH", str(state_file)):
|
||||||
|
with patch("maigret.db_updater.MAIGRET_HOME", str(tmp_path)):
|
||||||
|
_save_state({"last_check_at": "2026-01-01T00:00:00Z"})
|
||||||
|
loaded = _load_state()
|
||||||
|
assert loaded["last_check_at"] == "2026-01-01T00:00:00Z"
|
||||||
|
|
||||||
|
|
||||||
|
def test_best_local_with_valid_cache(tmp_path):
|
||||||
|
cache = tmp_path / "data.json"
|
||||||
|
cache.write_text('{"sites": {}, "engines": {}, "tags": []}')
|
||||||
|
with patch("maigret.db_updater.CACHED_DB_PATH", str(cache)):
|
||||||
|
assert _best_local() == str(cache)
|
||||||
|
|
||||||
|
|
||||||
|
def test_best_local_with_corrupt_cache(tmp_path):
|
||||||
|
cache = tmp_path / "data.json"
|
||||||
|
cache.write_text("not json")
|
||||||
|
with patch("maigret.db_updater.CACHED_DB_PATH", str(cache)):
|
||||||
|
assert _best_local() == BUNDLED_DB_PATH
|
||||||
|
|
||||||
|
|
||||||
|
def test_best_local_no_cache(tmp_path):
|
||||||
|
with patch("maigret.db_updater.CACHED_DB_PATH", str(tmp_path / "missing.json")):
|
||||||
|
assert _best_local() == BUNDLED_DB_PATH
|
||||||
|
|
||||||
|
|
||||||
|
def test_resolve_db_path_custom_url():
|
||||||
|
result = resolve_db_path("https://example.com/db.json")
|
||||||
|
assert result == "https://example.com/db.json"
|
||||||
|
|
||||||
|
|
||||||
|
def test_resolve_db_path_custom_file():
|
||||||
|
result = resolve_db_path("custom/path.json")
|
||||||
|
assert result.endswith("custom/path.json")
|
||||||
|
|
||||||
|
|
||||||
|
def test_resolve_db_path_no_autoupdate(tmp_path):
|
||||||
|
with patch("maigret.db_updater.CACHED_DB_PATH", str(tmp_path / "missing.json")):
|
||||||
|
result = resolve_db_path("resources/data.json", no_autoupdate=True)
|
||||||
|
assert result == BUNDLED_DB_PATH
|
||||||
|
|
||||||
|
|
||||||
|
def test_resolve_db_path_no_autoupdate_with_cache(tmp_path):
|
||||||
|
cache = tmp_path / "data.json"
|
||||||
|
cache.write_text('{"sites": {}, "engines": {}, "tags": []}')
|
||||||
|
with patch("maigret.db_updater.CACHED_DB_PATH", str(cache)):
|
||||||
|
result = resolve_db_path("resources/data.json", no_autoupdate=True)
|
||||||
|
assert result == str(cache)
|
||||||
|
|
||||||
|
|
||||||
|
@patch("maigret.db_updater._fetch_meta")
|
||||||
|
def test_resolve_db_path_network_failure(mock_fetch, tmp_path):
|
||||||
|
mock_fetch.return_value = None
|
||||||
|
with patch("maigret.db_updater.MAIGRET_HOME", str(tmp_path)):
|
||||||
|
with patch("maigret.db_updater.STATE_PATH", str(tmp_path / "state.json")):
|
||||||
|
with patch("maigret.db_updater.CACHED_DB_PATH", str(tmp_path / "missing.json")):
|
||||||
|
result = resolve_db_path("resources/data.json")
|
||||||
|
assert result == BUNDLED_DB_PATH
|
||||||
|
|
||||||
|
|
||||||
|
# --- force_update tests ---
|
||||||
|
|
||||||
|
|
||||||
|
@patch("maigret.db_updater._fetch_meta")
|
||||||
|
def test_force_update_network_failure(mock_fetch, tmp_path):
|
||||||
|
mock_fetch.return_value = None
|
||||||
|
with patch("maigret.db_updater.MAIGRET_HOME", str(tmp_path)):
|
||||||
|
with patch("maigret.db_updater.STATE_PATH", str(tmp_path / "state.json")):
|
||||||
|
assert force_update() is False
|
||||||
|
|
||||||
|
|
||||||
|
@patch("maigret.db_updater._fetch_meta")
|
||||||
|
def test_force_update_incompatible_version(mock_fetch, tmp_path):
|
||||||
|
mock_fetch.return_value = {"min_maigret_version": "99.0.0", "sites_count": 100}
|
||||||
|
with patch("maigret.db_updater.MAIGRET_HOME", str(tmp_path)):
|
||||||
|
with patch("maigret.db_updater.STATE_PATH", str(tmp_path / "state.json")):
|
||||||
|
assert force_update() is False
|
||||||
|
|
||||||
|
|
||||||
|
@patch("maigret.db_updater._download_and_verify")
|
||||||
|
@patch("maigret.db_updater._fetch_meta")
|
||||||
|
def test_force_update_success(mock_fetch, mock_download, tmp_path):
|
||||||
|
mock_fetch.return_value = {
|
||||||
|
"min_maigret_version": "0.1.0",
|
||||||
|
"sites_count": 3200,
|
||||||
|
"updated_at": "2099-01-01T00:00:00Z",
|
||||||
|
"data_url": "https://example.com/data.json",
|
||||||
|
"data_sha256": "abc123",
|
||||||
|
}
|
||||||
|
mock_download.return_value = str(tmp_path / "data.json")
|
||||||
|
with patch("maigret.db_updater.MAIGRET_HOME", str(tmp_path)):
|
||||||
|
with patch("maigret.db_updater.STATE_PATH", str(tmp_path / "state.json")):
|
||||||
|
with patch("maigret.db_updater.CACHED_DB_PATH", str(tmp_path / "missing.json")):
|
||||||
|
assert force_update() is True
|
||||||
|
state = _load_state()
|
||||||
|
assert state["last_meta"]["sites_count"] == 3200
|
||||||
|
|
||||||
|
|
||||||
|
@patch("maigret.db_updater._fetch_meta")
|
||||||
|
def test_force_update_already_up_to_date(mock_fetch, tmp_path):
|
||||||
|
cache = tmp_path / "data.json"
|
||||||
|
cache.write_text('{"sites": {}, "engines": {}, "tags": []}')
|
||||||
|
state_file = tmp_path / "state.json"
|
||||||
|
state_file.write_text(json.dumps({
|
||||||
|
"last_check_at": _now_iso(),
|
||||||
|
"last_meta": {"updated_at": "2026-01-01T00:00:00Z", "sites_count": 3000},
|
||||||
|
}))
|
||||||
|
mock_fetch.return_value = {
|
||||||
|
"min_maigret_version": "0.1.0",
|
||||||
|
"sites_count": 3000,
|
||||||
|
"updated_at": "2026-01-01T00:00:00Z",
|
||||||
|
}
|
||||||
|
with patch("maigret.db_updater.MAIGRET_HOME", str(tmp_path)):
|
||||||
|
with patch("maigret.db_updater.STATE_PATH", str(state_file)):
|
||||||
|
with patch("maigret.db_updater.CACHED_DB_PATH", str(cache)):
|
||||||
|
assert force_update() is False
|
||||||
|
|
||||||
|
|
||||||
|
@patch("maigret.db_updater._download_and_verify")
|
||||||
|
@patch("maigret.db_updater._fetch_meta")
|
||||||
|
def test_force_update_download_fails(mock_fetch, mock_download, tmp_path):
|
||||||
|
mock_fetch.return_value = {
|
||||||
|
"min_maigret_version": "0.1.0",
|
||||||
|
"sites_count": 3200,
|
||||||
|
"updated_at": "2099-01-01T00:00:00Z",
|
||||||
|
"data_url": "https://example.com/data.json",
|
||||||
|
"data_sha256": "abc123",
|
||||||
|
}
|
||||||
|
mock_download.return_value = None
|
||||||
|
with patch("maigret.db_updater.MAIGRET_HOME", str(tmp_path)):
|
||||||
|
with patch("maigret.db_updater.STATE_PATH", str(tmp_path / "state.json")):
|
||||||
|
with patch("maigret.db_updater.CACHED_DB_PATH", str(tmp_path / "missing.json")):
|
||||||
|
assert force_update() is False
|
||||||
@@ -0,0 +1,59 @@
|
|||||||
|
"""Generate db_meta.json from data.json for the auto-update system."""
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import hashlib
|
||||||
|
import json
|
||||||
|
import os.path as path
|
||||||
|
import sys
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
|
||||||
|
RESOURCES_DIR = path.join(path.dirname(path.dirname(path.abspath(__file__))), "maigret", "resources")
|
||||||
|
DATA_JSON_PATH = path.join(RESOURCES_DIR, "data.json")
|
||||||
|
META_JSON_PATH = path.join(RESOURCES_DIR, "db_meta.json")
|
||||||
|
DEFAULT_DATA_URL = "https://raw.githubusercontent.com/soxoj/maigret/main/maigret/resources/data.json"
|
||||||
|
|
||||||
|
|
||||||
|
def get_current_version():
|
||||||
|
version_file = path.join(path.dirname(path.dirname(path.abspath(__file__))), "maigret", "__version__.py")
|
||||||
|
with open(version_file) as f:
|
||||||
|
for line in f:
|
||||||
|
if line.startswith("__version__"):
|
||||||
|
return line.split("=")[1].strip().strip("'\"")
|
||||||
|
return "0.0.0"
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
parser = argparse.ArgumentParser(description="Generate db_meta.json from data.json")
|
||||||
|
parser.add_argument("--min-version", default=None, help="Minimum compatible maigret version (default: current version)")
|
||||||
|
parser.add_argument("--data-url", default=DEFAULT_DATA_URL, help="URL where data.json can be downloaded")
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
min_version = args.min_version or get_current_version()
|
||||||
|
|
||||||
|
with open(DATA_JSON_PATH, "rb") as f:
|
||||||
|
raw = f.read()
|
||||||
|
sha256 = hashlib.sha256(raw).hexdigest()
|
||||||
|
|
||||||
|
data = json.loads(raw)
|
||||||
|
sites_count = len(data.get("sites", {}))
|
||||||
|
|
||||||
|
meta = {
|
||||||
|
"version": 1,
|
||||||
|
"updated_at": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"),
|
||||||
|
"sites_count": sites_count,
|
||||||
|
"min_maigret_version": min_version,
|
||||||
|
"data_sha256": sha256,
|
||||||
|
"data_url": args.data_url,
|
||||||
|
}
|
||||||
|
|
||||||
|
with open(META_JSON_PATH, "w", encoding="utf-8") as f:
|
||||||
|
json.dump(meta, f, indent=4, ensure_ascii=False)
|
||||||
|
|
||||||
|
print(f"Generated {META_JSON_PATH}")
|
||||||
|
print(f" sites: {sites_count}")
|
||||||
|
print(f" sha256: {sha256[:16]}...")
|
||||||
|
print(f" min_version: {min_version}")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
@@ -217,6 +217,13 @@ Rank data fetched from Majestic Million by domains.
|
|||||||
site_file.write(f'\nThe list was updated at ({datetime.now(timezone.utc).date()})\n')
|
site_file.write(f'\nThe list was updated at ({datetime.now(timezone.utc).date()})\n')
|
||||||
db.save_to_file(args.base_file)
|
db.save_to_file(args.base_file)
|
||||||
|
|
||||||
|
# Regenerate db_meta.json to stay in sync with data.json
|
||||||
|
try:
|
||||||
|
from generate_db_meta import main as generate_meta
|
||||||
|
generate_meta()
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Warning: could not regenerate db_meta.json: {e}")
|
||||||
|
|
||||||
statistics_text = db.get_db_stats(is_markdown=True)
|
statistics_text = db.get_db_stats(is_markdown=True)
|
||||||
site_file.write('## Statistics\n\n')
|
site_file.write('## Statistics\n\n')
|
||||||
site_file.write(statistics_text)
|
site_file.write(statistics_text)
|
||||||
|
|||||||
Reference in New Issue
Block a user