mirror of
https://github.com/soxoj/maigret.git
synced 2026-05-15 19:05:43 +00:00
a7338e97f3
* Make xhtml2pdf optional, fix install on Linux without libcairo Move xhtml2pdf to the new [pdf] extra so default `pip install maigret` no longer pulls pycairo (which has no Linux/macOS wheels and breaks the build without libcairo2-dev). save_pdf_report now raises a clear RuntimeError pointing to `pip install 'maigret[pdf]'`, and the CLI turns it into a friendly warning instead of a crash. Adds tests covering the missing-extra path, plus per-OS install docs. Fix for #2657, #2534 * Make arabic-reshaper and python-bidi optional; idempotent update of db_meta.json and sites.md * Regenerated poerty.lock * Update CI workflow to cover minimal installation without PDF deps
103 lines
3.7 KiB
Python
103 lines
3.7 KiB
Python
"""Generate db_meta.json from data.json for the auto-update system."""
|
|
|
|
import argparse
|
|
import hashlib
|
|
import json
|
|
import os.path as path
|
|
from datetime import datetime, timezone
|
|
from typing import Optional, Tuple
|
|
|
|
RESOURCES_DIR = path.join(path.dirname(path.dirname(path.abspath(__file__))), "maigret", "resources")
|
|
DATA_JSON_PATH = path.join(RESOURCES_DIR, "data.json")
|
|
META_JSON_PATH = path.join(RESOURCES_DIR, "db_meta.json")
|
|
DEFAULT_DATA_URL = "https://raw.githubusercontent.com/soxoj/maigret/main/maigret/resources/data.json"
|
|
|
|
_TIMESTAMP_KEY = "updated_at"
|
|
|
|
|
|
def get_current_version():
|
|
version_file = path.join(path.dirname(path.dirname(path.abspath(__file__))), "maigret", "__version__.py")
|
|
with open(version_file) as f:
|
|
for line in f:
|
|
if line.startswith("__version__"):
|
|
return line.split("=")[1].strip().strip("'\"")
|
|
return "0.0.0"
|
|
|
|
|
|
def build_meta(data_path: str, min_version: str, data_url: str, now: Optional[datetime] = None) -> dict:
|
|
"""Build a db_meta dict for the given data.json. Does not touch the filesystem."""
|
|
with open(data_path, "rb") as f:
|
|
raw = f.read()
|
|
data = json.loads(raw)
|
|
ts = (now or datetime.now(timezone.utc)).strftime("%Y-%m-%dT%H:%M:%SZ")
|
|
return {
|
|
"version": 1,
|
|
_TIMESTAMP_KEY: ts,
|
|
"sites_count": len(data.get("sites", {})),
|
|
"min_maigret_version": min_version,
|
|
"data_sha256": hashlib.sha256(raw).hexdigest(),
|
|
"data_url": data_url,
|
|
}
|
|
|
|
|
|
def meta_payload_equals(a: dict, b: dict) -> bool:
|
|
"""Compare two db_meta dicts ignoring the volatile 'updated_at' field."""
|
|
a_clean = {k: v for k, v in a.items() if k != _TIMESTAMP_KEY}
|
|
b_clean = {k: v for k, v in b.items() if k != _TIMESTAMP_KEY}
|
|
return a_clean == b_clean
|
|
|
|
|
|
def _read_meta(meta_path: str) -> Optional[dict]:
|
|
try:
|
|
with open(meta_path, "r", encoding="utf-8") as f:
|
|
return json.load(f)
|
|
except (OSError, ValueError):
|
|
return None
|
|
|
|
|
|
def write_meta_if_changed(
|
|
data_path: str,
|
|
meta_path: str,
|
|
min_version: str,
|
|
data_url: str,
|
|
now: Optional[datetime] = None,
|
|
) -> Tuple[dict, bool]:
|
|
"""Generate db_meta.json next to data.json. Skip the write entirely if
|
|
the only thing that would change is `updated_at` — keeps the file (and
|
|
git/precommit hooks) quiet when the underlying site database hasn't
|
|
actually moved.
|
|
|
|
Returns the meta dict that *would* be written and a bool indicating
|
|
whether a write happened.
|
|
"""
|
|
new_meta = build_meta(data_path, min_version, data_url, now=now)
|
|
existing = _read_meta(meta_path)
|
|
if existing is not None and meta_payload_equals(existing, new_meta):
|
|
return existing, False
|
|
|
|
with open(meta_path, "w", encoding="utf-8") as f:
|
|
json.dump(new_meta, f, indent=4, ensure_ascii=False)
|
|
return new_meta, True
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(description="Generate db_meta.json from data.json")
|
|
parser.add_argument("--min-version", default=None, help="Minimum compatible maigret version (default: current version)")
|
|
parser.add_argument("--data-url", default=DEFAULT_DATA_URL, help="URL where data.json can be downloaded")
|
|
args = parser.parse_args()
|
|
|
|
min_version = args.min_version or get_current_version()
|
|
meta, written = write_meta_if_changed(DATA_JSON_PATH, META_JSON_PATH, min_version, args.data_url)
|
|
|
|
if written:
|
|
print(f"Generated {META_JSON_PATH}")
|
|
else:
|
|
print(f"Skipped {META_JSON_PATH}: nothing changed except timestamp")
|
|
print(f" sites: {meta['sites_count']}")
|
|
print(f" sha256: {meta['data_sha256'][:16]}...")
|
|
print(f" min_version: {meta['min_maigret_version']}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|