mirror of
https://github.com/soxoj/maigret.git
synced 2026-05-16 03:15:40 +00:00
a7338e97f3
* Make xhtml2pdf optional, fix install on Linux without libcairo Move xhtml2pdf to the new [pdf] extra so default `pip install maigret` no longer pulls pycairo (which has no Linux/macOS wheels and breaks the build without libcairo2-dev). save_pdf_report now raises a clear RuntimeError pointing to `pip install 'maigret[pdf]'`, and the CLI turns it into a friendly warning instead of a crash. Adds tests covering the missing-extra path, plus per-OS install docs. Fix for #2657, #2534 * Make arabic-reshaper and python-bidi optional; idempotent update of db_meta.json and sites.md * Regenerated poerty.lock * Update CI workflow to cover minimal installation without PDF deps
224 lines
7.4 KiB
Python
224 lines
7.4 KiB
Python
"""Tests for the 'don't rewrite files unless content actually changed' logic
|
|
in utils.generate_db_meta and utils.update_site_data. The point is to keep
|
|
sites.md and db_meta.json untouched when only the embedded timestamp/date
|
|
would change — so a precommit hook doesn't end up staging a no-op diff
|
|
every time someone runs the updater.
|
|
"""
|
|
|
|
import json
|
|
from datetime import datetime, timezone
|
|
|
|
from utils.generate_db_meta import (
|
|
build_meta,
|
|
meta_payload_equals,
|
|
write_meta_if_changed,
|
|
)
|
|
from utils.update_site_data import (
|
|
sites_md_payload_equals,
|
|
write_sites_md_if_changed,
|
|
)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# generate_db_meta
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def _write_data_json(path, sites):
|
|
with open(path, "w", encoding="utf-8") as f:
|
|
json.dump({"sites": sites}, f)
|
|
|
|
|
|
def test_meta_payload_equals_ignores_timestamp():
|
|
a = {"sites_count": 10, "data_sha256": "abc", "updated_at": "2026-01-01T00:00:00Z"}
|
|
b = {"sites_count": 10, "data_sha256": "abc", "updated_at": "2027-12-31T23:59:59Z"}
|
|
assert meta_payload_equals(a, b)
|
|
|
|
|
|
def test_meta_payload_equals_detects_real_change():
|
|
a = {"sites_count": 10, "data_sha256": "abc", "updated_at": "2026-01-01T00:00:00Z"}
|
|
b = {"sites_count": 11, "data_sha256": "abc", "updated_at": "2026-01-01T00:00:00Z"}
|
|
assert not meta_payload_equals(a, b)
|
|
|
|
|
|
def test_write_meta_creates_file_when_missing(tmp_path):
|
|
data_path = tmp_path / "data.json"
|
|
meta_path = tmp_path / "db_meta.json"
|
|
_write_data_json(data_path, {"GitHub": {}})
|
|
|
|
meta, written = write_meta_if_changed(
|
|
str(data_path), str(meta_path), "0.6.0", "https://example/data.json"
|
|
)
|
|
|
|
assert written is True
|
|
assert meta_path.exists()
|
|
on_disk = json.loads(meta_path.read_text())
|
|
assert on_disk["sites_count"] == 1
|
|
assert on_disk["updated_at"] == meta["updated_at"]
|
|
|
|
|
|
def test_write_meta_skips_when_only_timestamp_would_change(tmp_path):
|
|
data_path = tmp_path / "data.json"
|
|
meta_path = tmp_path / "db_meta.json"
|
|
_write_data_json(data_path, {"GitHub": {}})
|
|
|
|
# First write seeds the file with an old timestamp.
|
|
old = datetime(2026, 1, 1, tzinfo=timezone.utc)
|
|
_, written_first = write_meta_if_changed(
|
|
str(data_path), str(meta_path), "0.6.0", "https://example/data.json", now=old
|
|
)
|
|
assert written_first is True
|
|
seeded_bytes = meta_path.read_bytes()
|
|
|
|
# Second call with a NEW `now` but identical data.json — must be a no-op.
|
|
new = datetime(2027, 6, 15, tzinfo=timezone.utc)
|
|
_, written_second = write_meta_if_changed(
|
|
str(data_path), str(meta_path), "0.6.0", "https://example/data.json", now=new
|
|
)
|
|
assert written_second is False
|
|
# File on disk is byte-for-byte the same — including the OLD timestamp.
|
|
assert meta_path.read_bytes() == seeded_bytes
|
|
on_disk = json.loads(meta_path.read_text())
|
|
assert on_disk["updated_at"] == "2026-01-01T00:00:00Z"
|
|
|
|
|
|
def test_write_meta_writes_when_data_sha256_changes(tmp_path):
|
|
data_path = tmp_path / "data.json"
|
|
meta_path = tmp_path / "db_meta.json"
|
|
|
|
_write_data_json(data_path, {"GitHub": {}})
|
|
write_meta_if_changed(
|
|
str(data_path),
|
|
str(meta_path),
|
|
"0.6.0",
|
|
"https://example/data.json",
|
|
now=datetime(2026, 1, 1, tzinfo=timezone.utc),
|
|
)
|
|
|
|
# Real change to data.json — sha256 + sites_count both move.
|
|
_write_data_json(data_path, {"GitHub": {}, "GitLab": {}})
|
|
new_now = datetime(2027, 6, 15, tzinfo=timezone.utc)
|
|
meta, written = write_meta_if_changed(
|
|
str(data_path), str(meta_path), "0.6.0", "https://example/data.json", now=new_now
|
|
)
|
|
|
|
assert written is True
|
|
on_disk = json.loads(meta_path.read_text())
|
|
assert on_disk["sites_count"] == 2
|
|
assert on_disk["updated_at"] == "2027-06-15T00:00:00Z"
|
|
|
|
|
|
def test_write_meta_writes_when_min_version_changes(tmp_path):
|
|
data_path = tmp_path / "data.json"
|
|
meta_path = tmp_path / "db_meta.json"
|
|
_write_data_json(data_path, {"GitHub": {}})
|
|
|
|
write_meta_if_changed(
|
|
str(data_path),
|
|
str(meta_path),
|
|
"0.5.0",
|
|
"https://example/data.json",
|
|
now=datetime(2026, 1, 1, tzinfo=timezone.utc),
|
|
)
|
|
|
|
_, written = write_meta_if_changed(
|
|
str(data_path),
|
|
str(meta_path),
|
|
"0.6.0", # bumped
|
|
"https://example/data.json",
|
|
now=datetime(2026, 1, 2, tzinfo=timezone.utc),
|
|
)
|
|
|
|
assert written is True
|
|
on_disk = json.loads(meta_path.read_text())
|
|
assert on_disk["min_maigret_version"] == "0.6.0"
|
|
|
|
|
|
def test_write_meta_writes_when_existing_file_is_corrupt(tmp_path):
|
|
data_path = tmp_path / "data.json"
|
|
meta_path = tmp_path / "db_meta.json"
|
|
_write_data_json(data_path, {"GitHub": {}})
|
|
meta_path.write_text("this is not valid json")
|
|
|
|
_, written = write_meta_if_changed(
|
|
str(data_path), str(meta_path), "0.6.0", "https://example/data.json"
|
|
)
|
|
|
|
assert written is True
|
|
json.loads(meta_path.read_text()) # now parseable
|
|
|
|
|
|
def test_build_meta_uses_provided_now(tmp_path):
|
|
data_path = tmp_path / "data.json"
|
|
_write_data_json(data_path, {"GitHub": {}})
|
|
fixed = datetime(2030, 7, 4, 12, 0, 0, tzinfo=timezone.utc)
|
|
|
|
meta = build_meta(str(data_path), "0.6.0", "https://example/data.json", now=fixed)
|
|
|
|
assert meta["updated_at"] == "2030-07-04T12:00:00Z"
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# update_site_data
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
_SITES_MD_TEMPLATE = (
|
|
"## List of supported sites (search methods): total 1\n\n"
|
|
"Rank data fetched from Majestic Million by domains.\n\n"
|
|
"1. [GitHub](https://github.com/)*: top 100*\n"
|
|
"\nThe list was updated at ({date})\n"
|
|
"## Statistics\n\n"
|
|
"Some stats.\n"
|
|
)
|
|
|
|
|
|
def test_sites_md_payload_equals_ignores_date():
|
|
a = _SITES_MD_TEMPLATE.format(date="2026-01-01")
|
|
b = _SITES_MD_TEMPLATE.format(date="2027-12-31")
|
|
assert sites_md_payload_equals(a, b)
|
|
|
|
|
|
def test_sites_md_payload_equals_detects_body_change():
|
|
a = _SITES_MD_TEMPLATE.format(date="2026-01-01")
|
|
b = a.replace("GitHub", "GitLab")
|
|
assert not sites_md_payload_equals(a, b)
|
|
|
|
|
|
def test_write_sites_md_creates_file_when_missing(tmp_path):
|
|
target = tmp_path / "sites.md"
|
|
content = _SITES_MD_TEMPLATE.format(date="2026-05-15")
|
|
|
|
written = write_sites_md_if_changed(content, str(target))
|
|
|
|
assert written is True
|
|
assert target.read_text() == content
|
|
|
|
|
|
def test_write_sites_md_skips_when_only_date_would_change(tmp_path):
|
|
target = tmp_path / "sites.md"
|
|
seeded = _SITES_MD_TEMPLATE.format(date="2026-01-01")
|
|
target.write_text(seeded)
|
|
|
|
# New content has a different date but identical body.
|
|
new_content = _SITES_MD_TEMPLATE.format(date="2027-12-31")
|
|
written = write_sites_md_if_changed(new_content, str(target))
|
|
|
|
assert written is False
|
|
# File untouched, including the OLD date.
|
|
assert target.read_text() == seeded
|
|
|
|
|
|
def test_write_sites_md_writes_when_body_changes(tmp_path):
|
|
target = tmp_path / "sites.md"
|
|
target.write_text(_SITES_MD_TEMPLATE.format(date="2026-01-01"))
|
|
|
|
new_content = _SITES_MD_TEMPLATE.format(date="2026-01-01").replace(
|
|
"GitHub", "GitLab"
|
|
)
|
|
written = write_sites_md_if_changed(new_content, str(target))
|
|
|
|
assert written is True
|
|
assert "GitLab" in target.read_text()
|
|
assert "GitHub" not in target.read_text()
|