From ffb4c1856ca61b992633d3f39893494855dc3dbe Mon Sep 17 00:00:00 2001 From: Soxoj Date: Fri, 15 May 2026 12:17:10 +0200 Subject: [PATCH] Make xhtml2pdf optional, fix install on Linux without libcairo Move xhtml2pdf to the new [pdf] extra so default `pip install maigret` no longer pulls pycairo (which has no Linux/macOS wheels and breaks the build without libcairo2-dev). save_pdf_report now raises a clear RuntimeError pointing to `pip install 'maigret[pdf]'`, and the CLI turns it into a friendly warning instead of a crash. Adds tests covering the missing-extra path, plus per-OS install docs. Fix for #2657, #2534 --- README.md | 2 + docs/source/installation.rst | 137 +++++++++++++++++++++++++++++++++ maigret/maigret.py | 10 ++- maigret/report.py | 16 +++- maigret/resources/db_meta.json | 2 +- pyproject.toml | 10 ++- sites.md | 2 +- tests/test_report.py | 65 ++++++++++++++++ 8 files changed, 236 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 0c4a248..dff917e 100644 --- a/README.md +++ b/README.md @@ -173,6 +173,8 @@ docker build --target web -t maigret-web . # Web UI image Build errors? See the [troubleshooting guide](https://maigret.readthedocs.io/en/latest/installation.html#troubleshooting). +PDF reports (`--pdf`) are an optional extra — install with `pip install 'maigret[pdf]'`. They need system-level graphics libraries on Linux/macOS; see the [PDF reports section](https://maigret.readthedocs.io/en/latest/installation.html#optional-pdf-reports-maigret-pdf) for per-OS install steps. + ## Usage ### Examples diff --git a/docs/source/installation.rst b/docs/source/installation.rst index 43f4dbc..7f95b6b 100644 --- a/docs/source/installation.rst +++ b/docs/source/installation.rst @@ -58,6 +58,17 @@ Maigret ships with a bundled site database. After installation from PyPI (or any # usage maigret username +PDF report support is shipped as an **optional extra** because it relies on +system-level graphics libraries that pip cannot install for you. If you plan to +use ``--pdf``, install Maigret with the ``pdf`` extra: + +.. code-block:: bash + + pip3 install 'maigret[pdf]' + +See :ref:`pdf-extra` below for the full background on why PDF support is +optional and how to fix the most common build errors. + Development version (GitHub) ---------------------------- @@ -126,6 +137,132 @@ After installing the system dependencies, retry the maigret installation. If you continue to have issues, consider using Docker instead, which includes all necessary dependencies. +.. _pdf-extra: + +Optional: PDF reports (``maigret[pdf]``) +---------------------------------------- + +The ``--pdf`` report format is shipped as an optional extra. To enable it: + +.. code-block:: bash + + pip3 install 'maigret[pdf]' + +If PDF support is not installed and you pass ``--pdf``, Maigret prints a +warning and continues without crashing — every other output format +(``--html``, ``--json``, ``--csv``, ``--txt``, ``--xmind``, ``--graph``) +keeps working. + +Why is PDF optional? +~~~~~~~~~~~~~~~~~~~~ + +Maigret renders PDFs by converting an HTML template, and that conversion +pipeline ultimately depends on the ``cairo`` graphics library through a +chain of Python packages roughly shaped like:: + + maigret[pdf] → xhtml2pdf → svglib → rlPyCairo → pycairo → libcairo2 (system) + +The bottom of that chain is a C library — ``libcairo2`` — that has to exist +on the host *before* pip can build the Python bindings. The Python binding +package (``pycairo``) currently ships **only Windows wheels** on PyPI; on +Linux and macOS pip falls back to building from source, and the build fails +the moment ``pkg-config`` cannot find ``cairo``. The error looks like:: + + ../cairo/meson.build:31:12: ERROR: Dependency "cairo" not found (tried pkg-config) + note: This error originates from a subprocess, and is likely not a problem with pip. + error: metadata-generation-failed + +Pulling this whole chain for every Maigret install just so the much smaller +group of users who actually want PDFs can have them is a poor trade — so +``xhtml2pdf`` is gated behind the ``pdf`` extra. + +Installing the system prerequisites +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Install the cairo headers, ``pkg-config``, and a working C toolchain +*before* running ``pip install 'maigret[pdf]'``. + +**Debian / Ubuntu / Linux Mint / Kali:** + +.. code-block:: bash + + sudo apt update + sudo apt install -y libcairo2-dev pkg-config python3-dev build-essential + pip3 install --upgrade pip setuptools wheel + pip3 install 'maigret[pdf]' + +**Fedora / RHEL / CentOS:** + +.. code-block:: bash + + sudo dnf install -y cairo-devel pkgconfig python3-devel gcc + pip3 install 'maigret[pdf]' + +**Arch Linux:** + +.. code-block:: bash + + sudo pacman -S cairo pkgconf base-devel + pip3 install 'maigret[pdf]' + +**Alpine Linux:** + +.. code-block:: bash + + sudo apk add cairo-dev pkgconf python3-dev build-base + pip3 install 'maigret[pdf]' + +**macOS (Homebrew):** + +.. code-block:: bash + + brew install cairo pkg-config + pip3 install --upgrade pip setuptools wheel + pip3 install 'maigret[pdf]' + +**Windows:** + +No system packages are needed — ``pycairo`` ships prebuilt wheels for +Windows. Just run: + +.. code-block:: bash + + pip install 'maigret[pdf]' + +**Google Cloud Shell / Colab / Replit / generic CI:** + +These environments behave like Debian/Ubuntu — install the same +``libcairo2-dev pkg-config python3-dev build-essential`` triple before +``pip install 'maigret[pdf]'``. If you do not control the base image and +cannot ``apt install``, skip the extra and use ``--html`` reports instead; +HTML reports contain the same data and open in any browser. + +``maigret: command not found`` after install +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +If pip prints warnings like:: + + WARNING: The scripts maigret and update_sitesmd are installed in + '/home//.local/bin' which is not on PATH. + +…and ``maigret --version`` then fails with ``command not found``, your +``--user`` install put the entry-point script in a directory the shell does +not search. Add it to ``PATH``: + +.. code-block:: bash + + echo 'export PATH="$HOME/.local/bin:$PATH"' >> ~/.bashrc + source ~/.bashrc + +Or install into a virtual environment, where the entry point lands in the +venv's ``bin/`` automatically: + +.. code-block:: bash + + python3 -m venv ~/.venvs/maigret + source ~/.venvs/maigret/bin/activate + pip install 'maigret[pdf]' # or just `pip install maigret` + Optional: Cloudflare bypass solver ---------------------------------- diff --git a/maigret/maigret.py b/maigret/maigret.py index d4a65bf..eeb4b81 100755 --- a/maigret/maigret.py +++ b/maigret/maigret.py @@ -908,8 +908,14 @@ async def main(): if args.pdf: username = username.replace('/', '_') filename = report_filepath_tpl.format(username=username, postfix='.pdf') - save_pdf_report(filename, report_context) - query_notify.warning(f'PDF report on all usernames saved in {filename}') + try: + save_pdf_report(filename, report_context) + except RuntimeError as e: + query_notify.warning(str(e)) + else: + query_notify.warning( + f'PDF report on all usernames saved in {filename}' + ) if args.md: username = username.replace('/', '_') diff --git a/maigret/report.py b/maigret/report.py index 3f79305..c79eef5 100644 --- a/maigret/report.py +++ b/maigret/report.py @@ -78,13 +78,23 @@ def save_html_report(filename: str, context: dict): f.write(filled_template) +PDF_EXTRA_HINT = ( + "PDF reports require the optional 'pdf' extra. " + "Install it with: pip install 'maigret[pdf]'" +) + + def save_pdf_report(filename: str, context: dict): + # Imported lazily so that users without the optional 'pdf' extra + # can still import maigret.report and use other report formats. + try: + from xhtml2pdf import pisa # type: ignore[import-untyped] + except ImportError as e: + raise RuntimeError(PDF_EXTRA_HINT) from e + template, css = generate_report_template(is_pdf=True) filled_template = template.render(**context) - # moved here to speed up the launch of Maigret - from xhtml2pdf import pisa # type: ignore[import-untyped] - with open(filename, "w+b") as f: pisa.pisaDocument(io.StringIO(filled_template), dest=f, default_css=css) diff --git a/maigret/resources/db_meta.json b/maigret/resources/db_meta.json index 6f8d610..5c79908 100644 --- a/maigret/resources/db_meta.json +++ b/maigret/resources/db_meta.json @@ -1,6 +1,6 @@ { "version": 1, - "updated_at": "2026-05-11T17:38:18Z", + "updated_at": "2026-05-15T10:17:13Z", "sites_count": 3154, "min_maigret_version": "0.6.0", "data_sha256": "1787a341c90d91a56507ae704c8471743709b56d85d6c3dfa8c56189dccbc6dd", diff --git a/pyproject.toml b/pyproject.toml index 6d24669..ca2a0e5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -69,7 +69,7 @@ torrequest = "^0.1.0" alive_progress = "^3.2.0" typing-extensions = "^4.14.1" webencodings = "^0.5.1" -xhtml2pdf = "^0.2.11" +xhtml2pdf = {version = "^0.2.11", optional = true} XMind = "^1.2.0" yarl = "^1.20.1" networkx = "^2.6.3" @@ -82,6 +82,13 @@ platformdirs = "^4.3.8" curl-cffi = ">=0.14,<1.0" +[tool.poetry.extras] +# Install PDF support with: pip install 'maigret[pdf]' +# Skipped by default because the underlying `pycairo` has no Linux/macOS +# wheels on PyPI and requires system libcairo + pkg-config to build. +pdf = ["xhtml2pdf"] + + [tool.poetry.group.dev.dependencies] # How to add a new dev dependency: poetry add black --group dev # Install dev dependencies with: poetry install --with dev @@ -92,6 +99,7 @@ pytest-cov = ">=6,<8" pytest-httpserver = "^1.0.0" pytest-rerunfailures = ">=15.1,<17.0" reportlab = "^4.4.3" +xhtml2pdf = "^0.2.11" mypy = ">=1.14.1,<3.0.0" tuna = "^0.5.11" coverage = "^7.9.2" diff --git a/sites.md b/sites.md index 5d46c33..1c49b44 100644 --- a/sites.md +++ b/sites.md @@ -3158,7 +3158,7 @@ Rank data fetched from Majestic Million by domains. 1. ![](https://www.google.com/s2/favicons?domain=https://app.airnfts.com) [AirNFTs (https://app.airnfts.com)](https://app.airnfts.com)*: top 100M, crypto, nft* 1. ![](https://www.google.com/s2/favicons?domain=https://greasyfork.org) [GreasyFork (https://greasyfork.org)](https://greasyfork.org)*: top 100M, coding* -The list was updated at (2026-05-11) +The list was updated at (2026-05-15) ## Statistics Enabled/total sites: 2524/3154 = 80.03% diff --git a/tests/test_report.py b/tests/test_report.py index 8e12408..2d05410 100644 --- a/tests/test_report.py +++ b/tests/test_report.py @@ -3,6 +3,9 @@ import copy import json import os +import subprocess +import sys +import textwrap import pytest from io import StringIO @@ -442,6 +445,68 @@ def test_pdf_report(): assert os.path.exists(report_name) +def test_save_pdf_report_raises_helpful_error_without_xhtml2pdf( + monkeypatch, tmp_path +): + # Setting an entry to None makes a subsequent `import` raise ImportError — + # this simulates the optional 'pdf' extra not being installed without + # actually uninstalling xhtml2pdf from the test environment. + monkeypatch.setitem(sys.modules, 'xhtml2pdf', None) + monkeypatch.setitem(sys.modules, 'xhtml2pdf.pisa', None) + + context = generate_report_context(TEST) + target = tmp_path / "report.pdf" + + with pytest.raises(RuntimeError) as excinfo: + save_pdf_report(str(target), context) + + msg = str(excinfo.value) + assert "maigret[pdf]" in msg + assert "pip install" in msg + assert not target.exists() + + +def test_xhtml2pdf_is_not_module_level_dependency(): + # Guard against a regression where someone hoists `import xhtml2pdf` / + # `from xhtml2pdf import pisa` to the top of maigret/report.py — that + # would force every Maigret user to install the optional extra. + import maigret.report as report_module + + module_globals = vars(report_module) + assert 'xhtml2pdf' not in module_globals + assert 'pisa' not in module_globals + + +def test_import_maigret_without_xhtml2pdf(): + # End-to-end check: spawn a fresh interpreter where xhtml2pdf is blocked + # before any maigret module is loaded, and confirm the package, the + # report module, and save_pdf_report itself all import cleanly. Mirrors + # what a user without the [pdf] extra installed would experience. + code = textwrap.dedent( + """ + import sys + sys.modules['xhtml2pdf'] = None + sys.modules['xhtml2pdf.pisa'] = None + + import maigret + import maigret.report + from maigret.report import save_pdf_report + + assert callable(save_pdf_report) + print("OK") + """ + ) + result = subprocess.run( + [sys.executable, "-c", code], + capture_output=True, + text=True, + ) + assert result.returncode == 0, ( + f"stdout={result.stdout!r} stderr={result.stderr!r}" + ) + assert "OK" in result.stdout + + def test_text_report(): context = generate_report_context(TEST) report_text = get_plaintext_report(context)