mirror of
https://github.com/soxoj/maigret.git
synced 2026-05-07 06:24:35 +00:00
Add Markdown reports for LLM analysis (#2463)
This commit is contained in:
@@ -106,6 +106,9 @@ username).
|
|||||||
``-J``, ``--json`` - Generate a JSON report of specific type: simple,
|
``-J``, ``--json`` - Generate a JSON report of specific type: simple,
|
||||||
ndjson (one report per username). E.g. ``--json ndjson``
|
ndjson (one report per username). E.g. ``--json ndjson``
|
||||||
|
|
||||||
|
``-M``, ``--md`` - Generate a Markdown report (general report on all
|
||||||
|
usernames). See :ref:`markdown-report` below.
|
||||||
|
|
||||||
``-fo``, ``--folderoutput`` - Results will be saved to this folder,
|
``-fo``, ``--folderoutput`` - Results will be saved to this folder,
|
||||||
``results`` by default. Will be created if doesn’t exist.
|
``results`` by default. Will be created if doesn’t exist.
|
||||||
|
|
||||||
@@ -142,4 +145,35 @@ site main page URL to determine the site engine and methods to check
|
|||||||
account presence. After checking Maigret asks if you want to add the
|
account presence. After checking Maigret asks if you want to add the
|
||||||
site, answering y/Y will rewrite the local database.
|
site, answering y/Y will rewrite the local database.
|
||||||
|
|
||||||
|
.. _markdown-report:
|
||||||
|
|
||||||
|
Markdown report (LLM-friendly)
|
||||||
|
------------------------------
|
||||||
|
|
||||||
|
The ``--md`` / ``-M`` flag generates a Markdown report designed for both human reading and analysis by AI assistants (ChatGPT, Claude, etc.).
|
||||||
|
|
||||||
|
.. code-block:: console
|
||||||
|
|
||||||
|
maigret username --md
|
||||||
|
|
||||||
|
The report includes:
|
||||||
|
|
||||||
|
- **Summary** with aggregated personal data (all fullnames, locations, bios found across accounts), country tags, website tags, first/last seen timestamps.
|
||||||
|
- **Per-account sections** with profile URL, site tags, and all extracted fields (username, bio, follower count, linked accounts, etc.).
|
||||||
|
- **Possible false positives** disclaimer explaining that accounts may belong to different people.
|
||||||
|
- **Ethical use** notice about applicable data protection laws.
|
||||||
|
|
||||||
|
**Using with AI tools:**
|
||||||
|
|
||||||
|
The Markdown format is optimized for LLM context windows. You can feed the report directly to an AI assistant for follow-up analysis:
|
||||||
|
|
||||||
|
.. code-block:: console
|
||||||
|
|
||||||
|
# Generate the report
|
||||||
|
maigret johndoe --md
|
||||||
|
|
||||||
|
# Feed it to an AI tool
|
||||||
|
cat reports/report_johndoe.md | llm "Analyze this OSINT report and summarize key findings"
|
||||||
|
|
||||||
|
The structured Markdown with per-site sections makes it easy for AI tools to extract relationships, cross-reference identities, and identify patterns across accounts.
|
||||||
|
|
||||||
|
|||||||
+27
-1
@@ -37,6 +37,7 @@ from .report import (
|
|||||||
get_plaintext_report,
|
get_plaintext_report,
|
||||||
sort_report_by_data_points,
|
sort_report_by_data_points,
|
||||||
save_graph_report,
|
save_graph_report,
|
||||||
|
save_markdown_report,
|
||||||
)
|
)
|
||||||
from .sites import MaigretDatabase
|
from .sites import MaigretDatabase
|
||||||
from .submit import Submitter
|
from .submit import Submitter
|
||||||
@@ -465,6 +466,14 @@ def setup_arguments_parser(settings: Settings):
|
|||||||
default=settings.pdf_report,
|
default=settings.pdf_report,
|
||||||
help="Generate a PDF report (general report on all usernames).",
|
help="Generate a PDF report (general report on all usernames).",
|
||||||
)
|
)
|
||||||
|
report_group.add_argument(
|
||||||
|
"-M",
|
||||||
|
"--md",
|
||||||
|
action="store_true",
|
||||||
|
dest="md",
|
||||||
|
default=settings.md_report,
|
||||||
|
help="Generate a Markdown report (general report on all usernames).",
|
||||||
|
)
|
||||||
report_group.add_argument(
|
report_group.add_argument(
|
||||||
"-G",
|
"-G",
|
||||||
"--graph",
|
"--graph",
|
||||||
@@ -803,7 +812,7 @@ async def main():
|
|||||||
|
|
||||||
# reporting for all the result
|
# reporting for all the result
|
||||||
if general_results:
|
if general_results:
|
||||||
if args.html or args.pdf:
|
if args.html or args.pdf or args.md:
|
||||||
query_notify.warning('Generating report info...')
|
query_notify.warning('Generating report info...')
|
||||||
report_context = generate_report_context(general_results)
|
report_context = generate_report_context(general_results)
|
||||||
# determine main username
|
# determine main username
|
||||||
@@ -823,6 +832,23 @@ async def main():
|
|||||||
save_pdf_report(filename, report_context)
|
save_pdf_report(filename, report_context)
|
||||||
query_notify.warning(f'PDF report on all usernames saved in {filename}')
|
query_notify.warning(f'PDF report on all usernames saved in {filename}')
|
||||||
|
|
||||||
|
if args.md:
|
||||||
|
username = username.replace('/', '_')
|
||||||
|
filename = report_filepath_tpl.format(username=username, postfix='.md')
|
||||||
|
run_flags = []
|
||||||
|
if args.tags:
|
||||||
|
run_flags.append(f"--tags {args.tags}")
|
||||||
|
if args.site_list:
|
||||||
|
run_flags.append(f"--site {','.join(args.site_list)}")
|
||||||
|
if args.all_sites:
|
||||||
|
run_flags.append("--all-sites")
|
||||||
|
run_info = {
|
||||||
|
"sites_count": sum(len(d) for _, _, d in general_results),
|
||||||
|
"flags": " ".join(run_flags) if run_flags else None,
|
||||||
|
}
|
||||||
|
save_markdown_report(filename, report_context, run_info=run_info)
|
||||||
|
query_notify.warning(f'Markdown report on all usernames saved in {filename}')
|
||||||
|
|
||||||
if args.graph:
|
if args.graph:
|
||||||
username = username.replace('/', '_')
|
username = username.replace('/', '_')
|
||||||
filename = report_filepath_tpl.format(
|
filename = report_filepath_tpl.format(
|
||||||
|
|||||||
@@ -257,6 +257,144 @@ def get_plaintext_report(context: dict) -> str:
|
|||||||
return output.strip()
|
return output.strip()
|
||||||
|
|
||||||
|
|
||||||
|
def _md_format_value(value) -> str:
|
||||||
|
"""Format a value for Markdown output, detecting links."""
|
||||||
|
if isinstance(value, list):
|
||||||
|
return ", ".join(str(v) for v in value)
|
||||||
|
s = str(value)
|
||||||
|
if s.startswith("http://") or s.startswith("https://"):
|
||||||
|
return f"[{s}]({s})"
|
||||||
|
return s
|
||||||
|
|
||||||
|
|
||||||
|
def save_markdown_report(filename: str, context: dict, run_info: dict = None):
|
||||||
|
username = context.get("username", "unknown")
|
||||||
|
generated_at = context.get("generated_at", "")
|
||||||
|
brief = context.get("brief", "")
|
||||||
|
countries = context.get("countries_tuple_list", [])
|
||||||
|
interests = context.get("interests_tuple_list", [])
|
||||||
|
first_seen = context.get("first_seen")
|
||||||
|
results = context.get("results", [])
|
||||||
|
|
||||||
|
# Collect ALL values for key fields across all accounts
|
||||||
|
all_fields: Dict[str, list] = {}
|
||||||
|
last_seen = None
|
||||||
|
for _, _, data in results:
|
||||||
|
for _, v in data.items():
|
||||||
|
if not v.get("found") or v.get("is_similar"):
|
||||||
|
continue
|
||||||
|
ids_data = v.get("ids_data", {})
|
||||||
|
# Map multiple source fields to unified output fields
|
||||||
|
field_sources = {
|
||||||
|
"fullname": ("fullname", "name"),
|
||||||
|
"location": ("location", "country", "city", "country_code", "locale", "region"),
|
||||||
|
"gender": ("gender",),
|
||||||
|
"bio": ("bio", "about", "description"),
|
||||||
|
}
|
||||||
|
for out_field, source_keys in field_sources.items():
|
||||||
|
for src in source_keys:
|
||||||
|
val = ids_data.get(src)
|
||||||
|
if val:
|
||||||
|
all_fields.setdefault(out_field, [])
|
||||||
|
val_str = str(val)
|
||||||
|
if val_str not in all_fields[out_field]:
|
||||||
|
all_fields[out_field].append(val_str)
|
||||||
|
# Track last_seen
|
||||||
|
for ts_field in ("last_online", "latest_activity_at", "updated_at"):
|
||||||
|
ts = ids_data.get(ts_field)
|
||||||
|
if ts and (last_seen is None or str(ts) > str(last_seen)):
|
||||||
|
last_seen = ts
|
||||||
|
|
||||||
|
lines = []
|
||||||
|
lines.append(f"# Report by searching on username \"{username}\"\n")
|
||||||
|
|
||||||
|
# Generated line with run info
|
||||||
|
gen_line = f"Generated at {generated_at} by [Maigret](https://github.com/soxoj/maigret)"
|
||||||
|
if run_info:
|
||||||
|
parts = []
|
||||||
|
if run_info.get("sites_count"):
|
||||||
|
parts.append(f"{run_info['sites_count']} sites checked")
|
||||||
|
if run_info.get("flags"):
|
||||||
|
parts.append(f"flags: `{run_info['flags']}`")
|
||||||
|
if parts:
|
||||||
|
gen_line += f" ({', '.join(parts)})"
|
||||||
|
lines.append(f"{gen_line}\n")
|
||||||
|
|
||||||
|
# Summary
|
||||||
|
lines.append("## Summary\n")
|
||||||
|
lines.append(f"{brief}\n")
|
||||||
|
|
||||||
|
if all_fields:
|
||||||
|
lines.append("**Information extracted from accounts:**\n")
|
||||||
|
for field, values in all_fields.items():
|
||||||
|
title = CaseConverter.snake_to_title(field)
|
||||||
|
lines.append(f"- {title}: {'; '.join(values)}")
|
||||||
|
lines.append("")
|
||||||
|
|
||||||
|
if countries:
|
||||||
|
geo = ", ".join(f"{code} (x{count})" for code, count in countries)
|
||||||
|
lines.append(f"**Country tags:** {geo}\n")
|
||||||
|
|
||||||
|
if interests:
|
||||||
|
tags = ", ".join(f"{tag} (x{count})" for tag, count in interests)
|
||||||
|
lines.append(f"**Website tags:** {tags}\n")
|
||||||
|
|
||||||
|
if first_seen:
|
||||||
|
lines.append(f"**First seen:** {first_seen}")
|
||||||
|
if last_seen:
|
||||||
|
lines.append(f"**Last seen:** {last_seen}")
|
||||||
|
if first_seen or last_seen:
|
||||||
|
lines.append("")
|
||||||
|
|
||||||
|
# Accounts found
|
||||||
|
lines.append("## Accounts found\n")
|
||||||
|
|
||||||
|
for u, id_type, data in results:
|
||||||
|
for site_name, v in data.items():
|
||||||
|
if not v.get("found") or v.get("is_similar"):
|
||||||
|
continue
|
||||||
|
|
||||||
|
lines.append(f"### {site_name}\n")
|
||||||
|
lines.append(f"- **URL:** [{v.get('url_user', '')}]({v.get('url_user', '')})")
|
||||||
|
|
||||||
|
tags = v.get("status") and v["status"].tags or []
|
||||||
|
if tags:
|
||||||
|
lines.append(f"- **Tags:** {', '.join(tags)}")
|
||||||
|
lines.append("")
|
||||||
|
|
||||||
|
ids_data = v.get("ids_data", {})
|
||||||
|
if ids_data:
|
||||||
|
for field, value in ids_data.items():
|
||||||
|
if field == "image":
|
||||||
|
continue
|
||||||
|
title = CaseConverter.snake_to_title(field)
|
||||||
|
lines.append(f"- {title}: {_md_format_value(value)}")
|
||||||
|
|
||||||
|
lines.append("")
|
||||||
|
|
||||||
|
# Possible false positives
|
||||||
|
lines.append("## Possible false positives\n")
|
||||||
|
lines.append(
|
||||||
|
f"This report was generated by searching for accounts matching the username `{username}`. "
|
||||||
|
f"Accounts listed above may belong to different people who happen to use the same "
|
||||||
|
f"or similar username. Results without extracted personal information could contain "
|
||||||
|
f"some false positive findings. Always verify findings before drawing conclusions.\n"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Ethical use
|
||||||
|
lines.append("## Ethical use\n")
|
||||||
|
lines.append(
|
||||||
|
"This report is a result of a technical collection of publicly available information "
|
||||||
|
"from online accounts and does not constitute personal data processing. If you intend "
|
||||||
|
"to use this data for personal data processing or collection purposes, ensure your use "
|
||||||
|
"complies with applicable laws and regulations in your jurisdiction (such as GDPR, "
|
||||||
|
"CCPA, and similar).\n"
|
||||||
|
)
|
||||||
|
|
||||||
|
with open(filename, "w", encoding="utf-8") as f:
|
||||||
|
f.write("\n".join(lines))
|
||||||
|
|
||||||
|
|
||||||
"""
|
"""
|
||||||
REPORTS GENERATING
|
REPORTS GENERATING
|
||||||
"""
|
"""
|
||||||
|
|||||||
@@ -101,7 +101,7 @@
|
|||||||
"sec-ch-ua": "Google Chrome\";v=\"87\", \" Not;A Brand\";v=\"99\", \"Chromium\";v=\"87\"",
|
"sec-ch-ua": "Google Chrome\";v=\"87\", \" Not;A Brand\";v=\"99\", \"Chromium\";v=\"87\"",
|
||||||
"authorization": "Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA",
|
"authorization": "Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA",
|
||||||
"user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36",
|
"user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36",
|
||||||
"x-guest-token": "2039637579922866279"
|
"x-guest-token": "2041186137171976270"
|
||||||
},
|
},
|
||||||
"errors": {
|
"errors": {
|
||||||
"Bad guest token": "x-guest-token update required"
|
"Bad guest token": "x-guest-token update required"
|
||||||
@@ -294,7 +294,7 @@
|
|||||||
"method": "vimeo"
|
"method": "vimeo"
|
||||||
},
|
},
|
||||||
"headers": {
|
"headers": {
|
||||||
"Authorization": "jwt eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJleHAiOjE3NzUxMjM0MDAsInVzZXJfaWQiOm51bGwsImFwcF9pZCI6NTg0NzksInNjb3BlcyI6InB1YmxpYyIsInRlYW1fdXNlcl9pZCI6bnVsbCwianRpIjoiZDY4YjViMGMtYTE3OC00ZDdhLWIyM2QtMDg5Y2MwZjAwOGEyIn0.0bGwlqckn4J07em2-nEX10OfW1JAmi54QCrPtm8Qn6A"
|
"Authorization": "jwt eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJleHAiOjE3NzU0OTI1ODAsInVzZXJfaWQiOm51bGwsImFwcF9pZCI6NTg0NzksInNjb3BlcyI6InB1YmxpYyIsInRlYW1fdXNlcl9pZCI6bnVsbCwianRpIjoiZjkwOGY0MmYtMTE2Zi00MDRkLWExOTgtOGUyOTE2MTFmZTQzIn0.Wt_z9qrjHofYPtUIDkbxrPX2S-glzmEowkR8m89O_Zg"
|
||||||
},
|
},
|
||||||
"urlProbe": "https://api.vimeo.com/users/{username}?fields=name%2Cgender%2Cbio%2Curi%2Clink%2Cbackground_video%2Clocation_details%2Cpictures%2Cverified%2Cmetadata.public_videos.total%2Cavailable_for_hire%2Ccan_work_remotely%2Cmetadata.connections.videos.total%2Cmetadata.connections.albums.total%2Cmetadata.connections.followers.total%2Cmetadata.connections.following.total%2Cmetadata.public_videos.total%2Cmetadata.connections.vimeo_experts.is_enrolled%2Ctotal_collection_count%2Ccreated_time%2Cprofile_preferences%2Cmembership%2Cclients%2Cskills%2Cproject_types%2Crates%2Ccategories%2Cis_expert%2Cprofile_discovery%2Cwebsites%2Ccontact_emails&fetch_user_profile=1",
|
"urlProbe": "https://api.vimeo.com/users/{username}?fields=name%2Cgender%2Cbio%2Curi%2Clink%2Cbackground_video%2Clocation_details%2Cpictures%2Cverified%2Cmetadata.public_videos.total%2Cavailable_for_hire%2Ccan_work_remotely%2Cmetadata.connections.videos.total%2Cmetadata.connections.albums.total%2Cmetadata.connections.followers.total%2Cmetadata.connections.following.total%2Cmetadata.public_videos.total%2Cmetadata.connections.vimeo_experts.is_enrolled%2Ctotal_collection_count%2Ccreated_time%2Cprofile_preferences%2Cmembership%2Cclients%2Cskills%2Cproject_types%2Crates%2Ccategories%2Cis_expert%2Cprofile_discovery%2Cwebsites%2Ccontact_emails&fetch_user_profile=1",
|
||||||
"checkType": "status_code",
|
"checkType": "status_code",
|
||||||
|
|||||||
@@ -1,8 +1,8 @@
|
|||||||
{
|
{
|
||||||
"version": 1,
|
"version": 1,
|
||||||
"updated_at": "2026-04-04T17:04:45Z",
|
"updated_at": "2026-04-06T16:20:33Z",
|
||||||
"sites_count": 3155,
|
"sites_count": 3155,
|
||||||
"min_maigret_version": "0.5.0",
|
"min_maigret_version": "0.5.0",
|
||||||
"data_sha256": "4b1c0c96e1595f6e83584a7a6e885647095cbfb7f23c938d7440f8a3408551b1",
|
"data_sha256": "da87fd6f32bd60efc25e35aa6aa7d329e490d4aa544ddb68539d490cd2157b56",
|
||||||
"data_url": "https://raw.githubusercontent.com/soxoj/maigret/main/maigret/resources/data.json"
|
"data_url": "https://raw.githubusercontent.com/soxoj/maigret/main/maigret/resources/data.json"
|
||||||
}
|
}
|
||||||
@@ -54,6 +54,7 @@
|
|||||||
"graph_report": false,
|
"graph_report": false,
|
||||||
"pdf_report": false,
|
"pdf_report": false,
|
||||||
"html_report": false,
|
"html_report": false,
|
||||||
|
"md_report": false,
|
||||||
"web_interface_port": 5000,
|
"web_interface_port": 5000,
|
||||||
"no_autoupdate": false,
|
"no_autoupdate": false,
|
||||||
"db_update_meta_url": "https://raw.githubusercontent.com/soxoj/maigret/main/maigret/resources/db_meta.json",
|
"db_update_meta_url": "https://raw.githubusercontent.com/soxoj/maigret/main/maigret/resources/db_meta.json",
|
||||||
|
|||||||
@@ -42,6 +42,7 @@ class Settings:
|
|||||||
pdf_report: bool
|
pdf_report: bool
|
||||||
html_report: bool
|
html_report: bool
|
||||||
graph_report: bool
|
graph_report: bool
|
||||||
|
md_report: bool
|
||||||
web_interface_port: int
|
web_interface_port: int
|
||||||
no_autoupdate: bool
|
no_autoupdate: bool
|
||||||
db_update_meta_url: str
|
db_update_meta_url: str
|
||||||
|
|||||||
@@ -3159,7 +3159,7 @@ Rank data fetched from Majestic Million by domains.
|
|||||||
1.  [Tonometerbot (https://tonometerbot.com)](https://tonometerbot.com)*: top 100M, crypto*
|
1.  [Tonometerbot (https://tonometerbot.com)](https://tonometerbot.com)*: top 100M, crypto*
|
||||||
1.  [Spatial (https://www.spatial.io)](https://www.spatial.io)*: top 100M, crypto, gaming*
|
1.  [Spatial (https://www.spatial.io)](https://www.spatial.io)*: top 100M, crypto, gaming*
|
||||||
|
|
||||||
The list was updated at (2026-04-04)
|
The list was updated at (2026-04-06)
|
||||||
## Statistics
|
## Statistics
|
||||||
|
|
||||||
Enabled/total sites: 2538/3155 = 80.44%
|
Enabled/total sites: 2538/3155 = 80.44%
|
||||||
|
|||||||
@@ -48,6 +48,7 @@ DEFAULT_ARGS: Dict[str, Any] = {
|
|||||||
'web': None,
|
'web': None,
|
||||||
'with_domains': False,
|
'with_domains': False,
|
||||||
'xmind': False,
|
'xmind': False,
|
||||||
|
'md': False,
|
||||||
'no_autoupdate': False,
|
'no_autoupdate': False,
|
||||||
'force_update': False,
|
'force_update': False,
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user