diff --git a/docs/source/command-line-options.rst b/docs/source/command-line-options.rst index 89ce798..619cfe3 100644 --- a/docs/source/command-line-options.rst +++ b/docs/source/command-line-options.rst @@ -106,6 +106,9 @@ username). ``-J``, ``--json`` - Generate a JSON report of specific type: simple, ndjson (one report per username). E.g. ``--json ndjson`` +``-M``, ``--md`` - Generate a Markdown report (general report on all +usernames). See :ref:`markdown-report` below. + ``-fo``, ``--folderoutput`` - Results will be saved to this folder, ``results`` by default. Will be created if doesn’t exist. @@ -142,4 +145,35 @@ site main page URL to determine the site engine and methods to check account presence. After checking Maigret asks if you want to add the site, answering y/Y will rewrite the local database. +.. _markdown-report: + +Markdown report (LLM-friendly) +------------------------------ + +The ``--md`` / ``-M`` flag generates a Markdown report designed for both human reading and analysis by AI assistants (ChatGPT, Claude, etc.). + +.. code-block:: console + + maigret username --md + +The report includes: + +- **Summary** with aggregated personal data (all fullnames, locations, bios found across accounts), country tags, website tags, first/last seen timestamps. +- **Per-account sections** with profile URL, site tags, and all extracted fields (username, bio, follower count, linked accounts, etc.). +- **Possible false positives** disclaimer explaining that accounts may belong to different people. +- **Ethical use** notice about applicable data protection laws. + +**Using with AI tools:** + +The Markdown format is optimized for LLM context windows. You can feed the report directly to an AI assistant for follow-up analysis: + +.. code-block:: console + + # Generate the report + maigret johndoe --md + + # Feed it to an AI tool + cat reports/report_johndoe.md | llm "Analyze this OSINT report and summarize key findings" + +The structured Markdown with per-site sections makes it easy for AI tools to extract relationships, cross-reference identities, and identify patterns across accounts. diff --git a/maigret/maigret.py b/maigret/maigret.py index 3a191bf..b1595dc 100755 --- a/maigret/maigret.py +++ b/maigret/maigret.py @@ -37,6 +37,7 @@ from .report import ( get_plaintext_report, sort_report_by_data_points, save_graph_report, + save_markdown_report, ) from .sites import MaigretDatabase from .submit import Submitter @@ -465,6 +466,14 @@ def setup_arguments_parser(settings: Settings): default=settings.pdf_report, help="Generate a PDF report (general report on all usernames).", ) + report_group.add_argument( + "-M", + "--md", + action="store_true", + dest="md", + default=settings.md_report, + help="Generate a Markdown report (general report on all usernames).", + ) report_group.add_argument( "-G", "--graph", @@ -803,7 +812,7 @@ async def main(): # reporting for all the result if general_results: - if args.html or args.pdf: + if args.html or args.pdf or args.md: query_notify.warning('Generating report info...') report_context = generate_report_context(general_results) # determine main username @@ -823,6 +832,23 @@ async def main(): save_pdf_report(filename, report_context) query_notify.warning(f'PDF report on all usernames saved in {filename}') + if args.md: + username = username.replace('/', '_') + filename = report_filepath_tpl.format(username=username, postfix='.md') + run_flags = [] + if args.tags: + run_flags.append(f"--tags {args.tags}") + if args.site_list: + run_flags.append(f"--site {','.join(args.site_list)}") + if args.all_sites: + run_flags.append("--all-sites") + run_info = { + "sites_count": sum(len(d) for _, _, d in general_results), + "flags": " ".join(run_flags) if run_flags else None, + } + save_markdown_report(filename, report_context, run_info=run_info) + query_notify.warning(f'Markdown report on all usernames saved in {filename}') + if args.graph: username = username.replace('/', '_') filename = report_filepath_tpl.format( diff --git a/maigret/report.py b/maigret/report.py index 9c13e3d..2b6648c 100644 --- a/maigret/report.py +++ b/maigret/report.py @@ -257,6 +257,144 @@ def get_plaintext_report(context: dict) -> str: return output.strip() +def _md_format_value(value) -> str: + """Format a value for Markdown output, detecting links.""" + if isinstance(value, list): + return ", ".join(str(v) for v in value) + s = str(value) + if s.startswith("http://") or s.startswith("https://"): + return f"[{s}]({s})" + return s + + +def save_markdown_report(filename: str, context: dict, run_info: dict = None): + username = context.get("username", "unknown") + generated_at = context.get("generated_at", "") + brief = context.get("brief", "") + countries = context.get("countries_tuple_list", []) + interests = context.get("interests_tuple_list", []) + first_seen = context.get("first_seen") + results = context.get("results", []) + + # Collect ALL values for key fields across all accounts + all_fields: Dict[str, list] = {} + last_seen = None + for _, _, data in results: + for _, v in data.items(): + if not v.get("found") or v.get("is_similar"): + continue + ids_data = v.get("ids_data", {}) + # Map multiple source fields to unified output fields + field_sources = { + "fullname": ("fullname", "name"), + "location": ("location", "country", "city", "country_code", "locale", "region"), + "gender": ("gender",), + "bio": ("bio", "about", "description"), + } + for out_field, source_keys in field_sources.items(): + for src in source_keys: + val = ids_data.get(src) + if val: + all_fields.setdefault(out_field, []) + val_str = str(val) + if val_str not in all_fields[out_field]: + all_fields[out_field].append(val_str) + # Track last_seen + for ts_field in ("last_online", "latest_activity_at", "updated_at"): + ts = ids_data.get(ts_field) + if ts and (last_seen is None or str(ts) > str(last_seen)): + last_seen = ts + + lines = [] + lines.append(f"# Report by searching on username \"{username}\"\n") + + # Generated line with run info + gen_line = f"Generated at {generated_at} by [Maigret](https://github.com/soxoj/maigret)" + if run_info: + parts = [] + if run_info.get("sites_count"): + parts.append(f"{run_info['sites_count']} sites checked") + if run_info.get("flags"): + parts.append(f"flags: `{run_info['flags']}`") + if parts: + gen_line += f" ({', '.join(parts)})" + lines.append(f"{gen_line}\n") + + # Summary + lines.append("## Summary\n") + lines.append(f"{brief}\n") + + if all_fields: + lines.append("**Information extracted from accounts:**\n") + for field, values in all_fields.items(): + title = CaseConverter.snake_to_title(field) + lines.append(f"- {title}: {'; '.join(values)}") + lines.append("") + + if countries: + geo = ", ".join(f"{code} (x{count})" for code, count in countries) + lines.append(f"**Country tags:** {geo}\n") + + if interests: + tags = ", ".join(f"{tag} (x{count})" for tag, count in interests) + lines.append(f"**Website tags:** {tags}\n") + + if first_seen: + lines.append(f"**First seen:** {first_seen}") + if last_seen: + lines.append(f"**Last seen:** {last_seen}") + if first_seen or last_seen: + lines.append("") + + # Accounts found + lines.append("## Accounts found\n") + + for u, id_type, data in results: + for site_name, v in data.items(): + if not v.get("found") or v.get("is_similar"): + continue + + lines.append(f"### {site_name}\n") + lines.append(f"- **URL:** [{v.get('url_user', '')}]({v.get('url_user', '')})") + + tags = v.get("status") and v["status"].tags or [] + if tags: + lines.append(f"- **Tags:** {', '.join(tags)}") + lines.append("") + + ids_data = v.get("ids_data", {}) + if ids_data: + for field, value in ids_data.items(): + if field == "image": + continue + title = CaseConverter.snake_to_title(field) + lines.append(f"- {title}: {_md_format_value(value)}") + + lines.append("") + + # Possible false positives + lines.append("## Possible false positives\n") + lines.append( + f"This report was generated by searching for accounts matching the username `{username}`. " + f"Accounts listed above may belong to different people who happen to use the same " + f"or similar username. Results without extracted personal information could contain " + f"some false positive findings. Always verify findings before drawing conclusions.\n" + ) + + # Ethical use + lines.append("## Ethical use\n") + lines.append( + "This report is a result of a technical collection of publicly available information " + "from online accounts and does not constitute personal data processing. If you intend " + "to use this data for personal data processing or collection purposes, ensure your use " + "complies with applicable laws and regulations in your jurisdiction (such as GDPR, " + "CCPA, and similar).\n" + ) + + with open(filename, "w", encoding="utf-8") as f: + f.write("\n".join(lines)) + + """ REPORTS GENERATING """ diff --git a/maigret/resources/data.json b/maigret/resources/data.json index 1f39a9d..34a6fd9 100644 --- a/maigret/resources/data.json +++ b/maigret/resources/data.json @@ -101,7 +101,7 @@ "sec-ch-ua": "Google Chrome\";v=\"87\", \" Not;A Brand\";v=\"99\", \"Chromium\";v=\"87\"", "authorization": "Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA", "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36", - "x-guest-token": "2039637579922866279" + "x-guest-token": "2041186137171976270" }, "errors": { "Bad guest token": "x-guest-token update required" @@ -294,7 +294,7 @@ "method": "vimeo" }, "headers": { - "Authorization": "jwt eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJleHAiOjE3NzUxMjM0MDAsInVzZXJfaWQiOm51bGwsImFwcF9pZCI6NTg0NzksInNjb3BlcyI6InB1YmxpYyIsInRlYW1fdXNlcl9pZCI6bnVsbCwianRpIjoiZDY4YjViMGMtYTE3OC00ZDdhLWIyM2QtMDg5Y2MwZjAwOGEyIn0.0bGwlqckn4J07em2-nEX10OfW1JAmi54QCrPtm8Qn6A" + "Authorization": "jwt eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJleHAiOjE3NzU0OTI1ODAsInVzZXJfaWQiOm51bGwsImFwcF9pZCI6NTg0NzksInNjb3BlcyI6InB1YmxpYyIsInRlYW1fdXNlcl9pZCI6bnVsbCwianRpIjoiZjkwOGY0MmYtMTE2Zi00MDRkLWExOTgtOGUyOTE2MTFmZTQzIn0.Wt_z9qrjHofYPtUIDkbxrPX2S-glzmEowkR8m89O_Zg" }, "urlProbe": "https://api.vimeo.com/users/{username}?fields=name%2Cgender%2Cbio%2Curi%2Clink%2Cbackground_video%2Clocation_details%2Cpictures%2Cverified%2Cmetadata.public_videos.total%2Cavailable_for_hire%2Ccan_work_remotely%2Cmetadata.connections.videos.total%2Cmetadata.connections.albums.total%2Cmetadata.connections.followers.total%2Cmetadata.connections.following.total%2Cmetadata.public_videos.total%2Cmetadata.connections.vimeo_experts.is_enrolled%2Ctotal_collection_count%2Ccreated_time%2Cprofile_preferences%2Cmembership%2Cclients%2Cskills%2Cproject_types%2Crates%2Ccategories%2Cis_expert%2Cprofile_discovery%2Cwebsites%2Ccontact_emails&fetch_user_profile=1", "checkType": "status_code", diff --git a/maigret/resources/db_meta.json b/maigret/resources/db_meta.json index def26cc..330fd1a 100644 --- a/maigret/resources/db_meta.json +++ b/maigret/resources/db_meta.json @@ -1,8 +1,8 @@ { "version": 1, - "updated_at": "2026-04-04T17:04:45Z", + "updated_at": "2026-04-06T16:20:33Z", "sites_count": 3155, "min_maigret_version": "0.5.0", - "data_sha256": "4b1c0c96e1595f6e83584a7a6e885647095cbfb7f23c938d7440f8a3408551b1", + "data_sha256": "da87fd6f32bd60efc25e35aa6aa7d329e490d4aa544ddb68539d490cd2157b56", "data_url": "https://raw.githubusercontent.com/soxoj/maigret/main/maigret/resources/data.json" } \ No newline at end of file diff --git a/maigret/resources/settings.json b/maigret/resources/settings.json index d17ec6c..c28d19c 100644 --- a/maigret/resources/settings.json +++ b/maigret/resources/settings.json @@ -54,6 +54,7 @@ "graph_report": false, "pdf_report": false, "html_report": false, + "md_report": false, "web_interface_port": 5000, "no_autoupdate": false, "db_update_meta_url": "https://raw.githubusercontent.com/soxoj/maigret/main/maigret/resources/db_meta.json", diff --git a/maigret/settings.py b/maigret/settings.py index d37a7a3..5649161 100644 --- a/maigret/settings.py +++ b/maigret/settings.py @@ -42,6 +42,7 @@ class Settings: pdf_report: bool html_report: bool graph_report: bool + md_report: bool web_interface_port: int no_autoupdate: bool db_update_meta_url: str diff --git a/sites.md b/sites.md index cd45e11..7d359b8 100644 --- a/sites.md +++ b/sites.md @@ -3159,7 +3159,7 @@ Rank data fetched from Majestic Million by domains. 1. ![](https://www.google.com/s2/favicons?domain=https://tonometerbot.com) [Tonometerbot (https://tonometerbot.com)](https://tonometerbot.com)*: top 100M, crypto* 1. ![](https://www.google.com/s2/favicons?domain=https://www.spatial.io) [Spatial (https://www.spatial.io)](https://www.spatial.io)*: top 100M, crypto, gaming* -The list was updated at (2026-04-04) +The list was updated at (2026-04-06) ## Statistics Enabled/total sites: 2538/3155 = 80.44% diff --git a/tests/test_cli.py b/tests/test_cli.py index 3d58936..e4f7d77 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -48,6 +48,7 @@ DEFAULT_ARGS: Dict[str, Any] = { 'web': None, 'with_domains': False, 'xmind': False, + 'md': False, 'no_autoupdate': False, 'force_update': False, }