diff --git a/.gitignore b/.gitignore index 3bf7f81..919b269 100644 --- a/.gitignore +++ b/.gitignore @@ -27,3 +27,4 @@ tests/.excluded_sites # MacOS Folder Metadata File .DS_Store +/reports/ diff --git a/maigret/maigret.py b/maigret/maigret.py index 20d6bcc..5c64abc 100755 --- a/maigret/maigret.py +++ b/maigret/maigret.py @@ -26,7 +26,7 @@ from socid_extractor import parse, extract from .notify import QueryNotifyPrint from .result import QueryResult, QueryStatus from .sites import MaigretDatabase, MaigretSite -from .report import save_csv_report, genxmindfile, save_html_report +from .report import save_csv_report, genxmindfile, save_html_pdf_report import xmind @@ -285,6 +285,8 @@ def process_site_result(response, query_notify, logger, results_info, site: Maig return results_info + + async def maigret(username, site_dict, query_notify, logger, proxy=None, timeout=None, recursive_search=False, id_type='username', tags=None, debug=False, forced=False, @@ -705,6 +707,12 @@ async def main(): help="Generate an xmind 8 mindmap" ) + parser.add_argument("-P", "--pdf", + action="store_true", + dest="pdf", default=False, + help="Generate a pdf report" + ) + args = parser.parse_args() # Logging @@ -851,18 +859,15 @@ async def main(): # The usernames results should be stored in a targeted folder. # If the folder doesn't exist, create it first os.makedirs(args.folderoutput, exist_ok=True) - result_file = os.path.join(args.folderoutput, f"{username}.txt") - if args.xmind: - xmind_path = os.path.join(args.folderoutput, f"{username}.xmind") + result_path = os.path.join(args.folderoutput, f"{username}.") else: - result_file = f"{username}.txt" - if args.xmind: - xmind_path = f"{username}.xmind" + result_path = os.path.join("reports", f"{username}.") if args.xmind: - genxmindfile(xmind_path, username, results) + genxmindfile(result_path+"xmind", username, results) - with open(result_file, "w", encoding="utf-8") as file: + + with open(result_path+"txt", "w", encoding="utf-8") as file: exists_counter = 0 for website_name in results: dictionary = results[website_name] @@ -878,12 +883,20 @@ async def main(): exists_counter += 1 file.write(dictionary["url_user"] + "\n") file.write(f"Total Websites Username Detected On : {exists_counter}") + file.close() if args.csv: - save_csv_report(username, results) + save_csv_report(username, results, result_path+"csv") - if args.html: - save_html_report(general_results) + pathPDF = None + pathHTML = None + if args.html: + pathHTML = result_path+"html" + if args.pdf: + pathPDF = result_path+"pdf" + + if pathPDF or pathHTML: + save_html_pdf_report(general_results,pathHTML,pathPDF) def run(): @@ -894,6 +907,5 @@ def run(): print('Maigret is interrupted.') sys.exit(1) - if __name__ == "__main__": run() \ No newline at end of file diff --git a/maigret/report.py b/maigret/report.py index 58c697b..6211995 100644 --- a/maigret/report.py +++ b/maigret/report.py @@ -3,28 +3,61 @@ from datetime import datetime import logging import os import xmind +import io +from xhtml2pdf import pisa from jinja2 import Template + import pycountry from .result import QueryStatus from .utils import is_country_tag, CaseConverter, enrich_link_str - -def save_csv_report(username: str, results: dict): - with open(username + '.csv', 'w', newline='', encoding='utf-8') as csvfile: +def save_csv_report(username: str, results: dict, filename:str): + with open(filename, 'w', newline='', encoding='utf-8') as csvfile: save_csv_report_to_file(username, results, csvfile) +def retrive_timestamp(datestring:str): + first_seen_format = '%Y-%m-%d %H:%M:%S' + first_seen_formats = '%Y-%m-%dT%H:%M:%S' + try: + time = datetime.strptime(datestring, first_seen_format) + except: + try: + time = datetime.strptime(datestring, first_seen_formats) + except: + time = datetime.min + return time -def save_html_report(username_results: list): +def filterSupposedData(data): + ### interesting fields + allowed_fields = ['fullname', 'gender', 'location'] + filtered_supposed_data = {CaseConverter.snake_to_title(k): v[0] + for k, v in data.items() + if k in allowed_fields} + return filtered_supposed_data + +def generate_template(pdf:bool): + # template generation + if(pdf): + template_text = open(os.path.join(os.path.dirname(os.path.realpath(__file__)), + "resources/simple_report_pdf.tpl")).read() + else: + template_text = open(os.path.join(os.path.dirname(os.path.realpath(__file__)), + "resources/simple_report.tpl")).read() + template = Template(template_text) + template.globals['title'] = CaseConverter.snake_to_title + template.globals['detect_link'] = enrich_link_str + return template + +def save_html_pdf_report(username_results: list, filename:str=None, filenamepdf:str=None): brief_text = [] usernames = {} extended_info_count = 0 tags = {} supposed_data = {} - allowed_fields = ['fullname', 'gender'] + first_seen = None - first_seen_format = '%Y-%m-%d %H:%M:%S' for username, id_type, results in username_results: found_accounts = 0 @@ -51,8 +84,8 @@ def save_html_report(username_results: list): if first_seen is None: first_seen = created_at else: - known_time = datetime.strptime(first_seen, first_seen_format) - new_time = datetime.strptime(created_at, first_seen_format) + known_time = retrive_timestamp(first_seen) + new_time = retrive_timestamp(created_at) if new_time < known_time: first_seen = created_at @@ -103,13 +136,7 @@ def save_html_report(username_results: list): brief_text.append(f'Extended info extracted from {extended_info_count} accounts.') - # template generation - template_text = open(os.path.join(os.path.dirname(os.path.realpath(__file__)), - "resources/simple_report.tpl")).read() - template = Template(template_text) - template.globals['title'] = CaseConverter.snake_to_title - template.globals['detect_link'] = enrich_link_str brief = ' '.join(brief_text).strip() tuple_sort = lambda d: sorted(d, key=lambda x: x[1], reverse=True) @@ -122,23 +149,49 @@ def save_html_report(username_results: list): countries_lists = list(filter(lambda x: is_country_tag(x[0]), tags.items())) interests_list = list(filter(lambda x: not is_country_tag(x[0]), tags.items())) - filtered_supposed_data = {CaseConverter.snake_to_title(k): v[0] - for k, v in supposed_data.items() - if k in allowed_fields} + filtered_supposed_data = filterSupposedData(supposed_data) + + # save report in HTML + if(filename is not None): + template = generate_template(False) + filled_template = template.render(username=first_username, + brief=brief, + results=username_results, + first_seen=first_seen, + interests_tuple_list=tuple_sort(interests_list), + countries_tuple_list=tuple_sort(countries_lists), + supposed_data=filtered_supposed_data, + generated_at=datetime.now().strftime('%Y-%m-%d %H:%M:%S'), + ) + with open(filename, 'w') as f: + f.write(filled_template) + f.close() + # save report in PDF + if(filenamepdf is not None): + template = generate_template(True) + filled_template = template.render(username=first_username, + brief=brief, + results=username_results, + first_seen=first_seen, + interests_tuple_list=tuple_sort(interests_list), + countries_tuple_list=tuple_sort(countries_lists), + supposed_data=filtered_supposed_data, + generated_at=datetime.now().strftime('%Y-%m-%d %H:%M:%S'), + ) + csstext = "" + with open(os.path.join(os.path.dirname(os.path.realpath(__file__)), + "resources/simple_report_pdf.css"), "r") as cssfile: + cssline = cssfile.readline() + csstext += cssline + while cssline: + cssline = cssfile.readline() + csstext += cssline + cssfile.close() + + pdffile = open(filenamepdf, "w+b") + pisa.pisaDocument(io.StringIO(filled_template), dest=pdffile, default_css=csstext) + pdffile.close() - filled_template = template.render(username=first_username, - brief=brief, - results=username_results, - first_seen=first_seen, - interests_tuple_list=tuple_sort(interests_list), - countries_tuple_list=tuple_sort(countries_lists), - supposed_data=filtered_supposed_data, - generated_at=datetime.now().strftime('%Y-%m-%d %H:%M:%S'), - ) - # save report - html_filename = f'report_{first_username}.html' - with open(html_filename, 'w') as f: - f.write(filled_template) def save_csv_report_to_file(username: str, results: dict, csvfile): print(results) @@ -160,20 +213,23 @@ def save_csv_report_to_file(username: str, results: dict, csvfile): results[site]['http_status'], ]) - +''' +XMIND 8 Functions +''' def genxmindfile(filename, username, results): print(f'Generating XMIND8 file for username {username}') if os.path.exists(filename): os.remove(filename) workbook = xmind.load(filename) sheet = workbook.getPrimarySheet() - design_sheet1(sheet, username, results) + design_sheet(sheet, username, results) xmind.save(workbook, path=filename) -def design_sheet1(sheet, username, results): +def design_sheet(sheet, username, results): ##all tag list alltags = {} + supposed_data = {} sheet.setTitle("%s Analysis"%(username)) root_topic1 = sheet.getRootTopic() @@ -198,7 +254,6 @@ def design_sheet1(sheet, username, results): alltags[tag] = tagsection category = None - userlink= None for tag in dictionary.get("status").tags: if tag.strip() == "": continue @@ -206,12 +261,37 @@ def design_sheet1(sheet, username, results): category = tag if category is None: - category = "undefined" userlink = undefinedsection.addSubTopic() + userlink.addLabel(dictionary.get("status").site_url_user) else: userlink = alltags[category].addSubTopic() - userlink.addLabel(dictionary.get("status").site_url_user) + userlink.addLabel(dictionary.get("status").site_url_user) + + if dictionary.get("status").ids_data: + for k, v in dictionary.get("status").ids_data.items(): + # suppose target data + if not isinstance(v, list): + currentsublabel = userlink.addSubTopic() + field = 'fullname' if k == 'name' else k + if not field in supposed_data: + supposed_data[field] = [] + supposed_data[field].append(v) + currentsublabel.setTitle("%s: %s" % (k, v)) + else: + for currentval in v: + currentsublabel = userlink.addSubTopic() + field = 'fullname' if k == 'name' else k + if not field in supposed_data: + supposed_data[field] = [] + supposed_data[field].append(currentval) + currentsublabel.setTitle("%s: %s" % (k, currentval)) + ### Add Supposed DATA + filterede_supposed_data = filterSupposedData(supposed_data) + if(len(filterede_supposed_data) >0): + undefinedsection = root_topic1.addSubTopic() + undefinedsection.setTitle("SUPPOSED DATA") + for k, v in filterede_supposed_data.items(): + currentsublabel = undefinedsection.addSubTopic() + currentsublabel.setTitle("%s: %s" % (k, v)) + - #for tag in dictionary.get("status").tags: - # if( tag != category ): - # sheet.createRelationship(userlink.getID(), alltags[tag].getID(),"other tag") diff --git a/maigret/resources/simple_report_pdf.css b/maigret/resources/simple_report_pdf.css new file mode 100644 index 0000000..9fda1b4 --- /dev/null +++ b/maigret/resources/simple_report_pdf.css @@ -0,0 +1,41 @@ +h2 { + font-size: 30px; + width: 100%; + display:block; +} +h3 { + font-size: 25px; + width: 100%; + display:block; +} +h4 { + font-size: 20px; + width: 100%; + display:block; +} +p { + margin: 0 0 5px; + display: block; +} + + +table { + margin-bottom: 10px; + width:100%; +} +th { + font-weight: bold; +} +th,td,caption { + padding: 4px 10px 4px 5px; +} +table tr:nth-child(even) td, +table tr.even td { + background-color: #e5ecf9; +} + +div { + border-bottom-color: #3e3e3e; + border-bottom-width: 1px; + border-bottom-style: solid; +} \ No newline at end of file diff --git a/maigret/resources/simple_report_pdf.tpl b/maigret/resources/simple_report_pdf.tpl new file mode 100644 index 0000000..aa58ae5 --- /dev/null +++ b/maigret/resources/simple_report_pdf.tpl @@ -0,0 +1,113 @@ +type="text/css" + + + + +{{ username }} -- Maigret username search report + +
+
+
+

+ Username search report for {{ username }} +

+ Generated at {{ generated_at }} +
+
+
+
+
+
+

Supposed personal data

+ {% for k, v in supposed_data.items() %} +

+ {{ k }}: {{ v }} +

+ {% endfor %} + {% if countries_tuple_list %} +

+ Geo: {% for k, v in countries_tuple_list %}{{ k }} ({{ v }}){{ ", " if not loop.last }}{% endfor %} +

+ {% endif %}{% if interests_tuple_list %} +

+ Interests: {% for k, v in interests_tuple_list %}{{ k }} ({{ v }}){{ ", " if not loop.last }}{% endfor %} +

+ {% endif %}{% if first_seen %} +

+ First seen: {{ first_seen }} +

+ {% endif %} +
+
+
+
+
+
+
+
+

Brief

+

+ {{ brief }} +

+
+
+
+
+ {% for u, t, data in results %} + {% for k, v in data.items() %} + {% if v.found and not v.is_similar %} + +
+
+
+
+ + + + + + +
+ Photo + +
+

+ {{ k }} +

+ {% if v.status.tags %} +
Tags: {{ v.status.tags | join(', ') }}
+ {% endif %} +

+ {{ v.url_user }} +

+
+
+ {% if v.ids_data %} +
+
+
+

Details

+ + + {% for k1, v1 in v.ids_data.items() %} + {% if k1 != 'image' %} + + + + + {% endif %} + + {% endfor %} + +
{{ title(k1) }}{% if v1 is iterable and (v1 is not string and v1 is not mapping) %}{{ v1 | join(', ') }}{% else %}{{ detect_link(v1) }}{% endif %}
+
+ {% endif %} +
+
+
+ {% endif %} + {% endfor %} + {% endfor %} +
+ + \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 060f64b..4dd59b2 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,6 @@ aiohttp==3.7.3 aiohttp-socks==0.5.5 +arabic-reshaper==2.1.1 async-timeout==3.0.1 attrs==20.3.0 beautifulsoup4==4.9.3 @@ -7,16 +8,24 @@ bs4==0.0.1 certifi==2020.12.5 chardet==3.0.4 colorama==0.4.4 +future==0.18.2 +html5lib==1.1 idna==2.10 Jinja2==2.11.2 lxml==4.6.2 +MarkupSafe==1.1.1 mock==4.0.2 multidict==5.1.0 +Pillow==8.1.0 pycountry==20.7.3 +PyPDF2==1.26.0 PySocks==1.7.1 +python-bidi==0.4.2 python-socks==1.1.2 +reportlab==3.5.59 requests==2.25.1 requests-futures==1.0.0 +six==1.15.0 socid-extractor==0.0.2 soupsieve==2.1 stem==1.8.0 @@ -24,5 +33,7 @@ torrequest==0.1.0 tqdm==4.55.0 typing-extensions==3.7.4.3 urllib3==1.26.2 +webencodings==0.5.1 +xhtml2pdf==0.2.5 XMind==1.2.0 yarl==1.6.3 diff --git a/tests/test_report.py b/tests/test_report.py index 80dedac..1d4f0d1 100644 --- a/tests/test_report.py +++ b/tests/test_report.py @@ -5,7 +5,7 @@ import os import xmind -from maigret.report import save_csv_report_to_file, genxmindfile, save_html_report +from maigret.report import save_csv_report_to_file, genxmindfile, save_html_pdf_report from maigret.result import QueryResult, QueryStatus @@ -93,8 +93,7 @@ def test_html_report(): except: pass - save_html_report(TEST) - + save_html_pdf_report(TEST,filename=report_name,filenamepdf=None) assert os.path.exists(report_name) report_text = open(report_name).read() @@ -102,3 +101,13 @@ def test_html_report(): assert SUPPOSED_BRIEF in report_text assert SUPPOSED_GEO in report_text assert SUPPOSED_INTERESTS in report_text + +def test_pdf_report(): + report_name_pdf = 'report_alexaimephotographycars.pdf' + try: + os.remove(report_name_pdf) + except: + pass + + save_html_pdf_report(TEST,filename=None,filenamepdf=report_name_pdf) + assert os.path.exists(report_name_pdf)