diff --git a/maigret/__main__.py b/maigret/__main__.py index c24c5cf..527d456 100644 --- a/maigret/__main__.py +++ b/maigret/__main__.py @@ -5,8 +5,8 @@ Maigret entrypoint """ import asyncio -import maigret +import maigret if __name__ == "__main__": asyncio.run(maigret.main()) diff --git a/maigret/maigret.py b/maigret/maigret.py index aa4a4be..4f04300 100755 --- a/maigret/maigret.py +++ b/maigret/maigret.py @@ -2,6 +2,7 @@ Maigret main module """ +import aiohttp import asyncio import csv import http.cookiejar as cookielib @@ -10,26 +11,24 @@ import logging import os import platform import re +import requests import ssl import sys +import tqdm.asyncio +import xmind +from aiohttp_socks import ProxyConnector from argparse import ArgumentParser, RawDescriptionHelpFormatter from http.cookies import SimpleCookie - -import aiohttp -from aiohttp_socks import ProxyConnector -from python_socks import _errors as proxy_errors -import requests -import tqdm.asyncio from mock import Mock -from socid_extractor import parse, extract +from python_socks import _errors as proxy_errors +from socid_extractor import parse, extract, __version__ as socid_version from .activation import ParsingActivator from .notify import QueryNotifyPrint +from .report import save_csv_report, save_xmind_report, save_html_report, save_pdf_report, \ + generate_report_context, save_txt_report from .result import QueryResult, QueryStatus from .sites import MaigretDatabase, MaigretSite -from .report import save_csv_report, genxmindfile, save_html_pdf_report - -import xmind __version__ = '0.1.10' @@ -517,7 +516,7 @@ def timeout_check(value): return timeout -async def site_self_check(site, logger, semaphore, db: MaigretDatabase, no_progressbar=False): +async def site_self_check(site, logger, semaphore, db: MaigretDatabase, silent=False): query_notify = Mock() changes = { 'disabled': False, @@ -579,13 +578,14 @@ async def site_self_check(site, logger, semaphore, db: MaigretDatabase, no_progr if changes['disabled'] != site.disabled: site.disabled = changes['disabled'] db.update_site(site) - action = 'Disabled' if not site.disabled else 'Enabled' - print(f'{action} site {site.name}...') + if not silent: + action = 'Disabled' if not site.disabled else 'Enabled' + print(f'{action} site {site.name}...') return changes -async def self_check(db: MaigretDatabase, site_data: dict, logger): +async def self_check(db: MaigretDatabase, site_data: dict, logger, silent=False): sem = asyncio.Semaphore(10) tasks = [] all_sites = site_data @@ -596,7 +596,7 @@ async def self_check(db: MaigretDatabase, site_data: dict, logger): disabled_old_count = disabled_count(all_sites.values()) for _, site in all_sites.items(): - check_coro = site_self_check(site, logger, sem, db) + check_coro = site_self_check(site, logger, sem, db, silent) future = asyncio.ensure_future(check_coro) tasks.append(future) @@ -612,13 +612,18 @@ async def self_check(db: MaigretDatabase, site_data: dict, logger): message = 'Enabled' total_disabled *= -1 - print(f'{message} {total_disabled} checked sites. Run with `--info` flag to get more information') + if not silent: + print(f'{message} {total_disabled} checked sites. Run with `--info` flag to get more information') async def main(): - version_string = f"%(prog)s {__version__}\n" + \ - f"{requests.__description__}: {requests.__version__}\n" + \ - f"Python: {platform.python_version()}" + version_string = '\n'.join([ + f'%(prog)s {__version__}', + f'Socid-extractor: {socid_version}', + f'Aiohttp: {aiohttp.__version__}', + f'Requests: {requests.__version__}', + f'Python: {platform.python_version()}', + ]) parser = ArgumentParser(formatter_class=RawDescriptionHelpFormatter, description=f"Maigret v{__version__}" @@ -627,7 +632,7 @@ async def main(): action="version", version=version_string, help="Display version information and dependencies." ) - parser.add_argument("--info", + parser.add_argument("--info", "-vv", action="store_true", dest="info", default=False, help="Display service information." ) @@ -635,21 +640,10 @@ async def main(): action="store_true", dest="verbose", default=False, help="Display extra information and metrics." ) - parser.add_argument("-d", "--debug", + parser.add_argument("-d", "--debug", "-vvv", action="store_true", dest="debug", default=False, help="Saving debugging information and sites responses in debug.txt." ) - parser.add_argument("--folderoutput", "-fo", dest="folderoutput", default="reports", - help="If using multiple usernames, the output of the results will be saved to this folder." - ) - parser.add_argument("--csv", - action="store_true", dest="csv", default=False, - help="Create Comma-Separated Values (CSV) File." - ) - parser.add_argument("--html", - action="store_true", dest="html", default=False, - help="Create HTML report file." - ) parser.add_argument("--site", action="append", metavar='SITE_NAME', dest="site_list", default=[], @@ -715,17 +709,31 @@ async def main(): dest="tags", default='', help="Specify tags of sites." ) - - parser.add_argument("-x","--xmind", + # reports options + parser.add_argument("--folderoutput", "-fo", dest="folderoutput", default="reports", + help="If using multiple usernames, the output of the results will be saved to this folder." + ) + parser.add_argument("-T", "--txt", + action="store_true", dest="txt", default=False, + help="Create a TXT report (one report per username)." + ) + parser.add_argument("-C", "--csv", + action="store_true", dest="csv", default=False, + help="Create a CSV report (one report per username)." + ) + parser.add_argument("-H", "--html", + action="store_true", dest="html", default=False, + help="Create an HTML report file (general report on all usernames)." + ) + parser.add_argument("-X","--xmind", action="store_true", dest="xmind", default=False, - help="Generate an xmind 8 mindmap" + help="Generate an XMind 8 mindmap report (one report per username)." ) - parser.add_argument("-P", "--pdf", action="store_true", dest="pdf", default=False, - help="Generate a pdf report" + help="Generate a PDF report (general report on all usernames)." ) args = parser.parse_args() @@ -802,6 +810,13 @@ async def main(): else: print('Updates will be applied only for current search session.') + # Make reports folder is not exists + os.makedirs(args.folderoutput, exist_ok=True) + report_path = args.folderoutput + + # Define one report filename template + report_filepath_tpl = os.path.join(args.folderoutput, 'report_{username}{postfix}') + # Database consistency enabled_count = len(list(filter(lambda x: not x.disabled, site_data.values()))) print(f'Sites in database, enabled/total: {enabled_count}/{len(site_data)}') @@ -855,51 +870,54 @@ async def main(): logger=logger, forced=args.use_disabled_sites, ) + + username_result = (username, id_type, results) general_results.append((username, id_type, results)) - if args.folderoutput: - # The usernames results should be stored in a targeted folder. - # If the folder doesn't exist, create it first - os.makedirs(args.folderoutput, exist_ok=True) - result_path = os.path.join(args.folderoutput, f"{username}.") - else: - result_path = os.path.join("reports", f"{username}.") + # TODO: tests + for website_name in results: + dictionary = results[website_name] + # TODO: fix no site data issue + if not dictionary: + continue + new_usernames = dictionary.get('ids_usernames') + if new_usernames: + for u, utype in new_usernames.items(): + usernames[u] = utype + # reporting for a one username if args.xmind: - genxmindfile(result_path+"xmind", username, results) - - - with open(result_path+"txt", "w", encoding="utf-8") as file: - exists_counter = 0 - for website_name in results: - dictionary = results[website_name] - # TODO: fix no site data issue - if not dictionary: - continue - new_usernames = dictionary.get('ids_usernames') - if new_usernames: - for u, utype in new_usernames.items(): - usernames[u] = utype - - if dictionary.get("status").status == QueryStatus.CLAIMED: - exists_counter += 1 - file.write(dictionary["url_user"] + "\n") - file.write(f"Total Websites Username Detected On : {exists_counter}") - file.close() + filename = report_filepath_tpl.format(username=username, postfix='.xmind') + save_xmind_report(filename, username, results) + print(f'XMind report for {username} saved in {filename}') if args.csv: - save_csv_report(username, results, result_path+"csv") + filename = report_filepath_tpl.format(username=username, postfix='.csv') + save_csv_report(filename, username, results) + print(f'CSV report for {username} saved in {filename}') - pathPDF = None - pathHTML = None - if args.html: - pathHTML = result_path+"html" - if args.pdf: - pathPDF = result_path+"pdf" + if args.txt: + filename = report_filepath_tpl.format(username=username, postfix='.txt') + save_txt_report(filename, username, results) + print(f'TXT report for {username} saved in {filename}') - if pathPDF or pathHTML: - save_html_pdf_report(general_results,pathHTML,pathPDF) + # reporting for all the result + report_context = generate_report_context(general_results) + # determine main username + username = report_context['username'] + if args.html: + filename = report_filepath_tpl.format(username=username, postfix='.html') + save_html_report(filename, report_context) + print(f'HTML report on all usernames saved in {filename}') + + if args.pdf: + filename = report_filepath_tpl.format(username=username, postfix='.pdf') + save_pdf_report(filename, report_context) + print(f'PDF report on all usernames saved in {filename}') + + + # update database db.save_to_file(args.json_file) diff --git a/maigret/notify.py b/maigret/notify.py index 4dc87dc..c5374b9 100644 --- a/maigret/notify.py +++ b/maigret/notify.py @@ -4,8 +4,8 @@ This module defines the objects for notifying the caller about the results of queries. """ import sys - from colorama import Fore, Style, init + from .result import QueryStatus diff --git a/maigret/report.py b/maigret/report.py index 6211995..c847143 100644 --- a/maigret/report.py +++ b/maigret/report.py @@ -1,56 +1,83 @@ import csv -from datetime import datetime +import io import logging import os -import xmind -import io - -from xhtml2pdf import pisa -from jinja2 import Template - import pycountry +import xmind +from datetime import datetime +from jinja2 import Template +from xhtml2pdf import pisa +from dateutil.parser import parse as parse_datetime_str from .result import QueryStatus from .utils import is_country_tag, CaseConverter, enrich_link_str -def save_csv_report(username: str, results: dict, filename:str): - with open(filename, 'w', newline='', encoding='utf-8') as csvfile: - save_csv_report_to_file(username, results, csvfile) -def retrive_timestamp(datestring:str): - first_seen_format = '%Y-%m-%d %H:%M:%S' - first_seen_formats = '%Y-%m-%dT%H:%M:%S' - try: - time = datetime.strptime(datestring, first_seen_format) - except: - try: - time = datetime.strptime(datestring, first_seen_formats) - except: - time = datetime.min - return time - -def filterSupposedData(data): +''' +UTILS +''' +def filter_supposed_data(data): ### interesting fields - allowed_fields = ['fullname', 'gender', 'location'] + allowed_fields = ['fullname', 'gender', 'location', 'age'] filtered_supposed_data = {CaseConverter.snake_to_title(k): v[0] for k, v in data.items() if k in allowed_fields} return filtered_supposed_data -def generate_template(pdf:bool): - # template generation - if(pdf): - template_text = open(os.path.join(os.path.dirname(os.path.realpath(__file__)), - "resources/simple_report_pdf.tpl")).read() + +''' +REPORTS SAVING +''' +def save_csv_report(filename: str, username: str, results: dict): + with open(filename, 'w', newline='', encoding='utf-8') as f: + generate_csv_report(username, results, f) + + +def save_txt_report(filename: str, username: str, results: dict): + with open(filename, 'w', encoding='utf-8') as f: + generate_txt_report(username, results, f) + + +def save_html_report(filename: str, context: dict): + template, _ = generate_report_template(is_pdf=False) + filled_template = template.render(**context) + with open(filename, 'w') as f: + f.write(filled_template) + + +def save_pdf_report(filename: str, context: dict): + template, css = generate_report_template(is_pdf=True) + filled_template = template.render(**context) + with open(filename, 'w+b') as f: + pisa.pisaDocument(io.StringIO(filled_template), dest=f, default_css=css) + + +''' +REPORTS GENERATING +''' +def generate_report_template(is_pdf: bool): + """ + HTML/PDF template generation + """ + def get_resource_content(filename): + return open(os.path.join(maigret_path, 'resources', filename)).read() + + maigret_path = os.path.dirname(os.path.realpath(__file__)) + + if is_pdf: + template_content = get_resource_content('simple_report_pdf.tpl') + css_content = get_resource_content('simple_report_pdf.css') else: - template_text = open(os.path.join(os.path.dirname(os.path.realpath(__file__)), - "resources/simple_report.tpl")).read() - template = Template(template_text) + template_content = get_resource_content('simple_report.tpl') + css_content = None + + template = Template(template_content) template.globals['title'] = CaseConverter.snake_to_title template.globals['detect_link'] = enrich_link_str - return template + return template, css_content -def save_html_pdf_report(username_results: list, filename:str=None, filenamepdf:str=None): + +def generate_report_context(username_results: list): brief_text = [] usernames = {} extended_info_count = 0 @@ -84,10 +111,13 @@ def save_html_pdf_report(username_results: list, filename:str=None, filenamepdf: if first_seen is None: first_seen = created_at else: - known_time = retrive_timestamp(first_seen) - new_time = retrive_timestamp(created_at) - if new_time < known_time: - first_seen = created_at + try: + known_time = parse_datetime_str(first_seen) + new_time = parse_datetime_str(created_at) + if new_time < known_time: + first_seen = created_at + except: + logging.debug('Problems with converting datetime %s/%s', first_seen, created_at) for k, v in status.ids_data.items(): # suppose target data @@ -149,52 +179,21 @@ def save_html_pdf_report(username_results: list, filename:str=None, filenamepdf: countries_lists = list(filter(lambda x: is_country_tag(x[0]), tags.items())) interests_list = list(filter(lambda x: not is_country_tag(x[0]), tags.items())) - filtered_supposed_data = filterSupposedData(supposed_data) + filtered_supposed_data = filter_supposed_data(supposed_data) - # save report in HTML - if(filename is not None): - template = generate_template(False) - filled_template = template.render(username=first_username, - brief=brief, - results=username_results, - first_seen=first_seen, - interests_tuple_list=tuple_sort(interests_list), - countries_tuple_list=tuple_sort(countries_lists), - supposed_data=filtered_supposed_data, - generated_at=datetime.now().strftime('%Y-%m-%d %H:%M:%S'), - ) - with open(filename, 'w') as f: - f.write(filled_template) - f.close() - # save report in PDF - if(filenamepdf is not None): - template = generate_template(True) - filled_template = template.render(username=first_username, - brief=brief, - results=username_results, - first_seen=first_seen, - interests_tuple_list=tuple_sort(interests_list), - countries_tuple_list=tuple_sort(countries_lists), - supposed_data=filtered_supposed_data, - generated_at=datetime.now().strftime('%Y-%m-%d %H:%M:%S'), - ) - csstext = "" - with open(os.path.join(os.path.dirname(os.path.realpath(__file__)), - "resources/simple_report_pdf.css"), "r") as cssfile: - cssline = cssfile.readline() - csstext += cssline - while cssline: - cssline = cssfile.readline() - csstext += cssline - cssfile.close() - - pdffile = open(filenamepdf, "w+b") - pisa.pisaDocument(io.StringIO(filled_template), dest=pdffile, default_css=csstext) - pdffile.close() + return { + 'username': first_username, + 'brief': brief, + 'results': username_results, + 'first_seen': first_seen, + 'interests_tuple_list': tuple_sort(interests_list), + 'countries_tuple_list': tuple_sort(countries_lists), + 'supposed_data': filtered_supposed_data, + 'generated_at': datetime.now().strftime('%Y-%m-%d %H:%M:%S'), + } -def save_csv_report_to_file(username: str, results: dict, csvfile): - print(results) +def generate_csv_report(username: str, results: dict, csvfile): writer = csv.writer(csvfile) writer.writerow(['username', 'name', @@ -213,11 +212,23 @@ def save_csv_report_to_file(username: str, results: dict, csvfile): results[site]['http_status'], ]) + +def generate_txt_report(username: str, results: dict, file): + exists_counter = 0 + for website_name in results: + dictionary = results[website_name] + # TODO: fix no site data issue + if not dictionary: + continue + if dictionary.get("status").status == QueryStatus.CLAIMED: + exists_counter += 1 + file.write(dictionary["url_user"] + "\n") + file.write(f'Total Websites Username Detected On : {exists_counter}') + ''' XMIND 8 Functions ''' -def genxmindfile(filename, username, results): - print(f'Generating XMIND8 file for username {username}') +def save_xmind_report(filename, username, results): if os.path.exists(filename): os.remove(filename) workbook = xmind.load(filename) @@ -286,7 +297,7 @@ def design_sheet(sheet, username, results): supposed_data[field].append(currentval) currentsublabel.setTitle("%s: %s" % (k, currentval)) ### Add Supposed DATA - filterede_supposed_data = filterSupposedData(supposed_data) + filterede_supposed_data = filter_supposed_data(supposed_data) if(len(filterede_supposed_data) >0): undefinedsection = root_topic1.addSubTopic() undefinedsection.setTitle("SUPPOSED DATA") diff --git a/maigret/resources/data.json b/maigret/resources/data.json index f30deda..1139f95 100644 --- a/maigret/resources/data.json +++ b/maigret/resources/data.json @@ -8901,12 +8901,14 @@ "usernameClaimed": "red", "usernameUnclaimed": "noonewouldeverusethis7" }, - "NameMC (Minecraft.net skins)": { + "NameMC": { "tags": [ "us" ], + "regexCheck": "^.{3,16}$", "checkType": "message", - "absenceStrs": "Profiles: 0 results", + "presenseStrs": "/profile/", + "absenceStrs": "
-
- |
-
@@ -79,29 +78,32 @@ {{ v.url_user }}
+
+ {% endif %}
+ + Details+
|
+
+
|
| {{ title(k1) }} | -{% if v1 is iterable and (v1 is not string and v1 is not mapping) %}{{ v1 | join(', ') }}{% else %}{{ detect_link(v1) }}{% endif %} | -
|---|