Added Support for:

- PDF
- Enhanced XMIND 8
- Refactoring some report functions
This commit is contained in:
Ping-uu
2021-01-09 21:50:28 +01:00
parent 66a3d8ee52
commit 4f65b753e6
5 changed files with 262 additions and 53 deletions
+25 -13
View File
@@ -26,7 +26,7 @@ from socid_extractor import parse, extract
from .notify import QueryNotifyPrint from .notify import QueryNotifyPrint
from .result import QueryResult, QueryStatus from .result import QueryResult, QueryStatus
from .sites import MaigretDatabase, MaigretSite from .sites import MaigretDatabase, MaigretSite
from .report import save_csv_report, genxmindfile, save_html_report from .report import save_csv_report, genxmindfile, save_html_pdf_report
import xmind import xmind
@@ -285,6 +285,8 @@ def process_site_result(response, query_notify, logger, results_info, site: Maig
return results_info return results_info
async def maigret(username, site_dict, query_notify, logger, async def maigret(username, site_dict, query_notify, logger,
proxy=None, timeout=None, recursive_search=False, proxy=None, timeout=None, recursive_search=False,
id_type='username', tags=None, debug=False, forced=False, id_type='username', tags=None, debug=False, forced=False,
@@ -705,6 +707,12 @@ async def main():
help="Generate an xmind 8 mindmap" help="Generate an xmind 8 mindmap"
) )
parser.add_argument("-P", "--pdf",
action="store_true",
dest="pdf", default=False,
help="Generate a pdf report"
)
args = parser.parse_args() args = parser.parse_args()
# Logging # Logging
@@ -851,18 +859,15 @@ async def main():
# The usernames results should be stored in a targeted folder. # The usernames results should be stored in a targeted folder.
# If the folder doesn't exist, create it first # If the folder doesn't exist, create it first
os.makedirs(args.folderoutput, exist_ok=True) os.makedirs(args.folderoutput, exist_ok=True)
result_file = os.path.join(args.folderoutput, f"{username}.txt") result_path = os.path.join(args.folderoutput, f"{username}.")
if args.xmind:
xmind_path = os.path.join(args.folderoutput, f"{username}.xmind")
else: else:
result_file = f"{username}.txt" result_path = os.path.join("reports", f"{username}.")
if args.xmind:
xmind_path = f"{username}.xmind"
if args.xmind: if args.xmind:
genxmindfile(xmind_path, username, results) genxmindfile(result_path+"xmind", username, results)
with open(result_file, "w", encoding="utf-8") as file:
with open(result_path+"txt", "w", encoding="utf-8") as file:
exists_counter = 0 exists_counter = 0
for website_name in results: for website_name in results:
dictionary = results[website_name] dictionary = results[website_name]
@@ -878,12 +883,20 @@ async def main():
exists_counter += 1 exists_counter += 1
file.write(dictionary["url_user"] + "\n") file.write(dictionary["url_user"] + "\n")
file.write(f"Total Websites Username Detected On : {exists_counter}") file.write(f"Total Websites Username Detected On : {exists_counter}")
file.close()
if args.csv: if args.csv:
save_csv_report(username, results) save_csv_report(username, results, result_path+"csv")
if args.html: pathPDF = None
save_html_report(general_results) pathHTML = None
if args.html:
pathHTML = result_path+"html"
if args.pdf:
pathPDF = result_path+"pdf"
if pathPDF or pathHTML:
save_html_pdf_report(general_results,pathHTML,pathPDF)
def run(): def run():
@@ -894,6 +907,5 @@ def run():
print('Maigret is interrupted.') print('Maigret is interrupted.')
sys.exit(1) sys.exit(1)
if __name__ == "__main__": if __name__ == "__main__":
run() run()
+106 -39
View File
@@ -5,26 +5,58 @@ import os
import xmind import xmind
from jinja2 import Template from jinja2 import Template
from weasyprint import HTML, CSS
import pycountry import pycountry
from .result import QueryStatus from .result import QueryStatus
from .utils import is_country_tag, CaseConverter, enrich_link_str from .utils import is_country_tag, CaseConverter, enrich_link_str
def save_csv_report(username: str, results: dict, filename:str):
def save_csv_report(username: str, results: dict): with open(filename, 'w', newline='', encoding='utf-8') as csvfile:
with open(username + '.csv', 'w', newline='', encoding='utf-8') as csvfile:
save_csv_report_to_file(username, results, csvfile) save_csv_report_to_file(username, results, csvfile)
def retrive_timestamp(datestring:str):
first_seen_format = '%Y-%m-%d %H:%M:%S'
first_seen_formats = '%Y-%m-%dT%H:%M:%S'
try:
time = datetime.strptime(datestring, first_seen_format)
except:
try:
time = datetime.strptime(datestring, first_seen_formats)
except:
time = datetime.min
return time
def save_html_report(username_results: list): def filterSupposedData(data):
### interesting fields
allowed_fields = ['fullname', 'gender', 'location']
filtered_supposed_data = {CaseConverter.snake_to_title(k): v[0]
for k, v in data.items()
if k in allowed_fields}
return filtered_supposed_data
def generate_template(pdf:bool):
# template generation
if(pdf):
template_text = open(os.path.join(os.path.dirname(os.path.realpath(__file__)),
"resources/simple_report_pdf.tpl")).read()
else:
template_text = open(os.path.join(os.path.dirname(os.path.realpath(__file__)),
"resources/simple_report.tpl")).read()
template = Template(template_text)
template.globals['title'] = CaseConverter.snake_to_title
template.globals['detect_link'] = enrich_link_str
return template
def save_html_pdf_report(username_results: list, filename:str=None, filenamepdf:str=None):
brief_text = [] brief_text = []
usernames = {} usernames = {}
extended_info_count = 0 extended_info_count = 0
tags = {} tags = {}
supposed_data = {} supposed_data = {}
allowed_fields = ['fullname', 'gender']
first_seen = None first_seen = None
first_seen_format = '%Y-%m-%d %H:%M:%S'
for username, id_type, results in username_results: for username, id_type, results in username_results:
found_accounts = 0 found_accounts = 0
@@ -51,8 +83,8 @@ def save_html_report(username_results: list):
if first_seen is None: if first_seen is None:
first_seen = created_at first_seen = created_at
else: else:
known_time = datetime.strptime(first_seen, first_seen_format) known_time = retrive_timestamp(first_seen)
new_time = datetime.strptime(created_at, first_seen_format) new_time = retrive_timestamp(created_at)
if new_time < known_time: if new_time < known_time:
first_seen = created_at first_seen = created_at
@@ -103,13 +135,7 @@ def save_html_report(username_results: list):
brief_text.append(f'Extended info extracted from {extended_info_count} accounts.') brief_text.append(f'Extended info extracted from {extended_info_count} accounts.')
# template generation
template_text = open(os.path.join(os.path.dirname(os.path.realpath(__file__)),
"resources/simple_report.tpl")).read()
template = Template(template_text)
template.globals['title'] = CaseConverter.snake_to_title
template.globals['detect_link'] = enrich_link_str
brief = ' '.join(brief_text).strip() brief = ' '.join(brief_text).strip()
tuple_sort = lambda d: sorted(d, key=lambda x: x[1], reverse=True) tuple_sort = lambda d: sorted(d, key=lambda x: x[1], reverse=True)
@@ -122,23 +148,37 @@ def save_html_report(username_results: list):
countries_lists = list(filter(lambda x: is_country_tag(x[0]), tags.items())) countries_lists = list(filter(lambda x: is_country_tag(x[0]), tags.items()))
interests_list = list(filter(lambda x: not is_country_tag(x[0]), tags.items())) interests_list = list(filter(lambda x: not is_country_tag(x[0]), tags.items()))
filtered_supposed_data = {CaseConverter.snake_to_title(k): v[0] filtered_supposed_data = filterSupposedData(supposed_data)
for k, v in supposed_data.items()
if k in allowed_fields}
filled_template = template.render(username=first_username, # save report in HTML
brief=brief, if(filename is not None):
results=username_results, template = generate_template(False)
first_seen=first_seen, filled_template = template.render(username=first_username,
interests_tuple_list=tuple_sort(interests_list), brief=brief,
countries_tuple_list=tuple_sort(countries_lists), results=username_results,
supposed_data=filtered_supposed_data, first_seen=first_seen,
generated_at=datetime.now().strftime('%Y-%m-%d %H:%M:%S'), interests_tuple_list=tuple_sort(interests_list),
) countries_tuple_list=tuple_sort(countries_lists),
# save report supposed_data=filtered_supposed_data,
html_filename = f'report_{first_username}.html' generated_at=datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
with open(html_filename, 'w') as f: )
f.write(filled_template) with open(filename, 'w') as f:
f.write(filled_template)
f.close()
# save report in PDF
if(filenamepdf is not None):
template = generate_template(True)
filled_template = template.render(username=first_username,
brief=brief,
results=username_results,
first_seen=first_seen,
interests_tuple_list=tuple_sort(interests_list),
countries_tuple_list=tuple_sort(countries_lists),
supposed_data=filtered_supposed_data,
generated_at=datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
)
HTML(string=filled_template).write_pdf(filenamepdf, stylesheets=[CSS(filename=os.path.join(os.path.dirname(os.path.realpath(__file__)),
"resources/simple_report_pdf.css"))])
def save_csv_report_to_file(username: str, results: dict, csvfile): def save_csv_report_to_file(username: str, results: dict, csvfile):
print(results) print(results)
@@ -160,20 +200,23 @@ def save_csv_report_to_file(username: str, results: dict, csvfile):
results[site]['http_status'], results[site]['http_status'],
]) ])
'''
XMIND 8 Functions
'''
def genxmindfile(filename, username, results): def genxmindfile(filename, username, results):
print(f'Generating XMIND8 file for username {username}') print(f'Generating XMIND8 file for username {username}')
if os.path.exists(filename): if os.path.exists(filename):
os.remove(filename) os.remove(filename)
workbook = xmind.load(filename) workbook = xmind.load(filename)
sheet = workbook.getPrimarySheet() sheet = workbook.getPrimarySheet()
design_sheet1(sheet, username, results) design_sheet(sheet, username, results)
xmind.save(workbook, path=filename) xmind.save(workbook, path=filename)
def design_sheet1(sheet, username, results): def design_sheet(sheet, username, results):
##all tag list ##all tag list
alltags = {} alltags = {}
supposed_data = {}
sheet.setTitle("%s Analysis"%(username)) sheet.setTitle("%s Analysis"%(username))
root_topic1 = sheet.getRootTopic() root_topic1 = sheet.getRootTopic()
@@ -198,7 +241,6 @@ def design_sheet1(sheet, username, results):
alltags[tag] = tagsection alltags[tag] = tagsection
category = None category = None
userlink= None
for tag in dictionary.get("status").tags: for tag in dictionary.get("status").tags:
if tag.strip() == "": if tag.strip() == "":
continue continue
@@ -206,12 +248,37 @@ def design_sheet1(sheet, username, results):
category = tag category = tag
if category is None: if category is None:
category = "undefined"
userlink = undefinedsection.addSubTopic() userlink = undefinedsection.addSubTopic()
userlink.addLabel(dictionary.get("status").site_url_user)
else: else:
userlink = alltags[category].addSubTopic() userlink = alltags[category].addSubTopic()
userlink.addLabel(dictionary.get("status").site_url_user) userlink.addLabel(dictionary.get("status").site_url_user)
if dictionary.get("status").ids_data:
for k, v in dictionary.get("status").ids_data.items():
# suppose target data
if not isinstance(v, list):
currentsublabel = userlink.addSubTopic()
field = 'fullname' if k == 'name' else k
if not field in supposed_data:
supposed_data[field] = []
supposed_data[field].append(v)
currentsublabel.setTitle("%s: %s" % (k, v))
else:
for currentval in v:
currentsublabel = userlink.addSubTopic()
field = 'fullname' if k == 'name' else k
if not field in supposed_data:
supposed_data[field] = []
supposed_data[field].append(currentval)
currentsublabel.setTitle("%s: %s" % (k, currentval))
### Add Supposed DATA
filterede_supposed_data = filterSupposedData(supposed_data)
if(len(filterede_supposed_data) >0):
undefinedsection = root_topic1.addSubTopic()
undefinedsection.setTitle("SUPPOSED DATA")
for k, v in filterede_supposed_data.items():
currentsublabel = undefinedsection.addSubTopic()
currentsublabel.setTitle("%s: %s" % (k, v))
#for tag in dictionary.get("status").tags:
# if( tag != category ):
# sheet.createRelationship(userlink.getID(), alltags[tag].getID(),"other tag")
+106
View File
@@ -0,0 +1,106 @@
<html>
<head>
<meta charset="utf-8" />
</head>
<meta name="viewport" content="width=device-width, initial-scale=1.0, shrink-to-fit=no" />
<title>{{ username }} -- Maigret username search report</title>
<style>
.table td, .table th {
padding: .4rem;
}
@media print {
.pagebreak { page-break-before: always; }
}
</style>
<body>
<div class="container">
<div class="row-mb">
<div class="col-12 card-body" style="padding-bottom: 0.5rem; width:100%">
<h4 class="mb-0">
Username search report for {{ username }}
</h4>
<small>Generated at {{ generated_at }}</small>
</div>
</div>
<div class="">
<div class="">
<div class="">
<div class="">
<h5>Supposed personal data</h5>
{% for k, v in supposed_data.items() %}
<span>
{{ k }}: {{ v }}
</span>
{% endfor %}
{% if countries_tuple_list %}
<span>
Geo: {% for k, v in countries_tuple_list %}{{ k }} <span class="text-muted">({{ v }})</span>{{ ", " if not loop.last }}{% endfor %}
</span>
{% endif %}{% if interests_tuple_list %}
<span>
Interests: {% for k, v in interests_tuple_list %}{{ k }} <span class="text-muted">({{ v }})</span>{{ ", " if not loop.last }}{% endfor %}
</span>
{% endif %}{% if first_seen %}
<span>
First seen: {{ first_seen }}
</span>
{% endif %}
</div>
</div>
</div>
</div>
<div class="">
<div class="">
<div class="">
<div class="">
<h5>Brief</h5>
<span>
{{ brief }}
</span>
</div>
</div>
</div>
</div>
{% for u, t, data in results %}
{% for k, v in data.items() %}
{% if v.found and not v.is_similar %}
<div class="">
<div class="">
<div class="">
<img class="" alt="Photo" style="width: 200px; height: 200px; object-fit: scale-down;" src="{{ v.status.ids_data.image or 'https://i.imgur.com/040fmbw.png' }}" data-holder-rendered="true">
<div class="" style="padding-top: 0;">
<h3 class="">
<a class="text-dark" href="{{ v.url_main }}" target="_blank">{{ k }}</a>
</h3>
{% if v.status.tags %}
<div class="mb-1 text-muted">Tags: {{ v.status.tags | join(', ') }}</div>
{% endif %}
<p class="card-text">
<a href="{{ v.url_user }}" target="_blank">{{ v.url_user }}</a>
</p>
{% if v.ids_data %}
<table class="table table-striped">
<tbody>
{% for k1, v1 in v.ids_data.items() %}
{% if k1 != 'image' %}
<tr>
<th>{{ title(k1) }}</th>
<td>{% if v1 is iterable and (v1 is not string and v1 is not mapping) %}{{ v1 | join(', ') }}{% else %}{{ detect_link(v1) }}{% endif %}
</td>
</tr>
{% endif %}
{% endfor %}
</tbody>
</table>
{% endif %}
</p>
</div>
</div>
</div>
</div>
{% endif %}
{% endfor %}
{% endfor %}
</div>
</body>
</html>
+25 -1
View File
@@ -2,27 +2,51 @@ aiohttp==3.7.3
aiohttp-socks==0.5.5 aiohttp-socks==0.5.5
async-timeout==3.0.1 async-timeout==3.0.1
attrs==20.3.0 attrs==20.3.0
backports.functools-lru-cache==1.6.1
beautifulsoup4==4.9.3 beautifulsoup4==4.9.3
bs4==0.0.1 bs4==0.0.1
cairocffi==1.2.0
CairoSVG==2.5.1
certifi==2020.12.5 certifi==2020.12.5
cffi==1.14.4
chardet==3.0.4 chardet==3.0.4
colorama==0.4.4 colorama==0.4.4
cssselect2==0.4.1
defusedxml==0.6.0
html5lib==1.1
idna==2.10 idna==2.10
importlib-metadata==3.3.0
Jinja2==2.11.2 Jinja2==2.11.2
lxml==4.6.2 lxml==4.6.2
Markdown==3.3.3
MarkupSafe==1.1.1
mock==4.0.2 mock==4.0.2
multidict==5.1.0 multidict==5.1.0
numpy==1.19.5
pandas==1.2.0
pdf-reports==0.3.2
Pillow==8.1.0
pycountry==20.7.3 pycountry==20.7.3
pycparser==2.20
Pyphen==0.10.0
pypugjs==5.9.8
PySocks==1.7.1 PySocks==1.7.1
python-dateutil==2.8.1
python-socks==1.1.2 python-socks==1.1.2
pytz==2020.5
requests==2.25.1 requests==2.25.1
requests-futures==1.0.0 requests-futures==1.0.0
six==1.15.0
socid-extractor==0.0.2 socid-extractor==0.0.2
soupsieve==2.1 soupsieve==2.1
stem==1.8.0 stem==1.8.0
tinycss2==1.1.0
torrequest==0.1.0 torrequest==0.1.0
tqdm==4.55.0 tqdm==4.55.1
typing-extensions==3.7.4.3 typing-extensions==3.7.4.3
urllib3==1.26.2 urllib3==1.26.2
WeasyPrint==52.2
webencodings==0.5.1
XMind==1.2.0 XMind==1.2.0
yarl==1.6.3 yarl==1.6.3
zipp==3.4.0