Merge pull request #19 from Ping-uu/main

Pdf report + Xmind 8 Fixes + save path Fixes
This commit is contained in:
soxoj
2021-01-11 16:57:47 +03:00
committed by GitHub
7 changed files with 322 additions and 55 deletions
+1
View File
@@ -27,3 +27,4 @@ tests/.excluded_sites
# MacOS Folder Metadata File # MacOS Folder Metadata File
.DS_Store .DS_Store
/reports/
+25 -13
View File
@@ -26,7 +26,7 @@ from socid_extractor import parse, extract
from .notify import QueryNotifyPrint from .notify import QueryNotifyPrint
from .result import QueryResult, QueryStatus from .result import QueryResult, QueryStatus
from .sites import MaigretDatabase, MaigretSite from .sites import MaigretDatabase, MaigretSite
from .report import save_csv_report, genxmindfile, save_html_report from .report import save_csv_report, genxmindfile, save_html_pdf_report
import xmind import xmind
@@ -285,6 +285,8 @@ def process_site_result(response, query_notify, logger, results_info, site: Maig
return results_info return results_info
async def maigret(username, site_dict, query_notify, logger, async def maigret(username, site_dict, query_notify, logger,
proxy=None, timeout=None, recursive_search=False, proxy=None, timeout=None, recursive_search=False,
id_type='username', tags=None, debug=False, forced=False, id_type='username', tags=None, debug=False, forced=False,
@@ -705,6 +707,12 @@ async def main():
help="Generate an xmind 8 mindmap" help="Generate an xmind 8 mindmap"
) )
parser.add_argument("-P", "--pdf",
action="store_true",
dest="pdf", default=False,
help="Generate a pdf report"
)
args = parser.parse_args() args = parser.parse_args()
# Logging # Logging
@@ -851,18 +859,15 @@ async def main():
# The usernames results should be stored in a targeted folder. # The usernames results should be stored in a targeted folder.
# If the folder doesn't exist, create it first # If the folder doesn't exist, create it first
os.makedirs(args.folderoutput, exist_ok=True) os.makedirs(args.folderoutput, exist_ok=True)
result_file = os.path.join(args.folderoutput, f"{username}.txt") result_path = os.path.join(args.folderoutput, f"{username}.")
if args.xmind:
xmind_path = os.path.join(args.folderoutput, f"{username}.xmind")
else: else:
result_file = f"{username}.txt" result_path = os.path.join("reports", f"{username}.")
if args.xmind:
xmind_path = f"{username}.xmind"
if args.xmind: if args.xmind:
genxmindfile(xmind_path, username, results) genxmindfile(result_path+"xmind", username, results)
with open(result_file, "w", encoding="utf-8") as file:
with open(result_path+"txt", "w", encoding="utf-8") as file:
exists_counter = 0 exists_counter = 0
for website_name in results: for website_name in results:
dictionary = results[website_name] dictionary = results[website_name]
@@ -878,12 +883,20 @@ async def main():
exists_counter += 1 exists_counter += 1
file.write(dictionary["url_user"] + "\n") file.write(dictionary["url_user"] + "\n")
file.write(f"Total Websites Username Detected On : {exists_counter}") file.write(f"Total Websites Username Detected On : {exists_counter}")
file.close()
if args.csv: if args.csv:
save_csv_report(username, results) save_csv_report(username, results, result_path+"csv")
if args.html: pathPDF = None
save_html_report(general_results) pathHTML = None
if args.html:
pathHTML = result_path+"html"
if args.pdf:
pathPDF = result_path+"pdf"
if pathPDF or pathHTML:
save_html_pdf_report(general_results,pathHTML,pathPDF)
def run(): def run():
@@ -894,6 +907,5 @@ def run():
print('Maigret is interrupted.') print('Maigret is interrupted.')
sys.exit(1) sys.exit(1)
if __name__ == "__main__": if __name__ == "__main__":
run() run()
+119 -39
View File
@@ -3,28 +3,61 @@ from datetime import datetime
import logging import logging
import os import os
import xmind import xmind
import io
from xhtml2pdf import pisa
from jinja2 import Template from jinja2 import Template
import pycountry import pycountry
from .result import QueryStatus from .result import QueryStatus
from .utils import is_country_tag, CaseConverter, enrich_link_str from .utils import is_country_tag, CaseConverter, enrich_link_str
def save_csv_report(username: str, results: dict, filename:str):
def save_csv_report(username: str, results: dict): with open(filename, 'w', newline='', encoding='utf-8') as csvfile:
with open(username + '.csv', 'w', newline='', encoding='utf-8') as csvfile:
save_csv_report_to_file(username, results, csvfile) save_csv_report_to_file(username, results, csvfile)
def retrive_timestamp(datestring:str):
first_seen_format = '%Y-%m-%d %H:%M:%S'
first_seen_formats = '%Y-%m-%dT%H:%M:%S'
try:
time = datetime.strptime(datestring, first_seen_format)
except:
try:
time = datetime.strptime(datestring, first_seen_formats)
except:
time = datetime.min
return time
def save_html_report(username_results: list): def filterSupposedData(data):
### interesting fields
allowed_fields = ['fullname', 'gender', 'location']
filtered_supposed_data = {CaseConverter.snake_to_title(k): v[0]
for k, v in data.items()
if k in allowed_fields}
return filtered_supposed_data
def generate_template(pdf:bool):
# template generation
if(pdf):
template_text = open(os.path.join(os.path.dirname(os.path.realpath(__file__)),
"resources/simple_report_pdf.tpl")).read()
else:
template_text = open(os.path.join(os.path.dirname(os.path.realpath(__file__)),
"resources/simple_report.tpl")).read()
template = Template(template_text)
template.globals['title'] = CaseConverter.snake_to_title
template.globals['detect_link'] = enrich_link_str
return template
def save_html_pdf_report(username_results: list, filename:str=None, filenamepdf:str=None):
brief_text = [] brief_text = []
usernames = {} usernames = {}
extended_info_count = 0 extended_info_count = 0
tags = {} tags = {}
supposed_data = {} supposed_data = {}
allowed_fields = ['fullname', 'gender']
first_seen = None first_seen = None
first_seen_format = '%Y-%m-%d %H:%M:%S'
for username, id_type, results in username_results: for username, id_type, results in username_results:
found_accounts = 0 found_accounts = 0
@@ -51,8 +84,8 @@ def save_html_report(username_results: list):
if first_seen is None: if first_seen is None:
first_seen = created_at first_seen = created_at
else: else:
known_time = datetime.strptime(first_seen, first_seen_format) known_time = retrive_timestamp(first_seen)
new_time = datetime.strptime(created_at, first_seen_format) new_time = retrive_timestamp(created_at)
if new_time < known_time: if new_time < known_time:
first_seen = created_at first_seen = created_at
@@ -103,13 +136,7 @@ def save_html_report(username_results: list):
brief_text.append(f'Extended info extracted from {extended_info_count} accounts.') brief_text.append(f'Extended info extracted from {extended_info_count} accounts.')
# template generation
template_text = open(os.path.join(os.path.dirname(os.path.realpath(__file__)),
"resources/simple_report.tpl")).read()
template = Template(template_text)
template.globals['title'] = CaseConverter.snake_to_title
template.globals['detect_link'] = enrich_link_str
brief = ' '.join(brief_text).strip() brief = ' '.join(brief_text).strip()
tuple_sort = lambda d: sorted(d, key=lambda x: x[1], reverse=True) tuple_sort = lambda d: sorted(d, key=lambda x: x[1], reverse=True)
@@ -122,23 +149,49 @@ def save_html_report(username_results: list):
countries_lists = list(filter(lambda x: is_country_tag(x[0]), tags.items())) countries_lists = list(filter(lambda x: is_country_tag(x[0]), tags.items()))
interests_list = list(filter(lambda x: not is_country_tag(x[0]), tags.items())) interests_list = list(filter(lambda x: not is_country_tag(x[0]), tags.items()))
filtered_supposed_data = {CaseConverter.snake_to_title(k): v[0] filtered_supposed_data = filterSupposedData(supposed_data)
for k, v in supposed_data.items()
if k in allowed_fields} # save report in HTML
if(filename is not None):
template = generate_template(False)
filled_template = template.render(username=first_username,
brief=brief,
results=username_results,
first_seen=first_seen,
interests_tuple_list=tuple_sort(interests_list),
countries_tuple_list=tuple_sort(countries_lists),
supposed_data=filtered_supposed_data,
generated_at=datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
)
with open(filename, 'w') as f:
f.write(filled_template)
f.close()
# save report in PDF
if(filenamepdf is not None):
template = generate_template(True)
filled_template = template.render(username=first_username,
brief=brief,
results=username_results,
first_seen=first_seen,
interests_tuple_list=tuple_sort(interests_list),
countries_tuple_list=tuple_sort(countries_lists),
supposed_data=filtered_supposed_data,
generated_at=datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
)
csstext = ""
with open(os.path.join(os.path.dirname(os.path.realpath(__file__)),
"resources/simple_report_pdf.css"), "r") as cssfile:
cssline = cssfile.readline()
csstext += cssline
while cssline:
cssline = cssfile.readline()
csstext += cssline
cssfile.close()
pdffile = open(filenamepdf, "w+b")
pisa.pisaDocument(io.StringIO(filled_template), dest=pdffile, default_css=csstext)
pdffile.close()
filled_template = template.render(username=first_username,
brief=brief,
results=username_results,
first_seen=first_seen,
interests_tuple_list=tuple_sort(interests_list),
countries_tuple_list=tuple_sort(countries_lists),
supposed_data=filtered_supposed_data,
generated_at=datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
)
# save report
html_filename = f'report_{first_username}.html'
with open(html_filename, 'w') as f:
f.write(filled_template)
def save_csv_report_to_file(username: str, results: dict, csvfile): def save_csv_report_to_file(username: str, results: dict, csvfile):
print(results) print(results)
@@ -160,20 +213,23 @@ def save_csv_report_to_file(username: str, results: dict, csvfile):
results[site]['http_status'], results[site]['http_status'],
]) ])
'''
XMIND 8 Functions
'''
def genxmindfile(filename, username, results): def genxmindfile(filename, username, results):
print(f'Generating XMIND8 file for username {username}') print(f'Generating XMIND8 file for username {username}')
if os.path.exists(filename): if os.path.exists(filename):
os.remove(filename) os.remove(filename)
workbook = xmind.load(filename) workbook = xmind.load(filename)
sheet = workbook.getPrimarySheet() sheet = workbook.getPrimarySheet()
design_sheet1(sheet, username, results) design_sheet(sheet, username, results)
xmind.save(workbook, path=filename) xmind.save(workbook, path=filename)
def design_sheet1(sheet, username, results): def design_sheet(sheet, username, results):
##all tag list ##all tag list
alltags = {} alltags = {}
supposed_data = {}
sheet.setTitle("%s Analysis"%(username)) sheet.setTitle("%s Analysis"%(username))
root_topic1 = sheet.getRootTopic() root_topic1 = sheet.getRootTopic()
@@ -198,7 +254,6 @@ def design_sheet1(sheet, username, results):
alltags[tag] = tagsection alltags[tag] = tagsection
category = None category = None
userlink= None
for tag in dictionary.get("status").tags: for tag in dictionary.get("status").tags:
if tag.strip() == "": if tag.strip() == "":
continue continue
@@ -206,12 +261,37 @@ def design_sheet1(sheet, username, results):
category = tag category = tag
if category is None: if category is None:
category = "undefined"
userlink = undefinedsection.addSubTopic() userlink = undefinedsection.addSubTopic()
userlink.addLabel(dictionary.get("status").site_url_user)
else: else:
userlink = alltags[category].addSubTopic() userlink = alltags[category].addSubTopic()
userlink.addLabel(dictionary.get("status").site_url_user) userlink.addLabel(dictionary.get("status").site_url_user)
if dictionary.get("status").ids_data:
for k, v in dictionary.get("status").ids_data.items():
# suppose target data
if not isinstance(v, list):
currentsublabel = userlink.addSubTopic()
field = 'fullname' if k == 'name' else k
if not field in supposed_data:
supposed_data[field] = []
supposed_data[field].append(v)
currentsublabel.setTitle("%s: %s" % (k, v))
else:
for currentval in v:
currentsublabel = userlink.addSubTopic()
field = 'fullname' if k == 'name' else k
if not field in supposed_data:
supposed_data[field] = []
supposed_data[field].append(currentval)
currentsublabel.setTitle("%s: %s" % (k, currentval))
### Add Supposed DATA
filterede_supposed_data = filterSupposedData(supposed_data)
if(len(filterede_supposed_data) >0):
undefinedsection = root_topic1.addSubTopic()
undefinedsection.setTitle("SUPPOSED DATA")
for k, v in filterede_supposed_data.items():
currentsublabel = undefinedsection.addSubTopic()
currentsublabel.setTitle("%s: %s" % (k, v))
#for tag in dictionary.get("status").tags:
# if( tag != category ):
# sheet.createRelationship(userlink.getID(), alltags[tag].getID(),"other tag")
+41
View File
@@ -0,0 +1,41 @@
h2 {
font-size: 30px;
width: 100%;
display:block;
}
h3 {
font-size: 25px;
width: 100%;
display:block;
}
h4 {
font-size: 20px;
width: 100%;
display:block;
}
p {
margin: 0 0 5px;
display: block;
}
table {
margin-bottom: 10px;
width:100%;
}
th {
font-weight: bold;
}
th,td,caption {
padding: 4px 10px 4px 5px;
}
table tr:nth-child(even) td,
table tr.even td {
background-color: #e5ecf9;
}
div {
border-bottom-color: #3e3e3e;
border-bottom-width: 1px;
border-bottom-style: solid;
}
+113
View File
@@ -0,0 +1,113 @@
<html>type="text/css"
<head>
<meta charset="utf-8" />
</head>
<meta name="viewport" content="width=device-width, initial-scale=1.0, shrink-to-fit=no" />
<title>{{ username }} -- Maigret username search report</title>
<body>
<div class="container">
<div class="row-mb">
<div class="col-12 card-body" style="padding-bottom: 0.5rem; width:100%">
<h2 class="mb-0">
Username search report for {{ username }}
</h2>
<small>Generated at {{ generated_at }}</small>
</div>
</div>
<div>
<div>
<div>
<div>
<h3>Supposed personal data</h3>
{% for k, v in supposed_data.items() %}
<p>
{{ k }}: {{ v }}
</p>
{% endfor %}
{% if countries_tuple_list %}
<p>
Geo: {% for k, v in countries_tuple_list %}{{ k }} <span class="text-muted">({{ v }})</span>{{ ", " if not loop.last }}{% endfor %}
</p>
{% endif %}{% if interests_tuple_list %}
<p>
Interests: {% for k, v in interests_tuple_list %}{{ k }} <span class="text-muted">({{ v }})</span>{{ ", " if not loop.last }}{% endfor %}
</p>
{% endif %}{% if first_seen %}
<p>
First seen: {{ first_seen }}
</p>
{% endif %}
</div>
</div>
</div>
</div>
<div>
<div>
<div>
<div>
<h3>Brief</h3>
<p>
{{ brief }}
</p>
</div>
</div>
</div>
</div>
{% for u, t, data in results %}
{% for k, v in data.items() %}
{% if v.found and not v.is_similar %}
<split></split>
<br/>
<div class="sitebox" style="margin-top: 20px;" >
<div>
<div>
<table>
<tr>
<td style="width:201px;" >
<img alt="Photo" style="width: 200px; height: 200px; object-fit: scale-down;" src="{{ v.status.ids_data.image or 'https://i.imgur.com/040fmbw.png' }}" data-holder-rendered="true">
</td>
<td style="width:10px;" ></td>
<td valign="top">
<div class="textbox" style="padding-top: 10px;" >
<h3>
<a class="text-dark" href="{{ v.url_main }}" target="_blank">{{ k }}</a>
</h3>
{% if v.status.tags %}
<div class="mb-1 text-muted">Tags: {{ v.status.tags | join(', ') }}</div>
{% endif %}
<p class="card-text">
<a href="{{ v.url_user }}" target="_blank">{{ v.url_user }}</a>
</p>
</div>
</td>
</tr>
</table>
{% if v.ids_data %}
<div style="clear:both;"></div>
<div style="width:100%">
<br/>
<h4>Details</h4>
<table class="table table-striped;" style="margin-top:5px;">
<tbody>
{% for k1, v1 in v.ids_data.items() %}
{% if k1 != 'image' %}
<tr>
<th style="width:100px;">{{ title(k1) }}</th>
<td>{% if v1 is iterable and (v1 is not string and v1 is not mapping) %}{{ v1 | join(', ') }}{% else %}{{ detect_link(v1) }}{% endif %}</td>
</tr>
{% endif %}
{% endfor %}
</tbody>
</table>
</div>
{% endif %}
</div>
</div>
</div>
{% endif %}
{% endfor %}
{% endfor %}
</div>
</body>
</html>
+11
View File
@@ -1,5 +1,6 @@
aiohttp==3.7.3 aiohttp==3.7.3
aiohttp-socks==0.5.5 aiohttp-socks==0.5.5
arabic-reshaper==2.1.1
async-timeout==3.0.1 async-timeout==3.0.1
attrs==20.3.0 attrs==20.3.0
beautifulsoup4==4.9.3 beautifulsoup4==4.9.3
@@ -7,16 +8,24 @@ bs4==0.0.1
certifi==2020.12.5 certifi==2020.12.5
chardet==3.0.4 chardet==3.0.4
colorama==0.4.4 colorama==0.4.4
future==0.18.2
html5lib==1.1
idna==2.10 idna==2.10
Jinja2==2.11.2 Jinja2==2.11.2
lxml==4.6.2 lxml==4.6.2
MarkupSafe==1.1.1
mock==4.0.2 mock==4.0.2
multidict==5.1.0 multidict==5.1.0
Pillow==8.1.0
pycountry==20.7.3 pycountry==20.7.3
PyPDF2==1.26.0
PySocks==1.7.1 PySocks==1.7.1
python-bidi==0.4.2
python-socks==1.1.2 python-socks==1.1.2
reportlab==3.5.59
requests==2.25.1 requests==2.25.1
requests-futures==1.0.0 requests-futures==1.0.0
six==1.15.0
socid-extractor==0.0.2 socid-extractor==0.0.2
soupsieve==2.1 soupsieve==2.1
stem==1.8.0 stem==1.8.0
@@ -24,5 +33,7 @@ torrequest==0.1.0
tqdm==4.55.0 tqdm==4.55.0
typing-extensions==3.7.4.3 typing-extensions==3.7.4.3
urllib3==1.26.2 urllib3==1.26.2
webencodings==0.5.1
xhtml2pdf==0.2.5
XMind==1.2.0 XMind==1.2.0
yarl==1.6.3 yarl==1.6.3
+12 -3
View File
@@ -5,7 +5,7 @@ import os
import xmind import xmind
from maigret.report import save_csv_report_to_file, genxmindfile, save_html_report from maigret.report import save_csv_report_to_file, genxmindfile, save_html_pdf_report
from maigret.result import QueryResult, QueryStatus from maigret.result import QueryResult, QueryStatus
@@ -93,8 +93,7 @@ def test_html_report():
except: except:
pass pass
save_html_report(TEST) save_html_pdf_report(TEST,filename=report_name,filenamepdf=None)
assert os.path.exists(report_name) assert os.path.exists(report_name)
report_text = open(report_name).read() report_text = open(report_name).read()
@@ -102,3 +101,13 @@ def test_html_report():
assert SUPPOSED_BRIEF in report_text assert SUPPOSED_BRIEF in report_text
assert SUPPOSED_GEO in report_text assert SUPPOSED_GEO in report_text
assert SUPPOSED_INTERESTS in report_text assert SUPPOSED_INTERESTS in report_text
def test_pdf_report():
report_name_pdf = 'report_alexaimephotographycars.pdf'
try:
os.remove(report_name_pdf)
except:
pass
save_html_pdf_report(TEST,filename=None,filenamepdf=report_name_pdf)
assert os.path.exists(report_name_pdf)