Merge pull request #19 from Ping-uu/main

Pdf report + Xmind 8 Fixes + save path Fixes
This commit is contained in:
soxoj
2021-01-11 16:57:47 +03:00
committed by GitHub
7 changed files with 322 additions and 55 deletions
+1
View File
@@ -27,3 +27,4 @@ tests/.excluded_sites
# MacOS Folder Metadata File
.DS_Store
/reports/
+25 -13
View File
@@ -26,7 +26,7 @@ from socid_extractor import parse, extract
from .notify import QueryNotifyPrint
from .result import QueryResult, QueryStatus
from .sites import MaigretDatabase, MaigretSite
from .report import save_csv_report, genxmindfile, save_html_report
from .report import save_csv_report, genxmindfile, save_html_pdf_report
import xmind
@@ -285,6 +285,8 @@ def process_site_result(response, query_notify, logger, results_info, site: Maig
return results_info
async def maigret(username, site_dict, query_notify, logger,
proxy=None, timeout=None, recursive_search=False,
id_type='username', tags=None, debug=False, forced=False,
@@ -705,6 +707,12 @@ async def main():
help="Generate an xmind 8 mindmap"
)
parser.add_argument("-P", "--pdf",
action="store_true",
dest="pdf", default=False,
help="Generate a pdf report"
)
args = parser.parse_args()
# Logging
@@ -851,18 +859,15 @@ async def main():
# The usernames results should be stored in a targeted folder.
# If the folder doesn't exist, create it first
os.makedirs(args.folderoutput, exist_ok=True)
result_file = os.path.join(args.folderoutput, f"{username}.txt")
if args.xmind:
xmind_path = os.path.join(args.folderoutput, f"{username}.xmind")
result_path = os.path.join(args.folderoutput, f"{username}.")
else:
result_file = f"{username}.txt"
if args.xmind:
xmind_path = f"{username}.xmind"
result_path = os.path.join("reports", f"{username}.")
if args.xmind:
genxmindfile(xmind_path, username, results)
genxmindfile(result_path+"xmind", username, results)
with open(result_file, "w", encoding="utf-8") as file:
with open(result_path+"txt", "w", encoding="utf-8") as file:
exists_counter = 0
for website_name in results:
dictionary = results[website_name]
@@ -878,12 +883,20 @@ async def main():
exists_counter += 1
file.write(dictionary["url_user"] + "\n")
file.write(f"Total Websites Username Detected On : {exists_counter}")
file.close()
if args.csv:
save_csv_report(username, results)
save_csv_report(username, results, result_path+"csv")
if args.html:
save_html_report(general_results)
pathPDF = None
pathHTML = None
if args.html:
pathHTML = result_path+"html"
if args.pdf:
pathPDF = result_path+"pdf"
if pathPDF or pathHTML:
save_html_pdf_report(general_results,pathHTML,pathPDF)
def run():
@@ -894,6 +907,5 @@ def run():
print('Maigret is interrupted.')
sys.exit(1)
if __name__ == "__main__":
run()
+119 -39
View File
@@ -3,28 +3,61 @@ from datetime import datetime
import logging
import os
import xmind
import io
from xhtml2pdf import pisa
from jinja2 import Template
import pycountry
from .result import QueryStatus
from .utils import is_country_tag, CaseConverter, enrich_link_str
def save_csv_report(username: str, results: dict):
with open(username + '.csv', 'w', newline='', encoding='utf-8') as csvfile:
def save_csv_report(username: str, results: dict, filename:str):
with open(filename, 'w', newline='', encoding='utf-8') as csvfile:
save_csv_report_to_file(username, results, csvfile)
def retrive_timestamp(datestring:str):
first_seen_format = '%Y-%m-%d %H:%M:%S'
first_seen_formats = '%Y-%m-%dT%H:%M:%S'
try:
time = datetime.strptime(datestring, first_seen_format)
except:
try:
time = datetime.strptime(datestring, first_seen_formats)
except:
time = datetime.min
return time
def save_html_report(username_results: list):
def filterSupposedData(data):
### interesting fields
allowed_fields = ['fullname', 'gender', 'location']
filtered_supposed_data = {CaseConverter.snake_to_title(k): v[0]
for k, v in data.items()
if k in allowed_fields}
return filtered_supposed_data
def generate_template(pdf:bool):
# template generation
if(pdf):
template_text = open(os.path.join(os.path.dirname(os.path.realpath(__file__)),
"resources/simple_report_pdf.tpl")).read()
else:
template_text = open(os.path.join(os.path.dirname(os.path.realpath(__file__)),
"resources/simple_report.tpl")).read()
template = Template(template_text)
template.globals['title'] = CaseConverter.snake_to_title
template.globals['detect_link'] = enrich_link_str
return template
def save_html_pdf_report(username_results: list, filename:str=None, filenamepdf:str=None):
brief_text = []
usernames = {}
extended_info_count = 0
tags = {}
supposed_data = {}
allowed_fields = ['fullname', 'gender']
first_seen = None
first_seen_format = '%Y-%m-%d %H:%M:%S'
for username, id_type, results in username_results:
found_accounts = 0
@@ -51,8 +84,8 @@ def save_html_report(username_results: list):
if first_seen is None:
first_seen = created_at
else:
known_time = datetime.strptime(first_seen, first_seen_format)
new_time = datetime.strptime(created_at, first_seen_format)
known_time = retrive_timestamp(first_seen)
new_time = retrive_timestamp(created_at)
if new_time < known_time:
first_seen = created_at
@@ -103,13 +136,7 @@ def save_html_report(username_results: list):
brief_text.append(f'Extended info extracted from {extended_info_count} accounts.')
# template generation
template_text = open(os.path.join(os.path.dirname(os.path.realpath(__file__)),
"resources/simple_report.tpl")).read()
template = Template(template_text)
template.globals['title'] = CaseConverter.snake_to_title
template.globals['detect_link'] = enrich_link_str
brief = ' '.join(brief_text).strip()
tuple_sort = lambda d: sorted(d, key=lambda x: x[1], reverse=True)
@@ -122,23 +149,49 @@ def save_html_report(username_results: list):
countries_lists = list(filter(lambda x: is_country_tag(x[0]), tags.items()))
interests_list = list(filter(lambda x: not is_country_tag(x[0]), tags.items()))
filtered_supposed_data = {CaseConverter.snake_to_title(k): v[0]
for k, v in supposed_data.items()
if k in allowed_fields}
filtered_supposed_data = filterSupposedData(supposed_data)
# save report in HTML
if(filename is not None):
template = generate_template(False)
filled_template = template.render(username=first_username,
brief=brief,
results=username_results,
first_seen=first_seen,
interests_tuple_list=tuple_sort(interests_list),
countries_tuple_list=tuple_sort(countries_lists),
supposed_data=filtered_supposed_data,
generated_at=datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
)
with open(filename, 'w') as f:
f.write(filled_template)
f.close()
# save report in PDF
if(filenamepdf is not None):
template = generate_template(True)
filled_template = template.render(username=first_username,
brief=brief,
results=username_results,
first_seen=first_seen,
interests_tuple_list=tuple_sort(interests_list),
countries_tuple_list=tuple_sort(countries_lists),
supposed_data=filtered_supposed_data,
generated_at=datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
)
csstext = ""
with open(os.path.join(os.path.dirname(os.path.realpath(__file__)),
"resources/simple_report_pdf.css"), "r") as cssfile:
cssline = cssfile.readline()
csstext += cssline
while cssline:
cssline = cssfile.readline()
csstext += cssline
cssfile.close()
pdffile = open(filenamepdf, "w+b")
pisa.pisaDocument(io.StringIO(filled_template), dest=pdffile, default_css=csstext)
pdffile.close()
filled_template = template.render(username=first_username,
brief=brief,
results=username_results,
first_seen=first_seen,
interests_tuple_list=tuple_sort(interests_list),
countries_tuple_list=tuple_sort(countries_lists),
supposed_data=filtered_supposed_data,
generated_at=datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
)
# save report
html_filename = f'report_{first_username}.html'
with open(html_filename, 'w') as f:
f.write(filled_template)
def save_csv_report_to_file(username: str, results: dict, csvfile):
print(results)
@@ -160,20 +213,23 @@ def save_csv_report_to_file(username: str, results: dict, csvfile):
results[site]['http_status'],
])
'''
XMIND 8 Functions
'''
def genxmindfile(filename, username, results):
print(f'Generating XMIND8 file for username {username}')
if os.path.exists(filename):
os.remove(filename)
workbook = xmind.load(filename)
sheet = workbook.getPrimarySheet()
design_sheet1(sheet, username, results)
design_sheet(sheet, username, results)
xmind.save(workbook, path=filename)
def design_sheet1(sheet, username, results):
def design_sheet(sheet, username, results):
##all tag list
alltags = {}
supposed_data = {}
sheet.setTitle("%s Analysis"%(username))
root_topic1 = sheet.getRootTopic()
@@ -198,7 +254,6 @@ def design_sheet1(sheet, username, results):
alltags[tag] = tagsection
category = None
userlink= None
for tag in dictionary.get("status").tags:
if tag.strip() == "":
continue
@@ -206,12 +261,37 @@ def design_sheet1(sheet, username, results):
category = tag
if category is None:
category = "undefined"
userlink = undefinedsection.addSubTopic()
userlink.addLabel(dictionary.get("status").site_url_user)
else:
userlink = alltags[category].addSubTopic()
userlink.addLabel(dictionary.get("status").site_url_user)
userlink.addLabel(dictionary.get("status").site_url_user)
if dictionary.get("status").ids_data:
for k, v in dictionary.get("status").ids_data.items():
# suppose target data
if not isinstance(v, list):
currentsublabel = userlink.addSubTopic()
field = 'fullname' if k == 'name' else k
if not field in supposed_data:
supposed_data[field] = []
supposed_data[field].append(v)
currentsublabel.setTitle("%s: %s" % (k, v))
else:
for currentval in v:
currentsublabel = userlink.addSubTopic()
field = 'fullname' if k == 'name' else k
if not field in supposed_data:
supposed_data[field] = []
supposed_data[field].append(currentval)
currentsublabel.setTitle("%s: %s" % (k, currentval))
### Add Supposed DATA
filterede_supposed_data = filterSupposedData(supposed_data)
if(len(filterede_supposed_data) >0):
undefinedsection = root_topic1.addSubTopic()
undefinedsection.setTitle("SUPPOSED DATA")
for k, v in filterede_supposed_data.items():
currentsublabel = undefinedsection.addSubTopic()
currentsublabel.setTitle("%s: %s" % (k, v))
#for tag in dictionary.get("status").tags:
# if( tag != category ):
# sheet.createRelationship(userlink.getID(), alltags[tag].getID(),"other tag")
+41
View File
@@ -0,0 +1,41 @@
h2 {
font-size: 30px;
width: 100%;
display:block;
}
h3 {
font-size: 25px;
width: 100%;
display:block;
}
h4 {
font-size: 20px;
width: 100%;
display:block;
}
p {
margin: 0 0 5px;
display: block;
}
table {
margin-bottom: 10px;
width:100%;
}
th {
font-weight: bold;
}
th,td,caption {
padding: 4px 10px 4px 5px;
}
table tr:nth-child(even) td,
table tr.even td {
background-color: #e5ecf9;
}
div {
border-bottom-color: #3e3e3e;
border-bottom-width: 1px;
border-bottom-style: solid;
}
+113
View File
@@ -0,0 +1,113 @@
<html>type="text/css"
<head>
<meta charset="utf-8" />
</head>
<meta name="viewport" content="width=device-width, initial-scale=1.0, shrink-to-fit=no" />
<title>{{ username }} -- Maigret username search report</title>
<body>
<div class="container">
<div class="row-mb">
<div class="col-12 card-body" style="padding-bottom: 0.5rem; width:100%">
<h2 class="mb-0">
Username search report for {{ username }}
</h2>
<small>Generated at {{ generated_at }}</small>
</div>
</div>
<div>
<div>
<div>
<div>
<h3>Supposed personal data</h3>
{% for k, v in supposed_data.items() %}
<p>
{{ k }}: {{ v }}
</p>
{% endfor %}
{% if countries_tuple_list %}
<p>
Geo: {% for k, v in countries_tuple_list %}{{ k }} <span class="text-muted">({{ v }})</span>{{ ", " if not loop.last }}{% endfor %}
</p>
{% endif %}{% if interests_tuple_list %}
<p>
Interests: {% for k, v in interests_tuple_list %}{{ k }} <span class="text-muted">({{ v }})</span>{{ ", " if not loop.last }}{% endfor %}
</p>
{% endif %}{% if first_seen %}
<p>
First seen: {{ first_seen }}
</p>
{% endif %}
</div>
</div>
</div>
</div>
<div>
<div>
<div>
<div>
<h3>Brief</h3>
<p>
{{ brief }}
</p>
</div>
</div>
</div>
</div>
{% for u, t, data in results %}
{% for k, v in data.items() %}
{% if v.found and not v.is_similar %}
<split></split>
<br/>
<div class="sitebox" style="margin-top: 20px;" >
<div>
<div>
<table>
<tr>
<td style="width:201px;" >
<img alt="Photo" style="width: 200px; height: 200px; object-fit: scale-down;" src="{{ v.status.ids_data.image or 'https://i.imgur.com/040fmbw.png' }}" data-holder-rendered="true">
</td>
<td style="width:10px;" ></td>
<td valign="top">
<div class="textbox" style="padding-top: 10px;" >
<h3>
<a class="text-dark" href="{{ v.url_main }}" target="_blank">{{ k }}</a>
</h3>
{% if v.status.tags %}
<div class="mb-1 text-muted">Tags: {{ v.status.tags | join(', ') }}</div>
{% endif %}
<p class="card-text">
<a href="{{ v.url_user }}" target="_blank">{{ v.url_user }}</a>
</p>
</div>
</td>
</tr>
</table>
{% if v.ids_data %}
<div style="clear:both;"></div>
<div style="width:100%">
<br/>
<h4>Details</h4>
<table class="table table-striped;" style="margin-top:5px;">
<tbody>
{% for k1, v1 in v.ids_data.items() %}
{% if k1 != 'image' %}
<tr>
<th style="width:100px;">{{ title(k1) }}</th>
<td>{% if v1 is iterable and (v1 is not string and v1 is not mapping) %}{{ v1 | join(', ') }}{% else %}{{ detect_link(v1) }}{% endif %}</td>
</tr>
{% endif %}
{% endfor %}
</tbody>
</table>
</div>
{% endif %}
</div>
</div>
</div>
{% endif %}
{% endfor %}
{% endfor %}
</div>
</body>
</html>
+11
View File
@@ -1,5 +1,6 @@
aiohttp==3.7.3
aiohttp-socks==0.5.5
arabic-reshaper==2.1.1
async-timeout==3.0.1
attrs==20.3.0
beautifulsoup4==4.9.3
@@ -7,16 +8,24 @@ bs4==0.0.1
certifi==2020.12.5
chardet==3.0.4
colorama==0.4.4
future==0.18.2
html5lib==1.1
idna==2.10
Jinja2==2.11.2
lxml==4.6.2
MarkupSafe==1.1.1
mock==4.0.2
multidict==5.1.0
Pillow==8.1.0
pycountry==20.7.3
PyPDF2==1.26.0
PySocks==1.7.1
python-bidi==0.4.2
python-socks==1.1.2
reportlab==3.5.59
requests==2.25.1
requests-futures==1.0.0
six==1.15.0
socid-extractor==0.0.2
soupsieve==2.1
stem==1.8.0
@@ -24,5 +33,7 @@ torrequest==0.1.0
tqdm==4.55.0
typing-extensions==3.7.4.3
urllib3==1.26.2
webencodings==0.5.1
xhtml2pdf==0.2.5
XMind==1.2.0
yarl==1.6.3
+12 -3
View File
@@ -5,7 +5,7 @@ import os
import xmind
from maigret.report import save_csv_report_to_file, genxmindfile, save_html_report
from maigret.report import save_csv_report_to_file, genxmindfile, save_html_pdf_report
from maigret.result import QueryResult, QueryStatus
@@ -93,8 +93,7 @@ def test_html_report():
except:
pass
save_html_report(TEST)
save_html_pdf_report(TEST,filename=report_name,filenamepdf=None)
assert os.path.exists(report_name)
report_text = open(report_name).read()
@@ -102,3 +101,13 @@ def test_html_report():
assert SUPPOSED_BRIEF in report_text
assert SUPPOSED_GEO in report_text
assert SUPPOSED_INTERESTS in report_text
def test_pdf_report():
report_name_pdf = 'report_alexaimephotographycars.pdf'
try:
os.remove(report_name_pdf)
except:
pass
save_html_pdf_report(TEST,filename=None,filenamepdf=report_name_pdf)
assert os.path.exists(report_name_pdf)