HTLM reports draft, 500 sites scanning by default

This commit is contained in:
Soxoj
2021-01-07 23:52:29 +03:00
parent 5c8b65d033
commit e4765d1ed9
11 changed files with 544 additions and 65 deletions
+17 -2
View File
@@ -26,7 +26,7 @@ from socid_extractor import parse, extract
from .notify import QueryNotifyPrint from .notify import QueryNotifyPrint
from .result import QueryResult, QueryStatus from .result import QueryResult, QueryStatus
from .sites import MaigretDatabase, MaigretSite from .sites import MaigretDatabase, MaigretSite
from .report import save_csv_report, genxmindfile from .report import save_csv_report, genxmindfile, save_html_report
import xmind import xmind
@@ -629,6 +629,10 @@ async def main():
action="store_true", dest="csv", default=False, action="store_true", dest="csv", default=False,
help="Create Comma-Separated Values (CSV) File." help="Create Comma-Separated Values (CSV) File."
) )
parser.add_argument("--html",
action="store_true", dest="html", default=False,
help="Create HTML report file."
)
parser.add_argument("--site", parser.add_argument("--site",
action="append", metavar='SITE_NAME', action="append", metavar='SITE_NAME',
dest="site_list", default=None, dest="site_list", default=None,
@@ -649,6 +653,10 @@ async def main():
"A longer timeout will be more likely to get results from slow sites." "A longer timeout will be more likely to get results from slow sites."
"On the other hand, this may cause a long delay to gather all results." "On the other hand, this may cause a long delay to gather all results."
) )
parser.add_argument("--top-sites",
action="store", default=500,
help="Count of sites for checking ranked by Alexa Top (default: 500)."
)
parser.add_argument("--print-not-found", parser.add_argument("--print-not-found",
action="store_true", dest="print_not_found", default=False, action="store_true", dest="print_not_found", default=False,
help="Print sites where the username was not found." help="Print sites where the username was not found."
@@ -757,7 +765,8 @@ async def main():
# Create object with all information about sites we are aware of. # Create object with all information about sites we are aware of.
try: try:
site_data_all = MaigretDatabase().load_from_file(args.json_file).sites_dict db = MaigretDatabase().load_from_file(args.json_file)
site_data_all = db.ranked_sites_dict(top=args.top_sites)
except Exception as error: except Exception as error:
print(f"ERROR: {error}") print(f"ERROR: {error}")
sys.exit(1) sys.exit(1)
@@ -805,6 +814,8 @@ async def main():
already_checked = set() already_checked = set()
general_results = []
while usernames: while usernames:
username, id_type = list(usernames.items())[0] username, id_type = list(usernames.items())[0]
del usernames[username] del usernames[username]
@@ -834,6 +845,7 @@ async def main():
logger=logger, logger=logger,
forced=args.use_disabled_sites, forced=args.use_disabled_sites,
) )
general_results.append((username, id_type, results))
if args.folderoutput: if args.folderoutput:
# The usernames results should be stored in a targeted folder. # The usernames results should be stored in a targeted folder.
@@ -870,6 +882,9 @@ async def main():
if args.csv: if args.csv:
save_csv_report(username, results) save_csv_report(username, results)
if args.html:
save_html_report(general_results)
def run(): def run():
try: try:
+215
View File
@@ -0,0 +1,215 @@
import csv
from datetime import datetime
import logging
import os
import xmind
from jinja2 import Template
import pycountry
from .result import QueryStatus
from .utils import is_country_tag, CaseConverter, enrich_link_str
def save_csv_report(username: str, results: dict):
with open(username + '.csv', 'w', newline='', encoding='utf-8') as csvfile:
save_csv_report_to_file(username, results, csvfile)
def save_html_report(username_results: list):
brief_text = []
usernames = {}
extended_info_count = 0
tags = {}
supposed_data = {}
allowed_fields = ['fullname', 'gender']
first_seen = None
first_seen_format = '%Y-%m-%d %H:%M:%S'
for username, id_type, results in username_results:
found_accounts = 0
new_ids = []
usernames[username] = {'type': id_type}
for website_name in results:
dictionary = results[website_name]
# TODO: fix no site data issue
if not dictionary:
continue
status = dictionary.get('status')
if status.ids_data:
dictionary['ids_data'] = status.ids_data
extended_info_count += 1
# detect first seen
created_at = status.ids_data.get('created_at')
if created_at:
if first_seen is None:
first_seen = created_at
else:
known_time = datetime.strptime(first_seen, first_seen_format)
new_time = datetime.strptime(created_at, first_seen_format)
if new_time < known_time:
first_seen = created_at
for k, v in status.ids_data.items():
# suppose target data
field = 'fullname' if k == 'name' else k
if not field in supposed_data:
supposed_data[field] = []
supposed_data[field].append(v)
# suppose country
if k in ['country', 'locale']:
try:
if is_country_tag(k):
tag = pycountry.countries.get(alpha_2=v).alpha_2.lower()
else:
tag = pycountry.countries.search_fuzzy(v)[0].alpha_2.lower()
# TODO: move countries to another struct
tags[tag] = tags.get(tag, 0) + 1
except Exception as e:
logging.debug('pycountry exception', exc_info=True)
new_usernames = dictionary.get('ids_usernames')
if new_usernames:
for u, utype in new_usernames.items():
if not u in usernames:
new_ids.append((u, utype))
usernames[u] = {'type': utype}
if status.status == QueryStatus.CLAIMED:
found_accounts += 1
dictionary['found'] = True
else:
continue
if not dictionary.get('is_similar'):
# ignore non-exact search results
if status.tags:
for t in status.tags:
tags[t] = tags.get(t, 0) + 1
brief_text.append(f'Search by {id_type} {username} returned {found_accounts} accounts.')
if new_ids:
ids_list = []
for u, t in new_ids:
ids_list.append(f'{u} ({t})' if t != 'username' else u)
brief_text.append(f'Found target\'s other IDs: ' + ', '.join(ids_list) + '.')
brief_text.append(f'Extended info extracted from {extended_info_count} accounts.')
# template generation
template_text = open(os.path.join(os.path.dirname(os.path.realpath(__file__)),
"resources/simple_report.tpl")).read()
template = Template(template_text)
template.globals['title'] = CaseConverter.snake_to_title
template.globals['detect_link'] = enrich_link_str
brief = ' '.join(brief_text).strip()
tuple_sort = lambda d: sorted(d, key=lambda x: x[1], reverse=True)
if 'global' in tags:
# remove tag 'global' useless for country detection
del tags['global']
first_username = username_results[0][0]
countries_lists = list(filter(lambda x: is_country_tag(x[0]), tags.items()))
interests_list = list(filter(lambda x: not is_country_tag(x[0]), tags.items()))
filtered_supposed_data = {CaseConverter.snake_to_title(k): v[0]
for k, v in supposed_data.items()
if k in allowed_fields}
filled_template = template.render(username=first_username,
brief=brief,
results=username_results,
first_seen=first_seen,
interests_tuple_list=tuple_sort(interests_list),
countries_tuple_list=tuple_sort(countries_lists),
supposed_data=filtered_supposed_data,
generated_at=datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
)
# save report
html_filename = f'report_{first_username}.html'
with open(html_filename, 'w') as f:
f.write(filled_template)
def save_csv_report_to_file(username: str, results: dict, csvfile):
print(results)
writer = csv.writer(csvfile)
writer.writerow(['username',
'name',
'url_main',
'url_user',
'exists',
'http_status'
]
)
for site in results:
writer.writerow([username,
site,
results[site]['url_main'],
results[site]['url_user'],
str(results[site]['status'].status),
results[site]['http_status'],
])
def genxmindfile(filename, username, results):
print(f'Generating XMIND8 file for username {username}')
if os.path.exists(filename):
os.remove(filename)
workbook = xmind.load(filename)
sheet = workbook.getPrimarySheet()
design_sheet1(sheet, username, results)
xmind.save(workbook, path=filename)
def design_sheet1(sheet, username, results):
##all tag list
alltags = {}
sheet.setTitle("%s Analysis"%(username))
root_topic1 = sheet.getRootTopic()
root_topic1.setTitle("%s"%(username))
undefinedsection = root_topic1.addSubTopic()
undefinedsection.setTitle("Undefined")
alltags["undefined"] = undefinedsection
for website_name in results:
dictionary = results[website_name]
if dictionary.get("status").status == QueryStatus.CLAIMED:
## firsttime I found that entry
for tag in dictionary.get("status").tags:
if tag.strip() == "":
continue
if tag not in alltags.keys():
if not is_country_tag(tag):
tagsection = root_topic1.addSubTopic()
tagsection.setTitle(tag)
alltags[tag] = tagsection
category = None
userlink= None
for tag in dictionary.get("status").tags:
if tag.strip() == "":
continue
if not is_country_tag(tag):
category = tag
if category is None:
category = "undefined"
userlink = undefinedsection.addSubTopic()
else:
userlink = alltags[category].addSubTopic()
userlink.addLabel(dictionary.get("status").site_url_user)
#for tag in dictionary.get("status").tags:
# if( tag != category ):
# sheet.createRelationship(userlink.getID(), alltags[tag].getID(),"other tag")
+29 -14
View File
@@ -307,8 +307,9 @@
}, },
"500px": { "500px": {
"tags": [ "tags": [
"images", "photos",
"in" "in",
"global"
], ],
"errors": { "errors": {
"INTERNAL_SERVER_ERROR": "Site error", "INTERNAL_SERVER_ERROR": "Site error",
@@ -3221,6 +3222,7 @@
"tags": [ "tags": [
"global", "global",
"images", "images",
"photos",
"us" "us"
], ],
"regexCheck": "^[a-zA-Z][a-zA-Z0-9_-]*$", "regexCheck": "^[a-zA-Z][a-zA-Z0-9_-]*$",
@@ -3979,8 +3981,11 @@
}, },
"EyeEm": { "EyeEm": {
"tags": [ "tags": [
"de",
"in", "in",
"sd" "sd",
"global",
"photos"
], ],
"checkType": "message", "checkType": "message",
"absenceStrs": "Not Found (404) | EyeEm", "absenceStrs": "Not Found (404) | EyeEm",
@@ -6551,8 +6556,8 @@
}, },
"Instagram": { "Instagram": {
"tags": [ "tags": [
"social", "photos",
"us" "global"
], ],
"errors": { "errors": {
"Login \u2022 Instagram": "Login required" "Login \u2022 Instagram": "Login required"
@@ -8018,7 +8023,9 @@
"news", "news",
"us" "us"
], ],
"checkType": "status_code", "checkType": "message",
"absenceStrs": [":{\"__typename\":\"NotFound\"},\"viewer\""],
"presenseStrs": ["userPostCounts"],
"alexaRank": 76, "alexaRank": 76,
"url": "https://medium.com/@{username}", "url": "https://medium.com/@{username}",
"urlMain": "https://medium.com/", "urlMain": "https://medium.com/",
@@ -9835,9 +9842,9 @@
}, },
"Picuki": { "Picuki": {
"tags": [ "tags": [
"photos",
"global", "global",
"jp", "instagram"
"us"
], ],
"checkType": "message", "checkType": "message",
"absenceStrs": [ "absenceStrs": [
@@ -9899,7 +9906,8 @@
}, },
"Pinterest": { "Pinterest": {
"tags": [ "tags": [
"social", "images",
"photos",
"us" "us"
], ],
"checkType": "status_code", "checkType": "status_code",
@@ -10858,6 +10866,7 @@
}, },
"Reddit": { "Reddit": {
"tags": [ "tags": [
"social",
"news", "news",
"us" "us"
], ],
@@ -13392,6 +13401,7 @@
}, },
"Tumblr": { "Tumblr": {
"tags": [ "tags": [
"blogs",
"global", "global",
"us" "us"
], ],
@@ -13433,11 +13443,14 @@
"us" "us"
], ],
"headers": { "headers": {
"User-Agent": "Mozilla" "sec-ch-ua": "Google Chrome\";v=\"87\", \" Not;A Brand\";v=\"99\", \"Chromium\";v=\"87\"",
"authorization": "Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA",
"user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36",
"x-guest-token": "1347256342462009351"
}, },
"urlProbe": "https://mobile.twitter.com/{username}", "urlProbe": "https://twitter.com/i/api/graphql/ZRnOhhXPwue_JGILb9TNug/UserByScreenName?variables=%7B%22screen_name%22%3A%22{username}%22%2C%22withHighlightedLabel%22%3Atrue%7D",
"checkType": "message", "checkType": "message",
"absenceStrs": "Sorry, that page doesn't exist", "absenceStrs": "Not found",
"alexaRank": 55, "alexaRank": 55,
"url": "https://twitter.com/{username}", "url": "https://twitter.com/{username}",
"urlMain": "https://www.twitter.com/", "urlMain": "https://www.twitter.com/",
@@ -13604,9 +13617,9 @@
}, },
"VK": { "VK": {
"tags": [ "tags": [
"global",
"ru", "ru",
"social" "social",
"global"
], ],
"checkType": "response_url", "checkType": "response_url",
"alexaRank": 23, "alexaRank": 23,
@@ -14107,6 +14120,8 @@
}, },
"We Heart It": { "We Heart It": {
"tags": [ "tags": [
"photos",
"us",
"in" "in"
], ],
"checkType": "message", "checkType": "message",
+109
View File
@@ -0,0 +1,109 @@
<html>
<head>
<meta charset="utf-8" />
</head>
<meta name="viewport" content="width=device-width, initial-scale=1.0, shrink-to-fit=no" />
<title>{{ username }} -- Maigret username search report</title>
<link rel="stylesheet" href="https://stackpath.bootstrapcdn.com/bootstrap/4.3.1/css/bootstrap.min.css" integrity="sha384-ggOyR0iXCbMQv3Xipma34MD+dH/1fQ784/j6cY/iJTQUOhcWr7x9JvoRxT2MZw1T" crossorigin="anonymous">
<style>
.table td, .table th {
padding: .4rem;
}
@media print {
.pagebreak { page-break-before: always; }
}
</style>
<body>
<div class="container">
<div class="row-mb">
<div class="col-12 card-body" style="padding-bottom: 0.5rem;">
<h4 class="mb-0">
<a class="blog-header-logo text-dark" href="#">Username search report for {{ username }}</a>
</h4>
<small class="text-muted">Generated at {{ generated_at }}</small>
</div>
</div>
<div class="row-mb">
<div class="col-md">
<div class="card flex-md-row mb-4 box-shadow h-md-250">
<div class="card-body d-flex flex-column align-items-start">
<h5>Supposed personal data</h5>
{% for k, v in supposed_data.items() %}
<span>
{{ k }}: {{ v }}
</span>
{% endfor %}
{% if countries_tuple_list %}
<span>
Geo: {% for k, v in countries_tuple_list %}{{ k }} <span class="text-muted">({{ v }})</span>{{ ", " if not loop.last }}{% endfor %}
</span>
{% endif %}{% if interests_tuple_list %}
<span>
Interests: {% for k, v in interests_tuple_list %}{{ k }} <span class="text-muted">({{ v }})</span>{{ ", " if not loop.last }}{% endfor %}
</span>
{% endif %}{% if first_seen %}
<span>
First seen: {{ first_seen }}
</span>
{% endif %}
</div>
</div>
</div>
</div>
<div class="row-mb">
<div class="col-md">
<div class="card flex-md-row mb-4 box-shadow h-md-250">
<div class="card-body d-flex flex-column align-items-start">
<h5>Brief</h5>
<span>
{{ brief }}
</span>
</div>
</div>
</div>
</div>
{% for u, t, data in results %}
{% for k, v in data.items() %}
{% if v.found and not v.is_similar %}
<div class="row-mb">
<div class="col-md">
<div class="card flex-md-row mb-4 box-shadow h-md-250">
<img class="card-img-right flex-auto d-none d-md-block" alt="Photo" style="width: 200px; height: 200px; object-fit: scale-down;" src="{{ v.status.ids_data.image or 'https://i.imgur.com/040fmbw.png' }}" data-holder-rendered="true">
<div class="card-body d-flex flex-column align-items-start" style="padding-top: 0;">
<h3 class="mb-0" style="padding-top: 1rem;">
<a class="text-dark" href="{{ v.url_main }}" target="_blank">{{ k }}</a>
</h3>
{% if v.status.tags %}
<div class="mb-1 text-muted">Tags: {{ v.status.tags | join(', ') }}</div>
{% endif %}
<p class="card-text">
<a href="{{ v.url_user }}" target="_blank">{{ v.url_user }}</a>
</p>
{% if v.ids_data %}
<table class="table table-striped">
<tbody>
{% for k1, v1 in v.ids_data.items() %}
{% if k1 != 'image' %}
<tr>
<th>{{ title(k1) }}</th>
<td>{% if v1 is iterable and (v1 is not string and v1 is not mapping) %}{{ v1 | join(', ') }}{% else %}{{ detect_link(v1) }}{% endif %}
</td>
</tr>
{% endif %}
{% endfor %}
</tbody>
</table>
{% endif %}
</p>
</div>
</div>
</div>
</div>
{% endif %}
{% endfor %}
{% endfor %}
</div>
<script src="https://code.jquery.com/jquery-3.3.1.slim.min.js" integrity="sha384-q8i/X+965DzO0rT7abK41JStQIAqVgRVzpbzo5smXKp4YfRvH+8abtTE1Pi6jizo" crossorigin="anonymous"></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/popper.js/1.14.7/umd/popper.min.js" integrity="sha384-UO2eT0CpHqdSJQ6hJty5KVphtPhzWj9WO1clHTMGa3JDZwrnQq4sF86dIHNDz0W1" crossorigin="anonymous"></script>
<script src="https://stackpath.bootstrapcdn.com/bootstrap/4.3.1/js/bootstrap.min.js" integrity="sha384-JjSmVgyd0p3pXB1rRibZUAYoIIy6OrQ6VrjIEaFf/nJGzIxFDsf4x0xIM+B07jRM" crossorigin="anonymous"></script>
</html>
+2 -8
View File
@@ -34,7 +34,7 @@ class QueryResult():
""" """
def __init__(self, username, site_name, site_url_user, status, ids_data=None, def __init__(self, username, site_name, site_url_user, status, ids_data=None,
query_time=None, context=None, tags=None): query_time=None, context=None, tags=[]):
"""Create Query Result Object. """Create Query Result Object.
Contains information about a specific method of detecting usernames on Contains information about a specific method of detecting usernames on
@@ -72,14 +72,8 @@ class QueryResult():
self.query_time = query_time self.query_time = query_time
self.context = context self.context = context
self.ids_data = ids_data self.ids_data = ids_data
self.tags = tags
self.tags = ""
if (tags is not None):
TAGstring = "".join(['%s,' % tags for tags in tags])
TAGstring = TAGstring[:-1]
self.tags = TAGstring
return
def __str__(self): def __str__(self):
"""Convert Object To String. """Convert Object To String.
+15 -35
View File
@@ -13,6 +13,7 @@ from .utils import CaseConverter
class MaigretEngine: class MaigretEngine:
def __init__(self, name, data): def __init__(self, name, data):
self.name = name self.name = name
self.site = {}
self.__dict__.update(data) self.__dict__.update(data)
@property @property
@@ -127,6 +128,15 @@ class MaigretDatabase:
def sites_dict(self): def sites_dict(self):
return {site.name: site for site in self._sites} return {site.name: site for site in self._sites}
def ranked_sites_dict(self, reverse=False, top=sys.maxsize, tags=[]):
if not tags:
filtered_list = self.sites
else:
filtered_list = [s for s in self.sites if set(s.tags).intersection(set(tags)) or s.engine in tags]
sorted_list = sorted(filtered_list, key=lambda x: x.alexa_rank, reverse=reverse)[:top]
return {site.name: site for site in sorted_list}
@property @property
def engines(self): def engines(self):
return self._engines return self._engines
@@ -145,12 +155,12 @@ class MaigretDatabase:
return self return self
def save_to_file(self, filename: str) -> MaigretDatabase: def save_to_file(self, filename: str) -> MaigretDatabase:
json_data = { db_data = {
'sites': {site.name: site.strip_engine_data().json for site in self._sites}, 'sites': {site.name: site.strip_engine_data().json for site in self._sites},
'engines': {engine.name: engine.json for engine in self._engines}, 'engines': {engine.name: engine.json for engine in self._engines},
} }
json_data = json.dumps(json_data, indent=4) json_data = json.dumps(db_data, indent=4)
with open(filename, 'w') as f: with open(filename, 'w') as f:
f.write(json_data) f.write(json_data)
@@ -160,8 +170,8 @@ class MaigretDatabase:
def load_from_json(self, json_data: dict) -> MaigretDatabase: def load_from_json(self, json_data: dict) -> MaigretDatabase:
# Add all of site information from the json file to internal site list. # Add all of site information from the json file to internal site list.
site_data = json_data.get("sites") site_data = json_data.get("sites", {})
engines_data = json_data.get("engines") engines_data = json_data.get("engines", {})
for engine_name in engines_data: for engine_name in engines_data:
self._engines.append(MaigretEngine(engine_name, engines_data[engine_name])) self._engines.append(MaigretEngine(engine_name, engines_data[engine_name]))
@@ -198,7 +208,7 @@ class MaigretDatabase:
is_url_valid = url.startswith('http://') or url.startswith('https://') is_url_valid = url.startswith('http://') or url.startswith('https://')
if not is_url_valid: if not is_url_valid:
return False raise FileNotFoundError(f"Invalid data file URL '{url}'.")
try: try:
response = requests.get(url=url) response = requests.get(url=url)
@@ -238,33 +248,3 @@ class MaigretDatabase:
) )
return self.load_from_json(data) return self.load_from_json(data)
def site_name_list(self, popularity_rank=False):
"""Get Site Name List.
Keyword Arguments:
self -- This object.
popularity_rank -- Boolean indicating if list should be sorted
by popularity rank.
Default value is False.
NOTE: List is sorted in ascending
alphabetical order is popularity rank
is not requested.
Return Value:
List of strings containing names of sites.
"""
if popularity_rank:
# Sort in ascending popularity rank order.
site_rank_name = \
sorted([(site.popularity_rank, site.name) for site in self],
key=operator.itemgetter(0)
)
site_names = [name for _, name in site_rank_name]
else:
# Sort in ascending alphabetical order.
site_names = sorted([site.name for site in self], key=str.lower)
return site_names
+17 -4
View File
@@ -3,16 +3,29 @@ import re
class CaseConverter: class CaseConverter:
@staticmethod @staticmethod
def camel_to_snake(camelcased_string: str): def camel_to_snake(camelcased_string: str) -> str:
return re.sub(r'(?<!^)(?=[A-Z])', '_', camelcased_string).lower() return re.sub(r'(?<!^)(?=[A-Z])', '_', camelcased_string).lower()
@staticmethod @staticmethod
def snake_to_camel(snakecased_string: str): def snake_to_camel(snakecased_string: str) -> str:
formatted = ''.join(word.title() for word in snakecased_string.split('_')) formatted = ''.join(word.title() for word in snakecased_string.split('_'))
result = formatted[0].lower() + formatted[1:] result = formatted[0].lower() + formatted[1:]
return result return result
@staticmethod
def snake_to_title(snakecased_string: str) -> str:
words = snakecased_string.split('_')
words[0] = words[0].title()
return ' '.join(words)
def is_country_tag(tag):
def is_country_tag(tag: str) -> bool:
"""detect if tag represent a country""" """detect if tag represent a country"""
return bool(re.match("^([a-z]){2}$", tag)) return bool(re.match("^([a-zA-Z]){2}$", tag)) or tag == 'global'
def enrich_link_str(link: str) -> str:
link = link.strip()
if link.startswith('www.') or (link.startswith('http') and '//' in link):
return f'<a class="auto-link" href="{link}">{link}</a>'
return link
+2
View File
@@ -8,9 +8,11 @@ certifi==2020.12.5
chardet==3.0.4 chardet==3.0.4
colorama==0.4.4 colorama==0.4.4
idna==2.10 idna==2.10
Jinja2==2.11.2
lxml==4.6.2 lxml==4.6.2
mock==4.0.2 mock==4.0.2
multidict==5.1.0 multidict==5.1.0
pycountry==20.7.3
PySocks==1.7.1 PySocks==1.7.1
python-socks==1.1.2 python-socks==1.1.2
requests==2.25.1 requests==2.25.1
+104
View File
@@ -0,0 +1,104 @@
"""Maigret reports test functions"""
from io import StringIO
import copy
import os
import xmind
from maigret.report import save_csv_report_to_file, genxmindfile, save_html_report
from maigret.result import QueryResult, QueryStatus
EXAMPLE_RESULTS = {
'GitHub': {
'username': 'test',
'parsing_enabled': True,
'url_main': 'https://www.github.com/',
'url_user': 'https://www.github.com/test',
'status': QueryResult('test',
'GitHub',
'https://www.github.com/test',
QueryStatus.CLAIMED,
tags=['test_tag']),
'http_status': 200,
'is_similar': False,
'rank': 78
}
}
GOOD_RESULT = QueryResult('', '', '', QueryStatus.CLAIMED)
BAD_RESULT = QueryResult('', '', '', QueryStatus.AVAILABLE)
GOOD_500PX_RESULT = copy.deepcopy(GOOD_RESULT)
GOOD_500PX_RESULT.tags = ['photo', 'us', 'global']
GOOD_500PX_RESULT.ids_data = {"uid": "dXJpOm5vZGU6VXNlcjoyNjQwMzQxNQ==", "legacy_id": "26403415", "username": "alexaimephotographycars", "name": "Alex Aim\u00e9", "website": "www.flickr.com/photos/alexaimephotography/", "facebook_link": " www.instagram.com/street.reality.photography/", "instagram_username": "alexaimephotography", "twitter_username": "Alexaimephotogr"}
GOOD_REDDIT_RESULT = copy.deepcopy(GOOD_RESULT)
GOOD_REDDIT_RESULT.tags = ['news', 'us']
GOOD_REDDIT_RESULT.ids_data = {"reddit_id": "t5_1nytpy", "reddit_username": "alexaimephotography", "fullname": "alexaimephotography", "image": "https://styles.redditmedia.com/t5_1nytpy/styles/profileIcon_7vmhdwzd3g931.jpg?width=256&height=256&crop=256:256,smart&frame=1&s=4f355f16b4920844a3f4eacd4237a7bf76b2e97e", "is_employee": "False", "is_nsfw": "False", "is_mod": "True", "is_following": "True", "has_user_profile": "True", "hide_from_robots": "False", "created_at": "2019-07-10 12:20:03", "total_karma": "53959", "post_karma": "52738"}
GOOD_IG_RESULT = copy.deepcopy(GOOD_RESULT)
GOOD_IG_RESULT.tags = ['photo', 'global']
GOOD_IG_RESULT.ids_data = {"instagram_username": "alexaimephotography", "fullname": "Alexaimephotography", "id": "6828488620", "image": "https://scontent-hel3-1.cdninstagram.com/v/t51.2885-19/s320x320/95420076_1169632876707608_8741505804647006208_n.jpg?_nc_ht=scontent-hel3-1.cdninstagram.com&_nc_ohc=jd87OUGsX4MAX_Ym5GX&tp=1&oh=0f42badd68307ba97ec7fb1ef7b4bfd4&oe=601E5E6F", "bio": "Photographer \nChild of fine street arts", "external_url": "https://www.flickr.com/photos/alexaimephotography2020/"}
GOOD_TWITTER_RESULT = copy.deepcopy(GOOD_RESULT)
GOOD_TWITTER_RESULT.tags = ['social', 'us']
TEST = [('alexaimephotographycars', 'username', {'500px': {'username': 'alexaimephotographycars', 'parsing_enabled': True, 'url_main': 'https://500px.com/', 'url_user': 'https://500px.com/p/alexaimephotographycars', 'ids_usernames': {'alexaimephotographycars': 'username', 'alexaimephotography': 'username', 'Alexaimephotogr': 'username'}, 'status': GOOD_500PX_RESULT, 'http_status': 200, 'is_similar': False, 'rank': 2981}, 'Reddit': {'username': 'alexaimephotographycars', 'parsing_enabled': True, 'url_main': 'https://www.reddit.com/', 'url_user': 'https://www.reddit.com/user/alexaimephotographycars', 'status': BAD_RESULT, 'http_status': 404, 'is_similar': False, 'rank': 17}, 'Twitter': {'username': 'alexaimephotographycars', 'parsing_enabled': True, 'url_main': 'https://www.twitter.com/', 'url_user': 'https://twitter.com/alexaimephotographycars', 'status': BAD_RESULT, 'http_status': 400, 'is_similar': False, 'rank': 55}, 'Instagram': {'username': 'alexaimephotographycars', 'parsing_enabled': True, 'url_main': 'https://www.instagram.com/', 'url_user': 'https://www.instagram.com/alexaimephotographycars', 'status': BAD_RESULT, 'http_status': 404, 'is_similar': False, 'rank': 29}}), ('alexaimephotography', 'username', {'500px': {'username': 'alexaimephotography', 'parsing_enabled': True, 'url_main': 'https://500px.com/', 'url_user': 'https://500px.com/p/alexaimephotography', 'status': BAD_RESULT, 'http_status': 200, 'is_similar': False, 'rank': 2981}, 'Reddit': {'username': 'alexaimephotography', 'parsing_enabled': True, 'url_main': 'https://www.reddit.com/', 'url_user': 'https://www.reddit.com/user/alexaimephotography', 'ids_usernames': {'alexaimephotography': 'username'}, 'status': GOOD_REDDIT_RESULT, 'http_status': 200, 'is_similar': False, 'rank': 17}, 'Twitter': {'username': 'alexaimephotography', 'parsing_enabled': True, 'url_main': 'https://www.twitter.com/', 'url_user': 'https://twitter.com/alexaimephotography', 'status': BAD_RESULT, 'http_status': 400, 'is_similar': False, 'rank': 55}, 'Instagram': {'username': 'alexaimephotography', 'parsing_enabled': True, 'url_main': 'https://www.instagram.com/', 'url_user': 'https://www.instagram.com/alexaimephotography', 'ids_usernames': {'alexaimephotography': 'username'}, 'status': GOOD_IG_RESULT, 'http_status': 200, 'is_similar': False, 'rank': 29}}), ('Alexaimephotogr', 'username', {'500px': {'username': 'Alexaimephotogr', 'parsing_enabled': True, 'url_main': 'https://500px.com/', 'url_user': 'https://500px.com/p/Alexaimephotogr', 'status': BAD_RESULT, 'http_status': 200, 'is_similar': False, 'rank': 2981}, 'Reddit': {'username': 'Alexaimephotogr', 'parsing_enabled': True, 'url_main': 'https://www.reddit.com/', 'url_user': 'https://www.reddit.com/user/Alexaimephotogr', 'status': BAD_RESULT, 'http_status': 404, 'is_similar': False, 'rank': 17}, 'Twitter': {'username': 'Alexaimephotogr', 'parsing_enabled': True, 'url_main': 'https://www.twitter.com/', 'url_user': 'https://twitter.com/Alexaimephotogr', 'status': GOOD_TWITTER_RESULT, 'http_status': 400, 'is_similar': False, 'rank': 55}, 'Instagram': {'username': 'Alexaimephotogr', 'parsing_enabled': True, 'url_main': 'https://www.instagram.com/', 'url_user': 'https://www.instagram.com/Alexaimephotogr', 'status':BAD_RESULT, 'http_status': 404, 'is_similar': False, 'rank': 29}})]
SUPPOSED_BRIEF = """Search by username alexaimephotographycars returned 1 accounts. Found target's other IDs: alexaimephotography, Alexaimephotogr. Search by username alexaimephotography returned 2 accounts. Search by username Alexaimephotogr returned 1 accounts. Extended info extracted from 3 accounts."""
SUPPOSED_INTERESTS = "Interests: photo <span class=\"text-muted\">(2)</span>, news <span class=\"text-muted\">(1)</span>, social <span class=\"text-muted\">(1)</span>"
SUPPOSED_GEO = "Geo: us <span class=\"text-muted\">(3)</span>"
def test_save_csv_report_to_file():
csvfile = StringIO()
save_csv_report_to_file('test', EXAMPLE_RESULTS, csvfile)
csvfile.seek(0)
data = csvfile.readlines()
assert data == [
'username,name,url_main,url_user,exists,http_status\r\n',
'test,GitHub,https://www.github.com/,https://www.github.com/test,Claimed,200\r\n',
]
def test_save_xmind_report():
filename = 'test_report.xmind'
genxmindfile(filename, 'test', EXAMPLE_RESULTS)
workbook = xmind.load(filename)
sheet = workbook.getPrimarySheet()
data = sheet.getData()
assert data['title'] == 'test Analysis'
assert data['topic']['title'] == 'test'
assert len(data['topic']['topics']) == 2
assert data['topic']['topics'][0]['title'] == 'Undefined'
assert data['topic']['topics'][1]['title'] == 'test_tag'
assert len(data['topic']['topics'][1]['topics']) == 1
assert data['topic']['topics'][1]['topics'][0]['label'] == 'https://www.github.com/test'
def test_html_report():
report_name = 'report_alexaimephotographycars.html'
try:
os.remove(report_name)
except:
pass
save_html_report(TEST)
assert os.path.exists(report_name)
report_text = open(report_name).read()
assert SUPPOSED_BRIEF in report_text
assert SUPPOSED_GEO in report_text
assert SUPPOSED_INTERESTS in report_text
+20 -1
View File
@@ -1,5 +1,5 @@
"""Maigret Database test functions""" """Maigret Database test functions"""
from maigret.sites import MaigretDatabase from maigret.sites import MaigretDatabase, MaigretSite
EXAMPLE_DB = { EXAMPLE_DB = {
@@ -99,3 +99,22 @@ def test_saving_site_error():
assert amperka.strip_engine_data().errors == {'error1': 'text1'} assert amperka.strip_engine_data().errors == {'error1': 'text1'}
assert amperka.strip_engine_data().json['errors'] == {'error1': 'text1'} assert amperka.strip_engine_data().json['errors'] == {'error1': 'text1'}
def test_ranked_sites_dict():
db = MaigretDatabase()
db.update_site(MaigretSite('3', {'alexaRank': 1000, 'engine': 'ucoz'}))
db.update_site(MaigretSite('1', {'alexaRank': 2, 'tags': ['forum']}))
db.update_site(MaigretSite('2', {'alexaRank': 10, 'tags': ['ru', 'forum']}))
# sorting
assert list(db.ranked_sites_dict().keys()) == ['1', '2', '3']
assert list(db.ranked_sites_dict(top=2).keys()) == ['1', '2']
assert list(db.ranked_sites_dict(reverse=True, top=2).keys()) == ['3', '2']
# filtering by tags
assert list(db.ranked_sites_dict(tags=['ru'], top=2).keys()) == ['2']
assert list(db.ranked_sites_dict(tags=['forum']).keys()) == ['1', '2']
# filtering by engine
assert list(db.ranked_sites_dict(tags=['ucoz']).keys()) == ['3']
+14 -1
View File
@@ -1,5 +1,5 @@
"""Maigret utils test functions""" """Maigret utils test functions"""
from maigret.utils import CaseConverter, is_country_tag from maigret.utils import CaseConverter, is_country_tag, enrich_link_str
def test_case_convert_camel_to_snake(): def test_case_convert_camel_to_snake():
@@ -14,8 +14,21 @@ def test_case_convert_snake_to_camel():
assert b == 'camelCasedString' assert b == 'camelCasedString'
def test_case_convert_snake_to_title():
a = 'camel_cased_string'
b = CaseConverter.snake_to_title(a)
assert b == 'Camel cased string'
def test_is_country_tag(): def test_is_country_tag():
assert is_country_tag('ru') == True assert is_country_tag('ru') == True
assert is_country_tag('FR') == True
assert is_country_tag('a1') == False assert is_country_tag('a1') == False
assert is_country_tag('dating') == False assert is_country_tag('dating') == False
assert is_country_tag('global') == True
def test_enrich_link_str():
assert enrich_link_str('test') == 'test'
assert enrich_link_str(' www.flickr.com/photos/alexaimephotography/') == '<a class="auto-link" href="www.flickr.com/photos/alexaimephotography/">www.flickr.com/photos/alexaimephotography/</a>'