diff --git a/maigret/maigret.py b/maigret/maigret.py
index b0abe3b..025701a 100755
--- a/maigret/maigret.py
+++ b/maigret/maigret.py
@@ -26,7 +26,7 @@ from socid_extractor import parse, extract
from .notify import QueryNotifyPrint
from .result import QueryResult, QueryStatus
from .sites import MaigretDatabase, MaigretSite
-from .report import save_csv_report, genxmindfile
+from .report import save_csv_report, genxmindfile, save_html_report
import xmind
@@ -629,6 +629,10 @@ async def main():
action="store_true", dest="csv", default=False,
help="Create Comma-Separated Values (CSV) File."
)
+ parser.add_argument("--html",
+ action="store_true", dest="html", default=False,
+ help="Create HTML report file."
+ )
parser.add_argument("--site",
action="append", metavar='SITE_NAME',
dest="site_list", default=None,
@@ -649,6 +653,10 @@ async def main():
"A longer timeout will be more likely to get results from slow sites."
"On the other hand, this may cause a long delay to gather all results."
)
+ parser.add_argument("--top-sites",
+ action="store", default=500,
+ help="Count of sites for checking ranked by Alexa Top (default: 500)."
+ )
parser.add_argument("--print-not-found",
action="store_true", dest="print_not_found", default=False,
help="Print sites where the username was not found."
@@ -757,7 +765,8 @@ async def main():
# Create object with all information about sites we are aware of.
try:
- site_data_all = MaigretDatabase().load_from_file(args.json_file).sites_dict
+ db = MaigretDatabase().load_from_file(args.json_file)
+ site_data_all = db.ranked_sites_dict(top=args.top_sites)
except Exception as error:
print(f"ERROR: {error}")
sys.exit(1)
@@ -805,6 +814,8 @@ async def main():
already_checked = set()
+ general_results = []
+
while usernames:
username, id_type = list(usernames.items())[0]
del usernames[username]
@@ -834,6 +845,7 @@ async def main():
logger=logger,
forced=args.use_disabled_sites,
)
+ general_results.append((username, id_type, results))
if args.folderoutput:
# The usernames results should be stored in a targeted folder.
@@ -870,6 +882,9 @@ async def main():
if args.csv:
save_csv_report(username, results)
+ if args.html:
+ save_html_report(general_results)
+
def run():
try:
diff --git a/maigret/report.py b/maigret/report.py
new file mode 100644
index 0000000..958df81
--- /dev/null
+++ b/maigret/report.py
@@ -0,0 +1,215 @@
+import csv
+from datetime import datetime
+import logging
+import os
+import xmind
+
+from jinja2 import Template
+import pycountry
+
+from .result import QueryStatus
+from .utils import is_country_tag, CaseConverter, enrich_link_str
+
+
+def save_csv_report(username: str, results: dict):
+ with open(username + '.csv', 'w', newline='', encoding='utf-8') as csvfile:
+ save_csv_report_to_file(username, results, csvfile)
+
+
+def save_html_report(username_results: list):
+ brief_text = []
+ usernames = {}
+ extended_info_count = 0
+ tags = {}
+ supposed_data = {}
+ allowed_fields = ['fullname', 'gender']
+ first_seen = None
+ first_seen_format = '%Y-%m-%d %H:%M:%S'
+
+ for username, id_type, results in username_results:
+ found_accounts = 0
+ new_ids = []
+ usernames[username] = {'type': id_type}
+
+ for website_name in results:
+ dictionary = results[website_name]
+ # TODO: fix no site data issue
+ if not dictionary:
+ continue
+
+ status = dictionary.get('status')
+ if status.ids_data:
+ dictionary['ids_data'] = status.ids_data
+ extended_info_count += 1
+
+ # detect first seen
+ created_at = status.ids_data.get('created_at')
+ if created_at:
+ if first_seen is None:
+ first_seen = created_at
+ else:
+ known_time = datetime.strptime(first_seen, first_seen_format)
+ new_time = datetime.strptime(created_at, first_seen_format)
+ if new_time < known_time:
+ first_seen = created_at
+
+ for k, v in status.ids_data.items():
+ # suppose target data
+ field = 'fullname' if k == 'name' else k
+ if not field in supposed_data:
+ supposed_data[field] = []
+ supposed_data[field].append(v)
+ # suppose country
+ if k in ['country', 'locale']:
+ try:
+ if is_country_tag(k):
+ tag = pycountry.countries.get(alpha_2=v).alpha_2.lower()
+ else:
+ tag = pycountry.countries.search_fuzzy(v)[0].alpha_2.lower()
+ # TODO: move countries to another struct
+ tags[tag] = tags.get(tag, 0) + 1
+ except Exception as e:
+ logging.debug('pycountry exception', exc_info=True)
+
+ new_usernames = dictionary.get('ids_usernames')
+ if new_usernames:
+ for u, utype in new_usernames.items():
+ if not u in usernames:
+ new_ids.append((u, utype))
+ usernames[u] = {'type': utype}
+
+ if status.status == QueryStatus.CLAIMED:
+ found_accounts += 1
+ dictionary['found'] = True
+ else:
+ continue
+
+ if not dictionary.get('is_similar'):
+ # ignore non-exact search results
+ if status.tags:
+ for t in status.tags:
+ tags[t] = tags.get(t, 0) + 1
+
+
+ brief_text.append(f'Search by {id_type} {username} returned {found_accounts} accounts.')
+
+ if new_ids:
+ ids_list = []
+ for u, t in new_ids:
+ ids_list.append(f'{u} ({t})' if t != 'username' else u)
+ brief_text.append(f'Found target\'s other IDs: ' + ', '.join(ids_list) + '.')
+
+ brief_text.append(f'Extended info extracted from {extended_info_count} accounts.')
+
+ # template generation
+ template_text = open(os.path.join(os.path.dirname(os.path.realpath(__file__)),
+ "resources/simple_report.tpl")).read()
+ template = Template(template_text)
+
+ template.globals['title'] = CaseConverter.snake_to_title
+ template.globals['detect_link'] = enrich_link_str
+
+ brief = ' '.join(brief_text).strip()
+ tuple_sort = lambda d: sorted(d, key=lambda x: x[1], reverse=True)
+
+ if 'global' in tags:
+ # remove tag 'global' useless for country detection
+ del tags['global']
+
+ first_username = username_results[0][0]
+ countries_lists = list(filter(lambda x: is_country_tag(x[0]), tags.items()))
+ interests_list = list(filter(lambda x: not is_country_tag(x[0]), tags.items()))
+
+ filtered_supposed_data = {CaseConverter.snake_to_title(k): v[0]
+ for k, v in supposed_data.items()
+ if k in allowed_fields}
+
+ filled_template = template.render(username=first_username,
+ brief=brief,
+ results=username_results,
+ first_seen=first_seen,
+ interests_tuple_list=tuple_sort(interests_list),
+ countries_tuple_list=tuple_sort(countries_lists),
+ supposed_data=filtered_supposed_data,
+ generated_at=datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
+ )
+ # save report
+ html_filename = f'report_{first_username}.html'
+ with open(html_filename, 'w') as f:
+ f.write(filled_template)
+
+def save_csv_report_to_file(username: str, results: dict, csvfile):
+ print(results)
+ writer = csv.writer(csvfile)
+ writer.writerow(['username',
+ 'name',
+ 'url_main',
+ 'url_user',
+ 'exists',
+ 'http_status'
+ ]
+ )
+ for site in results:
+ writer.writerow([username,
+ site,
+ results[site]['url_main'],
+ results[site]['url_user'],
+ str(results[site]['status'].status),
+ results[site]['http_status'],
+ ])
+
+
+def genxmindfile(filename, username, results):
+ print(f'Generating XMIND8 file for username {username}')
+ if os.path.exists(filename):
+ os.remove(filename)
+ workbook = xmind.load(filename)
+ sheet = workbook.getPrimarySheet()
+ design_sheet1(sheet, username, results)
+ xmind.save(workbook, path=filename)
+
+
+def design_sheet1(sheet, username, results):
+ ##all tag list
+ alltags = {}
+
+ sheet.setTitle("%s Analysis"%(username))
+ root_topic1 = sheet.getRootTopic()
+ root_topic1.setTitle("%s"%(username))
+
+ undefinedsection = root_topic1.addSubTopic()
+ undefinedsection.setTitle("Undefined")
+ alltags["undefined"] = undefinedsection
+
+ for website_name in results:
+ dictionary = results[website_name]
+
+ if dictionary.get("status").status == QueryStatus.CLAIMED:
+ ## firsttime I found that entry
+ for tag in dictionary.get("status").tags:
+ if tag.strip() == "":
+ continue
+ if tag not in alltags.keys():
+ if not is_country_tag(tag):
+ tagsection = root_topic1.addSubTopic()
+ tagsection.setTitle(tag)
+ alltags[tag] = tagsection
+
+ category = None
+ userlink= None
+ for tag in dictionary.get("status").tags:
+ if tag.strip() == "":
+ continue
+ if not is_country_tag(tag):
+ category = tag
+
+ if category is None:
+ category = "undefined"
+ userlink = undefinedsection.addSubTopic()
+ else:
+ userlink = alltags[category].addSubTopic()
+ userlink.addLabel(dictionary.get("status").site_url_user)
+
+ #for tag in dictionary.get("status").tags:
+ # if( tag != category ):
+ # sheet.createRelationship(userlink.getID(), alltags[tag].getID(),"other tag")
diff --git a/maigret/resources/data.json b/maigret/resources/data.json
index 46054ee..7153628 100644
--- a/maigret/resources/data.json
+++ b/maigret/resources/data.json
@@ -307,8 +307,9 @@
},
"500px": {
"tags": [
- "images",
- "in"
+ "photos",
+ "in",
+ "global"
],
"errors": {
"INTERNAL_SERVER_ERROR": "Site error",
@@ -3221,6 +3222,7 @@
"tags": [
"global",
"images",
+ "photos",
"us"
],
"regexCheck": "^[a-zA-Z][a-zA-Z0-9_-]*$",
@@ -3979,8 +3981,11 @@
},
"EyeEm": {
"tags": [
+ "de",
"in",
- "sd"
+ "sd",
+ "global",
+ "photos"
],
"checkType": "message",
"absenceStrs": "Not Found (404) | EyeEm",
@@ -6551,8 +6556,8 @@
},
"Instagram": {
"tags": [
- "social",
- "us"
+ "photos",
+ "global"
],
"errors": {
"Login \u2022 Instagram": "Login required"
@@ -8018,7 +8023,9 @@
"news",
"us"
],
- "checkType": "status_code",
+ "checkType": "message",
+ "absenceStrs": [":{\"__typename\":\"NotFound\"},\"viewer\""],
+ "presenseStrs": ["userPostCounts"],
"alexaRank": 76,
"url": "https://medium.com/@{username}",
"urlMain": "https://medium.com/",
@@ -9835,9 +9842,9 @@
},
"Picuki": {
"tags": [
+ "photos",
"global",
- "jp",
- "us"
+ "instagram"
],
"checkType": "message",
"absenceStrs": [
@@ -9899,7 +9906,8 @@
},
"Pinterest": {
"tags": [
- "social",
+ "images",
+ "photos",
"us"
],
"checkType": "status_code",
@@ -10858,6 +10866,7 @@
},
"Reddit": {
"tags": [
+ "social",
"news",
"us"
],
@@ -13392,6 +13401,7 @@
},
"Tumblr": {
"tags": [
+ "blogs",
"global",
"us"
],
@@ -13433,11 +13443,14 @@
"us"
],
"headers": {
- "User-Agent": "Mozilla"
+ "sec-ch-ua": "Google Chrome\";v=\"87\", \" Not;A Brand\";v=\"99\", \"Chromium\";v=\"87\"",
+ "authorization": "Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA",
+ "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36",
+ "x-guest-token": "1347256342462009351"
},
- "urlProbe": "https://mobile.twitter.com/{username}",
+ "urlProbe": "https://twitter.com/i/api/graphql/ZRnOhhXPwue_JGILb9TNug/UserByScreenName?variables=%7B%22screen_name%22%3A%22{username}%22%2C%22withHighlightedLabel%22%3Atrue%7D",
"checkType": "message",
- "absenceStrs": "Sorry, that page doesn't exist",
+ "absenceStrs": "Not found",
"alexaRank": 55,
"url": "https://twitter.com/{username}",
"urlMain": "https://www.twitter.com/",
@@ -13604,9 +13617,9 @@
},
"VK": {
"tags": [
- "global",
"ru",
- "social"
+ "social",
+ "global"
],
"checkType": "response_url",
"alexaRank": 23,
@@ -14107,6 +14120,8 @@
},
"We Heart It": {
"tags": [
+ "photos",
+ "us",
"in"
],
"checkType": "message",
diff --git a/maigret/resources/simple_report.tpl b/maigret/resources/simple_report.tpl
new file mode 100644
index 0000000..0908808
--- /dev/null
+++ b/maigret/resources/simple_report.tpl
@@ -0,0 +1,109 @@
+
+
+
+
+
+{{ username }} -- Maigret username search report
+
+
+
+
+
+
+
+
+
+ Generated at {{ generated_at }}
+
+
+
+
+
+
+
Supposed personal data
+ {% for k, v in supposed_data.items() %}
+
+ {{ k }}: {{ v }}
+
+ {% endfor %}
+ {% if countries_tuple_list %}
+
+ Geo: {% for k, v in countries_tuple_list %}{{ k }} ({{ v }}) {{ ", " if not loop.last }}{% endfor %}
+
+ {% endif %}{% if interests_tuple_list %}
+
+ Interests: {% for k, v in interests_tuple_list %}{{ k }} ({{ v }}) {{ ", " if not loop.last }}{% endfor %}
+
+ {% endif %}{% if first_seen %}
+
+ First seen: {{ first_seen }}
+
+ {% endif %}
+
+
+
+
+
+
+
+
+
Brief
+
+ {{ brief }}
+
+
+
+
+
+ {% for u, t, data in results %}
+ {% for k, v in data.items() %}
+ {% if v.found and not v.is_similar %}
+
+
+
+
+
+
+ {% if v.status.tags %}
+
Tags: {{ v.status.tags | join(', ') }}
+ {% endif %}
+
+ {{ v.url_user }}
+
+ {% if v.ids_data %}
+
+
+ {% for k1, v1 in v.ids_data.items() %}
+ {% if k1 != 'image' %}
+
+ {{ title(k1) }}
+ {% if v1 is iterable and (v1 is not string and v1 is not mapping) %}{{ v1 | join(', ') }}{% else %}{{ detect_link(v1) }}{% endif %}
+
+
+ {% endif %}
+ {% endfor %}
+
+
+ {% endif %}
+
+
+
+
+
+ {% endif %}
+ {% endfor %}
+ {% endfor %}
+
+
+
+
+
\ No newline at end of file
diff --git a/maigret/result.py b/maigret/result.py
index 80ade5c..bdfade7 100644
--- a/maigret/result.py
+++ b/maigret/result.py
@@ -34,7 +34,7 @@ class QueryResult():
"""
def __init__(self, username, site_name, site_url_user, status, ids_data=None,
- query_time=None, context=None, tags=None):
+ query_time=None, context=None, tags=[]):
"""Create Query Result Object.
Contains information about a specific method of detecting usernames on
@@ -72,14 +72,8 @@ class QueryResult():
self.query_time = query_time
self.context = context
self.ids_data = ids_data
+ self.tags = tags
- self.tags = ""
- if (tags is not None):
- TAGstring = "".join(['%s,' % tags for tags in tags])
- TAGstring = TAGstring[:-1]
- self.tags = TAGstring
-
- return
def __str__(self):
"""Convert Object To String.
diff --git a/maigret/sites.py b/maigret/sites.py
index 1498770..edc68e1 100644
--- a/maigret/sites.py
+++ b/maigret/sites.py
@@ -13,6 +13,7 @@ from .utils import CaseConverter
class MaigretEngine:
def __init__(self, name, data):
self.name = name
+ self.site = {}
self.__dict__.update(data)
@property
@@ -127,6 +128,15 @@ class MaigretDatabase:
def sites_dict(self):
return {site.name: site for site in self._sites}
+ def ranked_sites_dict(self, reverse=False, top=sys.maxsize, tags=[]):
+ if not tags:
+ filtered_list = self.sites
+ else:
+ filtered_list = [s for s in self.sites if set(s.tags).intersection(set(tags)) or s.engine in tags]
+
+ sorted_list = sorted(filtered_list, key=lambda x: x.alexa_rank, reverse=reverse)[:top]
+ return {site.name: site for site in sorted_list}
+
@property
def engines(self):
return self._engines
@@ -145,12 +155,12 @@ class MaigretDatabase:
return self
def save_to_file(self, filename: str) -> MaigretDatabase:
- json_data = {
+ db_data = {
'sites': {site.name: site.strip_engine_data().json for site in self._sites},
'engines': {engine.name: engine.json for engine in self._engines},
}
- json_data = json.dumps(json_data, indent=4)
+ json_data = json.dumps(db_data, indent=4)
with open(filename, 'w') as f:
f.write(json_data)
@@ -160,8 +170,8 @@ class MaigretDatabase:
def load_from_json(self, json_data: dict) -> MaigretDatabase:
# Add all of site information from the json file to internal site list.
- site_data = json_data.get("sites")
- engines_data = json_data.get("engines")
+ site_data = json_data.get("sites", {})
+ engines_data = json_data.get("engines", {})
for engine_name in engines_data:
self._engines.append(MaigretEngine(engine_name, engines_data[engine_name]))
@@ -198,7 +208,7 @@ class MaigretDatabase:
is_url_valid = url.startswith('http://') or url.startswith('https://')
if not is_url_valid:
- return False
+ raise FileNotFoundError(f"Invalid data file URL '{url}'.")
try:
response = requests.get(url=url)
@@ -238,33 +248,3 @@ class MaigretDatabase:
)
return self.load_from_json(data)
-
-
- def site_name_list(self, popularity_rank=False):
- """Get Site Name List.
-
- Keyword Arguments:
- self -- This object.
- popularity_rank -- Boolean indicating if list should be sorted
- by popularity rank.
- Default value is False.
- NOTE: List is sorted in ascending
- alphabetical order is popularity rank
- is not requested.
-
- Return Value:
- List of strings containing names of sites.
- """
-
- if popularity_rank:
- # Sort in ascending popularity rank order.
- site_rank_name = \
- sorted([(site.popularity_rank, site.name) for site in self],
- key=operator.itemgetter(0)
- )
- site_names = [name for _, name in site_rank_name]
- else:
- # Sort in ascending alphabetical order.
- site_names = sorted([site.name for site in self], key=str.lower)
-
- return site_names
diff --git a/maigret/utils.py b/maigret/utils.py
index a9f0d39..851d3db 100644
--- a/maigret/utils.py
+++ b/maigret/utils.py
@@ -3,16 +3,29 @@ import re
class CaseConverter:
@staticmethod
- def camel_to_snake(camelcased_string: str):
+ def camel_to_snake(camelcased_string: str) -> str:
return re.sub(r'(? str:
formatted = ''.join(word.title() for word in snakecased_string.split('_'))
result = formatted[0].lower() + formatted[1:]
return result
+ @staticmethod
+ def snake_to_title(snakecased_string: str) -> str:
+ words = snakecased_string.split('_')
+ words[0] = words[0].title()
+ return ' '.join(words)
-def is_country_tag(tag):
+
+def is_country_tag(tag: str) -> bool:
"""detect if tag represent a country"""
- return bool(re.match("^([a-z]){2}$", tag))
+ return bool(re.match("^([a-zA-Z]){2}$", tag)) or tag == 'global'
+
+
+def enrich_link_str(link: str) -> str:
+ link = link.strip()
+ if link.startswith('www.') or (link.startswith('http') and '//' in link):
+ return f'{link} '
+ return link
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
index 364188a..060f64b 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -8,9 +8,11 @@ certifi==2020.12.5
chardet==3.0.4
colorama==0.4.4
idna==2.10
+Jinja2==2.11.2
lxml==4.6.2
mock==4.0.2
multidict==5.1.0
+pycountry==20.7.3
PySocks==1.7.1
python-socks==1.1.2
requests==2.25.1
diff --git a/tests/test_report.py b/tests/test_report.py
new file mode 100644
index 0000000..80dedac
--- /dev/null
+++ b/tests/test_report.py
@@ -0,0 +1,104 @@
+"""Maigret reports test functions"""
+from io import StringIO
+import copy
+import os
+
+import xmind
+
+from maigret.report import save_csv_report_to_file, genxmindfile, save_html_report
+from maigret.result import QueryResult, QueryStatus
+
+
+EXAMPLE_RESULTS = {
+ 'GitHub': {
+ 'username': 'test',
+ 'parsing_enabled': True,
+ 'url_main': 'https://www.github.com/',
+ 'url_user': 'https://www.github.com/test',
+ 'status': QueryResult('test',
+ 'GitHub',
+ 'https://www.github.com/test',
+ QueryStatus.CLAIMED,
+ tags=['test_tag']),
+ 'http_status': 200,
+ 'is_similar': False,
+ 'rank': 78
+ }
+}
+
+
+GOOD_RESULT = QueryResult('', '', '', QueryStatus.CLAIMED)
+BAD_RESULT = QueryResult('', '', '', QueryStatus.AVAILABLE)
+
+GOOD_500PX_RESULT = copy.deepcopy(GOOD_RESULT)
+GOOD_500PX_RESULT.tags = ['photo', 'us', 'global']
+GOOD_500PX_RESULT.ids_data = {"uid": "dXJpOm5vZGU6VXNlcjoyNjQwMzQxNQ==", "legacy_id": "26403415", "username": "alexaimephotographycars", "name": "Alex Aim\u00e9", "website": "www.flickr.com/photos/alexaimephotography/", "facebook_link": " www.instagram.com/street.reality.photography/", "instagram_username": "alexaimephotography", "twitter_username": "Alexaimephotogr"}
+
+GOOD_REDDIT_RESULT = copy.deepcopy(GOOD_RESULT)
+GOOD_REDDIT_RESULT.tags = ['news', 'us']
+GOOD_REDDIT_RESULT.ids_data = {"reddit_id": "t5_1nytpy", "reddit_username": "alexaimephotography", "fullname": "alexaimephotography", "image": "https://styles.redditmedia.com/t5_1nytpy/styles/profileIcon_7vmhdwzd3g931.jpg?width=256&height=256&crop=256:256,smart&frame=1&s=4f355f16b4920844a3f4eacd4237a7bf76b2e97e", "is_employee": "False", "is_nsfw": "False", "is_mod": "True", "is_following": "True", "has_user_profile": "True", "hide_from_robots": "False", "created_at": "2019-07-10 12:20:03", "total_karma": "53959", "post_karma": "52738"}
+
+GOOD_IG_RESULT = copy.deepcopy(GOOD_RESULT)
+GOOD_IG_RESULT.tags = ['photo', 'global']
+GOOD_IG_RESULT.ids_data = {"instagram_username": "alexaimephotography", "fullname": "Alexaimephotography", "id": "6828488620", "image": "https://scontent-hel3-1.cdninstagram.com/v/t51.2885-19/s320x320/95420076_1169632876707608_8741505804647006208_n.jpg?_nc_ht=scontent-hel3-1.cdninstagram.com&_nc_ohc=jd87OUGsX4MAX_Ym5GX&tp=1&oh=0f42badd68307ba97ec7fb1ef7b4bfd4&oe=601E5E6F", "bio": "Photographer \nChild of fine street arts", "external_url": "https://www.flickr.com/photos/alexaimephotography2020/"}
+
+GOOD_TWITTER_RESULT = copy.deepcopy(GOOD_RESULT)
+GOOD_TWITTER_RESULT.tags = ['social', 'us']
+
+
+TEST = [('alexaimephotographycars', 'username', {'500px': {'username': 'alexaimephotographycars', 'parsing_enabled': True, 'url_main': 'https://500px.com/', 'url_user': 'https://500px.com/p/alexaimephotographycars', 'ids_usernames': {'alexaimephotographycars': 'username', 'alexaimephotography': 'username', 'Alexaimephotogr': 'username'}, 'status': GOOD_500PX_RESULT, 'http_status': 200, 'is_similar': False, 'rank': 2981}, 'Reddit': {'username': 'alexaimephotographycars', 'parsing_enabled': True, 'url_main': 'https://www.reddit.com/', 'url_user': 'https://www.reddit.com/user/alexaimephotographycars', 'status': BAD_RESULT, 'http_status': 404, 'is_similar': False, 'rank': 17}, 'Twitter': {'username': 'alexaimephotographycars', 'parsing_enabled': True, 'url_main': 'https://www.twitter.com/', 'url_user': 'https://twitter.com/alexaimephotographycars', 'status': BAD_RESULT, 'http_status': 400, 'is_similar': False, 'rank': 55}, 'Instagram': {'username': 'alexaimephotographycars', 'parsing_enabled': True, 'url_main': 'https://www.instagram.com/', 'url_user': 'https://www.instagram.com/alexaimephotographycars', 'status': BAD_RESULT, 'http_status': 404, 'is_similar': False, 'rank': 29}}), ('alexaimephotography', 'username', {'500px': {'username': 'alexaimephotography', 'parsing_enabled': True, 'url_main': 'https://500px.com/', 'url_user': 'https://500px.com/p/alexaimephotography', 'status': BAD_RESULT, 'http_status': 200, 'is_similar': False, 'rank': 2981}, 'Reddit': {'username': 'alexaimephotography', 'parsing_enabled': True, 'url_main': 'https://www.reddit.com/', 'url_user': 'https://www.reddit.com/user/alexaimephotography', 'ids_usernames': {'alexaimephotography': 'username'}, 'status': GOOD_REDDIT_RESULT, 'http_status': 200, 'is_similar': False, 'rank': 17}, 'Twitter': {'username': 'alexaimephotography', 'parsing_enabled': True, 'url_main': 'https://www.twitter.com/', 'url_user': 'https://twitter.com/alexaimephotography', 'status': BAD_RESULT, 'http_status': 400, 'is_similar': False, 'rank': 55}, 'Instagram': {'username': 'alexaimephotography', 'parsing_enabled': True, 'url_main': 'https://www.instagram.com/', 'url_user': 'https://www.instagram.com/alexaimephotography', 'ids_usernames': {'alexaimephotography': 'username'}, 'status': GOOD_IG_RESULT, 'http_status': 200, 'is_similar': False, 'rank': 29}}), ('Alexaimephotogr', 'username', {'500px': {'username': 'Alexaimephotogr', 'parsing_enabled': True, 'url_main': 'https://500px.com/', 'url_user': 'https://500px.com/p/Alexaimephotogr', 'status': BAD_RESULT, 'http_status': 200, 'is_similar': False, 'rank': 2981}, 'Reddit': {'username': 'Alexaimephotogr', 'parsing_enabled': True, 'url_main': 'https://www.reddit.com/', 'url_user': 'https://www.reddit.com/user/Alexaimephotogr', 'status': BAD_RESULT, 'http_status': 404, 'is_similar': False, 'rank': 17}, 'Twitter': {'username': 'Alexaimephotogr', 'parsing_enabled': True, 'url_main': 'https://www.twitter.com/', 'url_user': 'https://twitter.com/Alexaimephotogr', 'status': GOOD_TWITTER_RESULT, 'http_status': 400, 'is_similar': False, 'rank': 55}, 'Instagram': {'username': 'Alexaimephotogr', 'parsing_enabled': True, 'url_main': 'https://www.instagram.com/', 'url_user': 'https://www.instagram.com/Alexaimephotogr', 'status':BAD_RESULT, 'http_status': 404, 'is_similar': False, 'rank': 29}})]
+
+
+SUPPOSED_BRIEF = """Search by username alexaimephotographycars returned 1 accounts. Found target's other IDs: alexaimephotography, Alexaimephotogr. Search by username alexaimephotography returned 2 accounts. Search by username Alexaimephotogr returned 1 accounts. Extended info extracted from 3 accounts."""
+
+SUPPOSED_INTERESTS = "Interests: photo (2) , news (1) , social (1) "
+
+SUPPOSED_GEO = "Geo: us (3) "
+
+
+def test_save_csv_report_to_file():
+ csvfile = StringIO()
+ save_csv_report_to_file('test', EXAMPLE_RESULTS, csvfile)
+
+ csvfile.seek(0)
+ data = csvfile.readlines()
+
+ assert data == [
+ 'username,name,url_main,url_user,exists,http_status\r\n',
+ 'test,GitHub,https://www.github.com/,https://www.github.com/test,Claimed,200\r\n',
+ ]
+
+
+def test_save_xmind_report():
+ filename = 'test_report.xmind'
+ genxmindfile(filename, 'test', EXAMPLE_RESULTS)
+
+ workbook = xmind.load(filename)
+ sheet = workbook.getPrimarySheet()
+ data = sheet.getData()
+
+ assert data['title'] == 'test Analysis'
+ assert data['topic']['title'] == 'test'
+ assert len(data['topic']['topics']) == 2
+ assert data['topic']['topics'][0]['title'] == 'Undefined'
+ assert data['topic']['topics'][1]['title'] == 'test_tag'
+ assert len(data['topic']['topics'][1]['topics']) == 1
+ assert data['topic']['topics'][1]['topics'][0]['label'] == 'https://www.github.com/test'
+
+
+def test_html_report():
+ report_name = 'report_alexaimephotographycars.html'
+ try:
+ os.remove(report_name)
+ except:
+ pass
+
+ save_html_report(TEST)
+
+ assert os.path.exists(report_name)
+
+ report_text = open(report_name).read()
+
+ assert SUPPOSED_BRIEF in report_text
+ assert SUPPOSED_GEO in report_text
+ assert SUPPOSED_INTERESTS in report_text
diff --git a/tests/test_sites.py b/tests/test_sites.py
index 13302d9..7a362cc 100644
--- a/tests/test_sites.py
+++ b/tests/test_sites.py
@@ -1,5 +1,5 @@
"""Maigret Database test functions"""
-from maigret.sites import MaigretDatabase
+from maigret.sites import MaigretDatabase, MaigretSite
EXAMPLE_DB = {
@@ -99,3 +99,22 @@ def test_saving_site_error():
assert amperka.strip_engine_data().errors == {'error1': 'text1'}
assert amperka.strip_engine_data().json['errors'] == {'error1': 'text1'}
+
+
+def test_ranked_sites_dict():
+ db = MaigretDatabase()
+ db.update_site(MaigretSite('3', {'alexaRank': 1000, 'engine': 'ucoz'}))
+ db.update_site(MaigretSite('1', {'alexaRank': 2, 'tags': ['forum']}))
+ db.update_site(MaigretSite('2', {'alexaRank': 10, 'tags': ['ru', 'forum']}))
+
+ # sorting
+ assert list(db.ranked_sites_dict().keys()) == ['1', '2', '3']
+ assert list(db.ranked_sites_dict(top=2).keys()) == ['1', '2']
+ assert list(db.ranked_sites_dict(reverse=True, top=2).keys()) == ['3', '2']
+
+ # filtering by tags
+ assert list(db.ranked_sites_dict(tags=['ru'], top=2).keys()) == ['2']
+ assert list(db.ranked_sites_dict(tags=['forum']).keys()) == ['1', '2']
+
+ # filtering by engine
+ assert list(db.ranked_sites_dict(tags=['ucoz']).keys()) == ['3']
diff --git a/tests/test_utils.py b/tests/test_utils.py
index b92b6ea..18b9825 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -1,5 +1,5 @@
"""Maigret utils test functions"""
-from maigret.utils import CaseConverter, is_country_tag
+from maigret.utils import CaseConverter, is_country_tag, enrich_link_str
def test_case_convert_camel_to_snake():
@@ -14,8 +14,21 @@ def test_case_convert_snake_to_camel():
assert b == 'camelCasedString'
+def test_case_convert_snake_to_title():
+ a = 'camel_cased_string'
+ b = CaseConverter.snake_to_title(a)
+
+ assert b == 'Camel cased string'
+
def test_is_country_tag():
assert is_country_tag('ru') == True
+ assert is_country_tag('FR') == True
assert is_country_tag('a1') == False
assert is_country_tag('dating') == False
+
+ assert is_country_tag('global') == True
+
+def test_enrich_link_str():
+ assert enrich_link_str('test') == 'test'
+ assert enrich_link_str(' www.flickr.com/photos/alexaimephotography/') == 'www.flickr.com/photos/alexaimephotography/ '