diff --git a/maigret/maigret.py b/maigret/maigret.py index b0abe3b..025701a 100755 --- a/maigret/maigret.py +++ b/maigret/maigret.py @@ -26,7 +26,7 @@ from socid_extractor import parse, extract from .notify import QueryNotifyPrint from .result import QueryResult, QueryStatus from .sites import MaigretDatabase, MaigretSite -from .report import save_csv_report, genxmindfile +from .report import save_csv_report, genxmindfile, save_html_report import xmind @@ -629,6 +629,10 @@ async def main(): action="store_true", dest="csv", default=False, help="Create Comma-Separated Values (CSV) File." ) + parser.add_argument("--html", + action="store_true", dest="html", default=False, + help="Create HTML report file." + ) parser.add_argument("--site", action="append", metavar='SITE_NAME', dest="site_list", default=None, @@ -649,6 +653,10 @@ async def main(): "A longer timeout will be more likely to get results from slow sites." "On the other hand, this may cause a long delay to gather all results." ) + parser.add_argument("--top-sites", + action="store", default=500, + help="Count of sites for checking ranked by Alexa Top (default: 500)." + ) parser.add_argument("--print-not-found", action="store_true", dest="print_not_found", default=False, help="Print sites where the username was not found." @@ -757,7 +765,8 @@ async def main(): # Create object with all information about sites we are aware of. try: - site_data_all = MaigretDatabase().load_from_file(args.json_file).sites_dict + db = MaigretDatabase().load_from_file(args.json_file) + site_data_all = db.ranked_sites_dict(top=args.top_sites) except Exception as error: print(f"ERROR: {error}") sys.exit(1) @@ -805,6 +814,8 @@ async def main(): already_checked = set() + general_results = [] + while usernames: username, id_type = list(usernames.items())[0] del usernames[username] @@ -834,6 +845,7 @@ async def main(): logger=logger, forced=args.use_disabled_sites, ) + general_results.append((username, id_type, results)) if args.folderoutput: # The usernames results should be stored in a targeted folder. @@ -870,6 +882,9 @@ async def main(): if args.csv: save_csv_report(username, results) + if args.html: + save_html_report(general_results) + def run(): try: diff --git a/maigret/report.py b/maigret/report.py new file mode 100644 index 0000000..958df81 --- /dev/null +++ b/maigret/report.py @@ -0,0 +1,215 @@ +import csv +from datetime import datetime +import logging +import os +import xmind + +from jinja2 import Template +import pycountry + +from .result import QueryStatus +from .utils import is_country_tag, CaseConverter, enrich_link_str + + +def save_csv_report(username: str, results: dict): + with open(username + '.csv', 'w', newline='', encoding='utf-8') as csvfile: + save_csv_report_to_file(username, results, csvfile) + + +def save_html_report(username_results: list): + brief_text = [] + usernames = {} + extended_info_count = 0 + tags = {} + supposed_data = {} + allowed_fields = ['fullname', 'gender'] + first_seen = None + first_seen_format = '%Y-%m-%d %H:%M:%S' + + for username, id_type, results in username_results: + found_accounts = 0 + new_ids = [] + usernames[username] = {'type': id_type} + + for website_name in results: + dictionary = results[website_name] + # TODO: fix no site data issue + if not dictionary: + continue + + status = dictionary.get('status') + if status.ids_data: + dictionary['ids_data'] = status.ids_data + extended_info_count += 1 + + # detect first seen + created_at = status.ids_data.get('created_at') + if created_at: + if first_seen is None: + first_seen = created_at + else: + known_time = datetime.strptime(first_seen, first_seen_format) + new_time = datetime.strptime(created_at, first_seen_format) + if new_time < known_time: + first_seen = created_at + + for k, v in status.ids_data.items(): + # suppose target data + field = 'fullname' if k == 'name' else k + if not field in supposed_data: + supposed_data[field] = [] + supposed_data[field].append(v) + # suppose country + if k in ['country', 'locale']: + try: + if is_country_tag(k): + tag = pycountry.countries.get(alpha_2=v).alpha_2.lower() + else: + tag = pycountry.countries.search_fuzzy(v)[0].alpha_2.lower() + # TODO: move countries to another struct + tags[tag] = tags.get(tag, 0) + 1 + except Exception as e: + logging.debug('pycountry exception', exc_info=True) + + new_usernames = dictionary.get('ids_usernames') + if new_usernames: + for u, utype in new_usernames.items(): + if not u in usernames: + new_ids.append((u, utype)) + usernames[u] = {'type': utype} + + if status.status == QueryStatus.CLAIMED: + found_accounts += 1 + dictionary['found'] = True + else: + continue + + if not dictionary.get('is_similar'): + # ignore non-exact search results + if status.tags: + for t in status.tags: + tags[t] = tags.get(t, 0) + 1 + + + brief_text.append(f'Search by {id_type} {username} returned {found_accounts} accounts.') + + if new_ids: + ids_list = [] + for u, t in new_ids: + ids_list.append(f'{u} ({t})' if t != 'username' else u) + brief_text.append(f'Found target\'s other IDs: ' + ', '.join(ids_list) + '.') + + brief_text.append(f'Extended info extracted from {extended_info_count} accounts.') + + # template generation + template_text = open(os.path.join(os.path.dirname(os.path.realpath(__file__)), + "resources/simple_report.tpl")).read() + template = Template(template_text) + + template.globals['title'] = CaseConverter.snake_to_title + template.globals['detect_link'] = enrich_link_str + + brief = ' '.join(brief_text).strip() + tuple_sort = lambda d: sorted(d, key=lambda x: x[1], reverse=True) + + if 'global' in tags: + # remove tag 'global' useless for country detection + del tags['global'] + + first_username = username_results[0][0] + countries_lists = list(filter(lambda x: is_country_tag(x[0]), tags.items())) + interests_list = list(filter(lambda x: not is_country_tag(x[0]), tags.items())) + + filtered_supposed_data = {CaseConverter.snake_to_title(k): v[0] + for k, v in supposed_data.items() + if k in allowed_fields} + + filled_template = template.render(username=first_username, + brief=brief, + results=username_results, + first_seen=first_seen, + interests_tuple_list=tuple_sort(interests_list), + countries_tuple_list=tuple_sort(countries_lists), + supposed_data=filtered_supposed_data, + generated_at=datetime.now().strftime('%Y-%m-%d %H:%M:%S'), + ) + # save report + html_filename = f'report_{first_username}.html' + with open(html_filename, 'w') as f: + f.write(filled_template) + +def save_csv_report_to_file(username: str, results: dict, csvfile): + print(results) + writer = csv.writer(csvfile) + writer.writerow(['username', + 'name', + 'url_main', + 'url_user', + 'exists', + 'http_status' + ] + ) + for site in results: + writer.writerow([username, + site, + results[site]['url_main'], + results[site]['url_user'], + str(results[site]['status'].status), + results[site]['http_status'], + ]) + + +def genxmindfile(filename, username, results): + print(f'Generating XMIND8 file for username {username}') + if os.path.exists(filename): + os.remove(filename) + workbook = xmind.load(filename) + sheet = workbook.getPrimarySheet() + design_sheet1(sheet, username, results) + xmind.save(workbook, path=filename) + + +def design_sheet1(sheet, username, results): + ##all tag list + alltags = {} + + sheet.setTitle("%s Analysis"%(username)) + root_topic1 = sheet.getRootTopic() + root_topic1.setTitle("%s"%(username)) + + undefinedsection = root_topic1.addSubTopic() + undefinedsection.setTitle("Undefined") + alltags["undefined"] = undefinedsection + + for website_name in results: + dictionary = results[website_name] + + if dictionary.get("status").status == QueryStatus.CLAIMED: + ## firsttime I found that entry + for tag in dictionary.get("status").tags: + if tag.strip() == "": + continue + if tag not in alltags.keys(): + if not is_country_tag(tag): + tagsection = root_topic1.addSubTopic() + tagsection.setTitle(tag) + alltags[tag] = tagsection + + category = None + userlink= None + for tag in dictionary.get("status").tags: + if tag.strip() == "": + continue + if not is_country_tag(tag): + category = tag + + if category is None: + category = "undefined" + userlink = undefinedsection.addSubTopic() + else: + userlink = alltags[category].addSubTopic() + userlink.addLabel(dictionary.get("status").site_url_user) + + #for tag in dictionary.get("status").tags: + # if( tag != category ): + # sheet.createRelationship(userlink.getID(), alltags[tag].getID(),"other tag") diff --git a/maigret/resources/data.json b/maigret/resources/data.json index 46054ee..7153628 100644 --- a/maigret/resources/data.json +++ b/maigret/resources/data.json @@ -307,8 +307,9 @@ }, "500px": { "tags": [ - "images", - "in" + "photos", + "in", + "global" ], "errors": { "INTERNAL_SERVER_ERROR": "Site error", @@ -3221,6 +3222,7 @@ "tags": [ "global", "images", + "photos", "us" ], "regexCheck": "^[a-zA-Z][a-zA-Z0-9_-]*$", @@ -3979,8 +3981,11 @@ }, "EyeEm": { "tags": [ + "de", "in", - "sd" + "sd", + "global", + "photos" ], "checkType": "message", "absenceStrs": "Not Found (404) | EyeEm", @@ -6551,8 +6556,8 @@ }, "Instagram": { "tags": [ - "social", - "us" + "photos", + "global" ], "errors": { "Login \u2022 Instagram": "Login required" @@ -8018,7 +8023,9 @@ "news", "us" ], - "checkType": "status_code", + "checkType": "message", + "absenceStrs": [":{\"__typename\":\"NotFound\"},\"viewer\""], + "presenseStrs": ["userPostCounts"], "alexaRank": 76, "url": "https://medium.com/@{username}", "urlMain": "https://medium.com/", @@ -9835,9 +9842,9 @@ }, "Picuki": { "tags": [ + "photos", "global", - "jp", - "us" + "instagram" ], "checkType": "message", "absenceStrs": [ @@ -9899,7 +9906,8 @@ }, "Pinterest": { "tags": [ - "social", + "images", + "photos", "us" ], "checkType": "status_code", @@ -10858,6 +10866,7 @@ }, "Reddit": { "tags": [ + "social", "news", "us" ], @@ -13392,6 +13401,7 @@ }, "Tumblr": { "tags": [ + "blogs", "global", "us" ], @@ -13433,11 +13443,14 @@ "us" ], "headers": { - "User-Agent": "Mozilla" + "sec-ch-ua": "Google Chrome\";v=\"87\", \" Not;A Brand\";v=\"99\", \"Chromium\";v=\"87\"", + "authorization": "Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA", + "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36", + "x-guest-token": "1347256342462009351" }, - "urlProbe": "https://mobile.twitter.com/{username}", + "urlProbe": "https://twitter.com/i/api/graphql/ZRnOhhXPwue_JGILb9TNug/UserByScreenName?variables=%7B%22screen_name%22%3A%22{username}%22%2C%22withHighlightedLabel%22%3Atrue%7D", "checkType": "message", - "absenceStrs": "Sorry, that page doesn't exist", + "absenceStrs": "Not found", "alexaRank": 55, "url": "https://twitter.com/{username}", "urlMain": "https://www.twitter.com/", @@ -13604,9 +13617,9 @@ }, "VK": { "tags": [ - "global", "ru", - "social" + "social", + "global" ], "checkType": "response_url", "alexaRank": 23, @@ -14107,6 +14120,8 @@ }, "We Heart It": { "tags": [ + "photos", + "us", "in" ], "checkType": "message", diff --git a/maigret/resources/simple_report.tpl b/maigret/resources/simple_report.tpl new file mode 100644 index 0000000..0908808 --- /dev/null +++ b/maigret/resources/simple_report.tpl @@ -0,0 +1,109 @@ + + + + + +{{ username }} -- Maigret username search report + + + +
+
+
+

+ +

+ Generated at {{ generated_at }} +
+
+
+
+
+
+
Supposed personal data
+ {% for k, v in supposed_data.items() %} + + {{ k }}: {{ v }} + + {% endfor %} + {% if countries_tuple_list %} + + Geo: {% for k, v in countries_tuple_list %}{{ k }} ({{ v }}){{ ", " if not loop.last }}{% endfor %} + + {% endif %}{% if interests_tuple_list %} + + Interests: {% for k, v in interests_tuple_list %}{{ k }} ({{ v }}){{ ", " if not loop.last }}{% endfor %} + + {% endif %}{% if first_seen %} + + First seen: {{ first_seen }} + + {% endif %} +
+
+
+
+
+
+
+
+
Brief
+ + {{ brief }} + +
+
+
+
+ {% for u, t, data in results %} + {% for k, v in data.items() %} + {% if v.found and not v.is_similar %} +
+
+
+ Photo +
+

+ {{ k }} +

+ {% if v.status.tags %} +
Tags: {{ v.status.tags | join(', ') }}
+ {% endif %} +

+ {{ v.url_user }} +

+ {% if v.ids_data %} + + + {% for k1, v1 in v.ids_data.items() %} + {% if k1 != 'image' %} + + + + + {% endif %} + {% endfor %} + +
{{ title(k1) }}{% if v1 is iterable and (v1 is not string and v1 is not mapping) %}{{ v1 | join(', ') }}{% else %}{{ detect_link(v1) }}{% endif %} +
+ {% endif %} +

+
+
+
+
+ {% endif %} + {% endfor %} + {% endfor %} +
+ + + + \ No newline at end of file diff --git a/maigret/result.py b/maigret/result.py index 80ade5c..bdfade7 100644 --- a/maigret/result.py +++ b/maigret/result.py @@ -34,7 +34,7 @@ class QueryResult(): """ def __init__(self, username, site_name, site_url_user, status, ids_data=None, - query_time=None, context=None, tags=None): + query_time=None, context=None, tags=[]): """Create Query Result Object. Contains information about a specific method of detecting usernames on @@ -72,14 +72,8 @@ class QueryResult(): self.query_time = query_time self.context = context self.ids_data = ids_data + self.tags = tags - self.tags = "" - if (tags is not None): - TAGstring = "".join(['%s,' % tags for tags in tags]) - TAGstring = TAGstring[:-1] - self.tags = TAGstring - - return def __str__(self): """Convert Object To String. diff --git a/maigret/sites.py b/maigret/sites.py index 1498770..edc68e1 100644 --- a/maigret/sites.py +++ b/maigret/sites.py @@ -13,6 +13,7 @@ from .utils import CaseConverter class MaigretEngine: def __init__(self, name, data): self.name = name + self.site = {} self.__dict__.update(data) @property @@ -127,6 +128,15 @@ class MaigretDatabase: def sites_dict(self): return {site.name: site for site in self._sites} + def ranked_sites_dict(self, reverse=False, top=sys.maxsize, tags=[]): + if not tags: + filtered_list = self.sites + else: + filtered_list = [s for s in self.sites if set(s.tags).intersection(set(tags)) or s.engine in tags] + + sorted_list = sorted(filtered_list, key=lambda x: x.alexa_rank, reverse=reverse)[:top] + return {site.name: site for site in sorted_list} + @property def engines(self): return self._engines @@ -145,12 +155,12 @@ class MaigretDatabase: return self def save_to_file(self, filename: str) -> MaigretDatabase: - json_data = { + db_data = { 'sites': {site.name: site.strip_engine_data().json for site in self._sites}, 'engines': {engine.name: engine.json for engine in self._engines}, } - json_data = json.dumps(json_data, indent=4) + json_data = json.dumps(db_data, indent=4) with open(filename, 'w') as f: f.write(json_data) @@ -160,8 +170,8 @@ class MaigretDatabase: def load_from_json(self, json_data: dict) -> MaigretDatabase: # Add all of site information from the json file to internal site list. - site_data = json_data.get("sites") - engines_data = json_data.get("engines") + site_data = json_data.get("sites", {}) + engines_data = json_data.get("engines", {}) for engine_name in engines_data: self._engines.append(MaigretEngine(engine_name, engines_data[engine_name])) @@ -198,7 +208,7 @@ class MaigretDatabase: is_url_valid = url.startswith('http://') or url.startswith('https://') if not is_url_valid: - return False + raise FileNotFoundError(f"Invalid data file URL '{url}'.") try: response = requests.get(url=url) @@ -238,33 +248,3 @@ class MaigretDatabase: ) return self.load_from_json(data) - - - def site_name_list(self, popularity_rank=False): - """Get Site Name List. - - Keyword Arguments: - self -- This object. - popularity_rank -- Boolean indicating if list should be sorted - by popularity rank. - Default value is False. - NOTE: List is sorted in ascending - alphabetical order is popularity rank - is not requested. - - Return Value: - List of strings containing names of sites. - """ - - if popularity_rank: - # Sort in ascending popularity rank order. - site_rank_name = \ - sorted([(site.popularity_rank, site.name) for site in self], - key=operator.itemgetter(0) - ) - site_names = [name for _, name in site_rank_name] - else: - # Sort in ascending alphabetical order. - site_names = sorted([site.name for site in self], key=str.lower) - - return site_names diff --git a/maigret/utils.py b/maigret/utils.py index a9f0d39..851d3db 100644 --- a/maigret/utils.py +++ b/maigret/utils.py @@ -3,16 +3,29 @@ import re class CaseConverter: @staticmethod - def camel_to_snake(camelcased_string: str): + def camel_to_snake(camelcased_string: str) -> str: return re.sub(r'(? str: formatted = ''.join(word.title() for word in snakecased_string.split('_')) result = formatted[0].lower() + formatted[1:] return result + @staticmethod + def snake_to_title(snakecased_string: str) -> str: + words = snakecased_string.split('_') + words[0] = words[0].title() + return ' '.join(words) -def is_country_tag(tag): + +def is_country_tag(tag: str) -> bool: """detect if tag represent a country""" - return bool(re.match("^([a-z]){2}$", tag)) + return bool(re.match("^([a-zA-Z]){2}$", tag)) or tag == 'global' + + +def enrich_link_str(link: str) -> str: + link = link.strip() + if link.startswith('www.') or (link.startswith('http') and '//' in link): + return f'{link}' + return link \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 364188a..060f64b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -8,9 +8,11 @@ certifi==2020.12.5 chardet==3.0.4 colorama==0.4.4 idna==2.10 +Jinja2==2.11.2 lxml==4.6.2 mock==4.0.2 multidict==5.1.0 +pycountry==20.7.3 PySocks==1.7.1 python-socks==1.1.2 requests==2.25.1 diff --git a/tests/test_report.py b/tests/test_report.py new file mode 100644 index 0000000..80dedac --- /dev/null +++ b/tests/test_report.py @@ -0,0 +1,104 @@ +"""Maigret reports test functions""" +from io import StringIO +import copy +import os + +import xmind + +from maigret.report import save_csv_report_to_file, genxmindfile, save_html_report +from maigret.result import QueryResult, QueryStatus + + +EXAMPLE_RESULTS = { + 'GitHub': { + 'username': 'test', + 'parsing_enabled': True, + 'url_main': 'https://www.github.com/', + 'url_user': 'https://www.github.com/test', + 'status': QueryResult('test', + 'GitHub', + 'https://www.github.com/test', + QueryStatus.CLAIMED, + tags=['test_tag']), + 'http_status': 200, + 'is_similar': False, + 'rank': 78 + } +} + + +GOOD_RESULT = QueryResult('', '', '', QueryStatus.CLAIMED) +BAD_RESULT = QueryResult('', '', '', QueryStatus.AVAILABLE) + +GOOD_500PX_RESULT = copy.deepcopy(GOOD_RESULT) +GOOD_500PX_RESULT.tags = ['photo', 'us', 'global'] +GOOD_500PX_RESULT.ids_data = {"uid": "dXJpOm5vZGU6VXNlcjoyNjQwMzQxNQ==", "legacy_id": "26403415", "username": "alexaimephotographycars", "name": "Alex Aim\u00e9", "website": "www.flickr.com/photos/alexaimephotography/", "facebook_link": " www.instagram.com/street.reality.photography/", "instagram_username": "alexaimephotography", "twitter_username": "Alexaimephotogr"} + +GOOD_REDDIT_RESULT = copy.deepcopy(GOOD_RESULT) +GOOD_REDDIT_RESULT.tags = ['news', 'us'] +GOOD_REDDIT_RESULT.ids_data = {"reddit_id": "t5_1nytpy", "reddit_username": "alexaimephotography", "fullname": "alexaimephotography", "image": "https://styles.redditmedia.com/t5_1nytpy/styles/profileIcon_7vmhdwzd3g931.jpg?width=256&height=256&crop=256:256,smart&frame=1&s=4f355f16b4920844a3f4eacd4237a7bf76b2e97e", "is_employee": "False", "is_nsfw": "False", "is_mod": "True", "is_following": "True", "has_user_profile": "True", "hide_from_robots": "False", "created_at": "2019-07-10 12:20:03", "total_karma": "53959", "post_karma": "52738"} + +GOOD_IG_RESULT = copy.deepcopy(GOOD_RESULT) +GOOD_IG_RESULT.tags = ['photo', 'global'] +GOOD_IG_RESULT.ids_data = {"instagram_username": "alexaimephotography", "fullname": "Alexaimephotography", "id": "6828488620", "image": "https://scontent-hel3-1.cdninstagram.com/v/t51.2885-19/s320x320/95420076_1169632876707608_8741505804647006208_n.jpg?_nc_ht=scontent-hel3-1.cdninstagram.com&_nc_ohc=jd87OUGsX4MAX_Ym5GX&tp=1&oh=0f42badd68307ba97ec7fb1ef7b4bfd4&oe=601E5E6F", "bio": "Photographer \nChild of fine street arts", "external_url": "https://www.flickr.com/photos/alexaimephotography2020/"} + +GOOD_TWITTER_RESULT = copy.deepcopy(GOOD_RESULT) +GOOD_TWITTER_RESULT.tags = ['social', 'us'] + + +TEST = [('alexaimephotographycars', 'username', {'500px': {'username': 'alexaimephotographycars', 'parsing_enabled': True, 'url_main': 'https://500px.com/', 'url_user': 'https://500px.com/p/alexaimephotographycars', 'ids_usernames': {'alexaimephotographycars': 'username', 'alexaimephotography': 'username', 'Alexaimephotogr': 'username'}, 'status': GOOD_500PX_RESULT, 'http_status': 200, 'is_similar': False, 'rank': 2981}, 'Reddit': {'username': 'alexaimephotographycars', 'parsing_enabled': True, 'url_main': 'https://www.reddit.com/', 'url_user': 'https://www.reddit.com/user/alexaimephotographycars', 'status': BAD_RESULT, 'http_status': 404, 'is_similar': False, 'rank': 17}, 'Twitter': {'username': 'alexaimephotographycars', 'parsing_enabled': True, 'url_main': 'https://www.twitter.com/', 'url_user': 'https://twitter.com/alexaimephotographycars', 'status': BAD_RESULT, 'http_status': 400, 'is_similar': False, 'rank': 55}, 'Instagram': {'username': 'alexaimephotographycars', 'parsing_enabled': True, 'url_main': 'https://www.instagram.com/', 'url_user': 'https://www.instagram.com/alexaimephotographycars', 'status': BAD_RESULT, 'http_status': 404, 'is_similar': False, 'rank': 29}}), ('alexaimephotography', 'username', {'500px': {'username': 'alexaimephotography', 'parsing_enabled': True, 'url_main': 'https://500px.com/', 'url_user': 'https://500px.com/p/alexaimephotography', 'status': BAD_RESULT, 'http_status': 200, 'is_similar': False, 'rank': 2981}, 'Reddit': {'username': 'alexaimephotography', 'parsing_enabled': True, 'url_main': 'https://www.reddit.com/', 'url_user': 'https://www.reddit.com/user/alexaimephotography', 'ids_usernames': {'alexaimephotography': 'username'}, 'status': GOOD_REDDIT_RESULT, 'http_status': 200, 'is_similar': False, 'rank': 17}, 'Twitter': {'username': 'alexaimephotography', 'parsing_enabled': True, 'url_main': 'https://www.twitter.com/', 'url_user': 'https://twitter.com/alexaimephotography', 'status': BAD_RESULT, 'http_status': 400, 'is_similar': False, 'rank': 55}, 'Instagram': {'username': 'alexaimephotography', 'parsing_enabled': True, 'url_main': 'https://www.instagram.com/', 'url_user': 'https://www.instagram.com/alexaimephotography', 'ids_usernames': {'alexaimephotography': 'username'}, 'status': GOOD_IG_RESULT, 'http_status': 200, 'is_similar': False, 'rank': 29}}), ('Alexaimephotogr', 'username', {'500px': {'username': 'Alexaimephotogr', 'parsing_enabled': True, 'url_main': 'https://500px.com/', 'url_user': 'https://500px.com/p/Alexaimephotogr', 'status': BAD_RESULT, 'http_status': 200, 'is_similar': False, 'rank': 2981}, 'Reddit': {'username': 'Alexaimephotogr', 'parsing_enabled': True, 'url_main': 'https://www.reddit.com/', 'url_user': 'https://www.reddit.com/user/Alexaimephotogr', 'status': BAD_RESULT, 'http_status': 404, 'is_similar': False, 'rank': 17}, 'Twitter': {'username': 'Alexaimephotogr', 'parsing_enabled': True, 'url_main': 'https://www.twitter.com/', 'url_user': 'https://twitter.com/Alexaimephotogr', 'status': GOOD_TWITTER_RESULT, 'http_status': 400, 'is_similar': False, 'rank': 55}, 'Instagram': {'username': 'Alexaimephotogr', 'parsing_enabled': True, 'url_main': 'https://www.instagram.com/', 'url_user': 'https://www.instagram.com/Alexaimephotogr', 'status':BAD_RESULT, 'http_status': 404, 'is_similar': False, 'rank': 29}})] + + +SUPPOSED_BRIEF = """Search by username alexaimephotographycars returned 1 accounts. Found target's other IDs: alexaimephotography, Alexaimephotogr. Search by username alexaimephotography returned 2 accounts. Search by username Alexaimephotogr returned 1 accounts. Extended info extracted from 3 accounts.""" + +SUPPOSED_INTERESTS = "Interests: photo (2), news (1), social (1)" + +SUPPOSED_GEO = "Geo: us (3)" + + +def test_save_csv_report_to_file(): + csvfile = StringIO() + save_csv_report_to_file('test', EXAMPLE_RESULTS, csvfile) + + csvfile.seek(0) + data = csvfile.readlines() + + assert data == [ + 'username,name,url_main,url_user,exists,http_status\r\n', + 'test,GitHub,https://www.github.com/,https://www.github.com/test,Claimed,200\r\n', + ] + + +def test_save_xmind_report(): + filename = 'test_report.xmind' + genxmindfile(filename, 'test', EXAMPLE_RESULTS) + + workbook = xmind.load(filename) + sheet = workbook.getPrimarySheet() + data = sheet.getData() + + assert data['title'] == 'test Analysis' + assert data['topic']['title'] == 'test' + assert len(data['topic']['topics']) == 2 + assert data['topic']['topics'][0]['title'] == 'Undefined' + assert data['topic']['topics'][1]['title'] == 'test_tag' + assert len(data['topic']['topics'][1]['topics']) == 1 + assert data['topic']['topics'][1]['topics'][0]['label'] == 'https://www.github.com/test' + + +def test_html_report(): + report_name = 'report_alexaimephotographycars.html' + try: + os.remove(report_name) + except: + pass + + save_html_report(TEST) + + assert os.path.exists(report_name) + + report_text = open(report_name).read() + + assert SUPPOSED_BRIEF in report_text + assert SUPPOSED_GEO in report_text + assert SUPPOSED_INTERESTS in report_text diff --git a/tests/test_sites.py b/tests/test_sites.py index 13302d9..7a362cc 100644 --- a/tests/test_sites.py +++ b/tests/test_sites.py @@ -1,5 +1,5 @@ """Maigret Database test functions""" -from maigret.sites import MaigretDatabase +from maigret.sites import MaigretDatabase, MaigretSite EXAMPLE_DB = { @@ -99,3 +99,22 @@ def test_saving_site_error(): assert amperka.strip_engine_data().errors == {'error1': 'text1'} assert amperka.strip_engine_data().json['errors'] == {'error1': 'text1'} + + +def test_ranked_sites_dict(): + db = MaigretDatabase() + db.update_site(MaigretSite('3', {'alexaRank': 1000, 'engine': 'ucoz'})) + db.update_site(MaigretSite('1', {'alexaRank': 2, 'tags': ['forum']})) + db.update_site(MaigretSite('2', {'alexaRank': 10, 'tags': ['ru', 'forum']})) + + # sorting + assert list(db.ranked_sites_dict().keys()) == ['1', '2', '3'] + assert list(db.ranked_sites_dict(top=2).keys()) == ['1', '2'] + assert list(db.ranked_sites_dict(reverse=True, top=2).keys()) == ['3', '2'] + + # filtering by tags + assert list(db.ranked_sites_dict(tags=['ru'], top=2).keys()) == ['2'] + assert list(db.ranked_sites_dict(tags=['forum']).keys()) == ['1', '2'] + + # filtering by engine + assert list(db.ranked_sites_dict(tags=['ucoz']).keys()) == ['3'] diff --git a/tests/test_utils.py b/tests/test_utils.py index b92b6ea..18b9825 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -1,5 +1,5 @@ """Maigret utils test functions""" -from maigret.utils import CaseConverter, is_country_tag +from maigret.utils import CaseConverter, is_country_tag, enrich_link_str def test_case_convert_camel_to_snake(): @@ -14,8 +14,21 @@ def test_case_convert_snake_to_camel(): assert b == 'camelCasedString' +def test_case_convert_snake_to_title(): + a = 'camel_cased_string' + b = CaseConverter.snake_to_title(a) + + assert b == 'Camel cased string' + def test_is_country_tag(): assert is_country_tag('ru') == True + assert is_country_tag('FR') == True assert is_country_tag('a1') == False assert is_country_tag('dating') == False + + assert is_country_tag('global') == True + +def test_enrich_link_str(): + assert enrich_link_str('test') == 'test' + assert enrich_link_str(' www.flickr.com/photos/alexaimephotography/') == 'www.flickr.com/photos/alexaimephotography/'