HTLM reports draft, 500 sites scanning by default

2026-05-06 14:08:59 +00:00 · 2021-01-07 23:52:29 +03:00
parent 5c8b65d033
commit e4765d1ed9
11 changed files with 544 additions and 65 deletions
@@ -26,7 +26,7 @@ from socid_extractor import parse, extract
 from .notify import QueryNotifyPrint
 from .result import QueryResult, QueryStatus
 from .sites import MaigretDatabase, MaigretSite
-from .report import save_csv_report, genxmindfile
+from .report import save_csv_report, genxmindfile, save_html_report

 import xmind

@@ -629,6 +629,10 @@ async def main():
                        action="store_true", dest="csv", default=False,
                        help="Create Comma-Separated Values (CSV) File."
                        )
+    parser.add_argument("--html",
+                        action="store_true", dest="html", default=False,
+                        help="Create HTML report file."
+                        )
    parser.add_argument("--site",
                        action="append", metavar='SITE_NAME',
                        dest="site_list", default=None,
@@ -649,6 +653,10 @@ async def main():
                             "A longer timeout will be more likely to get results from slow sites."
                             "On the other hand, this may cause a long delay to gather all results."
                        )
+    parser.add_argument("--top-sites",
+                        action="store", default=500,
+                        help="Count of sites for checking ranked by Alexa Top (default: 500)."
+                        )
    parser.add_argument("--print-not-found",
                        action="store_true", dest="print_not_found", default=False,
                        help="Print sites where the username was not found."
@@ -757,7 +765,8 @@ async def main():

    # Create object with all information about sites we are aware of.
    try:
-        site_data_all = MaigretDatabase().load_from_file(args.json_file).sites_dict
+        db = MaigretDatabase().load_from_file(args.json_file)
+        site_data_all = db.ranked_sites_dict(top=args.top_sites)
    except Exception as error:
        print(f"ERROR:  {error}")
        sys.exit(1)
@@ -805,6 +814,8 @@ async def main():

    already_checked = set()

+    general_results = []
+
    while usernames:
        username, id_type = list(usernames.items())[0]
        del usernames[username]
@@ -834,6 +845,7 @@ async def main():
                                logger=logger,
                                forced=args.use_disabled_sites,
                                )
+        general_results.append((username, id_type, results))

        if args.folderoutput:
            # The usernames results should be stored in a targeted folder.
@@ -870,6 +882,9 @@ async def main():
        if args.csv:
            save_csv_report(username, results)

+    if args.html:
+        save_html_report(general_results)
+

 def run():
    try:
@@ -0,0 +1,215 @@
+import csv
+from datetime import datetime
+import logging
+import os
+import xmind
+
+from jinja2 import Template
+import pycountry
+
+from .result import QueryStatus
+from .utils import is_country_tag, CaseConverter, enrich_link_str
+
+
+def save_csv_report(username: str, results: dict):
+    with open(username + '.csv', 'w', newline='', encoding='utf-8') as csvfile:
+        save_csv_report_to_file(username, results, csvfile)
+
+
+def save_html_report(username_results: list):
+    brief_text = []
+    usernames = {}
+    extended_info_count = 0
+    tags = {}
+    supposed_data = {}
+    allowed_fields = ['fullname', 'gender']
+    first_seen = None
+    first_seen_format = '%Y-%m-%d %H:%M:%S'
+
+    for username, id_type, results in username_results:
+        found_accounts = 0
+        new_ids = []
+        usernames[username] = {'type': id_type}
+
+        for website_name in results:
+            dictionary = results[website_name]
+            # TODO: fix no site data issue
+            if not dictionary:
+                continue
+
+            status = dictionary.get('status')
+            if status.ids_data:
+                dictionary['ids_data'] = status.ids_data
+                extended_info_count += 1
+
+                # detect first seen
+                created_at = status.ids_data.get('created_at')
+                if created_at:
+                    if first_seen is None:
+                        first_seen = created_at
+                    else:
+                        known_time = datetime.strptime(first_seen, first_seen_format)
+                        new_time = datetime.strptime(created_at, first_seen_format)
+                        if new_time < known_time:
+                            first_seen = created_at
+
+                for k, v in status.ids_data.items():
+                    # suppose target data
+                    field = 'fullname' if k == 'name' else k
+                    if not field in supposed_data:
+                        supposed_data[field] = []
+                    supposed_data[field].append(v)
+                    # suppose country
+                    if k in ['country', 'locale']:
+                        try:
+                            if is_country_tag(k):
+                                tag = pycountry.countries.get(alpha_2=v).alpha_2.lower()
+                            else:
+                                tag = pycountry.countries.search_fuzzy(v)[0].alpha_2.lower()
+                            # TODO: move countries to another struct
+                            tags[tag] = tags.get(tag, 0) + 1
+                        except Exception as e:
+                            logging.debug('pycountry exception', exc_info=True)
+
+            new_usernames = dictionary.get('ids_usernames')
+            if new_usernames:
+                for u, utype in new_usernames.items():
+                    if not u in usernames:
+                        new_ids.append((u, utype))
+                        usernames[u] = {'type': utype}
+
+            if status.status == QueryStatus.CLAIMED:
+                found_accounts += 1
+                dictionary['found'] = True
+            else:
+                continue
+
+            if not dictionary.get('is_similar'):
+                # ignore non-exact search results
+                if status.tags:
+                    for t in status.tags:
+                        tags[t] = tags.get(t, 0) + 1
+
+
+        brief_text.append(f'Search by {id_type} {username} returned {found_accounts} accounts.')
+
+        if new_ids:
+            ids_list = []
+            for u, t in new_ids:
+                ids_list.append(f'{u} ({t})' if t != 'username' else u)
+            brief_text.append(f'Found target\'s other IDs: ' + ', '.join(ids_list) + '.')
+
+    brief_text.append(f'Extended info extracted from {extended_info_count} accounts.')
+
+    # template generation
+    template_text = open(os.path.join(os.path.dirname(os.path.realpath(__file__)),
+                         "resources/simple_report.tpl")).read()
+    template = Template(template_text)
+
+    template.globals['title'] = CaseConverter.snake_to_title
+    template.globals['detect_link'] = enrich_link_str
+
+    brief = ' '.join(brief_text).strip()
+    tuple_sort = lambda d: sorted(d, key=lambda x: x[1], reverse=True)
+
+    if 'global' in tags:
+        # remove tag 'global' useless for country detection
+        del tags['global']
+
+    first_username = username_results[0][0]
+    countries_lists = list(filter(lambda x: is_country_tag(x[0]), tags.items()))
+    interests_list = list(filter(lambda x: not is_country_tag(x[0]), tags.items()))
+
+    filtered_supposed_data = {CaseConverter.snake_to_title(k): v[0]
+                              for k, v in supposed_data.items()
+                              if k in allowed_fields}
+
+    filled_template = template.render(username=first_username,
+                                      brief=brief,
+                                      results=username_results,
+                                      first_seen=first_seen,
+                                      interests_tuple_list=tuple_sort(interests_list),
+                                      countries_tuple_list=tuple_sort(countries_lists),
+                                      supposed_data=filtered_supposed_data,
+                                      generated_at=datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
+                                      )
+    # save report
+    html_filename = f'report_{first_username}.html'
+    with open(html_filename, 'w') as f:
+        f.write(filled_template)
+
+def save_csv_report_to_file(username: str, results: dict, csvfile):
+    print(results)
+    writer = csv.writer(csvfile)
+    writer.writerow(['username',
+                     'name',
+                     'url_main',
+                     'url_user',
+                     'exists',
+                     'http_status'
+                     ]
+                    )
+    for site in results:
+        writer.writerow([username,
+                         site,
+                         results[site]['url_main'],
+                         results[site]['url_user'],
+                         str(results[site]['status'].status),
+                         results[site]['http_status'],
+                        ])
+
+
+def genxmindfile(filename, username, results):
+    print(f'Generating XMIND8 file for username {username}')
+    if os.path.exists(filename):
+        os.remove(filename)
+    workbook = xmind.load(filename)
+    sheet = workbook.getPrimarySheet()
+    design_sheet1(sheet, username, results)
+    xmind.save(workbook, path=filename)
+
+
+def design_sheet1(sheet, username, results):
+    ##all tag list
+    alltags = {}
+
+    sheet.setTitle("%s Analysis"%(username))
+    root_topic1 = sheet.getRootTopic()
+    root_topic1.setTitle("%s"%(username))
+
+    undefinedsection = root_topic1.addSubTopic()
+    undefinedsection.setTitle("Undefined")
+    alltags["undefined"] = undefinedsection
+
+    for website_name in results:
+        dictionary = results[website_name]
+
+        if dictionary.get("status").status == QueryStatus.CLAIMED:
+            ## firsttime I found that entry
+            for tag in dictionary.get("status").tags:
+                if tag.strip() == "":
+                    continue
+                if tag not in alltags.keys():
+                    if not is_country_tag(tag):
+                        tagsection = root_topic1.addSubTopic()
+                        tagsection.setTitle(tag)
+                        alltags[tag] = tagsection
+
+            category = None
+            userlink=  None
+            for tag in dictionary.get("status").tags:
+                if tag.strip() == "":
+                    continue
+                if not is_country_tag(tag):
+                    category = tag
+
+            if category is None:
+                category = "undefined"
+                userlink = undefinedsection.addSubTopic()
+            else:
+                userlink = alltags[category].addSubTopic()
+            userlink.addLabel(dictionary.get("status").site_url_user)
+
+            #for tag in dictionary.get("status").tags:
+            #    if( tag != category ):
+            #       sheet.createRelationship(userlink.getID(), alltags[tag].getID(),"other tag")
@@ -307,8 +307,9 @@
        },
        "500px": {
            "tags": [
-                "images",
-                "in"
+                "photos",
+                "in",
+                "global"
            ],
            "errors": {
                "INTERNAL_SERVER_ERROR": "Site error",
@@ -3221,6 +3222,7 @@
            "tags": [
                "global",
                "images",
+                "photos",
                "us"
            ],
            "regexCheck": "^[a-zA-Z][a-zA-Z0-9_-]*$",
@@ -3979,8 +3981,11 @@
        },
        "EyeEm": {
            "tags": [
+                "de",
                "in",
-                "sd"
+                "sd",
+                "global",
+                "photos"
            ],
            "checkType": "message",
            "absenceStrs": "Not Found (404) | EyeEm",
@@ -6551,8 +6556,8 @@
        },
        "Instagram": {
            "tags": [
-                "social",
-                "us"
+                "photos",
+                "global"
            ],
            "errors": {
                "Login \u2022 Instagram": "Login required"
@@ -8018,7 +8023,9 @@
                "news",
                "us"
            ],
-            "checkType": "status_code",
+            "checkType": "message",
+            "absenceStrs": [":{\"__typename\":\"NotFound\"},\"viewer\""],
+            "presenseStrs": ["userPostCounts"],
            "alexaRank": 76,
            "url": "https://medium.com/@{username}",
            "urlMain": "https://medium.com/",
@@ -9835,9 +9842,9 @@
        },
        "Picuki": {
            "tags": [
+                "photos",
                "global",
-                "jp",
-                "us"
+                "instagram"
            ],
            "checkType": "message",
            "absenceStrs": [
@@ -9899,7 +9906,8 @@
        },
        "Pinterest": {
            "tags": [
-                "social",
+                "images",
+                "photos",
                "us"
            ],
            "checkType": "status_code",
@@ -10858,6 +10866,7 @@
        },
        "Reddit": {
            "tags": [
+                "social",
                "news",
                "us"
            ],
@@ -13392,6 +13401,7 @@
        },
        "Tumblr": {
            "tags": [
+                "blogs",
                "global",
                "us"
            ],
@@ -13433,11 +13443,14 @@
                "us"
            ],
            "headers": {
-                "User-Agent": "Mozilla"
+                "sec-ch-ua": "Google Chrome\";v=\"87\", \" Not;A Brand\";v=\"99\", \"Chromium\";v=\"87\"",
+                "authorization": "Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA",
+                "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36",
+                "x-guest-token": "1347256342462009351"
            },
-            "urlProbe": "https://mobile.twitter.com/{username}",
+            "urlProbe": "https://twitter.com/i/api/graphql/ZRnOhhXPwue_JGILb9TNug/UserByScreenName?variables=%7B%22screen_name%22%3A%22{username}%22%2C%22withHighlightedLabel%22%3Atrue%7D",
            "checkType": "message",
-            "absenceStrs": "Sorry, that page doesn't exist",
+            "absenceStrs": "Not found",
            "alexaRank": 55,
            "url": "https://twitter.com/{username}",
            "urlMain": "https://www.twitter.com/",
@@ -13604,9 +13617,9 @@
        },
        "VK": {
            "tags": [
-                "global",
                "ru",
-                "social"
+                "social",
+                "global"
            ],
            "checkType": "response_url",
            "alexaRank": 23,
@@ -14107,6 +14120,8 @@
        },
        "We Heart It": {
            "tags": [
+                "photos",
+                "us",
                "in"
            ],
            "checkType": "message",
@@ -0,0 +1,109 @@
+<html>
+<head>
+    <meta charset="utf-8" />
+</head>
+<meta name="viewport" content="width=device-width, initial-scale=1.0, shrink-to-fit=no" />
+<title>{{ username }} -- Maigret username search report</title>
+<link rel="stylesheet" href="https://stackpath.bootstrapcdn.com/bootstrap/4.3.1/css/bootstrap.min.css" integrity="sha384-ggOyR0iXCbMQv3Xipma34MD+dH/1fQ784/j6cY/iJTQUOhcWr7x9JvoRxT2MZw1T" crossorigin="anonymous">
+<style>
+    .table td, .table th {
+        padding: .4rem;
+    }
+    @media print {
+        .pagebreak { page-break-before: always; }
+    }
+</style>
+<body>
+    <div class="container">
+        <div class="row-mb">
+            <div class="col-12 card-body" style="padding-bottom: 0.5rem;">
+                <h4 class="mb-0">
+                    <a class="blog-header-logo text-dark" href="#">Username search report for {{ username }}</a>
+                </h4>
+                <small class="text-muted">Generated at {{ generated_at }}</small>
+            </div>
+        </div>
+        <div class="row-mb">
+            <div class="col-md">
+                <div class="card flex-md-row mb-4 box-shadow h-md-250">
+                    <div class="card-body d-flex flex-column align-items-start">
+                        <h5>Supposed personal data</h5>
+                        {% for k, v in supposed_data.items() %}
+                        <span>
+                            {{ k }}: {{ v }}
+                        </span>
+                        {% endfor %}
+                        {% if countries_tuple_list %}
+                        <span>
+                            Geo: {% for k, v in countries_tuple_list %}{{ k }} <span class="text-muted">({{ v }})</span>{{ ", " if not loop.last }}{% endfor %}
+                        </span>
+                        {% endif %}{% if interests_tuple_list %}
+                        <span>
+                            Interests: {% for k, v in interests_tuple_list %}{{ k }} <span class="text-muted">({{ v }})</span>{{ ", " if not loop.last }}{% endfor %}
+                        </span>
+                        {% endif %}{% if first_seen %}
+                        <span>
+                            First seen: {{ first_seen }}
+                        </span>
+                        {% endif %}
+                    </div>
+                </div>
+            </div>
+        </div>
+        <div class="row-mb">
+            <div class="col-md">
+                <div class="card flex-md-row mb-4 box-shadow h-md-250">
+                    <div class="card-body d-flex flex-column align-items-start">
+                        <h5>Brief</h5>
+                        <span>
+                            {{ brief }}  
+                        </span>
+                    </div>
+                </div>
+            </div>
+        </div>
+        {% for u, t, data in results %}
+            {% for k, v in data.items() %}
+                {% if v.found and not v.is_similar %}
+        <div class="row-mb">
+            <div class="col-md">
+                <div class="card flex-md-row mb-4 box-shadow h-md-250">
+                    <img class="card-img-right flex-auto d-none d-md-block" alt="Photo" style="width: 200px; height: 200px; object-fit: scale-down;" src="{{ v.status.ids_data.image or 'https://i.imgur.com/040fmbw.png' }}" data-holder-rendered="true">
+                    <div class="card-body d-flex flex-column align-items-start" style="padding-top: 0;">
+                    <h3 class="mb-0" style="padding-top: 1rem;">
+                        <a class="text-dark" href="{{ v.url_main }}" target="_blank">{{ k }}</a>
+                    </h3>
+                    {% if v.status.tags %}
+                        <div class="mb-1 text-muted">Tags: {{ v.status.tags | join(', ') }}</div>
+                    {% endif %}
+                    <p class="card-text">
+                        <a href="{{ v.url_user }}" target="_blank">{{ v.url_user }}</a>
+                    </p>
+                    {% if v.ids_data %}
+                    <table class="table table-striped">
+                        <tbody>
+                        {% for k1, v1 in v.ids_data.items() %}
+                            {% if k1 != 'image' %}
+                            <tr>
+                                <th>{{ title(k1) }}</th>
+                                <td>{% if v1 is iterable and (v1 is not string and v1 is not mapping) %}{{ v1 | join(', ') }}{% else %}{{ detect_link(v1) }}{% endif %}
+                                </td>
+                            </tr>
+                            {% endif %}
+                        {% endfor %}
+                        </tbody>
+                    </table>
+                    {% endif %}
+                  </p>
+                </div>
+                </div>
+            </div>
+        </div>
+                {% endif %}
+            {% endfor %}
+        {% endfor %}
+    </div>
+    <script src="https://code.jquery.com/jquery-3.3.1.slim.min.js" integrity="sha384-q8i/X+965DzO0rT7abK41JStQIAqVgRVzpbzo5smXKp4YfRvH+8abtTE1Pi6jizo" crossorigin="anonymous"></script>
+    <script src="https://cdnjs.cloudflare.com/ajax/libs/popper.js/1.14.7/umd/popper.min.js" integrity="sha384-UO2eT0CpHqdSJQ6hJty5KVphtPhzWj9WO1clHTMGa3JDZwrnQq4sF86dIHNDz0W1" crossorigin="anonymous"></script>
+    <script src="https://stackpath.bootstrapcdn.com/bootstrap/4.3.1/js/bootstrap.min.js" integrity="sha384-JjSmVgyd0p3pXB1rRibZUAYoIIy6OrQ6VrjIEaFf/nJGzIxFDsf4x0xIM+B07jRM" crossorigin="anonymous"></script>
+</html>
@@ -34,7 +34,7 @@ class QueryResult():
    """

    def __init__(self, username, site_name, site_url_user, status, ids_data=None,
-                 query_time=None, context=None, tags=None):
+                 query_time=None, context=None, tags=[]):
        """Create Query Result Object.

        Contains information about a specific method of detecting usernames on
@@ -72,14 +72,8 @@ class QueryResult():
        self.query_time = query_time
        self.context = context
        self.ids_data = ids_data
+        self.tags = tags

-        self.tags = ""
-        if (tags is not None):
-            TAGstring = "".join(['%s,' % tags for tags in tags])
-            TAGstring = TAGstring[:-1]
-            self.tags = TAGstring
-
-        return

    def __str__(self):
        """Convert Object To String.
@@ -13,6 +13,7 @@ from .utils import CaseConverter
 class MaigretEngine:
    def __init__(self, name, data):
        self.name = name
+        self.site = {}
        self.__dict__.update(data)

    @property
@@ -127,6 +128,15 @@ class MaigretDatabase:
    def sites_dict(self):
        return {site.name: site for site in self._sites}

+    def ranked_sites_dict(self, reverse=False, top=sys.maxsize, tags=[]):
+        if not tags:
+            filtered_list = self.sites
+        else:
+            filtered_list = [s for s in self.sites if set(s.tags).intersection(set(tags)) or s.engine in tags]
+
+        sorted_list = sorted(filtered_list, key=lambda x: x.alexa_rank, reverse=reverse)[:top]
+        return {site.name: site for site in sorted_list}
+
    @property
    def engines(self):
        return self._engines
@@ -145,12 +155,12 @@ class MaigretDatabase:
        return self

    def save_to_file(self, filename: str) -> MaigretDatabase:
-        json_data = {
+        db_data = {
            'sites': {site.name: site.strip_engine_data().json for site in self._sites},
            'engines': {engine.name: engine.json for engine in self._engines},
        }

-        json_data = json.dumps(json_data, indent=4)
+        json_data = json.dumps(db_data, indent=4)

        with open(filename, 'w') as f:
            f.write(json_data)
@@ -160,8 +170,8 @@ class MaigretDatabase:

    def load_from_json(self, json_data: dict) -> MaigretDatabase:
        # Add all of site information from the json file to internal site list.
-        site_data = json_data.get("sites")
-        engines_data = json_data.get("engines")
+        site_data = json_data.get("sites", {})
+        engines_data = json_data.get("engines", {})

        for engine_name in engines_data:
            self._engines.append(MaigretEngine(engine_name, engines_data[engine_name]))
@@ -198,7 +208,7 @@ class MaigretDatabase:
        is_url_valid = url.startswith('http://') or url.startswith('https://')

        if not is_url_valid:
-            return False
+            raise FileNotFoundError(f"Invalid data file URL '{url}'.")

        try:
            response = requests.get(url=url)
@@ -238,33 +248,3 @@ class MaigretDatabase:
                                    )

        return self.load_from_json(data)
-
-
-    def site_name_list(self, popularity_rank=False):
-        """Get Site Name List.
-
-        Keyword Arguments:
-        self                   -- This object.
-        popularity_rank        -- Boolean indicating if list should be sorted
-                                  by popularity rank.
-                                  Default value is False.
-                                  NOTE:  List is sorted in ascending
-                                         alphabetical order is popularity rank
-                                         is not requested.
-
-        Return Value:
-        List of strings containing names of sites.
-        """
-
-        if popularity_rank:
-            # Sort in ascending popularity rank order.
-            site_rank_name = \
-                sorted([(site.popularity_rank, site.name) for site in self],
-                       key=operator.itemgetter(0)
-                       )
-            site_names = [name for _, name in site_rank_name]
-        else:
-            # Sort in ascending alphabetical order.
-            site_names = sorted([site.name for site in self], key=str.lower)
-
-        return site_names
@@ -3,16 +3,29 @@ import re

 class CaseConverter:
    @staticmethod
-    def camel_to_snake(camelcased_string: str):
+    def camel_to_snake(camelcased_string: str) -> str:
        return re.sub(r'(?<!^)(?=[A-Z])', '_', camelcased_string).lower()

    @staticmethod
-    def snake_to_camel(snakecased_string: str):
+    def snake_to_camel(snakecased_string: str) -> str:
        formatted = ''.join(word.title() for word in snakecased_string.split('_'))
        result = formatted[0].lower() + formatted[1:]
        return result

+    @staticmethod
+    def snake_to_title(snakecased_string: str) -> str:
+        words = snakecased_string.split('_')
+        words[0] = words[0].title()
+        return ' '.join(words)

-def is_country_tag(tag):
+
+def is_country_tag(tag: str) -> bool:
    """detect if tag represent a country"""
-    return bool(re.match("^([a-z]){2}$", tag))
+    return bool(re.match("^([a-zA-Z]){2}$", tag)) or tag == 'global'
+
+
+def enrich_link_str(link: str) -> str:
+    link = link.strip()
+    if link.startswith('www.') or (link.startswith('http') and '//' in link):
+        return f'<a class="auto-link" href="{link}">{link}</a>'
+    return link
@@ -8,9 +8,11 @@ certifi==2020.12.5
 chardet==3.0.4
 colorama==0.4.4
 idna==2.10
+Jinja2==2.11.2
 lxml==4.6.2
 mock==4.0.2
 multidict==5.1.0
+pycountry==20.7.3
 PySocks==1.7.1
 python-socks==1.1.2
 requests==2.25.1
@@ -0,0 +1,104 @@
+"""Maigret reports test functions"""
+from io import StringIO
+import copy
+import os
+
+import xmind
+
+from maigret.report import save_csv_report_to_file, genxmindfile, save_html_report
+from maigret.result import QueryResult, QueryStatus
+
+
+EXAMPLE_RESULTS = {
+    'GitHub': {
+        'username': 'test',
+        'parsing_enabled': True,
+        'url_main': 'https://www.github.com/',
+        'url_user': 'https://www.github.com/test',
+        'status': QueryResult('test',
+                              'GitHub',
+                              'https://www.github.com/test',
+                              QueryStatus.CLAIMED,
+                              tags=['test_tag']),
+        'http_status': 200,
+        'is_similar': False,
+        'rank': 78
+    }
+}
+
+
+GOOD_RESULT = QueryResult('', '', '', QueryStatus.CLAIMED)
+BAD_RESULT = QueryResult('', '', '', QueryStatus.AVAILABLE)
+
+GOOD_500PX_RESULT = copy.deepcopy(GOOD_RESULT)
+GOOD_500PX_RESULT.tags = ['photo', 'us', 'global']
+GOOD_500PX_RESULT.ids_data = {"uid": "dXJpOm5vZGU6VXNlcjoyNjQwMzQxNQ==", "legacy_id": "26403415", "username": "alexaimephotographycars", "name": "Alex Aim\u00e9", "website": "www.flickr.com/photos/alexaimephotography/", "facebook_link": " www.instagram.com/street.reality.photography/", "instagram_username": "alexaimephotography", "twitter_username": "Alexaimephotogr"}
+
+GOOD_REDDIT_RESULT = copy.deepcopy(GOOD_RESULT)
+GOOD_REDDIT_RESULT.tags = ['news', 'us']
+GOOD_REDDIT_RESULT.ids_data = {"reddit_id": "t5_1nytpy", "reddit_username": "alexaimephotography", "fullname": "alexaimephotography", "image": "https://styles.redditmedia.com/t5_1nytpy/styles/profileIcon_7vmhdwzd3g931.jpg?width=256&height=256&crop=256:256,smart&frame=1&s=4f355f16b4920844a3f4eacd4237a7bf76b2e97e", "is_employee": "False", "is_nsfw": "False", "is_mod": "True", "is_following": "True", "has_user_profile": "True", "hide_from_robots": "False", "created_at": "2019-07-10 12:20:03", "total_karma": "53959", "post_karma": "52738"}
+
+GOOD_IG_RESULT = copy.deepcopy(GOOD_RESULT)
+GOOD_IG_RESULT.tags = ['photo', 'global']
+GOOD_IG_RESULT.ids_data = {"instagram_username": "alexaimephotography", "fullname": "Alexaimephotography", "id": "6828488620", "image": "https://scontent-hel3-1.cdninstagram.com/v/t51.2885-19/s320x320/95420076_1169632876707608_8741505804647006208_n.jpg?_nc_ht=scontent-hel3-1.cdninstagram.com&_nc_ohc=jd87OUGsX4MAX_Ym5GX&tp=1&oh=0f42badd68307ba97ec7fb1ef7b4bfd4&oe=601E5E6F", "bio": "Photographer \nChild of fine street arts", "external_url": "https://www.flickr.com/photos/alexaimephotography2020/"}
+
+GOOD_TWITTER_RESULT = copy.deepcopy(GOOD_RESULT)
+GOOD_TWITTER_RESULT.tags = ['social', 'us']
+
+
+TEST = [('alexaimephotographycars', 'username', {'500px': {'username': 'alexaimephotographycars', 'parsing_enabled': True, 'url_main': 'https://500px.com/', 'url_user': 'https://500px.com/p/alexaimephotographycars', 'ids_usernames': {'alexaimephotographycars': 'username', 'alexaimephotography': 'username', 'Alexaimephotogr': 'username'}, 'status': GOOD_500PX_RESULT, 'http_status': 200, 'is_similar': False, 'rank': 2981}, 'Reddit': {'username': 'alexaimephotographycars', 'parsing_enabled': True, 'url_main': 'https://www.reddit.com/', 'url_user': 'https://www.reddit.com/user/alexaimephotographycars', 'status': BAD_RESULT, 'http_status': 404, 'is_similar': False, 'rank': 17}, 'Twitter': {'username': 'alexaimephotographycars', 'parsing_enabled': True, 'url_main': 'https://www.twitter.com/', 'url_user': 'https://twitter.com/alexaimephotographycars', 'status': BAD_RESULT, 'http_status': 400, 'is_similar': False, 'rank': 55}, 'Instagram': {'username': 'alexaimephotographycars', 'parsing_enabled': True, 'url_main': 'https://www.instagram.com/', 'url_user': 'https://www.instagram.com/alexaimephotographycars', 'status': BAD_RESULT, 'http_status': 404, 'is_similar': False, 'rank': 29}}), ('alexaimephotography', 'username', {'500px': {'username': 'alexaimephotography', 'parsing_enabled': True, 'url_main': 'https://500px.com/', 'url_user': 'https://500px.com/p/alexaimephotography', 'status': BAD_RESULT, 'http_status': 200, 'is_similar': False, 'rank': 2981}, 'Reddit': {'username': 'alexaimephotography', 'parsing_enabled': True, 'url_main': 'https://www.reddit.com/', 'url_user': 'https://www.reddit.com/user/alexaimephotography', 'ids_usernames': {'alexaimephotography': 'username'}, 'status': GOOD_REDDIT_RESULT, 'http_status': 200, 'is_similar': False, 'rank': 17}, 'Twitter': {'username': 'alexaimephotography', 'parsing_enabled': True, 'url_main': 'https://www.twitter.com/', 'url_user': 'https://twitter.com/alexaimephotography', 'status': BAD_RESULT, 'http_status': 400, 'is_similar': False, 'rank': 55}, 'Instagram': {'username': 'alexaimephotography', 'parsing_enabled': True, 'url_main': 'https://www.instagram.com/', 'url_user': 'https://www.instagram.com/alexaimephotography', 'ids_usernames': {'alexaimephotography': 'username'}, 'status': GOOD_IG_RESULT, 'http_status': 200, 'is_similar': False, 'rank': 29}}), ('Alexaimephotogr', 'username', {'500px': {'username': 'Alexaimephotogr', 'parsing_enabled': True, 'url_main': 'https://500px.com/', 'url_user': 'https://500px.com/p/Alexaimephotogr', 'status': BAD_RESULT, 'http_status': 200, 'is_similar': False, 'rank': 2981}, 'Reddit': {'username': 'Alexaimephotogr', 'parsing_enabled': True, 'url_main': 'https://www.reddit.com/', 'url_user': 'https://www.reddit.com/user/Alexaimephotogr', 'status': BAD_RESULT, 'http_status': 404, 'is_similar': False, 'rank': 17}, 'Twitter': {'username': 'Alexaimephotogr', 'parsing_enabled': True, 'url_main': 'https://www.twitter.com/', 'url_user': 'https://twitter.com/Alexaimephotogr', 'status': GOOD_TWITTER_RESULT, 'http_status': 400, 'is_similar': False, 'rank': 55}, 'Instagram': {'username': 'Alexaimephotogr', 'parsing_enabled': True, 'url_main': 'https://www.instagram.com/', 'url_user': 'https://www.instagram.com/Alexaimephotogr', 'status':BAD_RESULT, 'http_status': 404, 'is_similar': False, 'rank': 29}})]
+
+
+SUPPOSED_BRIEF = """Search by username alexaimephotographycars returned 1 accounts. Found target's other IDs: alexaimephotography, Alexaimephotogr. Search by username alexaimephotography returned 2 accounts. Search by username Alexaimephotogr returned 1 accounts. Extended info extracted from 3 accounts."""
+
+SUPPOSED_INTERESTS = "Interests: photo <span class=\"text-muted\">(2)</span>, news <span class=\"text-muted\">(1)</span>, social <span class=\"text-muted\">(1)</span>"
+
+SUPPOSED_GEO = "Geo: us <span class=\"text-muted\">(3)</span>"
+
+
+def test_save_csv_report_to_file():
+    csvfile = StringIO()
+    save_csv_report_to_file('test', EXAMPLE_RESULTS, csvfile)
+
+    csvfile.seek(0)
+    data = csvfile.readlines()
+
+    assert data == [
+         'username,name,url_main,url_user,exists,http_status\r\n',
+         'test,GitHub,https://www.github.com/,https://www.github.com/test,Claimed,200\r\n',
+    ]
+
+
+def test_save_xmind_report():
+    filename = 'test_report.xmind'
+    genxmindfile(filename, 'test', EXAMPLE_RESULTS)
+    
+    workbook = xmind.load(filename)
+    sheet = workbook.getPrimarySheet()
+    data = sheet.getData()
+
+    assert data['title'] == 'test Analysis'
+    assert data['topic']['title'] == 'test'
+    assert len(data['topic']['topics']) == 2
+    assert data['topic']['topics'][0]['title'] == 'Undefined'
+    assert data['topic']['topics'][1]['title'] == 'test_tag'
+    assert len(data['topic']['topics'][1]['topics']) == 1
+    assert data['topic']['topics'][1]['topics'][0]['label'] == 'https://www.github.com/test'
+
+
+def test_html_report():
+    report_name = 'report_alexaimephotographycars.html'
+    try:
+        os.remove(report_name)
+    except:
+        pass
+
+    save_html_report(TEST)
+
+    assert os.path.exists(report_name)
+
+    report_text = open(report_name).read()
+
+    assert SUPPOSED_BRIEF in report_text
+    assert SUPPOSED_GEO in report_text
+    assert SUPPOSED_INTERESTS in report_text
@@ -1,5 +1,5 @@
 """Maigret Database test functions"""
-from maigret.sites import MaigretDatabase
+from maigret.sites import MaigretDatabase, MaigretSite


 EXAMPLE_DB = {
@@ -99,3 +99,22 @@ def test_saving_site_error():

    assert amperka.strip_engine_data().errors == {'error1': 'text1'}
    assert amperka.strip_engine_data().json['errors'] == {'error1': 'text1'}
+
+
+def test_ranked_sites_dict():
+    db = MaigretDatabase()
+    db.update_site(MaigretSite('3', {'alexaRank': 1000, 'engine': 'ucoz'}))
+    db.update_site(MaigretSite('1', {'alexaRank': 2, 'tags': ['forum']}))
+    db.update_site(MaigretSite('2', {'alexaRank': 10, 'tags': ['ru', 'forum']}))
+
+    # sorting
+    assert list(db.ranked_sites_dict().keys()) == ['1', '2', '3']
+    assert list(db.ranked_sites_dict(top=2).keys()) == ['1', '2']
+    assert list(db.ranked_sites_dict(reverse=True, top=2).keys()) == ['3', '2']
+
+    # filtering by tags
+    assert list(db.ranked_sites_dict(tags=['ru'], top=2).keys()) == ['2']
+    assert list(db.ranked_sites_dict(tags=['forum']).keys()) == ['1', '2']
+
+    # filtering by engine
+    assert list(db.ranked_sites_dict(tags=['ucoz']).keys()) == ['3']
@@ -1,5 +1,5 @@
 """Maigret utils test functions"""
-from maigret.utils import CaseConverter, is_country_tag
+from maigret.utils import CaseConverter, is_country_tag, enrich_link_str


 def test_case_convert_camel_to_snake():
@@ -14,8 +14,21 @@ def test_case_convert_snake_to_camel():

 	assert b == 'camelCasedString'

+def test_case_convert_snake_to_title():
+	a = 'camel_cased_string'
+	b = CaseConverter.snake_to_title(a)
+
+	assert b == 'Camel cased string'
+
 def test_is_country_tag():
 	assert is_country_tag('ru') == True
+	assert is_country_tag('FR') == True

 	assert is_country_tag('a1') == False
 	assert is_country_tag('dating') == False
+
+	assert is_country_tag('global') == True
+
+def test_enrich_link_str():
+	assert enrich_link_str('test') == 'test'
+	assert enrich_link_str(' www.flickr.com/photos/alexaimephotography/') == '<a class="auto-link" href="www.flickr.com/photos/alexaimephotography/">www.flickr.com/photos/alexaimephotography/</a>'