first commit

2026-05-06 14:08:59 +00:00 · 2020-01-08 09:51:07 +03:00
commit ac0be37480
21 changed files with 22264 additions and 0 deletions
@@ -0,0 +1,8 @@
+.git/
+.vscode/
+screenshot/
+tests/
+*.txt
+!/requirements.txt
+venv/
+
@@ -0,0 +1,29 @@
+# Virtual Environment
+venv/
+
+# Editor Configurations
+.vscode/
+.idea/
+
+# Python
+__pycache__/
+
+# Pip
+src/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+*.ipynb
+
+# Output files, except requirements.txt
+*.txt
+!requirements.txt
+
+# Comma-Separated Values (CSV) Reports
+*.csv
+
+# Excluded sites list
+tests/.excluded_sites
+
+# MacOS Folder Metadata File
+.DS_Store
@@ -0,0 +1,27 @@
+FROM python:3.7-alpine as build
+WORKDIR /wheels
+RUN apk add --no-cache \
+    g++ \
+    gcc \
+    git \
+    libxml2 \
+    libxml2-dev \
+    libxslt-dev \
+    linux-headers
+COPY requirements.txt /opt/maigret/
+RUN pip3 wheel -r /opt/maigret/requirements.txt
+
+
+FROM python:3.7-alpine
+WORKDIR /opt/maigret
+ARG VCS_REF
+ARG VCS_URL="https://gitlab.com/soxoj/maigret"
+LABEL org.label-schema.vcs-ref=$VCS_REF \
+      org.label-schema.vcs-url=$VCS_URL
+COPY --from=build /wheels /wheels
+COPY . /opt/maigret/
+RUN pip3 install -r requirements.txt -f /wheels \
+  && rm -rf /wheels \
+  && rm -rf /root/.cache/pip/*
+
+ENTRYPOINT ["python", "maigret.py"]
@@ -0,0 +1,45 @@
+MIT License
+
+Copyright (c) 2019 Soxoj
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+
+-------------------------------------------------------------------------------
+
+MIT License
+
+Copyright (c) 2019 Sherlock Project
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
@@ -0,0 +1,54 @@
+# Maigret
+
+<p align="center">
+  <img src="static/maigret.png" />
+</p>
+
+<i>The Commissioner Jules Maigret is a fictional French police detective, created by Georges Simenon. His investigation method is based on understanding the personality of different people and their interactions.</i>
+
+## About
+
+Purpose of Maigret - **collect a dossier on a person by username only**, checking for accounts on a huge number of sites.
+
+This is a [sherlock](https://github.com/sherlock-project/) fork with cool features under heavy development.
+*Don't forget to regularly update source code from repo*.
+
+Currently supported >1300 sites ([full list](/sites.md)).
+
+## Main features
+
+* Profile pages parsing, [extracting](https://github.com/soxoj/socid_extractor) personal info, links to other profiles, etc.
+* Recursive search by new usernames found
+* Search by tags (site categories, countries)
+* Censorship and captcha detection
+* Very few false positives
+
+## Installation
+
+**NOTE**: Python 3.7 or higher and pip is required.
+
+**Python 3.8 is recommended.**
+
+```bash
+# clone the repo and change directory
+$ git clone https://git.rip/soxoj/maigret && cd maigret
+
+# install the requirements
+$ python3 -m pip install -r requirements.txt
+```
+
+## Demo with page parsing and recursive username search
+
+```bash
+python3 maigret alexaimephotographycars
+```
+
+![animation of recursive search](./static/recursive_search.svg)
+
+[Full output](./static/recursive_search.md)
+
+## License
+
+MIT © [Maigret](https://git.rip/soxoj/maigret)<br/>
+MIT © [Sherlock Project](https://github.com/sherlock-project/)<br/>
+Original Creator of Sherlock Project - [Siddharth Dushantha](https://github.com/sdushantha)
@@ -0,0 +1,5 @@
+"""Sherlock Module
+
+This module contains the main logic to search for usernames at social
+networks.
+"""
@@ -0,0 +1,15 @@
+#! /usr/bin/env python3
+
+"""
+Maigret (Sherlock fork): Find Usernames Across Social Networks Module
+
+This module contains the main logic to search for usernames at social
+networks.
+"""
+
+import asyncio
+import maigret
+
+
+if __name__ == "__main__":
+    asyncio.run(maigret.main())
@@ -0,0 +1,867 @@
+#! /usr/bin/env python3
+
+"""
+Maigret main module
+"""
+
+import asyncio
+import csv
+import http.cookiejar as cookielib
+import json
+import logging
+import os
+import platform
+import re
+import ssl
+import sys
+from argparse import ArgumentParser, RawDescriptionHelpFormatter
+from http.cookies import SimpleCookie
+
+import aiohttp
+import requests
+from mock import Mock
+from notify import QueryNotifyPrint
+from result import QueryResult, QueryStatus
+from sites import SitesInformation
+from socid_extractor import parse, extract
+
+module_name = "Maigret OSINT tool"
+__version__ = "0.1.0"
+
+supported_recursive_search_ids = (
+    'yandex_public_id',
+    'gaia_id',
+    'vk_id',
+    'ok_id',
+    'wikimapia_uid',
+)
+
+common_errors = {
+    '<title>Attention Required! | Cloudflare</title>': 'Cloudflare captcha',
+    '<title>Доступ ограничен</title>': 'Rostelecom censorship',
+    'document.getElementById(\'validate_form_submit\').disabled=true': 'Mail.ru captcha',
+    'Verifying your browser, please wait...<br>DDoS Protection by</font> Blazingfast.io': 'Blazingfast protection',
+    '404</h1><p class="error-card__description">Мы&nbsp;не&nbsp;нашли страницу': 'MegaFon 404 page',
+}
+
+unsupported_characters = '#'
+
+cookies_file = 'cookies.txt'
+
+
+async def get_response(request_future, error_type, social_network, logger):
+    html_text = None
+    status_code = 0
+
+    error_text = "General Unknown Error"
+    expection_text = None
+
+    try:
+        response = await request_future
+
+        status_code = response.status
+        response_content = await response.content.read()
+        charset = response.charset or 'utf-8'
+        decoded_content = response_content.decode(charset, 'ignore')
+        html_text = decoded_content
+
+        if status_code > 0:
+            error_text = None
+
+        logger.debug(html_text)
+
+    except asyncio.TimeoutError as errt:
+        error_text = "Timeout Error"
+        expection_text = str(errt)
+    except (ssl.SSLCertVerificationError, ssl.SSLError) as err:
+        error_text = "SSL Error"
+        expection_text = str(err)
+    except aiohttp.client_exceptions.ClientConnectorError as err:
+        error_text = "Error Connecting"
+        expection_text = str(err)
+    except aiohttp.http_exceptions.BadHttpMessage as err:
+        error_text = "HTTP Error"
+        expection_text = str(err)
+    except Exception as err:
+        logger.warning(f'Unhandled error while requesting {social_network}: {err}')
+        logger.debug(err, exc_info=True)
+        error_text = "Some Error"
+        expection_text = str(err)
+
+    # TODO: return only needed information
+    return html_text, status_code, error_text, expection_text
+
+
+async def update_site_data_from_response(site, site_data, site_info, semaphore, logger):
+    async with semaphore:
+        future = site_info.get('request_future')
+        if not future:
+            # ignore: search by incompatible id type
+            return
+
+        error_type = site_info['errorType']
+        site_data[site]['resp'] = await get_response(request_future=future,
+                                                     error_type=error_type,
+                                                     social_network=site,
+                                                     logger=logger)
+
+
+# TODO: move info separate module
+def detect_error_page(html_text, status_code, fail_flags, ignore_403):
+    # Detect service restrictions such as a country restriction
+    for flag, msg in fail_flags.items():
+        if flag in html_text:
+            return 'Some site error', msg
+
+    # Detect common restrictions such as provider censorship and bot protection
+    for flag, msg in common_errors.items():
+        if flag in html_text:
+            return 'Error', msg
+
+    # Detect common site errors
+    if status_code == 403 and not ignore_403:
+        return 'Access denied', 'Access denied, use proxy/vpn'
+    elif status_code >= 500:
+        return f'Error {status_code}', f'Site error {status_code}'
+
+    return None, None
+
+
+async def maigret(username, site_data, query_notify, logger,
+                  proxy=None, timeout=None, recursive_search=False,
+                  id_type='username', tags=None, debug=False, forced=False,
+                  max_connections=100):
+    """Main search func
+
+    Checks for existence of username on various social media sites.
+
+    Keyword Arguments:
+    username               -- String indicating username that report
+                              should be created against.
+    site_data              -- Dictionary containing all of the site data.
+    query_notify           -- Object with base type of QueryNotify().
+                              This will be used to notify the caller about
+                              query results.
+    proxy                  -- String indicating the proxy URL
+    timeout                -- Time in seconds to wait before timing out request.
+                              Default is no timeout.
+    recursive_search       -- Search for other usernames in website pages & recursive search by them.
+
+    Return Value:
+    Dictionary containing results from report. Key of dictionary is the name
+    of the social network site, and the value is another dictionary with
+    the following keys:
+        url_main:      URL of main site.
+        url_user:      URL of user on site (if account exists).
+        status:        QueryResult() object indicating results of test for
+                       account existence.
+        http_status:   HTTP status code of query which checked for existence on
+                       site.
+        response_text: Text that came back from request.  May be None if
+                       there was an HTTP error when checking for existence.
+    """
+
+    # Notify caller that we are starting the query.
+    if tags is None:
+        tags = set()
+    query_notify.start(username, id_type)
+
+    # TODO: connector
+    connector = aiohttp.TCPConnector(ssl=False)
+    session = aiohttp.ClientSession(connector=connector)
+
+    # Results from analysis of all sites
+    results_total = {}
+
+    # First create futures for all requests. This allows for the requests to run in parallel
+    for social_network, net_info in site_data.items():
+        if net_info.get('type', 'username') != id_type:
+            continue
+
+        site_tags = set(net_info.get('tags', []))
+        if tags:
+            if not set(tags).intersection(site_tags):
+                continue
+
+        if 'disabled' in net_info and net_info['disabled'] and not forced:
+            continue
+
+        # Results from analysis of this specific site
+        results_site = {}
+
+        # Record URL of main site
+        results_site['url_main'] = net_info.get("urlMain")
+
+        headers = {
+            'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 11.1; rv:55.0) Gecko/20100101 Firefox/55.0',
+        }
+
+        if "headers" in net_info:
+            # Override/append any extra headers required by a given site.
+            headers.update(net_info["headers"])
+
+        # URL of user on site (if it exists)
+        url = net_info.get('url').format(username)
+
+        # Don't make request if username is invalid for the site
+        regex_check = net_info.get("regexCheck")
+        if regex_check and re.search(regex_check, username) is None:
+            # No need to do the check at the site: this user name is not allowed.
+            results_site['status'] = QueryResult(username,
+                                                 social_network,
+                                                 url,
+                                                 QueryStatus.ILLEGAL)
+            results_site["url_user"] = ""
+            results_site['http_status'] = ""
+            results_site['response_text'] = ""
+            query_notify.update(results_site['status'])
+        else:
+            # URL of user on site (if it exists)
+            results_site["url_user"] = url
+            url_probe = net_info.get("urlProbe")
+            if url_probe is None:
+                # Probe URL is normal one seen by people out on the web.
+                url_probe = url
+            else:
+                # There is a special URL for probing existence separate
+                # from where the user profile normally can be found.
+                url_probe = url_probe.format(username)
+
+            if net_info["errorType"] == 'status_code' and net_info.get("request_head_only", True):
+                # In most cases when we are detecting by status code,
+                # it is not necessary to get the entire body:  we can
+                # detect fine with just the HEAD response.
+                request_method = session.head
+            else:
+                # Either this detect method needs the content associated
+                # with the GET response, or this specific website will
+                # not respond properly unless we request the whole page.
+                request_method = session.get
+
+            if net_info["errorType"] == "response_url":
+                # Site forwards request to a different URL if username not
+                # found.  Disallow the redirect so we can capture the
+                # http status from the original URL request.
+                allow_redirects = False
+            else:
+                # Allow whatever redirect that the site wants to do.
+                # The final result of the request will be what is available.
+                allow_redirects = True
+
+            # TODO: cookies using
+            def parse_cookies(cookies_str):
+                cookies = SimpleCookie()
+                cookies.load(cookies_str)
+                return {key: morsel.value for key, morsel in cookies.items()}
+
+            if os.path.exists(cookies_file):
+                cookies_obj = cookielib.MozillaCookieJar(cookies_file)
+                cookies_obj.load(ignore_discard=True, ignore_expires=True)
+            else:
+                cookies_obj = []
+
+            # This future starts running the request in a new thread, doesn't block the main thread
+            if proxy is not None:
+                proxies = {"http": proxy, "https": proxy}
+                future = request_method(url=url_probe, headers=headers,
+                                        proxies=proxies,
+                                        allow_redirects=allow_redirects,
+                                        timeout=timeout,
+                                        )
+            else:
+                future = request_method(url=url_probe, headers=headers,
+                                        allow_redirects=allow_redirects,
+                                        timeout=timeout,
+                                        )
+
+            # Store future in data for access later
+            net_info["request_future"] = future
+
+        # Add this site's results into final dictionary with all of the other results.
+        results_total[social_network] = results_site
+
+    # TODO: move into top-level function
+
+    sem = asyncio.Semaphore(max_connections)
+
+    tasks = []
+    for social_network, net_info in site_data.items():
+        future = asyncio.ensure_future(update_site_data_from_response(social_network, site_data, net_info, sem, logger))
+        tasks.append(future)
+    await asyncio.gather(*tasks)
+    await session.close()
+
+    # TODO: split to separate functions
+    for social_network, net_info in site_data.items():
+
+        # Retrieve results again
+        results_site = results_total.get(social_network)
+        if not results_site:
+            continue
+
+        # Retrieve other site information again
+        url = results_site.get("url_user")
+        logger.debug(url)
+
+        status = results_site.get("status")
+        if status is not None:
+            # We have already determined the user doesn't exist here
+            continue
+
+        # Get the expected error type
+        error_type = net_info["errorType"]
+
+        # Get the failure messages and comments
+        failure_errors = net_info.get("errors", {})
+
+        # TODO: refactor
+        resp = net_info.get('resp')
+        if not resp:
+            logger.error(f'No response for {social_network}')
+            continue
+
+        html_text, status_code, error_text, expection_text = resp
+
+        # TODO: add elapsed request time counting
+        response_time = None
+
+        if debug:
+            with open('debug.txt', 'a') as f:
+                status = status_code or 'No response'
+                f.write(f'url: {url}\nerror: {str(error_text)}\nr: {status}\n')
+                if html_text:
+                    f.write(f'code: {status}\nresponse: {str(html_text)}\n')
+
+        if status_code and not error_text:
+            error_text, site_error_text = detect_error_page(html_text, status_code, failure_errors,
+                                                            'ignore_403' in net_info)
+
+        # presense flags
+        # True by default
+        presense_flags = net_info.get("presenseStrs", [])
+        is_presense_detected = html_text and all(
+            [(presense_flag in html_text) for presense_flag in presense_flags]) or not presense_flags
+
+        if error_text is not None:
+            logger.debug(error_text)
+            result = QueryResult(username,
+                                 social_network,
+                                 url,
+                                 QueryStatus.UNKNOWN,
+                                 query_time=response_time,
+                                 context=error_text)
+        elif error_type == "message":
+            absence_flags = net_info.get("errorMsg")
+            is_absence_flags_list = isinstance(absence_flags, list)
+            absence_flags_set = set(absence_flags) if is_absence_flags_list else {absence_flags}
+            # Checks if the error message is in the HTML
+            is_absence_detected = any([(absence_flag in html_text) for absence_flag in absence_flags_set])
+            if not is_absence_detected and is_presense_detected:
+                result = QueryResult(username,
+                                     social_network,
+                                     url,
+                                     QueryStatus.CLAIMED,
+                                     query_time=response_time)
+            else:
+                result = QueryResult(username,
+                                     social_network,
+                                     url,
+                                     QueryStatus.AVAILABLE,
+                                     query_time=response_time)
+        elif error_type == "status_code":
+            # Checks if the status code of the response is 2XX
+            if (not status_code >= 300 or status_code < 200) and is_presense_detected:
+                result = QueryResult(username,
+                                     social_network,
+                                     url,
+                                     QueryStatus.CLAIMED,
+                                     query_time=response_time)
+            else:
+                result = QueryResult(username,
+                                     social_network,
+                                     url,
+                                     QueryStatus.AVAILABLE,
+                                     query_time=response_time)
+        elif error_type == "response_url":
+            # For this detection method, we have turned off the redirect.
+            # So, there is no need to check the response URL: it will always
+            # match the request.  Instead, we will ensure that the response
+            # code indicates that the request was successful (i.e. no 404, or
+            # forward to some odd redirect).
+            if 200 <= status_code < 300 and is_presense_detected:
+                result = QueryResult(username,
+                                     social_network,
+                                     url,
+                                     QueryStatus.CLAIMED,
+                                     query_time=response_time)
+            else:
+                result = QueryResult(username,
+                                     social_network,
+                                     url,
+                                     QueryStatus.AVAILABLE,
+                                     query_time=response_time)
+        else:
+            # It should be impossible to ever get here...
+            raise ValueError(f"Unknown Error Type '{error_type}' for "
+                             f"site '{social_network}'")
+
+        extracted_ids_data = {}
+
+        if recursive_search and result.status == QueryStatus.CLAIMED:
+            try:
+                extracted_ids_data = extract(html_text)
+            except Exception as e:
+                logger.warning(f'Error while parsing {social_network}: {e}', exc_info=True)
+
+            if extracted_ids_data:
+                new_usernames = {}
+                for k, v in extracted_ids_data.items():
+                    if 'username' in k:
+                        new_usernames[v] = 'username'
+                    if k in supported_recursive_search_ids:
+                        new_usernames[v] = k
+
+                results_site['ids_usernames'] = new_usernames
+                result.ids_data = extracted_ids_data
+
+        is_similar = net_info.get('similarSearch', False)
+        # Notify caller about results of query.
+        query_notify.update(result, is_similar)
+
+        # Save status of request
+        results_site['status'] = result
+
+        # Save results from request
+        results_site['http_status'] = status_code
+        results_site['is_similar'] = is_similar
+        # results_site['response_text'] = html_text
+        results_site['rank'] = net_info.get('rank', 0)
+
+        # Add this site's results into final dictionary with all of the other results.
+        results_total[social_network] = results_site
+
+    # Notify caller that all queries are finished.
+    query_notify.finish()
+
+    return results_total
+
+
+def timeout_check(value):
+    """Check Timeout Argument.
+
+    Checks timeout for validity.
+
+    Keyword Arguments:
+    value                  -- Time in seconds to wait before timing out request.
+
+    Return Value:
+    Floating point number representing the time (in seconds) that should be
+    used for the timeout.
+
+    NOTE:  Will raise an exception if the timeout in invalid.
+    """
+    from argparse import ArgumentTypeError
+
+    try:
+        timeout = float(value)
+    except ValueError:
+        raise ArgumentTypeError(f"Timeout '{value}' must be a number.")
+    if timeout <= 0:
+        raise ArgumentTypeError(f"Timeout '{value}' must be greater than 0.0s.")
+    return timeout
+
+
+async def site_self_check(site_name, site_data, logger):
+    query_notify = Mock()
+    changes = {
+        'disabled': False,
+    }
+
+    check_data = [
+        (site_data['username_claimed'], QueryStatus.CLAIMED),
+        (site_data['username_unclaimed'], QueryStatus.AVAILABLE),
+    ]
+
+    logger.info(f'Checking {site_name}...')
+
+    for username, status in check_data:
+        results = await maigret(
+            username,
+            {site_name: site_data},
+            query_notify,
+            logger,
+            timeout=30,
+            forced=True,
+        )
+        # don't disable entries with other ids types
+        if site_name not in results:
+            logger.info(results)
+            changes['disabled'] = True
+            continue
+        site_status = results[site_name]['status'].status
+        if site_status != status:
+            if site_status == QueryStatus.UNKNOWN:
+                msg = site_data.get('errorMsg')
+                etype = site_data.get('errorType')
+                logger.info(f'Error while searching {username} in {site_name}: {msg}, type {etype}')
+                # don't disable in case of available username
+                if status == QueryStatus.CLAIMED:
+                    changes['disabled'] = True
+            elif status == QueryStatus.CLAIMED:
+                logger.info(f'Not found `{username}` in {site_name}, must be claimed')
+                changes['disabled'] = True
+            else:
+                logger.info(f'Found `{username}` in {site_name}, must be available')
+                changes['disabled'] = True
+
+    logger.info(f'Site {site_name} is okay')
+    return changes
+
+
+async def self_check(json_file, logger):
+    sites = SitesInformation(json_file)
+    all_sites = {}
+
+    def disabled_count(data):
+        return len(list(filter(lambda x: x.get('disabled', False), data)))
+
+    async def update_site_data(site_name, site_data, all_sites, logger):
+        updates = await site_self_check(site_name, dict(site_data), logger)
+        all_sites[site_name].update(updates)
+
+    for site in sites:
+        all_sites[site.name] = site.information
+
+    disabled_old_count = disabled_count(all_sites.values())
+
+    tasks = []
+    for site_name, site_data in all_sites.items():
+        future = asyncio.ensure_future(update_site_data(site_name, site_data, all_sites, logger))
+        tasks.append(future)
+
+    await asyncio.gather(*tasks)
+
+    disabled_new_count = disabled_count(all_sites.values())
+    total_disabled = disabled_new_count - disabled_old_count
+    if total_disabled > 0:
+        message = 'Disabled'
+    else:
+        message = 'Enabled'
+        total_disabled *= -1
+    print(f'{message} {total_disabled} checked sites. Run with `--info` flag to get more information')
+
+    with open(json_file, 'w') as f:
+        json.dump(all_sites, f, indent=4)
+
+
+async def main():
+    version_string = f"%(prog)s {__version__}\n" + \
+                     f"{requests.__description__}:  {requests.__version__}\n" + \
+                     f"Python:  {platform.python_version()}"
+
+    parser = ArgumentParser(formatter_class=RawDescriptionHelpFormatter,
+                            description=f"{module_name} (Version {__version__})"
+                            )
+    parser.add_argument("--version",
+                        action="version", version=version_string,
+                        help="Display version information and dependencies."
+                        )
+    parser.add_argument("--info",
+                        action="store_true", dest="info", default=False,
+                        help="Display service information."
+                        )
+    parser.add_argument("--verbose", "-v",
+                        action="store_true", dest="verbose", default=False,
+                        help="Display extra information and metrics."
+                        )
+    parser.add_argument("-d", "--debug",
+                        action="store_true", dest="debug", default=False,
+                        help="Saving debugging information and sites responses in debug.txt."
+                        )
+    parser.add_argument("--rank", "-r",
+                        action="store_true", dest="rank", default=False,
+                        help="Present websites ordered by their Alexa.com global rank in popularity.")
+    parser.add_argument("--folderoutput", "-fo", dest="folderoutput",
+                        help="If using multiple usernames, the output of the results will be saved to this folder."
+                        )
+    parser.add_argument("--output", "-o", dest="output",
+                        help="If using single username, the output of the result will be saved to this file."
+                        )
+    parser.add_argument("--csv",
+                        action="store_true", dest="csv", default=False,
+                        help="Create Comma-Separated Values (CSV) File."
+                        )
+    parser.add_argument("--site",
+                        action="append", metavar='SITE_NAME',
+                        dest="site_list", default=None,
+                        help="Limit analysis to just the listed sites (use several times to specify more than one)"
+                        )
+    parser.add_argument("--proxy", "-p", metavar='PROXY_URL',
+                        action="store", dest="proxy", default=None,
+                        help="Make requests over a proxy. e.g. socks5://127.0.0.1:1080"
+                        )
+    parser.add_argument("--json", "-j", metavar="JSON_FILE",
+                        dest="json_file", default=None,
+                        help="Load data from a JSON file or an online, valid, JSON file.")
+    parser.add_argument("--timeout",
+                        action="store", metavar='TIMEOUT',
+                        dest="timeout", type=timeout_check, default=10,
+                        help="Time (in seconds) to wait for response to requests."
+                             "Default timeout of 10.0s."
+                             "A longer timeout will be more likely to get results from slow sites."
+                             "On the other hand, this may cause a long delay to gather all results."
+                        )
+    parser.add_argument("--print-not-found",
+                        action="store_true", dest="print_not_found", default=False,
+                        help="Print sites where the username was not found."
+                        )
+    parser.add_argument("--print-errors",
+                        action="store_true", dest="print_check_errors", default=False,
+                        help="Print errors messages: connection, captcha, site country ban, etc."
+                        )
+    parser.add_argument("--no-color",
+                        action="store_true", dest="no_color", default=False,
+                        help="Don't color terminal output"
+                        )
+    parser.add_argument("--browse", "-b",
+                        action="store_true", dest="browse", default=False,
+                        help="Browse to all results on default bowser."
+                        )
+    parser.add_argument("--no-recursion",
+                        action="store_true", dest="disable_recursive_search", default=False,
+                        help="Disable parsing pages for other usernames and recursive search by them."
+                        )
+    parser.add_argument("--self-check",
+                        action="store_true", default=False,
+                        help="Do self check for sites and database and disable non-working ones."
+                        )
+    parser.add_argument("--use-disabled-sites",
+                        action="store_true", default=False,
+                        help="Use disabled sites to search (may cause many false positives)."
+                        )
+    parser.add_argument("--parse",
+                        dest="parse_url", default='',
+                        help="Parse page by URL and extract username and IDs to use for search."
+                        )
+    parser.add_argument("username",
+                        nargs='+', metavar='USERNAMES',
+                        action="store",
+                        help="One or more usernames to check with social networks."
+                        )
+    parser.add_argument("--tags",
+                        dest="tags", default='',
+                        help="Specify tags of sites."
+                        )
+    args = parser.parse_args()
+
+    # Logging    
+    log_level = logging.ERROR
+    logging.basicConfig(
+        format='[%(filename)s:%(lineno)d] %(levelname)-3s  %(asctime)s %(message)s',
+        datefmt='%H:%M:%S',
+        level=logging.ERROR
+    )
+
+    if args.debug:
+        log_level = logging.DEBUG
+    elif args.info:
+        log_level = logging.INFO
+    elif args.verbose:
+        log_level = logging.WARNING
+
+    logger = logging.getLogger('maigret')
+    logger.setLevel(log_level)
+
+    # Usernames initial list
+    usernames = {
+        u: 'username'
+        for u in args.username
+        if u not in ['-']
+    }
+
+    recursive_search_enabled = not args.disable_recursive_search
+
+    # Make prompts
+    if args.proxy is not None:
+        print("Using the proxy: " + args.proxy)
+
+    # Check if both output methods are entered as input.
+    if args.output is not None and args.folderoutput is not None:
+        print("You can only use one of the output methods.")
+        sys.exit(1)
+
+    # Check validity for single username output.
+    if args.output is not None and len(args.username) != 1:
+        print("You can only use --output with a single username")
+        sys.exit(1)
+
+    if args.parse_url:
+        page, _ = parse(args.parse_url, cookies_str='')
+        info = extract(page)
+        text = 'Extracted ID data from webpage: ' + ', '.join([f'{a}: {b}' for a, b in info.items()])
+        print(text)
+        for k, v in info.items():
+            if 'username' in k:
+                usernames[v] = 'username'
+            if k in supported_recursive_search_ids:
+                usernames[v] = k
+
+    if args.tags:
+        args.tags = set(str(args.tags).split(','))
+
+    if args.json_file is None:
+        args.json_file = \
+            os.path.join(os.path.dirname(os.path.realpath(__file__)),
+                         "resources/data.json"
+                         )
+
+    # Database self-checking
+    if args.self_check:
+        print('Maigret sites database self-checking...')
+        await self_check(args.json_file, logger)
+
+    # Create object with all information about sites we are aware of.
+    try:
+        sites = SitesInformation(args.json_file)
+    except Exception as error:
+        print(f"ERROR:  {error}")
+        sys.exit(1)
+
+    # Create original dictionary from SitesInformation() object.
+    # Eventually, the rest of the code will be updated to use the new object
+    # directly, but this will glue the two pieces together.
+    site_data_all = {}
+    for site in sites:
+        site_data_all[site.name] = site.information
+
+    if args.site_list is None:
+        # Not desired to look at a sub-set of sites
+        site_data = site_data_all
+    else:
+        # User desires to selectively run queries on a sub-set of the site list.
+
+        # Make sure that the sites are supported & build up pruned site database.
+        site_data = {}
+        site_missing = []
+        for site in args.site_list:
+            for existing_site in site_data_all:
+                if site.lower() == existing_site.lower():
+                    site_data[existing_site] = site_data_all[existing_site]
+            if not site_data:
+                # Build up list of sites not supported for future error message.
+                site_missing.append(f"'{site}'")
+
+        if site_missing:
+            print(
+                f"Error: Desired sites not found: {', '.join(site_missing)}.")
+            sys.exit(1)
+
+    if args.rank:
+        # Sort data by rank
+        site_dataCpy = dict(site_data)
+        ranked_sites = sorted(site_data, key=lambda k: ("rank" not in k, site_data[k].get("rank", sys.maxsize)))
+        site_data = {}
+        for site in ranked_sites:
+            site_data[site] = site_dataCpy.get(site)
+
+    # Database consistency
+    enabled_count = len(list(filter(lambda x: not x.get('disabled', False), site_data.values())))
+    print(f'Sites in database, enabled/total: {enabled_count}/{len(site_data)}')
+
+    # Create notify object for query results.
+    query_notify = QueryNotifyPrint(result=None,
+                                    verbose=args.verbose,
+                                    print_found_only=not args.print_not_found,
+                                    skip_check_errors=not args.print_check_errors,
+                                    color=not args.no_color)
+
+    already_checked = set()
+
+    while usernames:
+        username, id_type = list(usernames.items())[0]
+        del usernames[username]
+
+        if username.lower() in already_checked:
+            continue
+        else:
+            already_checked.add(username.lower())
+
+        # check for characters do not supported by sites generally
+        found_unsupported_chars = set(unsupported_characters).intersection(set(username))
+
+        if found_unsupported_chars:
+            pretty_chars_str = ','.join(map(lambda s: f'"{s}"', found_unsupported_chars))
+            print(f'Found unsupported URL characters: {pretty_chars_str}, skip search by username "{username}"')
+            continue
+
+        results = await maigret(username,
+                                site_data,
+                                query_notify,
+                                proxy=args.proxy,
+                                timeout=args.timeout,
+                                recursive_search=recursive_search_enabled,
+                                id_type=id_type,
+                                tags=args.tags,
+                                debug=args.verbose,
+                                logger=logger,
+                                forced=args.use_disabled_sites,
+                                )
+
+        if args.output:
+            result_file = args.output
+        elif args.folderoutput:
+            # The usernames results should be stored in a targeted folder.
+            # If the folder doesn't exist, create it first
+            os.makedirs(args.folderoutput, exist_ok=True)
+            result_file = os.path.join(args.folderoutput, f"{username}.txt")
+        else:
+            result_file = f"{username}.txt"
+
+        with open(result_file, "w", encoding="utf-8") as file:
+            exists_counter = 0
+            for website_name in results:
+                dictionary = results[website_name]
+
+                new_usernames = dictionary.get('ids_usernames')
+                if new_usernames:
+                    for u, utype in new_usernames.items():
+                        usernames[u] = utype
+
+                if dictionary.get("status").status == QueryStatus.CLAIMED:
+                    exists_counter += 1
+                    file.write(dictionary["url_user"] + "\n")
+            file.write(f"Total Websites Username Detected On : {exists_counter}")
+
+        if args.csv:
+            with open(username + ".csv", "w", newline='', encoding="utf-8") as csv_report:
+                writer = csv.writer(csv_report)
+                writer.writerow(['username',
+                                 'name',
+                                 'url_main',
+                                 'url_user',
+                                 'exists',
+                                 'http_status',
+                                 'response_time_s'
+                                 ]
+                                )
+                for site in results:
+                    response_time_s = results[site]['status'].query_time
+                    if response_time_s is None:
+                        response_time_s = ""
+                    writer.writerow([username,
+                                     site,
+                                     results[site]['url_main'],
+                                     results[site]['url_user'],
+                                     str(results[site]['status'].status),
+                                     results[site]['http_status'],
+                                     response_time_s
+                                     ]
+                                    )
+
+
+if __name__ == "__main__":
+    try:
+        asyncio.run(main())
+    except KeyboardInterrupt:
+        print('Maigret is interrupted.')
+        sys.exit(1)
@@ -0,0 +1,283 @@
+"""Sherlock Notify Module
+
+This module defines the objects for notifying the caller about the
+results of queries.
+"""
+from colorama import Fore, Style, init
+from result import QueryStatus
+
+
+class QueryNotify():
+    """Query Notify Object.
+
+    Base class that describes methods available to notify the results of
+    a query.
+    It is intended that other classes inherit from this base class and
+    override the methods to implement specific functionality.
+    """
+
+    def __init__(self, result=None):
+        """Create Query Notify Object.
+
+        Contains information about a specific method of notifying the results
+        of a query.
+
+        Keyword Arguments:
+        self                   -- This object.
+        result                 -- Object of type QueryResult() containing
+                                  results for this query.
+
+        Return Value:
+        Nothing.
+        """
+
+        self.result = result
+
+        return
+
+    def start(self, message=None, id_type='username'):
+        """Notify Start.
+
+        Notify method for start of query.  This method will be called before
+        any queries are performed.  This method will typically be
+        overridden by higher level classes that will inherit from it.
+
+        Keyword Arguments:
+        self                   -- This object.
+        message                -- Object that is used to give context to start
+                                  of query.
+                                  Default is None.
+
+        Return Value:
+        Nothing.
+        """
+
+        return
+
+    def update(self, result):
+        """Notify Update.
+
+        Notify method for query result.  This method will typically be
+        overridden by higher level classes that will inherit from it.
+
+        Keyword Arguments:
+        self                   -- This object.
+        result                 -- Object of type QueryResult() containing
+                                  results for this query.
+
+        Return Value:
+        Nothing.
+        """
+
+        self.result = result
+
+        return
+
+    def finish(self, message=None):
+        """Notify Finish.
+
+        Notify method for finish of query.  This method will be called after
+        all queries have been performed.  This method will typically be
+        overridden by higher level classes that will inherit from it.
+
+        Keyword Arguments:
+        self                   -- This object.
+        message                -- Object that is used to give context to start
+                                  of query.
+                                  Default is None.
+
+        Return Value:
+        Nothing.
+        """
+
+        return
+
+    def __str__(self):
+        """Convert Object To String.
+
+        Keyword Arguments:
+        self                   -- This object.
+
+        Return Value:
+        Nicely formatted string to get information about this object.
+        """
+        result = str(self.result)
+
+        return result
+
+
+class QueryNotifyPrint(QueryNotify):
+    """Query Notify Print Object.
+
+    Query notify class that prints results.
+    """
+
+    def __init__(self, result=None, verbose=False, print_found_only=False,
+                 skip_check_errors=False, color=True):
+        """Create Query Notify Print Object.
+
+        Contains information about a specific method of notifying the results
+        of a query.
+
+        Keyword Arguments:
+        self                   -- This object.
+        result                 -- Object of type QueryResult() containing
+                                  results for this query.
+        verbose                -- Boolean indicating whether to give verbose output.
+        print_found_only       -- Boolean indicating whether to only print found sites.
+        color                  -- Boolean indicating whether to color terminal output
+
+        Return Value:
+        Nothing.
+        """
+
+        # Colorama module's initialization.
+        init(autoreset=True)
+
+        super().__init__(result)
+        self.verbose = verbose
+        self.print_found_only = print_found_only
+        self.skip_check_errors = skip_check_errors
+        self.color = color
+
+        return
+
+    def start(self, message, id_type):
+        """Notify Start.
+
+        Will print the title to the standard output.
+
+        Keyword Arguments:
+        self                   -- This object.
+        message                -- String containing username that the series
+                                  of queries are about.
+
+        Return Value:
+        Nothing.
+        """
+
+        title = f"Checking {id_type}"
+        if self.color:
+            print(Style.BRIGHT + Fore.GREEN + "[" +
+                  Fore.YELLOW + "*" +
+                  Fore.GREEN + f"] {title}" +
+                  Fore.WHITE + f" {message}" +
+                  Fore.GREEN + " on:")
+        else:
+            print(f"[*] {title} {message} on:")
+
+        return
+
+    def get_additional_data_text(self, items, prepend=''):
+        text = ''
+        for num, item in enumerate(items):
+            box_symbol = '┣╸' if num != len(items) - 1 else '┗╸'
+
+            if type(item) == tuple:
+                field_name, field_value = item
+                if field_value.startswith('[\''):
+                    is_last_item = num == len(items) - 1
+                    prepend_symbols = ' ' * 3 if is_last_item else ' ┃ '
+                    field_value = self.get_additional_data_text(eval(field_value), prepend_symbols)
+                text += f'\n{prepend}{box_symbol}{field_name}: {field_value}'
+            else:
+                text += f'\n{prepend}{box_symbol} {item}'
+
+        return text
+
+    def update(self, result, is_similar=False):
+        """Notify Update.
+
+        Will print the query result to the standard output.
+
+        Keyword Arguments:
+        self                   -- This object.
+        result                 -- Object of type QueryResult() containing
+                                  results for this query.
+
+        Return Value:
+        Nothing.
+        """
+        self.result = result
+
+        if not self.result.ids_data:
+            ids_data_text = ""
+        else:
+            ids_data_text = self.get_additional_data_text(self.result.ids_data.items(), ' ')
+
+        def make_colored_terminal_notify(status, text, status_color, text_color, appendix):
+            text = [
+                f'{Style.BRIGHT}{Fore.WHITE}[{status_color}{status}{Fore.WHITE}]' +
+                f'{text_color} {text}: {Style.RESET_ALL}' +
+                f'{appendix}'
+            ]
+            return ''.join(text)
+
+        def make_simple_terminal_notify(status, text, appendix):
+            return f'[{status}] {text}: {appendix}'
+
+        def make_terminal_notify(is_colored=True, *args):
+            if is_colored:
+                return make_colored_terminal_notify(*args)
+            else:
+                return make_simple_terminal_notify(*args)
+
+        notify = None
+
+        # Output to the terminal is desired.
+        if result.status == QueryStatus.CLAIMED:
+            color = Fore.BLUE if is_similar else Fore.GREEN
+            status = '?' if is_similar else '+'
+            notify = make_terminal_notify(
+                self.color,
+                status, result.site_name,
+                color, color,
+                result.site_url_user + ids_data_text
+            )
+        elif result.status == QueryStatus.AVAILABLE:
+            if not self.print_found_only:
+                notify = make_terminal_notify(
+                    self.color,
+                    '-', result.site_name,
+                    Fore.RED, Fore.YELLOW,
+                    'Not found!' + ids_data_text
+                )
+        elif result.status == QueryStatus.UNKNOWN:
+            if not self.skip_check_errors:
+                notify = make_terminal_notify(
+                    self.color,
+                    '?', result.site_name,
+                    Fore.RED, Fore.RED,
+                    self.result.context + ids_data_text
+                )
+        elif result.status == QueryStatus.ILLEGAL:
+            if not self.print_found_only:
+                text = 'Illegal Username Format For This Site!'
+                notify = make_terminal_notify(
+                    self.color,
+                    '-', result.site_name,
+                    Fore.RED, Fore.YELLOW,
+                    text + ids_data_text
+                )
+        else:
+            # It should be impossible to ever get here...
+            raise ValueError(f"Unknown Query Status '{str(result.status)}' for "
+                             f"site '{self.result.site_name}'")
+
+        if notify:
+            print(notify)
+
+        return
+
+    def __str__(self):
+        """Convert Object To String.
+
+        Keyword Arguments:
+        self                   -- This object.
+
+        Return Value:
+        Nicely formatted string to get information about this object.
+        """
+        result = str(self.result)
+
+        return result
@@ -0,0 +1,93 @@
+"""Sherlock Result Module
+
+This module defines various objects for recording the results of queries.
+"""
+from enum import Enum
+
+
+class QueryStatus(Enum):
+    """Query Status Enumeration.
+
+    Describes status of query about a given username.
+    """
+    CLAIMED = "Claimed"  # Username Detected
+    AVAILABLE = "Available"  # Username Not Detected
+    UNKNOWN = "Unknown"  # Error Occurred While Trying To Detect Username
+    ILLEGAL = "Illegal"  # Username Not Allowable For This Site
+
+    def __str__(self):
+        """Convert Object To String.
+
+        Keyword Arguments:
+        self                   -- This object.
+
+        Return Value:
+        Nicely formatted string to get information about this object.
+        """
+        return self.value
+
+
+class QueryResult():
+    """Query Result Object.
+
+    Describes result of query about a given username.
+    """
+
+    def __init__(self, username, site_name, site_url_user, status, ids_data=None,
+                 query_time=None, context=None):
+        """Create Query Result Object.
+
+        Contains information about a specific method of detecting usernames on
+        a given type of web sites.
+
+        Keyword Arguments:
+        self                   -- This object.
+        username               -- String indicating username that query result
+                                  was about.
+        site_name              -- String which identifies site.
+        site_url_user          -- String containing URL for username on site.
+                                  NOTE:  The site may or may not exist:  this
+                                         just indicates what the name would
+                                         be, if it existed.
+        status                 -- Enumeration of type QueryStatus() indicating
+                                  the status of the query.
+        query_time             -- Time (in seconds) required to perform query.
+                                  Default of None.
+        context                -- String indicating any additional context
+                                  about the query.  For example, if there was
+                                  an error, this might indicate the type of
+                                  error that occurred.
+                                  Default of None.
+        ids_data               -- Extracted from website page info about other
+                                  usernames and inner ids.
+
+        Return Value:
+        Nothing.
+        """
+
+        self.username = username
+        self.site_name = site_name
+        self.site_url_user = site_url_user
+        self.status = status
+        self.query_time = query_time
+        self.context = context
+        self.ids_data = ids_data
+
+        return
+
+    def __str__(self):
+        """Convert Object To String.
+
+        Keyword Arguments:
+        self                   -- This object.
+
+        Return Value:
+        Nicely formatted string to get information about this object.
+        """
+        status = str(self.status)
+        if self.context is not None:
+            # There is extra context information available about the results.
+            # Append it to the normal response text.
+            status += f" ({self.context})"
+
+        return status
@@ -0,0 +1,246 @@
+"""Sherlock Sites Information Module
+
+This module supports storing information about web sites.
+This is the raw data that will be used to search for usernames.
+"""
+import json
+import operator
+import sys
+
+import requests
+
+
+class SiteInformation():
+    def __init__(self, name, url_home, url_username_format, popularity_rank,
+                 username_claimed, username_unclaimed,
+                 information):
+        """Create Site Information Object.
+
+        Contains information about a specific web site.
+
+        Keyword Arguments:
+        self                   -- This object.
+        name                   -- String which identifies site.
+        url_home               -- String containing URL for home of site.
+        url_username_format    -- String containing URL for Username format
+                                  on site.
+                                  NOTE:  The string should contain the
+                                         token "{}" where the username should
+                                         be substituted.  For example, a string
+                                         of "https://somesite.com/users/{}"
+                                         indicates that the individual
+                                         usernames would show up under the
+                                         "https://somesite.com/users/" area of
+                                         the web site.
+        popularity_rank        -- Integer indicating popularity of site.
+                                  In general, smaller numbers mean more
+                                  popular ("0" or None means ranking
+                                  information not available).
+        username_claimed       -- String containing username which is known
+                                  to be claimed on web site.
+        username_unclaimed     -- String containing username which is known
+                                  to be unclaimed on web site.
+        information            -- Dictionary containing all known information
+                                  about web site.
+                                  NOTE:  Custom information about how to
+                                         actually detect the existence of the
+                                         username will be included in this
+                                         dictionary.  This information will
+                                         be needed by the detection method,
+                                         but it is only recorded in this
+                                         object for future use.
+
+        Return Value:
+        Nothing.
+        """
+
+        self.name = name
+        self.url_home = url_home
+        self.url_username_format = url_username_format
+
+        if (popularity_rank is None) or (popularity_rank == 0):
+            # We do not know the popularity, so make site go to bottom of list.
+            popularity_rank = sys.maxsize
+        self.popularity_rank = popularity_rank
+
+        self.username_claimed = username_claimed
+        self.username_unclaimed = username_unclaimed
+        self.information = information
+
+        return
+
+    def __str__(self):
+        """Convert Object To String.
+
+        Keyword Arguments:
+        self                   -- This object.
+
+        Return Value:
+        Nicely formatted string to get information about this object.
+        """
+
+        return f"{self.name} ({self.url_home})"
+
+
+class SitesInformation():
+    def __init__(self, data_file_path=None):
+        """Create Sites Information Object.
+
+        Contains information about all supported web sites.
+
+        Keyword Arguments:
+        self                   -- This object.
+        data_file_path         -- String which indicates path to data file.
+                                  The file name must end in ".json".
+
+                                  There are 3 possible formats:
+                                   * Absolute File Format
+                                     For example, "c:/stuff/data.json".
+                                   * Relative File Format
+                                     The current working directory is used
+                                     as the context.
+                                     For example, "data.json".
+                                   * URL Format
+                                     For example,
+                                     "https://example.com/data.json", or
+                                     "http://example.com/data.json".
+
+                                  An exception will be thrown if the path
+                                  to the data file is not in the expected
+                                  format, or if there was any problem loading
+                                  the file.
+
+                                  If this option is not specified, then a
+                                  default site list will be used.
+
+        Return Value:
+        Nothing.
+        """
+
+        # Ensure that specified data file has correct extension.
+        if ".json" != data_file_path[-5:].lower():
+            raise FileNotFoundError(f"Incorrect JSON file extension for "
+                                    f"data file '{data_file_path}'."
+                                    )
+
+        if (("http://" == data_file_path[:7].lower()) or
+                ("https://" == data_file_path[:8].lower())
+        ):
+            # Reference is to a URL.
+            try:
+                response = requests.get(url=data_file_path)
+            except Exception as error:
+                raise FileNotFoundError(f"Problem while attempting to access "
+                                        f"data file URL '{data_file_path}':  "
+                                        f"{str(error)}"
+                                        )
+            if response.status_code == 200:
+                try:
+                    site_data = response.json()
+                except Exception as error:
+                    raise ValueError(f"Problem parsing json contents at "
+                                     f"'{data_file_path}':  {str(error)}."
+                                     )
+            else:
+                raise FileNotFoundError(f"Bad response while accessing "
+                                        f"data file URL '{data_file_path}'."
+                                        )
+        else:
+            # Reference is to a file.
+            try:
+                with open(data_file_path, "r", encoding="utf-8") as file:
+                    try:
+                        data = json.load(file)
+                        site_data = data.get("sites")
+                        engines_data = data.get("engines")
+                    except Exception as error:
+                        raise ValueError(f"Problem parsing json contents at "
+                                         f"'{data_file_path}':  {str(error)}."
+                                         )
+            except FileNotFoundError as error:
+                raise FileNotFoundError(f"Problem while attempting to access "
+                                        f"data file '{data_file_path}'."
+                                        )
+
+        self.sites = {}
+
+        # Add all of site information from the json file to internal site list.
+        for site_name in site_data:
+            try:
+                site = site_data[site_name]
+                # If popularity unknown, make site be at bottom of list.
+                popularity_rank = site.get("rank", sys.maxsize)
+
+                if 'engine' in site:
+                    engine_data = engines_data[site['engine']]['site']
+                    site.update(engine_data)
+
+                self.sites[site_name] = \
+                    SiteInformation(site_name,
+                                    site["urlMain"],
+                                    site["url"],
+                                    popularity_rank,
+                                    site["username_claimed"],
+                                    site["username_unclaimed"],
+                                    site
+                                    )
+            except KeyError as error:
+                raise ValueError(f"Problem parsing json contents at "
+                                 f"'{data_file_path}':  "
+                                 f"Missing attribute {str(error)}."
+                                 )
+
+        return
+
+    def site_name_list(self, popularity_rank=False):
+        """Get Site Name List.
+
+        Keyword Arguments:
+        self                   -- This object.
+        popularity_rank        -- Boolean indicating if list should be sorted
+                                  by popularity rank.
+                                  Default value is False.
+                                  NOTE:  List is sorted in ascending
+                                         alphabetical order is popularity rank
+                                         is not requested.
+
+        Return Value:
+        List of strings containing names of sites.
+        """
+
+        if popularity_rank:
+            # Sort in ascending popularity rank order.
+            site_rank_name = \
+                sorted([(site.popularity_rank, site.name) for site in self],
+                       key=operator.itemgetter(0)
+                       )
+            site_names = [name for _, name in site_rank_name]
+        else:
+            # Sort in ascending alphabetical order.
+            site_names = sorted([site.name for site in self], key=str.lower)
+
+        return site_names
+
+    def __iter__(self):
+        """Iterator For Object.
+
+        Keyword Arguments:
+        self                   -- This object.
+
+        Return Value:
+        Iterator for sites object.
+        """
+
+        for site_name in self.sites:
+            yield self.sites[site_name]
+
+    def __len__(self):
+        """Length For Object.
+
+        Keyword Arguments:
+        self                   -- This object.
+
+        Return Value:
+        Length of sites object.
+        """
+        return len(self.sites)
@@ -0,0 +1,4 @@
+"""Sherlock Tests
+
+This package contains various submodules used to run tests.
+"""
@@ -0,0 +1,297 @@
+"""Sherlock Tests
+
+This module contains various tests.
+"""
+from tests.base import SherlockBaseTest
+import unittest
+
+
+class SherlockDetectTests(SherlockBaseTest):
+    def test_detect_true_via_message(self):
+        """Test Username Does Exist (Via Message).
+
+        This test ensures that the "message" detection mechanism of
+        ensuring that a Username does exist works properly.
+
+        Keyword Arguments:
+        self                   -- This object.
+
+        Return Value:
+        N/A.
+        Will trigger an assert if detection mechanism did not work as expected.
+        """
+
+        site = 'Instructables'
+        site_data = self.site_data_all[site]
+
+        #Ensure that the site's detection method has not changed.
+        self.assertEqual("message", site_data["errorType"])
+
+        self.username_check([site_data["username_claimed"]],
+                            [site],
+                            exist_check=True
+                           )
+
+        return
+
+    def test_detect_false_via_message(self):
+        """Test Username Does Not Exist (Via Message).
+
+        This test ensures that the "message" detection mechanism of
+        ensuring that a Username does *not* exist works properly.
+
+        Keyword Arguments:
+        self                   -- This object.
+
+        Return Value:
+        N/A.
+        Will trigger an assert if detection mechanism did not work as expected.
+        """
+
+        site = 'Instructables'
+        site_data = self.site_data_all[site]
+
+        #Ensure that the site's detection method has not changed.
+        self.assertEqual("message", site_data["errorType"])
+
+        self.username_check([site_data["username_unclaimed"]],
+                            [site],
+                            exist_check=False
+                           )
+
+        return
+
+    def test_detect_true_via_status_code(self):
+        """Test Username Does Exist (Via Status Code).
+
+        This test ensures that the "status code" detection mechanism of
+        ensuring that a Username does exist works properly.
+
+        Keyword Arguments:
+        self                   -- This object.
+
+        Return Value:
+        N/A.
+        Will trigger an assert if detection mechanism did not work as expected.
+        """
+
+        site = 'Facebook'
+        site_data = self.site_data_all[site]
+
+        #Ensure that the site's detection method has not changed.
+        self.assertEqual("status_code", site_data["errorType"])
+
+        self.username_check([site_data["username_claimed"]],
+                            [site],
+                            exist_check=True
+                           )
+
+        return
+
+    def test_detect_false_via_status_code(self):
+        """Test Username Does Not Exist (Via Status Code).
+
+        This test ensures that the "status code" detection mechanism of
+        ensuring that a Username does *not* exist works properly.
+
+        Keyword Arguments:
+        self                   -- This object.
+
+        Return Value:
+        N/A.
+        Will trigger an assert if detection mechanism did not work as expected.
+        """
+
+        site = 'Facebook'
+        site_data = self.site_data_all[site]
+
+        #Ensure that the site's detection method has not changed.
+        self.assertEqual("status_code", site_data["errorType"])
+
+        self.username_check([site_data["username_unclaimed"]],
+                            [site],
+                            exist_check=False
+                           )
+
+        return
+
+    def test_detect_true_via_response_url(self):
+        """Test Username Does Exist (Via Response URL).
+
+        This test ensures that the "response URL" detection mechanism of
+        ensuring that a Username does exist works properly.
+
+        Keyword Arguments:
+        self                   -- This object.
+
+        Return Value:
+        N/A.
+        Will trigger an assert if detection mechanism did not work as expected.
+        """
+
+        site = 'Quora'
+        site_data = self.site_data_all[site]
+
+        #Ensure that the site's detection method has not changed.
+        self.assertEqual("response_url", site_data["errorType"])
+
+        self.username_check([site_data["username_claimed"]],
+                            [site],
+                            exist_check=True
+                           )
+
+        return
+
+    def test_detect_false_via_response_url(self):
+        """Test Username Does Not Exist (Via Response URL).
+
+        This test ensures that the "response URL" detection mechanism of
+        ensuring that a Username does *not* exist works properly.
+
+        Keyword Arguments:
+        self                   -- This object.
+
+        Return Value:
+        N/A.
+        Will trigger an assert if detection mechanism did not work as expected.
+        """
+
+        site = 'Quora'
+        site_data = self.site_data_all[site]
+
+        #Ensure that the site's detection method has not changed.
+        self.assertEqual("response_url", site_data["errorType"])
+
+        self.username_check([site_data["username_unclaimed"]],
+                            [site],
+                            exist_check=False
+                           )
+
+        return
+
+
+class SherlockSiteCoverageTests(SherlockBaseTest):
+    def test_coverage_false_via_response_url(self):
+        """Test Username Does Not Exist Site Coverage (Via Response URL).
+
+        This test checks all sites with the "response URL" detection mechanism
+        to ensure that a Username that does not exist is reported that way.
+
+        Keyword Arguments:
+        self                   -- This object.
+
+        Return Value:
+        N/A.
+        Will trigger an assert if detection mechanism did not work as expected.
+        """
+
+        self.detect_type_check("response_url", exist_check=False)
+
+        return
+
+    def test_coverage_true_via_response_url(self):
+        """Test Username Does Exist Site Coverage (Via Response URL).
+
+        This test checks all sites with the "response URL" detection mechanism
+        to ensure that a Username that does exist is reported that way.
+
+        Keyword Arguments:
+        self                   -- This object.
+
+        Return Value:
+        N/A.
+        Will trigger an assert if detection mechanism did not work as expected.
+        """
+
+        self.detect_type_check("response_url", exist_check=True)
+
+        return
+
+    def test_coverage_false_via_status(self):
+        """Test Username Does Not Exist Site Coverage (Via HTTP Status).
+
+        This test checks all sites with the "HTTP Status" detection mechanism
+        to ensure that a Username that does not exist is reported that way.
+
+        Keyword Arguments:
+        self                   -- This object.
+
+        Return Value:
+        N/A.
+        Will trigger an assert if detection mechanism did not work as expected.
+        """
+
+        self.detect_type_check("status_code", exist_check=False)
+
+        return
+
+    def test_coverage_true_via_status(self):
+        """Test Username Does Exist Site Coverage (Via HTTP Status).
+
+        This test checks all sites with the "HTTP Status" detection mechanism
+        to ensure that a Username that does exist is reported that way.
+
+        Keyword Arguments:
+        self                   -- This object.
+
+        Return Value:
+        N/A.
+        Will trigger an assert if detection mechanism did not work as expected.
+        """
+
+        self.detect_type_check("status_code", exist_check=True)
+
+        return
+
+    def test_coverage_false_via_message(self):
+        """Test Username Does Not Exist Site Coverage (Via Error Message).
+
+        This test checks all sites with the "Error Message" detection mechanism
+        to ensure that a Username that does not exist is reported that way.
+
+        Keyword Arguments:
+        self                   -- This object.
+
+        Return Value:
+        N/A.
+        Will trigger an assert if detection mechanism did not work as expected.
+        """
+
+        self.detect_type_check("message", exist_check=False)
+
+        return
+
+    def test_coverage_true_via_message(self):
+        """Test Username Does Exist Site Coverage (Via Error Message).
+
+        This test checks all sites with the "Error Message" detection mechanism
+        to ensure that a Username that does exist is reported that way.
+
+        Keyword Arguments:
+        self                   -- This object.
+
+        Return Value:
+        N/A.
+        Will trigger an assert if detection mechanism did not work as expected.
+        """
+
+        self.detect_type_check("message", exist_check=True)
+
+        return
+
+    def test_coverage_total(self):
+        """Test Site Coverage Is Total.
+
+        This test checks that all sites have test data available.
+
+        Keyword Arguments:
+        self                   -- This object.
+
+        Return Value:
+        N/A.
+        Will trigger an assert if we do not have total coverage.
+        """
+
+        self.coverage_total_check()
+
+        return
@@ -0,0 +1,228 @@
+"""Sherlock Base Tests
+
+This module contains various utilities for running tests.
+"""
+import os
+import os.path
+import unittest
+import maigret
+from result import QueryStatus
+from result import QueryResult
+from notify import QueryNotify
+from sites  import SitesInformation
+import warnings
+
+
+class SherlockBaseTest(unittest.TestCase):
+    def setUp(self):
+        """Sherlock Base Test Setup.
+
+        Does common setup tasks for base Sherlock tests.
+
+        Keyword Arguments:
+        self                   -- This object.
+
+        Return Value:
+        N/A.
+        """
+
+        #This ignores the ResourceWarning from an unclosed SSLSocket.
+        #TODO: Figure out how to fix the code so this is not needed.
+        warnings.simplefilter("ignore", ResourceWarning)
+
+        #Create object with all information about sites we are aware of.
+        sites = SitesInformation()
+
+        #Create original dictionary from SitesInformation() object.
+        #Eventually, the rest of the code will be updated to use the new object
+        #directly, but this will glue the two pieces together.
+        site_data_all = {}
+        for site in sites:
+            site_data_all[site.name] = site.information
+        self.site_data_all = site_data_all
+
+        # Load excluded sites list, if any
+        excluded_sites_path = os.path.join(os.path.dirname(os.path.realpath(maigret.__file__)), "tests/.excluded_sites")
+        try:
+          with open(excluded_sites_path, "r", encoding="utf-8") as excluded_sites_file:
+            self.excluded_sites = excluded_sites_file.read().splitlines()
+        except FileNotFoundError:
+          self.excluded_sites = []
+
+        #Create notify object for query results.
+        self.query_notify = QueryNotify()
+
+        self.tor=False
+        self.unique_tor=False
+        self.timeout=None
+        self.skip_error_sites=True
+
+        return
+
+    def site_data_filter(self, site_list):
+        """Filter Site Data.
+
+        Keyword Arguments:
+        self                   -- This object.
+        site_list              -- List of strings corresponding to sites which
+                                  should be filtered.
+
+        Return Value:
+        Dictionary containing sub-set of site data specified by 'site_list'.
+        """
+
+        # Create new dictionary that has filtered site data based on input.
+        # Note that any site specified which is not understood will generate
+        # an error.
+        site_data = {}
+        for site in site_list:
+            with self.subTest(f"Checking test vector Site '{site}' "
+                              f"exists in total site data."
+                             ):
+                site_data[site] = self.site_data_all[site]
+
+        return site_data
+
+    def username_check(self, username_list, site_list, exist_check=True):
+        """Username Exist Check.
+
+        Keyword Arguments:
+        self                   -- This object.
+        username_list          -- List of strings corresponding to usernames
+                                  which should exist on *all* of the sites.
+        site_list              -- List of strings corresponding to sites which
+                                  should be filtered.
+        exist_check            -- Boolean which indicates if this should be
+                                  a check for Username existence,
+                                  or non-existence.
+
+        Return Value:
+        N/A.
+        Will trigger an assert if Username does not have the expected
+        existence state.
+        """
+
+        #Filter all site data down to just what is needed for this test.
+        site_data = self.site_data_filter(site_list)
+
+        if exist_check:
+            check_type_text = "claimed"
+            exist_result_desired = QueryStatus.CLAIMED
+        else:
+            check_type_text = "available"
+            exist_result_desired = QueryStatus.AVAILABLE
+
+        for username in username_list:
+            results = maigret.sherlock(username,
+                                        site_data,
+                                        self.query_notify,
+                                        tor=self.tor,
+                                        unique_tor=self.unique_tor,
+                                        timeout=self.timeout
+                                       )
+            for site, result in results.items():
+                with self.subTest(f"Checking Username '{username}' "
+                                  f"{check_type_text} on Site '{site}'"
+                                 ):
+                    if (
+                         (self.skip_error_sites == True) and
+                         (result['status'].status == QueryStatus.UNKNOWN)
+                       ):
+                        #Some error connecting to site.
+                        self.skipTest(f"Skipping Username '{username}' "
+                                      f"{check_type_text} on Site '{site}':  "
+                                      f"Site returned error status."
+                                     )
+
+                    self.assertEqual(exist_result_desired,
+                                     result['status'].status)
+
+        return
+
+    def detect_type_check(self, detect_type, exist_check=True):
+        """Username Exist Check.
+
+        Keyword Arguments:
+        self                   -- This object.
+        detect_type            -- String corresponding to detection algorithm
+                                  which is desired to be tested.
+                                  Note that only sites which have documented
+                                  usernames which exist and do not exist
+                                  will be tested.
+        exist_check            -- Boolean which indicates if this should be
+                                  a check for Username existence,
+                                  or non-existence.
+
+        Return Value:
+        N/A.
+        Runs tests on all sites using the indicated detection algorithm
+        and which also has test vectors specified.
+        Will trigger an assert if Username does not have the expected
+        existence state.
+        """
+
+        #Dictionary of sites that should be tested for having a username.
+        #This will allow us to test sites with a common username in parallel.
+        sites_by_username = {}
+
+        for site, site_data in self.site_data_all.items():
+            if (
+                 (site in self.excluded_sites)                 or
+                 (site_data["errorType"] != detect_type)       or
+                 (site_data.get("username_claimed")   is None) or
+                 (site_data.get("username_unclaimed") is None)
+               ):
+                # This is either not a site we are interested in, or the
+                # site does not contain the required information to do
+                # the tests.
+                pass
+            else:
+                # We should run a test on this site.
+
+                # Figure out which type of user
+                if exist_check:
+                     username = site_data.get("username_claimed")
+                else:
+                     username = site_data.get("username_unclaimed")
+
+                # Add this site to the list of sites corresponding to this
+                # username.
+                if username in sites_by_username:
+                    sites_by_username[username].append(site)
+                else:
+                    sites_by_username[username] = [site]
+
+        # Check on the username availability against all of the sites.
+        for username, site_list in sites_by_username.items():
+            self.username_check([username],
+                                site_list,
+                                exist_check=exist_check
+                               )
+
+        return
+
+    def coverage_total_check(self):
+        """Total Coverage Check.
+
+        Keyword Arguments:
+        self                   -- This object.
+
+        Return Value:
+        N/A.
+        Counts up all Sites with full test data available.
+        Will trigger an assert if any Site does not have test coverage.
+        """
+
+        site_no_tests_list = []
+
+        for site, site_data in self.site_data_all.items():
+            if (
+                 (site_data.get("username_claimed")   is None) or
+                 (site_data.get("username_unclaimed") is None)
+               ):
+                # Test information not available on this site.
+                site_no_tests_list.append(site)
+
+        self.assertEqual("", ", ".join(site_no_tests_list))
+
+        return
@@ -0,0 +1,14 @@
+beautifulsoup4>=4.8.0
+bs4>=0.0.1
+certifi>=2019.6.16
+colorama>=0.4.1
+lxml>=4.4.0
+PySocks>=1.7.0
+requests>=2.22.0
+requests-futures>=1.0.0
+soupsieve>=1.9.2
+stem>=1.8.0 
+torrequest>=0.1.0
+git+https://github.com/soxoj/socid_extractor
+aiohttp==3.5.4
+mock==4.0.2
@@ -0,0 +1,90 @@
+## Demo with page parsing and recursive username search
+
+```bash
+python3 maigret --ids --print-found --skip-errors alexaimephotographycars
+[*] Checking username alexaimephotographycars on:
+[+] 500px: https://500px.com/p/alexaimephotographycars
+ ┣╸uid: dXJpOm5vZGU6VXNlcjoyNjQwMzQxNQ==
+ ┣╸legacy_id: 26403415
+ ┣╸username: alexaimephotographycars
+ ┣╸name: Alex Aimé
+ ┣╸website: www.flickr.com/photos/alexaimephotography/
+ ┣╸facebook_link:  www.instagram.com/street.reality.photography/
+ ┣╸instagram_username: alexaimephotography
+ ┗╸twitter_username: Alexaimephotogr
+[*] Checking username alexaimephotography on:
+[+] DeviantART: https://alexaimephotography.deviantart.com
+ ┣╸country: France
+ ┣╸registered_for_seconds: 55040868
+ ┣╸gender: male
+ ┣╸username: Alexaimephotography
+ ┣╸twitter_username: alexaimephotogr
+ ┣╸website: www.instagram.com/alexaimephotography/
+ ┗╸links:
+   ┗╸ https://www.instagram.com/alexaimephotography/
+[+] EyeEm: https://www.eyeem.com/u/alexaimephotography
+ ┣╸eyeem_id: 21974802
+ ┣╸eyeem_username: alexaimephotography
+ ┣╸fullname: Alex
+ ┣╸followers: 10
+ ┣╸friends: 2
+ ┣╸liked_photos: 37
+ ┣╸photos: 10
+ ┗╸facebook_uid: 1534915183474093
+[+] Facebook: https://www.facebook.com/alexaimephotography
+[+] Gramho: https://gramho.com/explore-hashtag/alexaimephotography
+[+] Instagram: https://www.instagram.com/alexaimephotography
+ ┣╸username: alexaimephotography
+ ┣╸full_name: Alexaimephotography
+ ┣╸id: 6828488620
+ ┣╸biography: 🇮🇹 🇲🇫 🇩🇪
+Amateur photographer
+Follow me @street.reality.photography
+Sony A7ii
+ ┗╸external_url: https://www.flickr.com/photos/alexaimephotography2020/
+[+] Picuki: https://www.picuki.com/profile/alexaimephotography
+[+] Pinterest: https://www.pinterest.com/alexaimephotography/
+ ┣╸pinterest_username: alexaimephotography
+ ┣╸fullname: alexaimephotography
+ ┣╸image: https://s.pinimg.com/images/user/default_280.png
+ ┣╸board_count: 3
+ ┣╸pin_count: 4
+ ┣╸country: FR
+ ┣╸follower_count: 0
+ ┣╸following_count: 1
+ ┣╸is_website_verified: False
+ ┣╸is_indexed: True
+ ┣╸is_verified_merchant: False
+ ┗╸locale: fr
+[+] Reddit: https://www.reddit.com/user/alexaimephotography
+ ┣╸reddit_id: t5_1nytpy
+ ┣╸reddit_username: alexaimephotography
+ ┣╸display_name: alexaimephotography
+ ┣╸is_employee: False
+ ┣╸is_nsfw: False
+ ┣╸is_mod: True
+ ┣╸is_following: True
+ ┣╸has_user_profile: True
+ ┣╸hide_from_robots: False
+ ┣╸created_utc: 1562750403
+ ┣╸total_karma: 43075
+ ┗╸post_karma: 42574
+[+] Tumblr: https://alexaimephotography.tumblr.com/
+[+] VK: https://vk.com/alexaimephotography
+[+] Vimeo: https://vimeo.com/alexaimephotography
+ ┣╸uid: 75857717
+ ┣╸name: AlexAimePhotography
+ ┣╸username: alexaimephotography
+ ┣╸location: France
+ ┣╸created_at: 2017-12-06 06:49:28
+ ┣╸is_staff: False
+ ┗╸links:
+   ┣╸ https://500px.com/alexaimephotography
+   ┣╸ https://www.flickr.com/photos/photoambiance/
+   ┣╸ https://www.instagram.com/alexaimephotography/
+   ┣╸ https://www.youtube.com/channel/UC4NiYV3Yqih2WHcwKg4uPuQ
+   ┗╸ https://flii.by/alexaimephotography/
+[+] We Heart It: https://weheartit.com/alexaimephotography
+[*] Checking username Alexaimephotogr on:
+[+] Twitter: https://twitter.com/Alexaimephotogr
+```
@@ -0,0 +1,126 @@
+#!/usr/bin/env python3
+"""Maigret: Supported Site Listing with Alexa ranking and country tags
+This module generates the listing of supported sites in file `SITES.md`
+and pretty prints file with sites data.
+"""
+import json
+import sys
+import requests
+import logging
+import threading
+import xml.etree.ElementTree as ET
+from datetime import datetime
+from argparse import ArgumentParser, RawDescriptionHelpFormatter
+
+RANKS = {str(i):str(i) for i in [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 50, 100, 500]}
+RANKS.update({
+    '1000': '1K',
+    '5000': '5K',
+    '10000': '10K',
+    '100000': '100K',
+    '10000000': '1M',
+    '50000000': '10M',
+})
+
+def get_rank(domain_to_query, dest, print_errors=True):
+    #Retrieve ranking data via alexa API
+    url = f"http://data.alexa.com/data?cli=10&url={domain_to_query}"
+    xml_data = requests.get(url).text
+    root = ET.fromstring(xml_data)
+
+    try:
+        #Get ranking for this site.
+        dest['rank'] = int(root.find('.//REACH').attrib['RANK'])
+        country = root.find('.//COUNTRY')
+        if not country is None and country.attrib:
+            country_code = country.attrib['CODE']
+            tags = set(dest.get('tags', []))
+            if country_code:
+                tags.add(country_code.lower())
+            dest['tags'] = sorted(list(tags))
+            if 'type' in dest and dest['type'] != 'username':
+                dest['disabled'] = False
+    except Exception as e:
+        if print_errors:
+            logging.error(e)
+            # We did not find the rank for some reason.
+            print(f"Error retrieving rank information for '{domain_to_query}'")
+            print(f"     Returned XML is |{xml_data}|")
+
+    return
+
+
+def get_step_rank(rank):
+    def get_readable_rank(r):
+        return RANKS[str(r)]
+    valid_step_ranks = sorted(map(int, RANKS.keys()))
+    if rank == 0:
+        return get_readable_rank(valid_step_ranks[-1])
+    else:
+        return get_readable_rank(list(filter(lambda x: x >= rank, valid_step_ranks))[0])
+
+
+if __name__ == '__main__':
+    parser = ArgumentParser(formatter_class=RawDescriptionHelpFormatter
+                            )
+    parser.add_argument("--base","-b", metavar="BASE_FILE",
+                        dest="base_file", default="maigret/resources/data.json",
+                        help="JSON file with sites data to update.")
+
+    pool = list()
+
+    args = parser.parse_args()
+
+    with open(args.base_file, "r", encoding="utf-8") as data_file:
+        sites_info = json.load(data_file)
+        data = sites_info['sites']
+        engines = sites_info['engines']
+
+    with open("sites.md", "w") as site_file:
+        data_length = len(data)
+        site_file.write(f"""
+## List of supported sites: total {data_length}\n
+Rank data fetched from Alexa by domains.
+
+""")
+
+        for social_network in data:
+            url_main = data.get(social_network).get("urlMain")
+            data.get(social_network)["rank"] = 0
+            th = threading.Thread(target=get_rank, args=(url_main, data.get(social_network)))
+            pool.append((social_network, url_main, th))
+            th.start()
+
+        index = 1
+        for social_network, url_main, th in pool:
+            th.join()
+            sys.stdout.write("\r{0}".format(f"Updated {index} out of {data_length} entries"))
+            sys.stdout.flush()
+            index = index + 1
+
+        sites_full_list = [(site, site_data['rank']) for site, site_data in data.items()]
+        sites_full_list.sort(reverse=False, key=lambda x: x[1])
+
+        while sites_full_list[0][1] == 0:
+            site = sites_full_list.pop(0)
+            sites_full_list.append(site)
+
+        for num, site_tuple in enumerate(sites_full_list):
+            site, rank = site_tuple
+            url_main = data[site]['urlMain']
+            valid_rank = get_step_rank(rank)
+            all_tags = data[site].get('tags', [])
+            tags = ', ' + ', '.join(all_tags) if all_tags else ''
+            note = ''
+            if data[site].get('disabled'):
+                note = ', search is disabled'
+            site_file.write(f'1. [{site}]({url_main})*: top {valid_rank}{tags}*{note}\n')
+
+        site_file.write(f'\nAlexa.com rank data fetched at ({datetime.utcnow()} UTC)\n')
+
+    sorted_json_data = json.dumps({'sites': data, 'engines': engines}, indent=2, sort_keys=True)
+
+    with open(args.base_file, "w") as data_file:
+        data_file.write(sorted_json_data)
+
+    print("\nFinished updating supported site listing!")