Merge pull request #110 from soxoj/0.1.19

Bump to 0.1.19
2026-05-07 14:34:33 +00:00 · 2021-04-14 23:16:30 +03:00 · 2021-04-14 23:14:33 +03:00 · 2021-04-12 23:18:47 +03:00 · 2021-04-12 23:16:26 +03:00 · 2021-04-12 23:01:59 +03:00
31 changed files with 6947 additions and 5367 deletions
@@ -15,7 +15,7 @@ jobs:
    runs-on: ubuntu-latest
    strategy:
      matrix:
-        python-version: [3.6, 3.7, 3.8, 3.9]
+        python-version: [3.6.9, 3.7, 3.8, 3.9]

    steps:
    - uses: actions/checkout@v2
@@ -26,8 +26,8 @@ jobs:
    - name: Install dependencies
      run: |
        python -m pip install --upgrade pip
-        python -m pip install flake8 pytest
+        python -m pip install flake8 pytest pytest-rerunfailures
        if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
    - name: Test with pytest
      run: |
-        pytest
+        pytest --reruns 3 --reruns-delay 5
@@ -2,6 +2,42 @@

 ## [Unreleased]

+## [0.1.19] - 2021-04-14
+* added `--no-progressbar` option
+* fixed ascii tree bug
+* fixed `python -m maigret` run
+* fixed requests freeze with timeout async tasks
+
+## [0.1.18] - 2021-03-30
+* some API improvements
+
+## [0.1.17] - 2021-03-30
+* simplified maigret search API
+* improved documentation
+* fixed 403 response code ignoring bug
+
+## [0.1.16] - 2021-03-21
+* improved URL parsing mode
+* improved sites submit mode
+* added uID.me uguid support
+* improved requests processing
+
+## [0.1.15] - 2021-03-14
+* improved HTML reports
+* fixed python-3.6-specific error
+* false positives fixes
+
+## [0.1.14] - 2021-02-25
+* added JSON export formats
+* improved tags markup
+* realized username detection in userinfo links
+* added DB stats CLI option
+* added site submit logic and CLI option
+* added Spotify parsing activation
+* main logic refactoring
+* fixed Dockerfile
+* fixed requirements
+
 ## [0.1.13] - 2021-02-06
 * improved sites list filtering
 * pretty console messages
@@ -1,20 +1,21 @@
-FROM python:3.7-alpine
+FROM python:3.7
 LABEL maintainer="Soxoj <soxoj@protonmail.com>"

 WORKDIR /app

 ADD requirements.txt .

-RUN pip install --upgrade pip \
-&& apk add --update --virtual .build-dependencies \
-      build-base \
+RUN pip install --upgrade pip
+
+RUN apt update -y
+
+RUN apt install -y\
      gcc \
      musl-dev \
      libxml2 \
      libxml2-dev \
      libxslt-dev \
 &&  YARL_NO_EXTENSIONS=1 python3 -m pip install maigret \
-&&  apk del .build-dependencies \
 &&  rm -rf /var/cache/apk/* \
           /tmp/* \
           /var/tmp/*
@@ -33,20 +33,43 @@ Currently supported more than 2000 sites ([full list](./sites.md)), by default s

 **Python 3.8 is recommended.**

+### Package installing
 ```bash
 # install from pypi
-$ pip3 install maigret
+pip3 install maigret

 # or clone and install manually
-$ git clone https://github.com/soxoj/maigret && cd maigret
-$ pip3 install .
+git clone https://github.com/soxoj/maigret && cd maigret
+pip3 install .
+```
+
+### Cloning a repository
+
+```bash
+git clone https://github.com/soxoj/maigret && cd maigret
+```
+
+You can use your a free virtual machine, the repo will be automatically cloned:
+
+[![Open in Cloud Shell](https://user-images.githubusercontent.com/27065646/92304704-8d146d80-ef80-11ea-8c29-0deaabb1c702.png)](https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/soxoj/maigret&tutorial=README.md) [![Run on Repl.it](https://user-images.githubusercontent.com/27065646/92304596-bf719b00-ef7f-11ea-987f-2c1f3c323088.png)](https://repl.it/github/soxoj/maigret)
+<a href="https://colab.research.google.com/gist//soxoj/879b51bc3b2f8b695abb054090645000/maigret.ipynb"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab" height="40"></a>
+
+```bash
+pip3 install -r requirements.txt
 ```

 ## Using examples

 ```bash
-maigret user
+# for a cloned repo
+./maigret.py user

+# for a package
+maigret user
+```
+
+Features:
+```bash
 # make HTML and PDF reports
 maigret user --html --pdf

@@ -63,19 +86,17 @@ Run `maigret --help` to get arguments description. Also options are documented i

 With Docker:
 ```
-docker build -t maigret .
+# manual build
+docker build -t maigret . && docker run maigret user

-docker run maigret user
+# official image
+docker run soxoj/maigret:latest user
 ```

 ## Demo with page parsing and recursive username search

 [PDF report](./static/report_alexaimephotographycars.pdf), [HTML report](https://htmlpreview.github.io/?https://raw.githubusercontent.com/soxoj/maigret/main/static/report_alexaimephotographycars.html)

-```bash
-maigret alexaimephotographycars
-```
-
 ![animation of recursive search](./static/recursive_search.svg)

 ![HTML report screenshot](./static/report_alexaimephotography_html_screenshot.png)
@@ -1,15 +1,13 @@
 # HTTP Cookie File downloaded with cookies.txt by Genuinous @genuinous
 # This file can be used by wget, curl, aria2c and other standard compliant tools.
 # Usage Examples:
-#   1) wget -x --load-cookies cookies.txt "https://xss.is/search/"
-#   2) curl --cookie cookies.txt "https://xss.is/search/"
-#   3) aria2c --load-cookies cookies.txt "https://xss.is/search/"
+#   1) wget -x --load-cookies cookies.txt "https://pixabay.com/users/blue-156711/"
+#   2) curl --cookie cookies.txt "https://pixabay.com/users/blue-156711/"
+#   3) aria2c --load-cookies cookies.txt "https://pixabay.com/users/blue-156711/"
 #
-xss.is	FALSE	/	TRUE	0	xf_csrf	PMnZNsr42HETwYEr
-xss.is	FALSE	/	TRUE	0	xf_from_search	google
-xss.is	FALSE	/	TRUE	1642709308	xf_user	215268%2CZNKB_-64Wk-BOpsdtLYy-1UxfS5zGpxWaiEGUhmX
-xss.is	FALSE	/	TRUE	0	xf_session	sGdxJtP_sKV0LCG8vUQbr6cL670_EFWM
-.xss.is	TRUE	/	FALSE	0	muchacho_cache	[&quot;00fbb0f2772c9596b0483d6864563cce&quot;]
-.xss.is	TRUE	/	FALSE	0	muchacho_png	[&quot;00fbb0f2772c9596b0483d6864563cce&quot;]
-.xss.is	TRUE	/	FALSE	0	muchacho_etag	[&quot;00fbb0f2772c9596b0483d6864563cce&quot;]
-.xss.is	TRUE	/	FALSE	1924905600	2e66e4dd94a7a237d0d1b4d50f01e179_evc	[&quot;00fbb0f2772c9596b0483d6864563cce&quot;]
+.pixabay.com	TRUE	/	TRUE	1618356838	__cfduid	d56929cd50d11474f421b849df5758a881615764837
+.pixabay.com	TRUE	/	TRUE	1615766638	__cf_bm	ea8f7c565b44d749f65500f0e45176cebccaeb09-1615764837-1800-AYJIXh2boDJ6HPf44JI9fnteWABHOVvkxiSccACP9EiS1E58UDTGhViXtqjFfVE0QRj1WowP4ss2DzCs+pW+qUc=
+pixabay.com	FALSE	/	FALSE	0	anonymous_user_id	c1e4ee09-5674-4252-aa94-8c47b1ea80ab
+pixabay.com	FALSE	/	FALSE	1647214439	csrftoken	vfetTSvIul7gBlURt6s985JNM18GCdEwN5MWMKqX4yI73xoPgEj42dbNefjGx5fr
+pixabay.com	FALSE	/	FALSE	1647300839	client_width	1680
+pixabay.com	FALSE	/	FALSE	748111764839	is_human	1
@@ -1,4 +1,4 @@
-#! /usr/bin/env python3
+#!/usr/bin/env python3
 import asyncio
 import sys

@@ -1 +1,5 @@
 """Maigret"""
+
+from .checking import maigret as search
+from .sites import MaigretEngine, MaigretSite, MaigretDatabase
+from .notify import QueryNotifyPrint as Notifier
@@ -6,7 +6,7 @@ Maigret entrypoint

 import asyncio

-import maigret
+from .maigret import main

 if __name__ == "__main__":
-    asyncio.run(maigret.main())
+    asyncio.run(main())
@@ -1,11 +1,9 @@
-import aiohttp
-from aiohttp import CookieJar
-import asyncio
-import json
 from http.cookiejar import MozillaCookieJar
 from http.cookies import Morsel

 import requests
+from aiohttp import CookieJar
+

 class ParsingActivator:
    @staticmethod
@@ -27,6 +25,15 @@ class ParsingActivator:
        jwt_token = r.json()['jwt']
        site.headers['Authorization'] = 'jwt ' + jwt_token

+    @staticmethod
+    def spotify(site, logger, cookies={}):
+        headers = dict(site.headers)
+        if 'Authorization' in headers:
+            del headers['Authorization']
+        r = requests.get(site.activation['url'])
+        bearer_token = r.json()['accessToken']
+        site.headers['authorization'] = f'Bearer {bearer_token}'
+
    @staticmethod
    def xssis(site, logger, cookies={}):
        if not cookies:
@@ -0,0 +1,740 @@
+import asyncio
+import logging
+from mock import Mock
+import re
+import ssl
+import sys
+import tqdm
+import time
+from typing import Callable, Any, Iterable, Tuple
+
+import aiohttp
+import tqdm.asyncio
+from aiohttp_socks import ProxyConnector
+from mock import Mock
+from python_socks import _errors as proxy_errors
+from socid_extractor import extract
+
+from .activation import ParsingActivator, import_aiohttp_cookies
+from .result import QueryResult, QueryStatus
+from .sites import MaigretDatabase, MaigretSite
+
+supported_recursive_search_ids = (
+    'yandex_public_id',
+    'gaia_id',
+    'vk_id',
+    'ok_id',
+    'wikimapia_uid',
+    'steam_id',
+    'uidme_uguid',
+)
+
+common_errors = {
+    '<title>Attention Required! | Cloudflare</title>': 'Cloudflare captcha',
+    'Please stand by, while we are checking your browser': 'Cloudflare captcha',
+    '<title>Доступ ограничен</title>': 'Rostelecom censorship',
+    'document.getElementById(\'validate_form_submit\').disabled=true': 'Mail.ru captcha',
+    'Verifying your browser, please wait...<br>DDoS Protection by</font> Blazingfast.io': 'Blazingfast protection',
+    '404</h1><p class="error-card__description">Мы&nbsp;не&nbsp;нашли страницу': 'MegaFon 404 page',
+    'Доступ к информационному ресурсу ограничен на основании Федерального закона': 'MGTS censorship',
+    'Incapsula incident ID': 'Incapsula antibot protection',
+}
+
+unsupported_characters = '#'
+
+QueryDraft = Tuple[Callable, Any, Any]
+QueriesDraft = Iterable[QueryDraft]
+
+
+def create_task_func():
+    if sys.version_info.minor > 6:
+        create_asyncio_task = asyncio.create_task
+    else:
+        loop = asyncio.get_event_loop()
+        create_asyncio_task = loop.create_task
+    return create_asyncio_task
+
+class AsyncExecutor:
+    def __init__(self, *args, **kwargs):
+        self.logger = kwargs['logger']
+
+    async def run(self, tasks: QueriesDraft):
+        start_time = time.time()
+        results = await self._run(tasks)
+        self.execution_time = time.time() - start_time
+        self.logger.debug(f'Spent time: {self.execution_time}')
+        return results
+
+    async def _run(self, tasks: QueriesDraft):
+        await asyncio.sleep(0)
+
+
+class AsyncioSimpleExecutor(AsyncExecutor):
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+    async def _run(self, tasks: QueriesDraft):
+        futures = [f(*args, **kwargs) for f, args, kwargs in tasks]
+        return await asyncio.gather(*futures)
+
+
+class AsyncioProgressbarExecutor(AsyncExecutor):
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+    async def _run(self, tasks: QueriesDraft):
+        futures = [f(*args, **kwargs) for f, args, kwargs in tasks]
+        results = []
+        for f in tqdm.asyncio.tqdm.as_completed(futures):
+            results.append(await f)
+        return results
+
+
+class AsyncioProgressbarSemaphoreExecutor(AsyncExecutor):
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.semaphore = asyncio.Semaphore(kwargs.get('in_parallel', 1))
+
+    async def _run(self, tasks: QueriesDraft):
+        async def _wrap_query(q: QueryDraft):
+            async with self.semaphore:
+                f, args, kwargs = q
+                return await f(*args, **kwargs)
+
+        async def semaphore_gather(tasks: QueriesDraft):
+            coros = [_wrap_query(q) for q in tasks]
+            results = []
+            for f in tqdm.asyncio.tqdm.as_completed(coros):
+                results.append(await f)
+            return results
+
+        return await semaphore_gather(tasks)
+
+
+class AsyncioProgressbarQueueExecutor(AsyncExecutor):
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.workers_count = kwargs.get('in_parallel', 10)
+        self.progress_func = kwargs.get('progress_func', tqdm.tqdm)
+        self.queue = asyncio.Queue(self.workers_count)
+        self.timeout = kwargs.get('timeout')
+
+    async def worker(self):
+        while True:
+            try:
+                f, args, kwargs = self.queue.get_nowait()
+            except asyncio.QueueEmpty:
+                return
+
+            query_future = f(*args, **kwargs)
+            query_task = create_task_func()(query_future)
+            try:
+                result = await asyncio.wait_for(query_task, timeout=self.timeout)
+            except asyncio.TimeoutError:
+                result = None
+
+            self.results.append(result)
+            self.progress.update(1)
+            self.queue.task_done()
+
+    async def _run(self, queries: QueriesDraft):
+        self.results = []
+
+        queries_list = list(queries)
+
+        min_workers = min(len(queries_list), self.workers_count)
+
+        workers = [create_task_func()(self.worker())
+                   for _ in range(min_workers)]
+
+        self.progress = self.progress_func(total=len(queries_list))
+        for t in queries_list:
+            await self.queue.put(t)
+        await self.queue.join()
+        for w in workers:
+            w.cancel()
+        self.progress.close()
+        return self.results
+
+
+async def get_response(request_future, site_name, logger):
+    html_text = None
+    status_code = 0
+
+    error_text = "General Unknown Error"
+    expection_text = None
+
+    try:
+        response = await request_future
+
+        status_code = response.status
+        response_content = await response.content.read()
+        charset = response.charset or 'utf-8'
+        decoded_content = response_content.decode(charset, 'ignore')
+        html_text = decoded_content
+
+        if status_code > 0:
+            error_text = None
+
+        logger.debug(html_text)
+
+    except asyncio.TimeoutError as errt:
+        error_text = "Timeout Error"
+        expection_text = str(errt)
+    except aiohttp.client_exceptions.ClientConnectorError as err:
+        error_text = "Error Connecting"
+        expection_text = str(err)
+    except aiohttp.http_exceptions.BadHttpMessage as err:
+        error_text = "HTTP Error"
+        expection_text = str(err)
+    except proxy_errors.ProxyError as err:
+        error_text = "Proxy Error"
+        expection_text = str(err)
+    except Exception as err:
+        # python-specific exceptions
+        if sys.version_info.minor > 6:
+            if isinstance(err, ssl.SSLCertVerificationError) or isinstance(err, ssl.SSLError):
+                error_text = "SSL Error"
+                expection_text = str(err)
+        else:
+            logger.warning(f'Unhandled error while requesting {site_name}: {err}')
+            logger.debug(err, exc_info=True)
+            error_text = "Some Error"
+            expection_text = str(err)
+
+    # TODO: return only needed information
+    return html_text, status_code, error_text, expection_text
+
+
+async def update_site_dict_from_response(sitename, site_dict, results_info, logger, query_notify):
+    site_obj = site_dict[sitename]
+    future = site_obj.request_future
+    if not future:
+        # ignore: search by incompatible id type
+        return
+
+    response = await get_response(request_future=future,
+                                  site_name=sitename,
+                                  logger=logger)
+
+    return sitename, process_site_result(response, query_notify, logger, results_info, site_obj)
+
+
+# TODO: move to separate class
+def detect_error_page(html_text, status_code, fail_flags, ignore_403):
+    # Detect service restrictions such as a country restriction
+    for flag, msg in fail_flags.items():
+        if flag in html_text:
+            return 'Some site error', msg
+
+    # Detect common restrictions such as provider censorship and bot protection
+    for flag, msg in common_errors.items():
+        if flag in html_text:
+            return 'Error', msg
+
+    # Detect common site errors
+    if status_code == 403 and not ignore_403:
+        return 'Access denied', 'Access denied, use proxy/vpn'
+    elif status_code >= 500:
+        return f'Error {status_code}', f'Site error {status_code}'
+
+    return None, None
+
+
+def process_site_result(response, query_notify, logger, results_info, site: MaigretSite):
+    if not response:
+        return results_info
+
+    fulltags = site.tags
+
+    # Retrieve other site information again
+    username = results_info['username']
+    is_parsing_enabled = results_info['parsing_enabled']
+    url = results_info.get("url_user")
+    logger.debug(url)
+
+    status = results_info.get("status")
+    if status is not None:
+        # We have already determined the user doesn't exist here
+        return results_info
+
+    # Get the expected check type
+    check_type = site.check_type
+
+    # Get the failure messages and comments
+    failure_errors = site.errors
+
+    # TODO: refactor
+    if not response:
+        logger.error(f'No response for {site.name}')
+        return results_info
+
+    html_text, status_code, error_text, expection_text = response
+    site_error_text = '?'
+
+    # TODO: add elapsed request time counting
+    response_time = None
+
+    if logger.level == logging.DEBUG:
+        with open('debug.txt', 'a') as f:
+            status = status_code or 'No response'
+            f.write(f'url: {url}\nerror: {str(error_text)}\nr: {status}\n')
+            if html_text:
+                f.write(f'code: {status}\nresponse: {str(html_text)}\n')
+
+    if status_code and not error_text:
+        error_text, site_error_text = detect_error_page(html_text, status_code, failure_errors,
+                                                        site.ignore403)
+
+    if site.activation and html_text:
+        is_need_activation = any([s for s in site.activation['marks'] if s in html_text])
+        if is_need_activation:
+            method = site.activation['method']
+            try:
+                activate_fun = getattr(ParsingActivator(), method)
+                # TODO: async call
+                activate_fun(site, logger)
+            except AttributeError:
+                logger.warning(f'Activation method {method} for site {site.name} not found!')
+            except Exception as e:
+                logger.warning(f'Failed activation {method} for site {site.name}: {e}')
+
+    # presense flags
+    # True by default
+    presense_flags = site.presense_strs
+    is_presense_detected = False
+    if html_text:
+        if not presense_flags:
+            is_presense_detected = True
+            site.stats['presense_flag'] = None
+        else:
+            for presense_flag in presense_flags:
+                if presense_flag in html_text:
+                    is_presense_detected = True
+                    site.stats['presense_flag'] = presense_flag
+                    logger.info(presense_flag)
+                    break
+
+    if error_text is not None:
+        logger.debug(error_text)
+        result = QueryResult(username,
+                             site.name,
+                             url,
+                             QueryStatus.UNKNOWN,
+                             query_time=response_time,
+                             context=f'{error_text}: {site_error_text}', tags=fulltags)
+    elif check_type == "message":
+        absence_flags = site.absence_strs
+        is_absence_flags_list = isinstance(absence_flags, list)
+        absence_flags_set = set(absence_flags) if is_absence_flags_list else {absence_flags}
+        # Checks if the error message is in the HTML
+        is_absence_detected = any([(absence_flag in html_text) for absence_flag in absence_flags_set])
+        if not is_absence_detected and is_presense_detected:
+            result = QueryResult(username,
+                                 site.name,
+                                 url,
+                                 QueryStatus.CLAIMED,
+                                 query_time=response_time, tags=fulltags)
+        else:
+            result = QueryResult(username,
+                                 site.name,
+                                 url,
+                                 QueryStatus.AVAILABLE,
+                                 query_time=response_time, tags=fulltags)
+    elif check_type == "status_code":
+        # Checks if the status code of the response is 2XX
+        if (not status_code >= 300 or status_code < 200) and is_presense_detected:
+            result = QueryResult(username,
+                                 site.name,
+                                 url,
+                                 QueryStatus.CLAIMED,
+                                 query_time=response_time, tags=fulltags)
+        else:
+            result = QueryResult(username,
+                                 site.name,
+                                 url,
+                                 QueryStatus.AVAILABLE,
+                                 query_time=response_time, tags=fulltags)
+    elif check_type == "response_url":
+        # For this detection method, we have turned off the redirect.
+        # So, there is no need to check the response URL: it will always
+        # match the request.  Instead, we will ensure that the response
+        # code indicates that the request was successful (i.e. no 404, or
+        # forward to some odd redirect).
+        if 200 <= status_code < 300 and is_presense_detected:
+            result = QueryResult(username,
+                                 site.name,
+                                 url,
+                                 QueryStatus.CLAIMED,
+                                 query_time=response_time, tags=fulltags)
+        else:
+            result = QueryResult(username,
+                                 site.name,
+                                 url,
+                                 QueryStatus.AVAILABLE,
+                                 query_time=response_time, tags=fulltags)
+    else:
+        # It should be impossible to ever get here...
+        raise ValueError(f"Unknown check type '{check_type}' for "
+                         f"site '{site.name}'")
+
+    extracted_ids_data = {}
+
+    if is_parsing_enabled and result.status == QueryStatus.CLAIMED:
+        try:
+            extracted_ids_data = extract(html_text)
+        except Exception as e:
+            logger.warning(f'Error while parsing {site.name}: {e}', exc_info=True)
+
+        if extracted_ids_data:
+            new_usernames = {}
+            for k, v in extracted_ids_data.items():
+                if 'username' in k:
+                    new_usernames[v] = 'username'
+                if k in supported_recursive_search_ids:
+                    new_usernames[v] = k
+
+            results_info['ids_usernames'] = new_usernames
+            results_info['ids_links'] = eval(extracted_ids_data.get('links', '[]'))
+            result.ids_data = extracted_ids_data
+
+    # Notify caller about results of query.
+    query_notify.update(result, site.similar_search)
+
+    # Save status of request
+    results_info['status'] = result
+
+    # Save results from request
+    results_info['http_status'] = status_code
+    results_info['is_similar'] = site.similar_search
+    # results_site['response_text'] = html_text
+    results_info['rank'] = site.alexa_rank
+    return results_info
+
+
+async def maigret(username, site_dict, logger, query_notify=None,
+                  proxy=None, timeout=None, is_parsing_enabled=False,
+                  id_type='username', debug=False, forced=False,
+                  max_connections=100, no_progressbar=False,
+                  cookies=None):
+    """Main search func
+
+    Checks for existence of username on certain sites.
+
+    Keyword Arguments:
+    username               -- Username string will be used for search.
+    site_dict              -- Dictionary containing sites data.
+    query_notify           -- Object with base type of QueryNotify().
+                              This will be used to notify the caller about
+                              query results.
+    logger                 -- Standard Python logger object.
+    timeout                -- Time in seconds to wait before timing out request.
+                              Default is no timeout.
+    is_parsing_enabled     -- Extract additional info from account pages.
+    id_type                -- Type of username to search.
+                              Default is 'username', see all supported here:
+                              https://github.com/soxoj/maigret/wiki/Supported-identifier-types
+    max_connections        -- Maximum number of concurrent connections allowed.
+                              Default is 100.
+    no_progressbar         -- Displaying of ASCII progressbar during scanner.
+    cookies                -- Filename of a cookie jar file to use for each request.
+
+    Return Value:
+    Dictionary containing results from report. Key of dictionary is the name
+    of the social network site, and the value is another dictionary with
+    the following keys:
+        url_main:      URL of main site.
+        url_user:      URL of user on site (if account exists).
+        status:        QueryResult() object indicating results of test for
+                       account existence.
+        http_status:   HTTP status code of query which checked for existence on
+                       site.
+        response_text: Text that came back from request.  May be None if
+                       there was an HTTP error when checking for existence.
+    """
+
+    # Notify caller that we are starting the query.
+    if not query_notify:
+        query_notify = Mock()
+
+    query_notify.start(username, id_type)
+
+    # TODO: connector
+    connector = ProxyConnector.from_url(proxy) if proxy else aiohttp.TCPConnector(ssl=False)
+    # connector = aiohttp.TCPConnector(ssl=False)
+    connector.verify_ssl = False
+
+    cookie_jar = None
+    if cookies:
+        logger.debug(f'Using cookies jar file {cookies}')
+        cookie_jar = await import_aiohttp_cookies(cookies)
+
+    session = aiohttp.ClientSession(connector=connector, trust_env=True, cookie_jar=cookie_jar)
+
+    if logger.level == logging.DEBUG:
+        future = session.get(url='https://icanhazip.com')
+        ip, status, error, expection = await get_response(future, None, logger)
+        if ip:
+            logger.debug(f'My IP is: {ip.strip()}')
+        else:
+            logger.debug(f'IP requesting {error}: {expection}')
+
+    # Results from analysis of all sites
+    results_total = {}
+
+    # First create futures for all requests. This allows for the requests to run in parallel
+    for site_name, site in site_dict.items():
+
+        if site.type != id_type:
+            continue
+
+        if site.disabled and not forced:
+            logger.debug(f'Site {site.name} is disabled, skipping...')
+            continue
+
+        # Results from analysis of this specific site
+        results_site = {}
+
+        # Record URL of main site and username
+        results_site['username'] = username
+        results_site['parsing_enabled'] = is_parsing_enabled
+        results_site['url_main'] = site.url_main
+        results_site['cookies'] = cookie_jar and cookie_jar.filter_cookies(site.url_main) or None
+
+        headers = {
+            'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 11.1; rv:55.0) Gecko/20100101 Firefox/55.0',
+        }
+
+        headers.update(site.headers)
+
+        if 'url' not in site.__dict__:
+            logger.error('No URL for site %s', site.name)
+        # URL of user on site (if it exists)
+        url = site.url.format(
+            urlMain=site.url_main,
+            urlSubpath=site.url_subpath,
+            username=username
+        )
+        # workaround to prevent slash errors
+        url = re.sub('(?<!:)/+', '/', url)
+
+        # Don't make request if username is invalid for the site
+        if site.regex_check and re.search(site.regex_check, username) is None:
+            # No need to do the check at the site: this user name is not allowed.
+            results_site['status'] = QueryResult(username,
+                                                 site_name,
+                                                 url,
+                                                 QueryStatus.ILLEGAL)
+            results_site["url_user"] = ""
+            results_site['http_status'] = ""
+            results_site['response_text'] = ""
+            query_notify.update(results_site['status'])
+        else:
+            # URL of user on site (if it exists)
+            results_site["url_user"] = url
+            url_probe = site.url_probe
+            if url_probe is None:
+                # Probe URL is normal one seen by people out on the web.
+                url_probe = url
+            else:
+                # There is a special URL for probing existence separate
+                # from where the user profile normally can be found.
+                url_probe = url_probe.format(
+                    urlMain=site.url_main,
+                    urlSubpath=site.url_subpath,
+                    username=username,
+                )
+
+            for k, v in site.get_params.items():
+                url_probe += f'&{k}={v}'
+
+            if site.check_type == 'status_code' and site.request_head_only:
+                # In most cases when we are detecting by status code,
+                # it is not necessary to get the entire body:  we can
+                # detect fine with just the HEAD response.
+                request_method = session.head
+            else:
+                # Either this detect method needs the content associated
+                # with the GET response, or this specific website will
+                # not respond properly unless we request the whole page.
+                request_method = session.get
+
+            if site.check_type == "response_url":
+                # Site forwards request to a different URL if username not
+                # found.  Disallow the redirect so we can capture the
+                # http status from the original URL request.
+                allow_redirects = False
+            else:
+                # Allow whatever redirect that the site wants to do.
+                # The final result of the request will be what is available.
+                allow_redirects = True
+
+            future = request_method(url=url_probe, headers=headers,
+                                    allow_redirects=allow_redirects,
+                                    timeout=timeout,
+                                    )
+
+            # Store future in data for access later
+            # TODO: move to separate obj
+            site.request_future = future
+
+        # Add this site's results into final dictionary with all of the other results.
+        results_total[site_name] = results_site
+
+    coroutines = []
+    for sitename, result_obj in results_total.items():
+        coroutines.append((update_site_dict_from_response, [sitename, site_dict, result_obj, logger, query_notify], {}))
+
+    if no_progressbar:
+        executor = AsyncioSimpleExecutor(logger=logger)
+    else:
+        executor = AsyncioProgressbarQueueExecutor(logger=logger, in_parallel=max_connections, timeout=timeout+0.5)
+
+    results = await executor.run(coroutines)
+
+    await session.close()
+
+    # Notify caller that all queries are finished.
+    query_notify.finish()
+
+    data = {}
+    for result in results:
+        # TODO: still can be empty
+        if result:
+            try:
+                data[result[0]] = result[1]
+            except Exception as e:
+                logger.error(e, exc_info=True)
+                logger.info(result)
+
+    return data
+
+
+def timeout_check(value):
+    """Check Timeout Argument.
+
+    Checks timeout for validity.
+
+    Keyword Arguments:
+    value                  -- Time in seconds to wait before timing out request.
+
+    Return Value:
+    Floating point number representing the time (in seconds) that should be
+    used for the timeout.
+
+    NOTE:  Will raise an exception if the timeout in invalid.
+    """
+    from argparse import ArgumentTypeError
+
+    try:
+        timeout = float(value)
+    except ValueError:
+        raise ArgumentTypeError(f"Timeout '{value}' must be a number.")
+    if timeout <= 0:
+        raise ArgumentTypeError(f"Timeout '{value}' must be greater than 0.0s.")
+    return timeout
+
+
+async def site_self_check(site, logger, semaphore, db: MaigretDatabase, silent=False):
+    changes = {
+        'disabled': False,
+    }
+
+    try:
+        check_data = [
+            (site.username_claimed, QueryStatus.CLAIMED),
+            (site.username_unclaimed, QueryStatus.AVAILABLE),
+        ]
+    except Exception as e:
+        logger.error(e)
+        logger.error(site.__dict__)
+        check_data = []
+
+    logger.info(f'Checking {site.name}...')
+
+    for username, status in check_data:
+        async with semaphore:
+            results_dict = await maigret(
+                username=username,
+                site_dict={site.name: site},
+                logger=logger,
+                timeout=30,
+                id_type=site.type,
+                forced=True,
+                no_progressbar=True,
+            )
+
+            # don't disable entries with other ids types
+            # TODO: make normal checking
+            if site.name not in results_dict:
+                logger.info(results_dict)
+                changes['disabled'] = True
+                continue
+
+            result = results_dict[site.name]['status']
+
+        site_status = result.status
+
+        if site_status != status:
+            if site_status == QueryStatus.UNKNOWN:
+                msgs = site.absence_strs
+                etype = site.check_type
+                logger.warning(
+                    f'Error while searching {username} in {site.name}: {result.context}, {msgs}, type {etype}')
+                # don't disable in case of available username
+                if status == QueryStatus.CLAIMED:
+                    changes['disabled'] = True
+            elif status == QueryStatus.CLAIMED:
+                logger.warning(f'Not found `{username}` in {site.name}, must be claimed')
+                logger.info(results_dict[site.name])
+                changes['disabled'] = True
+            else:
+                logger.warning(f'Found `{username}` in {site.name}, must be available')
+                logger.info(results_dict[site.name])
+                changes['disabled'] = True
+
+    logger.info(f'Site {site.name} checking is finished')
+
+    if changes['disabled'] != site.disabled:
+        site.disabled = changes['disabled']
+        db.update_site(site)
+        if not silent:
+            action = 'Disabled' if site.disabled else 'Enabled'
+            print(f'{action} site {site.name}...')
+
+    return changes
+
+
+async def self_check(db: MaigretDatabase, site_data: dict, logger, silent=False,
+                     max_connections=10) -> bool:
+    sem = asyncio.Semaphore(max_connections)
+    tasks = []
+    all_sites = site_data
+
+    def disabled_count(lst):
+        return len(list(filter(lambda x: x.disabled, lst)))
+
+    disabled_old_count = disabled_count(all_sites.values())
+
+    for _, site in all_sites.items():
+        check_coro = site_self_check(site, logger, sem, db, silent)
+        future = asyncio.ensure_future(check_coro)
+        tasks.append(future)
+
+    for f in tqdm.asyncio.tqdm.as_completed(tasks):
+        await f
+
+    disabled_new_count = disabled_count(all_sites.values())
+    total_disabled = disabled_new_count - disabled_old_count
+
+    if total_disabled >= 0:
+        message = 'Disabled'
+    else:
+        message = 'Enabled'
+        total_disabled *= -1
+
+    if not silent:
+        print(
+            f'{message} {total_disabled} ({disabled_old_count} => {disabled_new_count}) checked sites. Run with `--info` flag to get more information')
+
+    return total_disabled != 0
@@ -1,616 +1,27 @@
 """
 Maigret main module
 """
-
+import aiohttp
 import asyncio
 import logging
 import os
-import platform
-import re
-import ssl
 import sys
+import platform
 from argparse import ArgumentParser, RawDescriptionHelpFormatter

-import aiohttp
 import requests
-import tqdm.asyncio
-from aiohttp_socks import ProxyConnector
-from mock import Mock
-from python_socks import _errors as proxy_errors
-from socid_extractor import parse, extract, __version__ as socid_version
+from socid_extractor import extract, parse, __version__ as socid_version

-from .activation import ParsingActivator, import_aiohttp_cookies
+from .checking import timeout_check, supported_recursive_search_ids, self_check, unsupported_characters, maigret
 from .notify import QueryNotifyPrint
 from .report import save_csv_report, save_xmind_report, save_html_report, save_pdf_report, \
-    generate_report_context, save_txt_report
-from .result import QueryResult, QueryStatus
-from .sites import MaigretDatabase, MaigretSite
+    generate_report_context, save_txt_report, SUPPORTED_JSON_REPORT_FORMATS, check_supported_json_format, \
+    save_json_report
+from .sites import MaigretDatabase
+from .submit import submit_dialog
+from .utils import get_dict_ascii_tree

-__version__ = '0.1.13'
-
-supported_recursive_search_ids = (
-    'yandex_public_id',
-    'gaia_id',
-    'vk_id',
-    'ok_id',
-    'wikimapia_uid',
-)
-
-common_errors = {
-    '<title>Attention Required! | Cloudflare</title>': 'Cloudflare captcha',
-    'Please stand by, while we are checking your browser': 'Cloudflare captcha',
-    '<title>Доступ ограничен</title>': 'Rostelecom censorship',
-    'document.getElementById(\'validate_form_submit\').disabled=true': 'Mail.ru captcha',
-    'Verifying your browser, please wait...<br>DDoS Protection by</font> Blazingfast.io': 'Blazingfast protection',
-    '404</h1><p class="error-card__description">Мы&nbsp;не&nbsp;нашли страницу': 'MegaFon 404 page',
-    'Доступ к информационному ресурсу ограничен на основании Федерального закона': 'MGTS censorship',
-    'Incapsula incident ID': 'Incapsula antibot protection',
-}
-
-unsupported_characters = '#'
-
-async def get_response(request_future, site_name, logger):
-    html_text = None
-    status_code = 0
-
-    error_text = "General Unknown Error"
-    expection_text = None
-
-    try:
-        response = await request_future
-
-        status_code = response.status
-        response_content = await response.content.read()
-        charset = response.charset or 'utf-8'
-        decoded_content = response_content.decode(charset, 'ignore')
-        html_text = decoded_content
-
-        if status_code > 0:
-            error_text = None
-
-        logger.debug(html_text)
-
-    except asyncio.TimeoutError as errt:
-        error_text = "Timeout Error"
-        expection_text = str(errt)
-    except (ssl.SSLCertVerificationError, ssl.SSLError) as err:
-        error_text = "SSL Error"
-        expection_text = str(err)
-    except aiohttp.client_exceptions.ClientConnectorError as err:
-        error_text = "Error Connecting"
-        expection_text = str(err)
-    except aiohttp.http_exceptions.BadHttpMessage as err:
-        error_text = "HTTP Error"
-        expection_text = str(err)
-    except proxy_errors.ProxyError as err:
-        error_text = "Proxy Error"
-        expection_text = str(err)
-    except Exception as err:
-        logger.warning(f'Unhandled error while requesting {site_name}: {err}')
-        logger.debug(err, exc_info=True)
-        error_text = "Some Error"
-        expection_text = str(err)
-
-    # TODO: return only needed information
-    return html_text, status_code, error_text, expection_text
-
-
-async def update_site_dict_from_response(sitename, site_dict, results_info, semaphore, logger, query_notify):
-    async with semaphore:
-        site_obj = site_dict[sitename]
-        future = site_obj.request_future
-        if not future:
-            # ignore: search by incompatible id type
-            return
-
-        response = await get_response(request_future=future,
-                                      site_name=sitename,
-                                      logger=logger)
-
-        site_dict[sitename] = process_site_result(response, query_notify, logger, results_info, site_obj)
-
-# TODO: move info separate module
-def detect_error_page(html_text, status_code, fail_flags, ignore_403):
-    # Detect service restrictions such as a country restriction
-    for flag, msg in fail_flags.items():
-        if flag in html_text:
-            return 'Some site error', msg
-
-    # Detect common restrictions such as provider censorship and bot protection
-    for flag, msg in common_errors.items():
-        if flag in html_text:
-            return 'Error', msg
-
-    # Detect common site errors
-    if status_code == 403 and not ignore_403:
-        return 'Access denied', 'Access denied, use proxy/vpn'
-    elif status_code >= 500:
-        return f'Error {status_code}', f'Site error {status_code}'
-
-    return None, None
-
-
-def process_site_result(response, query_notify, logger, results_info, site: MaigretSite):
-    if not response:
-        return results_info
-
-    fulltags = site.tags
-
-    # Retrieve other site information again
-    username = results_info['username']
-    is_parsing_enabled = results_info['parsing_enabled']
-    url = results_info.get("url_user")
-    logger.debug(url)
-
-    status = results_info.get("status")
-    if status is not None:
-        # We have already determined the user doesn't exist here
-        return results_info
-
-    # Get the expected check type
-    check_type = site.check_type
-
-    # Get the failure messages and comments
-    failure_errors = site.errors
-
-    # TODO: refactor
-    if not response:
-        logger.error(f'No response for {site.name}')
-        return results_info
-
-    html_text, status_code, error_text, expection_text = response
-    site_error_text = '?'
-
-    # TODO: add elapsed request time counting
-    response_time = None
-
-    if logger.level == logging.DEBUG:
-        with open('debug.txt', 'a') as f:
-            status = status_code or 'No response'
-            f.write(f'url: {url}\nerror: {str(error_text)}\nr: {status}\n')
-            if html_text:
-                f.write(f'code: {status}\nresponse: {str(html_text)}\n')
-
-    if status_code and not error_text:
-        error_text, site_error_text = detect_error_page(html_text, status_code, failure_errors,
-                                                        site.ignore_403)
-
-    if site.activation and html_text:
-        is_need_activation = any([s for s in site.activation['marks'] if s in html_text])
-        if is_need_activation:
-            method = site.activation['method']
-            try:
-                activate_fun = getattr(ParsingActivator(), method)
-                # TODO: async call
-                activate_fun(site, logger)
-            except AttributeError:
-                logger.warning(f'Activation method {method} for site {site.name} not found!')
-
-    # presense flags
-    # True by default
-    presense_flags = site.presense_strs
-    is_presense_detected = False
-    if html_text:
-        if not presense_flags:
-            is_presense_detected = True
-            site.stats['presense_flag'] = None
-        else:
-            for presense_flag in presense_flags:
-                if presense_flag in html_text:
-                    is_presense_detected = True
-                    site.stats['presense_flag'] = presense_flag
-                    logger.info(presense_flag)
-                    break
-
-    if error_text is not None:
-        logger.debug(error_text)
-        result = QueryResult(username,
-                             site.name,
-                             url,
-                             QueryStatus.UNKNOWN,
-                             query_time=response_time,
-                             context=f'{error_text}: {site_error_text}', tags=fulltags)
-    elif check_type == "message":
-        absence_flags = site.absence_strs
-        is_absence_flags_list = isinstance(absence_flags, list)
-        absence_flags_set = set(absence_flags) if is_absence_flags_list else {absence_flags}
-        # Checks if the error message is in the HTML
-        is_absence_detected = any([(absence_flag in html_text) for absence_flag in absence_flags_set])
-        if not is_absence_detected and is_presense_detected:
-            result = QueryResult(username,
-                                 site.name,
-                                 url,
-                                 QueryStatus.CLAIMED,
-                                 query_time=response_time, tags=fulltags)
-        else:
-            result = QueryResult(username,
-                                 site.name,
-                                 url,
-                                 QueryStatus.AVAILABLE,
-                                 query_time=response_time, tags=fulltags)
-    elif check_type == "status_code":
-        # Checks if the status code of the response is 2XX
-        if (not status_code >= 300 or status_code < 200) and is_presense_detected:
-            result = QueryResult(username,
-                                 site.name,
-                                 url,
-                                 QueryStatus.CLAIMED,
-                                 query_time=response_time, tags=fulltags)
-        else:
-            result = QueryResult(username,
-                                 site.name,
-                                 url,
-                                 QueryStatus.AVAILABLE,
-                                 query_time=response_time, tags=fulltags)
-    elif check_type == "response_url":
-        # For this detection method, we have turned off the redirect.
-        # So, there is no need to check the response URL: it will always
-        # match the request.  Instead, we will ensure that the response
-        # code indicates that the request was successful (i.e. no 404, or
-        # forward to some odd redirect).
-        if 200 <= status_code < 300 and is_presense_detected:
-            result = QueryResult(username,
-                                 site.name,
-                                 url,
-                                 QueryStatus.CLAIMED,
-                                 query_time=response_time, tags=fulltags)
-        else:
-            result = QueryResult(username,
-                                 site.name,
-                                 url,
-                                 QueryStatus.AVAILABLE,
-                                 query_time=response_time, tags=fulltags)
-    else:
-        # It should be impossible to ever get here...
-        raise ValueError(f"Unknown check type '{check_type}' for "
-                         f"site '{site.name}'")
-
-    extracted_ids_data = {}
-
-    if is_parsing_enabled and result.status == QueryStatus.CLAIMED:
-        try:
-            extracted_ids_data = extract(html_text)
-        except Exception as e:
-            logger.warning(f'Error while parsing {site.name}: {e}', exc_info=True)
-
-        if extracted_ids_data:
-            new_usernames = {}
-            for k, v in extracted_ids_data.items():
-                if 'username' in k:
-                    new_usernames[v] = 'username'
-                if k in supported_recursive_search_ids:
-                    new_usernames[v] = k
-
-            results_info['ids_usernames'] = new_usernames
-            result.ids_data = extracted_ids_data
-
-    # Notify caller about results of query.
-    query_notify.update(result, site.similar_search)
-
-    # Save status of request
-    results_info['status'] = result
-
-    # Save results from request
-    results_info['http_status'] = status_code
-    results_info['is_similar'] = site.similar_search
-    # results_site['response_text'] = html_text
-    results_info['rank'] = site.alexa_rank
-    return results_info
-
-
-
-
-async def maigret(username, site_dict, query_notify, logger,
-                  proxy=None, timeout=None, recursive_search=False,
-                  id_type='username', debug=False, forced=False,
-                  max_connections=100, no_progressbar=False,
-                  cookies=None):
-    """Main search func
-
-    Checks for existence of username on various social media sites.
-
-    Keyword Arguments:
-    username               -- String indicating username that report
-                              should be created against.
-    site_dict              -- Dictionary containing all of the site data.
-    query_notify           -- Object with base type of QueryNotify().
-                              This will be used to notify the caller about
-                              query results.
-    proxy                  -- String indicating the proxy URL
-    timeout                -- Time in seconds to wait before timing out request.
-                              Default is no timeout.
-    recursive_search       -- Search for other usernames in website pages & recursive search by them.
-
-    Return Value:
-    Dictionary containing results from report. Key of dictionary is the name
-    of the social network site, and the value is another dictionary with
-    the following keys:
-        url_main:      URL of main site.
-        url_user:      URL of user on site (if account exists).
-        status:        QueryResult() object indicating results of test for
-                       account existence.
-        http_status:   HTTP status code of query which checked for existence on
-                       site.
-        response_text: Text that came back from request.  May be None if
-                       there was an HTTP error when checking for existence.
-    """
-
-    # Notify caller that we are starting the query.
-    query_notify.start(username, id_type)
-
-    # TODO: connector
-    connector = ProxyConnector.from_url(proxy) if proxy else aiohttp.TCPConnector(ssl=False)
-    # connector = aiohttp.TCPConnector(ssl=False)
-    connector.verify_ssl=False
-
-    cookie_jar = None
-    if cookies:
-        cookie_jar = await import_aiohttp_cookies(cookies)
-
-    session = aiohttp.ClientSession(connector=connector, trust_env=True, cookie_jar=cookie_jar)
-
-    if logger.level == logging.DEBUG:
-        future = session.get(url='https://icanhazip.com')
-        ip, status, error, expection = await get_response(future, None, logger)
-        if ip:
-            logger.debug(f'My IP is: {ip.strip()}')
-        else:
-            logger.debug(f'IP requesting {error}: {expection}')
-
-
-    # Results from analysis of all sites
-    results_total = {}
-
-    # First create futures for all requests. This allows for the requests to run in parallel
-    for site_name, site in site_dict.items():
-
-        if site.type != id_type:
-            continue
-
-        if site.disabled and not forced:
-            logger.debug(f'Site {site.name} is disabled, skipping...')
-            continue
-
-        # Results from analysis of this specific site
-        results_site = {}
-
-        # Record URL of main site and username
-        results_site['username'] = username
-        results_site['parsing_enabled'] = recursive_search
-        results_site['url_main'] = site.url_main
-        results_site['cookies'] = cookie_jar and cookie_jar.filter_cookies(site.url_main) or None
-
-        headers = {
-            'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 11.1; rv:55.0) Gecko/20100101 Firefox/55.0',
-        }
-
-        headers.update(site.headers)
-
-        if not 'url' in site.__dict__:
-            logger.error('No URL for site %s', site.name)
-        # URL of user on site (if it exists)
-        url = site.url.format(
-            urlMain=site.url_main,
-            urlSubpath=site.url_subpath,
-            username=username
-        )
-        # workaround to prevent slash errors
-        url = re.sub('(?<!:)/+', '/', url)
-
-        # Don't make request if username is invalid for the site
-        if site.regex_check and re.search(site.regex_check, username) is None:
-            # No need to do the check at the site: this user name is not allowed.
-            results_site['status'] = QueryResult(username,
-                                                 site_name,
-                                                 url,
-                                                 QueryStatus.ILLEGAL)
-            results_site["url_user"] = ""
-            results_site['http_status'] = ""
-            results_site['response_text'] = ""
-            query_notify.update(results_site['status'])
-        else:
-            # URL of user on site (if it exists)
-            results_site["url_user"] = url
-            url_probe = site.url_probe
-            if url_probe is None:
-                # Probe URL is normal one seen by people out on the web.
-                url_probe = url
-            else:
-                # There is a special URL for probing existence separate
-                # from where the user profile normally can be found.
-                url_probe = url_probe.format(
-                    urlMain=site.url_main,
-                    urlSubpath=site.url_subpath,
-                    username=username,
-                )
-
-            for k, v in site.get_params.items():
-                url_probe += f'&{k}={v}'
-
-            if site.check_type == 'status_code' and site.request_head_only:
-                # In most cases when we are detecting by status code,
-                # it is not necessary to get the entire body:  we can
-                # detect fine with just the HEAD response.
-                request_method = session.head
-            else:
-                # Either this detect method needs the content associated
-                # with the GET response, or this specific website will
-                # not respond properly unless we request the whole page.
-                request_method = session.get
-
-            if site.check_type == "response_url":
-                # Site forwards request to a different URL if username not
-                # found.  Disallow the redirect so we can capture the
-                # http status from the original URL request.
-                allow_redirects = False
-            else:
-                # Allow whatever redirect that the site wants to do.
-                # The final result of the request will be what is available.
-                allow_redirects = True
-
-            future = request_method(url=url_probe, headers=headers,
-                                    allow_redirects=allow_redirects,
-                                    timeout=timeout,
-                                    )
-
-            # Store future in data for access later
-            # TODO: move to separate obj
-            site.request_future = future
-
-        # Add this site's results into final dictionary with all of the other results.
-        results_total[site_name] = results_site
-
-    # TODO: move into top-level function
-
-    sem = asyncio.Semaphore(max_connections)
-
-    tasks = []
-    for sitename, result_obj in results_total.items():
-        update_site_coro = update_site_dict_from_response(sitename, site_dict, result_obj, sem, logger, query_notify)
-        future = asyncio.ensure_future(update_site_coro)
-        tasks.append(future)
-
-    if no_progressbar:
-        await asyncio.gather(*tasks)
-    else:
-        for f in tqdm.asyncio.tqdm.as_completed(tasks):
-            await f
-
-    await session.close()
-
-    # Notify caller that all queries are finished.
-    query_notify.finish()
-
-    return results_total
-
-
-def timeout_check(value):
-    """Check Timeout Argument.
-
-    Checks timeout for validity.
-
-    Keyword Arguments:
-    value                  -- Time in seconds to wait before timing out request.
-
-    Return Value:
-    Floating point number representing the time (in seconds) that should be
-    used for the timeout.
-
-    NOTE:  Will raise an exception if the timeout in invalid.
-    """
-    from argparse import ArgumentTypeError
-
-    try:
-        timeout = float(value)
-    except ValueError:
-        raise ArgumentTypeError(f"Timeout '{value}' must be a number.")
-    if timeout <= 0:
-        raise ArgumentTypeError(f"Timeout '{value}' must be greater than 0.0s.")
-    return timeout
-
-
-async def site_self_check(site, logger, semaphore, db: MaigretDatabase, silent=False):
-    query_notify = Mock()
-    changes = {
-        'disabled': False,
-    }
-
-    try:
-        check_data = [
-            (site.username_claimed, QueryStatus.CLAIMED),
-            (site.username_unclaimed, QueryStatus.AVAILABLE),
-        ]
-    except:
-        print(site.__dict__)
-
-    logger.info(f'Checking {site.name}...')
-
-    for username, status in check_data:
-        async with semaphore:
-            results_dict = await maigret(
-                username,
-                {site.name: site},
-                query_notify,
-                logger,
-                timeout=30,
-                id_type=site.type,
-                forced=True,
-                no_progressbar=True,
-            )
-
-            # don't disable entries with other ids types
-            # TODO: make normal checking
-            if site.name not in results_dict:
-                logger.info(results_dict)
-                changes['disabled'] = True
-                continue
-
-            result = results_dict[site.name]['status']
-
-
-        site_status = result.status
-
-        if site_status != status:
-            if site_status == QueryStatus.UNKNOWN:
-                msgs = site.absence_strs
-                etype = site.check_type
-                logger.warning(f'Error while searching {username} in {site.name}: {result.context}, {msgs}, type {etype}')
-                # don't disable in case of available username
-                if status == QueryStatus.CLAIMED:
-                    changes['disabled'] = True
-            elif status == QueryStatus.CLAIMED:
-                logger.warning(f'Not found `{username}` in {site.name}, must be claimed')
-                logger.info(results_dict[site.name])
-                changes['disabled'] = True
-            else:
-                logger.warning(f'Found `{username}` in {site.name}, must be available')
-                logger.info(results_dict[site.name])
-                changes['disabled'] = True
-
-    logger.info(f'Site {site.name} checking is finished')
-
-    if changes['disabled'] != site.disabled:
-        site.disabled = changes['disabled']
-        db.update_site(site)
-        if not silent:
-            action = 'Disabled' if site.disabled else 'Enabled'
-            print(f'{action} site {site.name}...')
-
-    return changes
-
-
-async def self_check(db: MaigretDatabase, site_data: dict, logger, silent=False,
-                     max_connections=10) -> bool:
-    sem = asyncio.Semaphore(max_connections)
-    tasks = []
-    all_sites = site_data
-
-    def disabled_count(lst):
-        return len(list(filter(lambda x: x.disabled, lst)))
-
-    disabled_old_count = disabled_count(all_sites.values())
-
-    for _, site in all_sites.items():
-        check_coro = site_self_check(site, logger, sem, db, silent)
-        future = asyncio.ensure_future(check_coro)
-        tasks.append(future)
-
-    for f in tqdm.asyncio.tqdm.as_completed(tasks):
-        await f
-
-    disabled_new_count = disabled_count(all_sites.values())
-    total_disabled = disabled_new_count - disabled_old_count
-
-    if total_disabled >= 0:
-        message = 'Disabled'
-    else:
-        message = 'Enabled'
-        total_disabled *= -1
-
-    if not silent:
-        print(f'{message} {total_disabled} ({disabled_old_count} => {disabled_new_count}) checked sites. Run with `--info` flag to get more information')
-
-    return total_disabled != 0
+__version__ = '0.1.19'


 async def main():
@@ -650,9 +61,9 @@ async def main():
                        action="store", dest="proxy", default=None,
                        help="Make requests over a proxy. e.g. socks5://127.0.0.1:1080"
                        )
-    parser.add_argument("--json", "-j", metavar="JSON_FILE",
-                        dest="json_file", default=None,
-                        help="Load data from a JSON file or an online, valid, JSON file.")
+    parser.add_argument("--db", metavar="DB_FILE",
+                        dest="db_file", default=None,
+                        help="Load Maigret database from a JSON file or an online, valid, JSON file.")
    parser.add_argument("--cookies-jar-file", metavar="COOKIE_FILE",
                        dest="cookie_file", default=None,
                        help="File with cookies.")
@@ -660,7 +71,7 @@ async def main():
                        action="store", metavar='TIMEOUT',
                        dest="timeout", type=timeout_check, default=10,
                        help="Time (in seconds) to wait for response to requests."
-                             "Default timeout of 10.0s."
+                             "Default timeout of 10.0s. "
                             "A longer timeout will be more likely to get results from slow sites."
                             "On the other hand, this may cause a long delay to gather all results."
                        )
@@ -685,22 +96,38 @@ async def main():
                        action="store_true", dest="print_check_errors", default=False,
                        help="Print errors messages: connection, captcha, site country ban, etc."
                        )
+    parser.add_argument("--submit", metavar='EXISTING_USER_URL',
+                        type=str, dest="new_site_to_submit", default=False,
+                        help="URL of existing profile in new site to submit."
+                        )
    parser.add_argument("--no-color",
                        action="store_true", dest="no_color", default=False,
                        help="Don't color terminal output"
                        )
+    parser.add_argument("--no-progressbar",
+                        action="store_true", dest="no_progressbar", default=False,
+                        help="Don't show progressbar."
+                        )
    parser.add_argument("--browse", "-b",
                        action="store_true", dest="browse", default=False,
                        help="Browse to all results on default bowser."
                        )
    parser.add_argument("--no-recursion",
                        action="store_true", dest="disable_recursive_search", default=False,
-                        help="Disable parsing pages for other usernames and recursive search by them."
+                        help="Disable recursive search by additional data extracted from pages."
+                        )
+    parser.add_argument("--no-extracting",
+                        action="store_true", dest="disable_extracting", default=False,
+                        help="Disable parsing pages for additional data and other usernames."
                        )
    parser.add_argument("--self-check",
                        action="store_true", default=False,
                        help="Do self check for sites and database and disable non-working ones."
                        )
+    parser.add_argument("--stats",
+                        action="store_true", default=False,
+                        help="Show database statistics."
+                        )
    parser.add_argument("--use-disabled-sites",
                        action="store_true", default=False,
                        help="Use disabled sites to search (may cause many false positives)."
@@ -713,6 +140,11 @@ async def main():
                        dest="id_type", default='username',
                        help="Specify identifier(s) type (default: username)."
                        )
+    parser.add_argument("--ignore-ids",
+                        action="append", metavar='IGNORED_IDS',
+                        dest="ignore_ids_list", default=[],
+                        help="Do not make search by the specified username or other ids."
+                        )
    parser.add_argument("username",
                        nargs='+', metavar='USERNAMES',
                        action="store",
@@ -738,7 +170,7 @@ async def main():
                        action="store_true", dest="html", default=False,
                        help="Create an HTML report file (general report on all usernames)."
                        )
-    parser.add_argument("-X","--xmind",
+    parser.add_argument("-X", "--xmind",
                        action="store_true",
                        dest="xmind", default=False,
                        help="Generate an XMind 8 mindmap report (one report per username)."
@@ -748,6 +180,12 @@ async def main():
                        dest="pdf", default=False,
                        help="Generate a PDF report (general report on all usernames)."
                        )
+    parser.add_argument("-J", "--json",
+                        action="store", metavar='REPORT_TYPE',
+                        dest="json", default='', type=check_supported_json_format,
+                        help=f"Generate a JSON report of specific type: {', '.join(SUPPORTED_JSON_REPORT_FORMATS)}"
+                             " (one report per username)."
+                        )

    args = parser.parse_args()

@@ -774,8 +212,10 @@ async def main():
        u: args.id_type
        for u in args.username
        if u not in ['-']
+           and u not in args.ignore_ids_list
    }

+    parsing_enabled = not args.disable_extracting
    recursive_search_enabled = not args.disable_recursive_search

    # Make prompts
@@ -783,21 +223,35 @@ async def main():
        print("Using the proxy: " + args.proxy)

    if args.parse_url:
-        page, _ = parse(args.parse_url, cookies_str='')
-        info = extract(page)
-        text = 'Extracted ID data from webpage: ' + ', '.join([f'{a}: {b}' for a, b in info.items()])
-        print(text)
-        for k, v in info.items():
-            if 'username' in k:
-                usernames[v] = 'username'
-            if k in supported_recursive_search_ids:
-                usernames[v] = k
+        # url, headers
+        reqs = [(args.parse_url, set())]
+        try:
+            # temporary workaround for URL mutations MVP
+            from socid_extractor import mutate_url
+            reqs += list(mutate_url(args.parse_url))
+        except:
+            pass
+
+        for req in reqs:
+            url, headers = req
+            print(f'Scanning webpage by URL {url}...')
+            page, _ = parse(url, cookies_str='', headers=headers)
+            info = extract(page)
+            if not info:
+                print('Nothing extracted')
+            else:
+                print(get_dict_ascii_tree(info.items(), new_line=False), ' ')
+            for k, v in info.items():
+                if 'username' in k:
+                    usernames[v] = 'username'
+                if k in supported_recursive_search_ids:
+                    usernames[v] = k

    if args.tags:
        args.tags = list(set(str(args.tags).split(',')))

-    if args.json_file is None:
-        args.json_file = \
+    if args.db_file is None:
+        args.db_file = \
            os.path.join(os.path.dirname(os.path.realpath(__file__)),
                         "resources/data.json"
                         )
@@ -813,28 +267,35 @@ async def main():
                                    color=not args.no_color)

    # Create object with all information about sites we are aware of.
-    db = MaigretDatabase().load_from_file(args.json_file)
+    db = MaigretDatabase().load_from_file(args.db_file)
    get_top_sites_for_id = lambda x: db.ranked_sites_dict(top=args.top_sites, tags=args.tags,
                                                          names=args.site_list,
                                                          disabled=False, id_type=x)

    site_data = get_top_sites_for_id(args.id_type)

+    if args.new_site_to_submit:
+        is_submitted = await submit_dialog(db, args.new_site_to_submit, args.cookie_file)
+        if is_submitted:
+            db.save_to_file(args.db_file)
+
    # Database self-checking
    if args.self_check:
        print('Maigret sites database self-checking...')
        is_need_update = await self_check(db, site_data, logger, max_connections=args.connections)
        if is_need_update:
-            if input('Do you want to save changes permanently? [yYnN]\n').lower() == 'y':
-                db.save_to_file(args.json_file)
+            if input('Do you want to save changes permanently? [Yn]\n').lower() == 'y':
+                db.save_to_file(args.db_file)
                print('Database was successfully updated.')
            else:
                print('Updates will be applied only for current search session.')
-        print(db.get_stats(site_data))
+        print(db.get_scan_stats(site_data))
+
+    if args.stats:
+        print(db.get_db_stats(db.sites_dict))

    # Make reports folder is not exists
    os.makedirs(args.folderoutput, exist_ok=True)
-    report_path = args.folderoutput

    # Define one report filename template
    report_filepath_tpl = os.path.join(args.folderoutput, 'report_{username}{postfix}')
@@ -869,44 +330,56 @@ async def main():
        else:
            already_checked.add(username.lower())

+        if username in args.ignore_ids_list:
+            query_notify.warning(f'Skip a search by username {username} cause it\'s marked as ignored.')
+            continue
+
        # check for characters do not supported by sites generally
        found_unsupported_chars = set(unsupported_characters).intersection(set(username))

        if found_unsupported_chars:
            pretty_chars_str = ','.join(map(lambda s: f'"{s}"', found_unsupported_chars))
-            query_notify.warning(f'Found unsupported URL characters: {pretty_chars_str}, skip search by username "{username}"')
+            query_notify.warning(
+                f'Found unsupported URL characters: {pretty_chars_str}, skip search by username "{username}"')
            continue

        sites_to_check = get_top_sites_for_id(id_type)

-        results = await maigret(username,
-                                dict(sites_to_check),
-                                query_notify,
+        results = await maigret(username=username,
+                                site_dict=dict(sites_to_check),
+                                query_notify=query_notify,
                                proxy=args.proxy,
                                timeout=args.timeout,
-                                recursive_search=recursive_search_enabled,
+                                is_parsing_enabled=parsing_enabled,
                                id_type=id_type,
                                debug=args.verbose,
                                logger=logger,
                                cookies=args.cookie_file,
                                forced=args.use_disabled_sites,
                                max_connections=args.connections,
+                                no_progressbar=args.no_progressbar,
                                )

-        username_result = (username, id_type, results)
        general_results.append((username, id_type, results))

        # TODO: tests
        for website_name in results:
            dictionary = results[website_name]
            # TODO: fix no site data issue
-            if not dictionary:
+            if not dictionary or not recursive_search_enabled:
                continue
+
            new_usernames = dictionary.get('ids_usernames')
            if new_usernames:
                for u, utype in new_usernames.items():
                    usernames[u] = utype

+            for url in dictionary.get('ids_links', []):
+                for s in db.sites:
+                    u = s.detect_username(url)
+                    if u:
+                        usernames[u] = 'username'
+
        # reporting for a one username
        if args.xmind:
            filename = report_filepath_tpl.format(username=username, postfix='.xmind')
@@ -923,6 +396,11 @@ async def main():
            save_txt_report(filename, username, results)
            query_notify.warning(f'TXT report for {username} saved in {filename}')

+        if args.json:
+            filename = report_filepath_tpl.format(username=username, postfix=f'_{args.json}.json')
+            save_json_report(filename, username, results, report_type=args.json)
+            query_notify.warning(f'JSON {args.json} report for {username} saved in {filename}')
+
    # reporting for all the result
    if general_results:
        if args.html or args.pdf:
@@ -941,7 +419,7 @@ async def main():
            save_pdf_report(filename, report_context)
            query_notify.warning(f'PDF report on all usernames saved in {filename}')
    # update database
-    db.save_to_file(args.json_file)
+    db.save_to_file(args.db_file)


 def run():
@@ -952,5 +430,6 @@ def run():
        print('Maigret is interrupted.')
        sys.exit(1)

+
 if __name__ == "__main__":
-    run()
+    run()
@@ -4,9 +4,11 @@ This module defines the objects for notifying the caller about the
 results of queries.
 """
 import sys
+
 from colorama import Fore, Style, init

 from .result import QueryStatus
+from .utils import get_dict_ascii_tree


 class QueryNotify():
@@ -175,22 +177,6 @@ class QueryNotifyPrint(QueryNotify):
        else:
            print(msg)

-    def get_additional_data_text(self, items, prepend=''):
-        text = ''
-        for num, item in enumerate(items):
-            box_symbol = '┣╸' if num != len(items) - 1 else '┗╸'
-
-            if type(item) == tuple:
-                field_name, field_value = item
-                if field_value.startswith('[\''):
-                    is_last_item = num == len(items) - 1
-                    prepend_symbols = ' ' * 3 if is_last_item else ' ┃ '
-                    field_value = self.get_additional_data_text(eval(field_value), prepend_symbols)
-                text += f'\n{prepend}{box_symbol}{field_name}: {field_value}'
-            else:
-                text += f'\n{prepend}{box_symbol} {item}'
-
-        return text

    def update(self, result, is_similar=False):
        """Notify Update.
@@ -210,7 +196,7 @@ class QueryNotifyPrint(QueryNotify):
        if not self.result.ids_data:
            ids_data_text = ""
        else:
-            ids_data_text = self.get_additional_data_text(self.result.ids_data.items(), ' ')
+            ids_data_text = get_dict_ascii_tree(self.result.ids_data.items(), ' ')

        def make_colored_terminal_notify(status, text, status_color, text_color, appendix):
            text = [
@@ -1,21 +1,30 @@
 import csv
 import io
+import json
 import logging
 import os
+from argparse import ArgumentTypeError
+from datetime import datetime
+
 import pycountry
 import xmind
-from datetime import datetime
+from dateutil.parser import parse as parse_datetime_str
 from jinja2 import Template
 from xhtml2pdf import pisa
-from dateutil.parser import parse as parse_datetime_str

 from .result import QueryStatus
 from .utils import is_country_tag, CaseConverter, enrich_link_str

+SUPPORTED_JSON_REPORT_FORMATS = [
+    'simple',
+    'ndjson',
+]

 '''
 UTILS
 '''
+
+
 def filter_supposed_data(data):
    ### interesting fields
    allowed_fields = ['fullname', 'gender', 'location', 'age']
@@ -28,6 +37,8 @@ def filter_supposed_data(data):
 '''
 REPORTS SAVING
 '''
+
+
 def save_csv_report(filename: str, username: str, results: dict):
    with open(filename, 'w', newline='', encoding='utf-8') as f:
        generate_csv_report(username, results, f)
@@ -52,13 +63,21 @@ def save_pdf_report(filename: str, context: dict):
        pisa.pisaDocument(io.StringIO(filled_template), dest=f, default_css=css)


+def save_json_report(filename: str, username: str, results: dict, report_type: str):
+    with open(filename, 'w', encoding='utf-8') as f:
+        generate_json_report(username, results, f, report_type=report_type)
+
+
 '''
 REPORTS GENERATING
 '''
+
+
 def generate_report_template(is_pdf: bool):
    """
        HTML/PDF template generation
    """
+
    def get_resource_content(filename):
        return open(os.path.join(maigret_path, 'resources', filename)).read()

@@ -101,6 +120,9 @@ def generate_report_context(username_results: list):
                continue

            status = dictionary.get('status')
+            if not status:  # FIXME: currently in case of timeout
+                continue
+
            if status.ids_data:
                dictionary['ids_data'] = status.ids_data
                extended_info_count += 1
@@ -155,7 +177,6 @@ def generate_report_context(username_results: list):
                for t in status.tags:
                    tags[t] = tags.get(t, 0) + 1

-
        brief_text.append(f'Search by {id_type} {username} returned {found_accounts} accounts.')

        if new_ids:
@@ -166,8 +187,6 @@ def generate_report_context(username_results: list):

    brief_text.append(f'Extended info extracted from {extended_info_count} accounts.')

-
-
    brief = ' '.join(brief_text).strip()
    tuple_sort = lambda d: sorted(d, key=lambda x: x[1], reverse=True)

@@ -210,7 +229,7 @@ def generate_csv_report(username: str, results: dict, csvfile):
                         results[site]['url_user'],
                         str(results[site]['status'].status),
                         results[site]['http_status'],
-                        ])
+                         ])


 def generate_txt_report(username: str, results: dict, file):
@@ -225,9 +244,36 @@ def generate_txt_report(username: str, results: dict, file):
            file.write(dictionary["url_user"] + "\n")
    file.write(f'Total Websites Username Detected On : {exists_counter}')

+
+def generate_json_report(username: str, results: dict, file, report_type):
+    exists_counter = 0
+    is_report_per_line = report_type.startswith('ndjson')
+    all_json = {}
+
+    for sitename in results:
+        site_result = results[sitename]
+        # TODO: fix no site data issue
+        if not site_result or site_result.get("status").status != QueryStatus.CLAIMED:
+            continue
+
+        data = dict(site_result)
+        data['status'] = data['status'].json()
+
+        if is_report_per_line:
+            data['sitename'] = sitename
+            file.write(json.dumps(data) + '\n')
+        else:
+            all_json[sitename] = data
+
+    if not is_report_per_line:
+        file.write(json.dumps(all_json))
+
+
 '''
 XMIND 8 Functions
 '''
+
+
 def save_xmind_report(filename, username, results):
    if os.path.exists(filename):
        os.remove(filename)
@@ -242,9 +288,9 @@ def design_sheet(sheet, username, results):
    alltags = {}
    supposed_data = {}

-    sheet.setTitle("%s Analysis"%(username))
+    sheet.setTitle("%s Analysis" % (username))
    root_topic1 = sheet.getRootTopic()
-    root_topic1.setTitle("%s"%(username))
+    root_topic1.setTitle("%s" % (username))

    undefinedsection = root_topic1.addSubTopic()
    undefinedsection.setTitle("Undefined")
@@ -298,7 +344,7 @@ def design_sheet(sheet, username, results):
                            currentsublabel.setTitle("%s: %s" % (k, currentval))
    ### Add Supposed DATA
    filterede_supposed_data = filter_supposed_data(supposed_data)
-    if(len(filterede_supposed_data) >0):
+    if (len(filterede_supposed_data) > 0):
        undefinedsection = root_topic1.addSubTopic()
        undefinedsection.setTitle("SUPPOSED DATA")
        for k, v in filterede_supposed_data.items():
@@ -306,3 +352,8 @@ def design_sheet(sheet, username, results):
            currentsublabel.setTitle("%s: %s" % (k, v))


+def check_supported_json_format(value):
+    if value and not value in SUPPORTED_JSON_REPORT_FORMATS:
+        raise ArgumentTypeError(f'JSON report type must be one of the following types: '
+                                + ', '.join(SUPPORTED_JSON_REPORT_FORMATS))
+    return value
@@ -68,7 +68,7 @@
        <div class="row-mb">
            <div class="col-md">
                <div class="card flex-md-row mb-4 box-shadow h-md-250">
-                    <img class="card-img-right flex-auto d-none d-md-block" alt="Photo" style="width: 200px; height: 200px; object-fit: scale-down;" src="{{ v.status.ids_data.image or 'https://i.imgur.com/040fmbw.png' }}" data-holder-rendered="true">
+                    <img class="card-img-right flex-auto d-md-block" alt="Photo" style="width: 200px; height: 200px; object-fit: scale-down;" src="{{ v.status.ids_data.image or 'https://i.imgur.com/040fmbw.png' }}" data-holder-rendered="true">
                    <div class="card-body d-flex flex-column align-items-start" style="padding-top: 0;">
                    <h3 class="mb-0" style="padding-top: 1rem;">
                        <a class="text-dark" href="{{ v.url_main }}" target="_blank">{{ k }}</a>
@@ -1,4 +1,4 @@
-"""Sherlock Result Module
+"""Maigret Result Module

 This module defines various objects for recording the results of queries.
 """
@@ -74,6 +74,18 @@ class QueryResult():
        self.ids_data = ids_data
        self.tags = tags

+    def json(self):
+        return {
+            'username': self.username,
+            'site_name': self.site_name,
+            'url': self.site_url_user,
+            'status': str(self.status),
+            'ids': self.ids_data or {},
+            'tags': self.tags,
+        }
+
+    def is_found(self):
+        return self.status == QueryStatus.CLAIMED

    def __str__(self):
        """Convert Object To String.
@@ -6,7 +6,17 @@ import sys

 import requests

-from .utils import CaseConverter
+from .utils import CaseConverter, URLMatcher, is_country_tag
+
+# TODO: move to data.json
+SUPPORTED_TAGS = [
+    'gaming', 'coding', 'photo', 'music', 'blog', 'finance', 'freelance', 'dating',
+    'tech', 'forum', 'porn', 'erotic', 'webcam', 'video', 'movies', 'hacking', 'art',
+    'discussion', 'sharing', 'writing', 'wiki', 'business', 'shopping', 'sport',
+    'books', 'news', 'documents', 'travel', 'maps', 'hobby', 'apps', 'classified',
+    'career', 'geosocial', 'streaming', 'education', 'networking', 'torrent',
+    'science', 'medicine',
+]


 class MaigretEngine:
@@ -21,12 +31,22 @@ class MaigretEngine:


 class MaigretSite:
+    NOT_SERIALIZABLE_FIELDS = [
+        'name',
+        'engineData',
+        'requestFuture',
+        'detectedEngine',
+        'engineObj',
+        'stats',
+        'urlRegexp',
+    ]
+
    def __init__(self, name, information):
        self.name = name

        self.disabled = False
        self.similar_search = False
-        self.ignore_403 = False
+        self.ignore403 = False
        self.tags = []

        self.type = 'username'
@@ -57,10 +77,28 @@ class MaigretSite:
            # We do not know the popularity, so make site go to bottom of list.
            self.alexa_rank = sys.maxsize

+        self.update_detectors()

    def __str__(self):
        return f"{self.name} ({self.url_main})"

+    def update_detectors(self):
+        if 'url' in self.__dict__:
+            url = self.url
+            for group in ['urlMain', 'urlSubpath']:
+                if group in url:
+                    url = url.replace('{' + group + '}', self.__dict__[CaseConverter.camel_to_snake(group)])
+
+            self.url_regexp = URLMatcher.make_profile_url_regexp(url, self.regex_check)
+
+    def detect_username(self, url: str) -> str:
+        if self.url_regexp:
+            match_groups = self.url_regexp.match(url)
+            if match_groups:
+                return match_groups.groups()[-1].rstrip('/')
+
+        return None
+
    @property
    def json(self):
        result = {}
@@ -70,7 +108,7 @@ class MaigretSite:
            # strip empty elements
            if v in (False, '', [], {}, None, sys.maxsize, 'username'):
                continue
-            if field in ['name', 'engineData', 'requestFuture', 'detectedEngine', 'engineObj', 'stats']:
+            if field in self.NOT_SERIALIZABLE_FIELDS:
                continue
            result[field] = v

@@ -78,6 +116,7 @@ class MaigretSite:

    def update(self, updates: dict) -> MaigretSite:
        self.__dict__.update(updates)
+        self.update_detectors()

        return self

@@ -95,6 +134,7 @@ class MaigretSite:
                self.__dict__[field] = v

        self.engine_obj = engine
+        self.update_detectors()

        return self

@@ -103,6 +143,8 @@ class MaigretSite:
            return self

        self.request_future = None
+        self.url_regexp = None
+
        self_copy = copy.deepcopy(self)
        engine_data = self_copy.engine_obj.site
        site_data_keys = list(self_copy.__dict__.keys())
@@ -113,7 +155,8 @@ class MaigretSite:
            # remove dict keys
            if isinstance(engine_data[k], dict) and is_exists:
                for f in engine_data[k].keys():
-                    del self_copy.__dict__[field][f]
+                    if f in self_copy.__dict__[field]:
+                        del self_copy.__dict__[field][f]
                continue
            # remove list items
            if isinstance(engine_data[k], list) and is_exists:
@@ -195,7 +238,6 @@ class MaigretDatabase:

        return self

-
    def load_from_json(self, json_data: dict) -> MaigretDatabase:
        # Add all of site information from the json file to internal site list.
        site_data = json_data.get("sites", {})
@@ -220,7 +262,6 @@ class MaigretDatabase:

        return self

-
    def load_from_str(self, db_str: str) -> MaigretDatabase:
        try:
            data = json.loads(db_str)
@@ -231,7 +272,6 @@ class MaigretDatabase:

        return self.load_from_json(data)

-
    def load_from_url(self, url: str) -> MaigretDatabase:
        is_url_valid = url.startswith('http://') or url.startswith('https://')

@@ -260,7 +300,6 @@ class MaigretDatabase:

        return self.load_from_json(data)

-
    def load_from_file(self, filename: str) -> MaigretDatabase:
        try:
            with open(filename, 'r', encoding='utf-8') as file:
@@ -277,7 +316,7 @@ class MaigretDatabase:

        return self.load_from_json(data)

-    def get_stats(self, sites_dict):
+    def get_scan_stats(self, sites_dict):
        sites = sites_dict or self.sites_dict
        found_flags = {}
        for _, s in sites.items():
@@ -286,3 +325,52 @@ class MaigretDatabase:
                found_flags[flag] = found_flags.get(flag, 0) + 1

        return found_flags
+
+    def get_db_stats(self, sites_dict):
+        if not sites_dict:
+            sites_dict = self.sites_dict()
+
+        output = ''
+        disabled_count = 0
+        total_count = len(sites_dict)
+        urls = {}
+        tags = {}
+
+        for _, site in sites_dict.items():
+            if site.disabled:
+                disabled_count += 1
+
+            url = URLMatcher.extract_main_part(site.url)
+            if url.startswith('{username}'):
+                url = 'SUBDOMAIN'
+            elif url == '':
+                url = f'{site.url} ({site.engine})'
+            else:
+                parts = url.split('/')
+                url = '/' + '/'.join(parts[1:])
+
+            urls[url] = urls.get(url, 0) + 1
+
+            if not site.tags:
+                tags['NO_TAGS'] = tags.get('NO_TAGS', 0) + 1
+
+            for tag in site.tags:
+                if is_country_tag(tag):
+                    # currenty do not display country tags
+                    continue
+                tags[tag] = tags.get(tag, 0) + 1
+
+        output += f'Enabled/total sites: {total_count - disabled_count}/{total_count}\n'
+        output += 'Top sites\' profile URLs:\n'
+        for url, count in sorted(urls.items(), key=lambda x: x[1], reverse=True)[:20]:
+            if count == 1:
+                break
+            output += f'{count}\t{url}\n'
+        output += 'Top sites\' tags:\n'
+        for tag, count in sorted(tags.items(), key=lambda x: x[1], reverse=True):
+            mark = ''
+            if not tag in SUPPORTED_TAGS:
+                mark = ' (non-standard)'
+            output += f'{count}\t{tag}{mark}\n'
+
+        return output
@@ -0,0 +1,232 @@
+import difflib
+
+import requests
+
+from .checking import *
+
+
+DESIRED_STRINGS = ["username", "not found", "пользователь", "profile", "lastname", "firstname", "biography",
+                   "birthday", "репутация", "информация", "e-mail"]
+
+SUPPOSED_USERNAMES = ['alex', 'god', 'admin', 'red', 'blue', 'john']
+
+RATIO = 0.6
+TOP_FEATURES = 5
+URL_RE = re.compile(r'https?://(www\.)?')
+
+
+def get_match_ratio(x):
+    return round(max([
+        difflib.SequenceMatcher(a=x.lower(), b=y).ratio()
+        for y in DESIRED_STRINGS
+    ]), 2)
+
+
+def extract_mainpage_url(url):
+    return '/'.join(url.split('/', 3)[:3])
+
+
+async def site_self_check(site, logger, semaphore, db: MaigretDatabase, silent=False):
+    changes = {
+        'disabled': False,
+    }
+
+    check_data = [
+        (site.username_claimed, QueryStatus.CLAIMED),
+        (site.username_unclaimed, QueryStatus.AVAILABLE),
+    ]
+
+    logger.info(f'Checking {site.name}...')
+
+    for username, status in check_data:
+        results_dict = await maigret(
+            username=username,
+            site_dict={site.name: site},
+            logger=logger,
+            timeout=30,
+            id_type=site.type,
+            forced=True,
+            no_progressbar=True,
+        )
+
+        # don't disable entries with other ids types
+        # TODO: make normal checking
+        if site.name not in results_dict:
+            logger.info(results_dict)
+            changes['disabled'] = True
+            continue
+
+        result = results_dict[site.name]['status']
+
+        site_status = result.status
+
+        if site_status != status:
+            if site_status == QueryStatus.UNKNOWN:
+                msgs = site.absence_strs
+                etype = site.check_type
+                logger.warning(
+                    f'Error while searching {username} in {site.name}: {result.context}, {msgs}, type {etype}')
+                # don't disable in case of available username
+                if status == QueryStatus.CLAIMED:
+                    changes['disabled'] = True
+            elif status == QueryStatus.CLAIMED:
+                logger.warning(f'Not found `{username}` in {site.name}, must be claimed')
+                logger.info(results_dict[site.name])
+                changes['disabled'] = True
+            else:
+                logger.warning(f'Found `{username}` in {site.name}, must be available')
+                logger.info(results_dict[site.name])
+                changes['disabled'] = True
+
+    logger.info(f'Site {site.name} checking is finished')
+
+    return changes
+
+
+async def detect_known_engine(db, url_exists, url_mainpage):
+    try:
+        r = requests.get(url_mainpage)
+    except Exception as e:
+        print(e)
+        print('Some error while checking main page')
+        return None
+
+    for e in db.engines:
+        strs_to_check = e.__dict__.get('presenseStrs')
+        if strs_to_check and r and r.text:
+            all_strs_in_response = True
+            for s in strs_to_check:
+                if not s in r.text:
+                    all_strs_in_response = False
+            if all_strs_in_response:
+                engine_name = e.__dict__.get('name')
+                print(f'Detected engine {engine_name} for site {url_mainpage}')
+
+                sites = []
+                for u in SUPPOSED_USERNAMES:
+                    site_data = {
+                        'urlMain': url_mainpage,
+                        'name': url_mainpage.split('//')[0],
+                        'engine': engine_name,
+                        'usernameClaimed': u,
+                        'usernameUnclaimed': 'noonewouldeverusethis7',
+                    }
+
+                    maigret_site = MaigretSite(url_mainpage.split('/')[-1], site_data)
+                    maigret_site.update_from_engine(db.engines_dict[engine_name])
+                    sites.append(maigret_site)
+
+                return sites
+
+    return None
+
+
+async def check_features_manually(db, url_exists, url_mainpage, cookie_file):
+    url_parts = url_exists.split('/')
+    supposed_username = url_parts[-1]
+    new_name = input(f'Is "{supposed_username}" a valid username? If not, write it manually: ')
+    if new_name:
+        supposed_username = new_name
+    non_exist_username = 'noonewouldeverusethis7'
+
+    url_user = url_exists.replace(supposed_username, '{username}')
+    url_not_exists = url_exists.replace(supposed_username, non_exist_username)
+
+    # cookies
+    cookie_dict = None
+    if cookie_file:
+        cookie_jar = await import_aiohttp_cookies(cookie_file)
+        cookie_dict = {c.key: c.value for c in cookie_jar}
+
+    a = requests.get(url_exists, cookies=cookie_dict).text
+    b = requests.get(url_not_exists, cookies=cookie_dict).text
+
+    tokens_a = set(a.split('"'))
+    tokens_b = set(b.split('"'))
+
+    a_minus_b = tokens_a.difference(tokens_b)
+    b_minus_a = tokens_b.difference(tokens_a)
+
+    top_features_count = int(input(f'Specify count of features to extract [default {TOP_FEATURES}]: ') or TOP_FEATURES)
+
+    presence_list = sorted(a_minus_b, key=get_match_ratio, reverse=True)[:top_features_count]
+
+    print('Detected text features of existing account: ' + ', '.join(presence_list))
+    features = input('If features was not detected correctly, write it manually: ')
+
+    if features:
+        presence_list = features.split(',')
+
+    absence_list = sorted(b_minus_a, key=get_match_ratio, reverse=True)[:top_features_count]
+    print('Detected text features of non-existing account: ' + ', '.join(absence_list))
+    features = input('If features was not detected correctly, write it manually: ')
+
+    if features:
+        absence_list = features.split(',')
+
+    site_data = {
+        'absenceStrs': absence_list,
+        'presenseStrs': presence_list,
+        'url': url_user,
+        'urlMain': url_mainpage,
+        'usernameClaimed': supposed_username,
+        'usernameUnclaimed': non_exist_username,
+        'checkType': 'message',
+    }
+
+    site = MaigretSite(url_mainpage.split('/')[-1], site_data)
+    return site
+
+async def submit_dialog(db, url_exists, cookie_file):
+    domain_raw = URL_RE.sub('', url_exists).strip().strip('/')
+    domain_raw = domain_raw.split('/')[0]
+
+    # check for existence
+    matched_sites = list(filter(lambda x: domain_raw in x.url_main + x.url, db.sites))
+    if matched_sites:
+        print(f'Sites with domain "{domain_raw}" already exists in the Maigret database!')
+        status = lambda s: '(disabled)' if s.disabled else ''
+        url_block = lambda s: f'\n\t{s.url_main}\n\t{s.url}'
+        print('\n'.join([f'{site.name} {status(site)}{url_block(site)}' for site in matched_sites]))
+        return False
+
+    url_mainpage = extract_mainpage_url(url_exists)
+
+    sites = await detect_known_engine(db, url_exists, url_mainpage)
+    if not sites:
+        print('Unable to detect site engine, lets generate checking features')
+        sites = [await check_features_manually(db, url_exists, url_mainpage, cookie_file)]
+
+    print(sites[0].__dict__)
+
+    sem = asyncio.Semaphore(1)
+    log_level = logging.INFO
+    logging.basicConfig(
+        format='[%(filename)s:%(lineno)d] %(levelname)-3s  %(asctime)s %(message)s',
+        datefmt='%H:%M:%S',
+        level=log_level
+    )
+    logger = logging.getLogger('site-submit')
+    logger.setLevel(log_level)
+
+    found = False
+    chosen_site = None
+    for s in sites:
+        chosen_site = s
+        result = await site_self_check(s, logger, sem, db)
+        if not result['disabled']:
+            found = True
+            break
+
+    if not found:
+        print(f'Sorry, we couldn\'t find params to detect account presence/absence in {chosen_site.name}.')
+        print('Try to run this mode again and increase features count or choose others.')
+    else:
+        if input(f'Site {chosen_site.name} successfully checked. Do you want to save it in the Maigret DB? [Yn] ').lower() in 'y':
+            print(chosen_site.json)
+            site_data = chosen_site.strip_engine_data()
+            print(site_data.json)
+            db.update_site(site_data)
+            return True
+
+    return False
@@ -28,4 +28,51 @@ def enrich_link_str(link: str) -> str:
    link = link.strip()
    if link.startswith('www.') or (link.startswith('http') and '//' in link):
        return f'<a class="auto-link" href="{link}">{link}</a>'
-    return link
+    return link
+
+
+class URLMatcher:
+    _HTTP_URL_RE_STR = '^https?://(www.)?(.+)$'
+    HTTP_URL_RE = re.compile(_HTTP_URL_RE_STR)
+    UNSAFE_SYMBOLS = '.?'
+
+    @classmethod
+    def extract_main_part(self, url: str) -> str:
+        match = self.HTTP_URL_RE.search(url)
+        if match and match.group(2):
+            return match.group(2).rstrip('/')
+
+        return ''
+
+    @classmethod
+    def make_profile_url_regexp(self, url: str, username_regexp: str = ''):
+        url_main_part = self.extract_main_part(url)
+        for c in self.UNSAFE_SYMBOLS:
+            url_main_part = url_main_part.replace(c, f'\\{c}')
+        username_regexp = username_regexp or '.+?'
+
+        url_regexp = url_main_part.replace('{username}', f'({username_regexp})')
+        regexp_str = self._HTTP_URL_RE_STR.replace('(.+)', url_regexp)
+
+        return re.compile(regexp_str)
+
+
+def get_dict_ascii_tree(items, prepend='', new_line=True):
+    text = ''
+    for num, item in enumerate(items):
+        box_symbol = '┣╸' if num != len(items) - 1 else '┗╸'
+
+        if type(item) == tuple:
+            field_name, field_value = item
+            if field_value.startswith('[\''):
+                is_last_item = num == len(items) - 1
+                prepend_symbols = ' ' * 3 if is_last_item else ' ┃ '
+                field_value = get_dict_ascii_tree(eval(field_value), prepend_symbols)
+            text += f'\n{prepend}{box_symbol}{field_name}: {field_value}'
+        else:
+            text += f'\n{prepend}{box_symbol} {item}'
+
+    if not new_line:
+        text = text[1:]
+
+    return text
@@ -1,4 +1,4 @@
-aiohttp==3.7.3
+aiohttp==3.7.4
 aiohttp-socks==0.5.5
 arabic-reshaper==2.1.1
 async-timeout==3.0.1
@@ -13,28 +13,25 @@ future==0.18.2
 future-annotations==1.0.0
 html5lib==1.1
 idna==2.10
-Jinja2==2.11.2
-lxml==4.6.2
+Jinja2==2.11.3
+lxml==4.6.3
 MarkupSafe==1.1.1
 mock==4.0.2
 multidict==5.1.0
-Pillow==8.1.0
 pycountry==20.7.3
 PyPDF2==1.26.0
 PySocks==1.7.1
 python-bidi==0.4.2
 python-socks==1.1.2
-reportlab==3.5.59
-requests==2.25.1
+requests>=2.24.0
 requests-futures==1.0.0
 six==1.15.0
-socid-extractor>=0.0.4
+socid-extractor>=0.0.16
 soupsieve==2.1
 stem==1.8.0
 torrequest==0.1.0
 tqdm==4.55.0
 typing-extensions==3.7.4.3
-urllib3==1.26.2
 webencodings==0.5.1
 xhtml2pdf==0.2.5
 XMind==1.2.0
@@ -12,7 +12,7 @@ with open('requirements.txt') as rf:
    requires = rf.read().splitlines()

 setup(name='maigret',
-      version='0.1.13',
+      version='0.1.19',
      description='Collect a dossier on a person by username from a huge number of sites',
      long_description=long_description,
      long_description_content_type="text/markdown",
@@ -1,11 +1,11 @@
 import glob
 import logging
 import os
+
 import pytest
 from _pytest.mark import Mark
-from mock import Mock

-from maigret.sites import MaigretDatabase, MaigretSite
+from maigret.sites import MaigretDatabase

 CUR_PATH = os.path.dirname(os.path.realpath(__file__))
 JSON_FILE = os.path.join(CUR_PATH, '../maigret/resources/data.json')
@@ -1,5 +1,6 @@
 """Maigret activation test functions"""
 import json
+
 import aiohttp
 import pytest
 from mock import Mock
@@ -0,0 +1,66 @@
+"""Maigret checking logic test functions"""
+import pytest
+import asyncio
+import logging
+from maigret.checking import AsyncioSimpleExecutor, AsyncioProgressbarExecutor, AsyncioProgressbarSemaphoreExecutor, AsyncioProgressbarQueueExecutor
+
+logger = logging.getLogger(__name__)
+
+async def func(n):
+    await asyncio.sleep(0.1 * (n % 3))
+    return n
+
+
+@pytest.mark.asyncio
+async def test_simple_asyncio_executor():
+    tasks = [(func, [n], {}) for n in range(10)]
+    executor = AsyncioSimpleExecutor(logger=logger)
+    assert await executor.run(tasks) == [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
+    assert executor.execution_time > 0.2
+    assert executor.execution_time < 0.3
+
+@pytest.mark.asyncio
+async def test_asyncio_progressbar_executor():
+    tasks = [(func, [n], {}) for n in range(10)]
+
+    executor = AsyncioProgressbarExecutor(logger=logger)
+    # no guarantees for the results order
+    assert sorted(await executor.run(tasks)) == [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
+    assert executor.execution_time > 0.2
+    assert executor.execution_time < 0.3
+
+
+@pytest.mark.asyncio
+async def test_asyncio_progressbar_semaphore_executor():
+    tasks = [(func, [n], {}) for n in range(10)]
+
+    executor = AsyncioProgressbarSemaphoreExecutor(logger=logger, in_parallel=5)
+    # no guarantees for the results order
+    assert sorted(await executor.run(tasks)) == [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
+    assert executor.execution_time > 0.2
+    assert executor.execution_time < 0.4
+
+
+@pytest.mark.asyncio
+async def test_asyncio_progressbar_queue_executor():
+    tasks = [(func, [n], {}) for n in range(10)]
+
+    executor = AsyncioProgressbarQueueExecutor(logger=logger, in_parallel=2)
+    assert await executor.run(tasks) == [0, 1, 3, 2, 4, 6, 7, 5, 9, 8]
+    assert executor.execution_time > 0.5
+    assert executor.execution_time < 0.6
+
+    executor = AsyncioProgressbarQueueExecutor(logger=logger, in_parallel=3)
+    assert await executor.run(tasks) == [0, 3, 1, 4, 6, 2, 7, 9, 5, 8]
+    assert executor.execution_time > 0.4
+    assert executor.execution_time < 0.5
+
+    executor = AsyncioProgressbarQueueExecutor(logger=logger, in_parallel=5)
+    assert await executor.run(tasks) == [0, 3, 6, 1, 4, 7, 9, 2, 5, 8]
+    assert executor.execution_time > 0.3
+    assert executor.execution_time < 0.4
+
+    executor = AsyncioProgressbarQueueExecutor(logger=logger, in_parallel=10)
+    assert await executor.run(tasks) == [0, 3, 6, 9, 1, 4, 7, 2, 5, 8]
+    assert executor.execution_time > 0.2
+    assert executor.execution_time < 0.3
@@ -1,10 +1,11 @@
 """Maigret main module test functions"""
 import asyncio
+
 import pytest
 from mock import Mock

 from maigret.maigret import self_check
-from maigret.sites import MaigretDatabase, MaigretSite
+from maigret.sites import MaigretDatabase

 EXAMPLE_DB = {
    'engines': {
@@ -1,5 +1,6 @@
 """Maigret reports test functions"""
 import copy
+import json
 import os
 from io import StringIO

@@ -7,7 +8,7 @@ import xmind
 from jinja2 import Template

 from maigret.report import generate_csv_report, generate_txt_report, save_xmind_report, save_html_report, \
-    save_pdf_report, generate_report_template, generate_report_context
+    save_pdf_report, generate_report_template, generate_report_context, generate_json_report
 from maigret.result import QueryResult, QueryStatus

 EXAMPLE_RESULTS = {
@@ -146,6 +147,32 @@ def test_generate_txt_report():
    ]


+def test_generate_json_simple_report():
+    jsonfile = StringIO()
+    MODIFIED_RESULTS = dict(EXAMPLE_RESULTS)
+    MODIFIED_RESULTS['GitHub2'] = EXAMPLE_RESULTS['GitHub']
+    generate_json_report('test', MODIFIED_RESULTS, jsonfile, 'simple')
+
+    jsonfile.seek(0)
+    data = jsonfile.readlines()
+
+    assert len(data) == 1
+    assert list(json.loads(data[0]).keys()) == ['GitHub', 'GitHub2']
+
+
+def test_generate_json_ndjson_report():
+    jsonfile = StringIO()
+    MODIFIED_RESULTS = dict(EXAMPLE_RESULTS)
+    MODIFIED_RESULTS['GitHub2'] = EXAMPLE_RESULTS['GitHub']
+    generate_json_report('test', MODIFIED_RESULTS, jsonfile, 'ndjson')
+
+    jsonfile.seek(0)
+    data = jsonfile.readlines()
+
+    assert len(data) == 2
+    assert json.loads(data[0])['sitename'] == 'GitHub'
+
+
 def test_save_xmind_report():
    filename = 'report_test.xmind'
    save_xmind_report(filename, 'test', EXAMPLE_RESULTS)
@@ -1,33 +1,32 @@
 """Maigret Database test functions"""
 from maigret.sites import MaigretDatabase, MaigretSite

-
 EXAMPLE_DB = {
    'engines': {
        "XenForo": {
-          "presenseStrs": ["XenForo"],
-          "site": {
-            "absenceStrs": [
-              "The specified member cannot be found. Please enter a member's entire name.",
-            ],
-            "checkType": "message",
-            "errors": {
-              "You must be logged-in to do that.": "Login required"
-            },
-            "url": "{urlMain}{urlSubpath}/members/?username={username}"
-          }
+            "presenseStrs": ["XenForo"],
+            "site": {
+                "absenceStrs": [
+                    "The specified member cannot be found. Please enter a member's entire name.",
+                ],
+                "checkType": "message",
+                "errors": {
+                    "You must be logged-in to do that.": "Login required"
+                },
+                "url": "{urlMain}{urlSubpath}/members/?username={username}"
+            }
        },
    },
    'sites': {
        "Amperka": {
-          "engine": "XenForo",
-          "rank": 121613,
-          "tags": [
-            "ru"
-          ],
-          "urlMain": "http://forum.amperka.ru",
-          "usernameClaimed": "adam",
-          "usernameUnclaimed": "noonewouldeverusethis7"
+            "engine": "XenForo",
+            "rank": 121613,
+            "tags": [
+                "ru"
+            ],
+            "urlMain": "http://forum.amperka.ru",
+            "usernameClaimed": "adam",
+            "usernameUnclaimed": "noonewouldeverusethis7"
        },
    }
 }
@@ -113,6 +112,14 @@ def test_saving_site_error():
    assert amperka.strip_engine_data().json['errors'] == {'error1': 'text1'}


+def test_site_url_detector():
+    db = MaigretDatabase()
+    db.load_from_json(EXAMPLE_DB)
+
+    assert db.sites[0].url_regexp.pattern == r'^https?://(www.)?forum\.amperka\.ru/members/\?username=(.+?)$'
+    assert db.sites[0].detect_username('http://forum.amperka.ru/members/?username=test') == 'test'
+
+
 def test_ranked_sites_dict():
    db = MaigretDatabase()
    db.update_site(MaigretSite('3', {'alexaRank': 1000, 'engine': 'ucoz'}))
@@ -159,6 +166,7 @@ def test_ranked_sites_dict_disabled():
    assert len(db.ranked_sites_dict()) == 2
    assert len(db.ranked_sites_dict(disabled=False)) == 1

+
 def test_ranked_sites_dict_id_type():
    db = MaigretDatabase()
    db.update_site(MaigretSite('1', {}))
@@ -1,34 +1,100 @@
 """Maigret utils test functions"""
-from maigret.utils import CaseConverter, is_country_tag, enrich_link_str
+import itertools
+import re
+
+from maigret.utils import CaseConverter, is_country_tag, enrich_link_str, URLMatcher, get_dict_ascii_tree


 def test_case_convert_camel_to_snake():
-	a = 'SnakeCasedString'
-	b = CaseConverter.camel_to_snake(a)
+    a = 'SnakeCasedString'
+    b = CaseConverter.camel_to_snake(a)
+
+    assert b == 'snake_cased_string'

-	assert b == 'snake_cased_string'

 def test_case_convert_snake_to_camel():
-	a = 'camel_cased_string'
-	b = CaseConverter.snake_to_camel(a)
+    a = 'camel_cased_string'
+    b = CaseConverter.snake_to_camel(a)
+
+    assert b == 'camelCasedString'

-	assert b == 'camelCasedString'

 def test_case_convert_snake_to_title():
-	a = 'camel_cased_string'
-	b = CaseConverter.snake_to_title(a)
+    a = 'camel_cased_string'
+    b = CaseConverter.snake_to_title(a)
+
+    assert b == 'Camel cased string'
+
+
+def test_case_convert_camel_with_digits_to_snake():
+    a = 'ignore403'
+    b = CaseConverter.camel_to_snake(a)
+
+    assert b == 'ignore403'

-	assert b == 'Camel cased string'

 def test_is_country_tag():
-	assert is_country_tag('ru') == True
-	assert is_country_tag('FR') == True
+    assert is_country_tag('ru') == True
+    assert is_country_tag('FR') == True

-	assert is_country_tag('a1') == False
-	assert is_country_tag('dating') == False
+    assert is_country_tag('a1') == False
+    assert is_country_tag('dating') == False
+
+    assert is_country_tag('global') == True

-	assert is_country_tag('global') == True

 def test_enrich_link_str():
-	assert enrich_link_str('test') == 'test'
-	assert enrich_link_str(' www.flickr.com/photos/alexaimephotography/') == '<a class="auto-link" href="www.flickr.com/photos/alexaimephotography/">www.flickr.com/photos/alexaimephotography/</a>'
+    assert enrich_link_str('test') == 'test'
+    assert enrich_link_str(
+        ' www.flickr.com/photos/alexaimephotography/') == '<a class="auto-link" href="www.flickr.com/photos/alexaimephotography/">www.flickr.com/photos/alexaimephotography/</a>'
+
+
+def test_url_extract_main_part():
+    url_main_part = 'flickr.com/photos/alexaimephotography'
+
+    parts = [
+        ['http://', 'https://'],
+        ['www.', ''],
+        [url_main_part],
+        ['/', ''],
+    ]
+
+    url_regexp = re.compile('^https?://(www.)?flickr.com/photos/(.+?)$')
+    for url_parts in itertools.product(*parts):
+        url = ''.join(url_parts)
+        assert URLMatcher.extract_main_part(url) == url_main_part
+        assert not url_regexp.match(url) is None
+
+
+def test_url_make_profile_url_regexp():
+    url_main_part = 'flickr.com/photos/{username}'
+
+    parts = [
+        ['http://', 'https://'],
+        ['www.', ''],
+        [url_main_part],
+        ['/', ''],
+    ]
+
+    for url_parts in itertools.product(*parts):
+        url = ''.join(url_parts)
+        assert URLMatcher.make_profile_url_regexp(url).pattern == r'^https?://(www.)?flickr\.com/photos/(.+?)$'
+
+
+def test_get_dict_ascii_tree():
+    data = {'uid': 'dXJpOm5vZGU6VXNlcjoyNjQwMzQxNQ==', 'legacy_id': '26403415', 'username': 'alexaimephotographycars', 'name': 'Alex Aimé', 'created_at': '2018-05-04T10:17:01.000+0000', 'image': 'https://drscdn.500px.org/user_avatar/26403415/q%3D85_w%3D300_h%3D300/v2?webp=true&v=2&sig=0235678a4f7b65e007e864033ebfaf5ef6d87fad34f80a8639d985320c20fe3b', 'image_bg': 'https://drscdn.500px.org/user_cover/26403415/q%3D65_m%3D2048/v2?webp=true&v=1&sig=bea411fb158391a4fdad498874ff17088f91257e59dfb376ff67e3a44c3a4201', 'website': 'www.instagram.com/street.reality.photography/', 'facebook_link': ' www.instagram.com/street.reality.photography/', 'instagram_username': 'Street.Reality.Photography', 'twitter_username': 'Alexaimephotogr'}
+
+    ascii_tree = get_dict_ascii_tree(data.items())
+
+    assert ascii_tree == """
+┣╸uid: dXJpOm5vZGU6VXNlcjoyNjQwMzQxNQ==
+┣╸legacy_id: 26403415
+┣╸username: alexaimephotographycars
+┣╸name: Alex Aimé
+┣╸created_at: 2018-05-04T10:17:01.000+0000
+┣╸image: https://drscdn.500px.org/user_avatar/26403415/q%3D85_w%3D300_h%3D300/v2?webp=true&v=2&sig=0235678a4f7b65e007e864033ebfaf5ef6d87fad34f80a8639d985320c20fe3b
+┣╸image_bg: https://drscdn.500px.org/user_cover/26403415/q%3D65_m%3D2048/v2?webp=true&v=1&sig=bea411fb158391a4fdad498874ff17088f91257e59dfb376ff67e3a44c3a4201
+┣╸website: www.instagram.com/street.reality.photography/
+┣╸facebook_link:  www.instagram.com/street.reality.photography/
+┣╸instagram_username: Street.Reality.Photography
+┗╸twitter_username: Alexaimephotogr"""
@@ -20,8 +20,9 @@ RANKS.update({
    '5000': '5K',
    '10000': '10K',
    '100000': '100K',
-    '10000000': '1M',
-    '50000000': '10M',
+    '10000000': '10M',
+    '50000000': '50M',
+    '100000000': '100M',
 })

 SEMAPHORE = threading.Semaphore(10)
@@ -58,8 +59,9 @@ def get_rank(domain_to_query, site, print_errors=True):
 def get_step_rank(rank):
    def get_readable_rank(r):
        return RANKS[str(r)]
+
    valid_step_ranks = sorted(map(int, RANKS.keys()))
-    if rank == 0:
+    if rank == 0 or rank == sys.maxsize:
        return get_readable_rank(valid_step_ranks[-1])
    else:
        return get_readable_rank(list(filter(lambda x: x >= rank, valid_step_ranks))[0])
@@ -73,6 +75,8 @@ if __name__ == '__main__':
                        help="JSON file with sites data to update.")

    parser.add_argument('--empty-only', help='update only sites without rating', action='store_true')
+    parser.add_argument('--exclude-engine', help='do not update score with certain engine',
+                        action="append", dest="exclude_engine_list", default=[])

    pool = list()

@@ -92,6 +96,8 @@ Rank data fetched from Alexa by domains.
            url_main = site.url_main
            if site.alexa_rank < sys.maxsize and args.empty_only:
                continue
+            if args.exclude_engine_list and site.engine in args.exclude_engine_list:
+                continue
            site.alexa_rank = 0
            th = threading.Thread(target=get_rank, args=(url_main, site))
            pool.append((site.name, url_main, th))
@@ -121,7 +127,9 @@ Rank data fetched from Alexa by domains.
            note = ''
            if site.disabled:
                note = ', search is disabled'
-            site_file.write(f'1. [{site}]({url_main})*: top {valid_rank}{tags}*{note}\n')
+
+            favicon = f"![](https://www.google.com/s2/favicons?domain={url_main})"
+            site_file.write(f'1. {favicon} [{site}]({url_main})*: top {valid_rank}{tags}*{note}\n')
            db.update_site(site)

        site_file.write(f'\nAlexa.com rank data fetched at ({datetime.utcnow()} UTC)\n')
@@ -0,0 +1,64 @@
+#!/usr/bin/env python3
+import asyncio
+import logging
+import maigret
+
+
+# top popular sites from the Maigret database
+TOP_SITES_COUNT = 300
+# Maigret HTTP requests timeout
+TIMEOUT = 10
+# max parallel requests
+MAX_CONNECTIONS = 50
+
+
+if __name__ == '__main__':
+    # setup logging and asyncio
+    logger = logging.getLogger('maigret')
+    logger.setLevel(logging.WARNING)
+    loop = asyncio.get_event_loop()
+
+    # setup Maigret
+    db = maigret.MaigretDatabase().load_from_file('./maigret/resources/data.json')
+    # also can be downloaded from web
+    # db = MaigretDatabase().load_from_url(MAIGRET_DB_URL)
+
+    # user input
+    username = input('Enter username to search: ')
+
+    sites_count_raw = input(f'Select the number of sites to search ({TOP_SITES_COUNT} for default, {len(db.sites_dict)} max): ')
+    sites_count = int(sites_count_raw) or TOP_SITES_COUNT
+
+    sites = db.ranked_sites_dict(top=sites_count)
+
+    show_progressbar_raw = input('Do you want to show a progressbar? [Yn] ')
+    show_progressbar = show_progressbar_raw.lower() != 'n' 
+
+    extract_info_raw = input('Do you want to extract additional info from accounts\' pages? [Yn] ')
+    extract_info = extract_info_raw.lower() != 'n' 
+
+    use_notifier_raw = input('Do you want to use notifier for displaying results while searching? [Yn] ')
+    use_notifier = use_notifier_raw.lower() != 'n'
+
+    notifier = None
+    if use_notifier:
+        notifier = maigret.Notifier(print_found_only=True, skip_check_errors=True)
+
+    # search!
+    search_func = maigret.search(username=username,
+                                 site_dict=sites,
+                                 timeout=TIMEOUT,
+                                 logger=logger,
+                                 max_connections=MAX_CONNECTIONS,
+                                 query_notify=notifier,
+                                 no_progressbar=(not show_progressbar),
+                                 is_parsing_enabled=extract_info,
+                                 )
+
+    results = loop.run_until_complete(search_func)
+
+    input('Search completed. Press any key to show results.')
+
+    for sitename, data in results.items():
+        is_found = data['status'].is_found()
+        print(f'{sitename} - {"Found!" if is_found else "Not found"}')
Author	SHA1	Message	Date
soxoj	b345512489	Merge pull request #110 from soxoj/0.1.19 Bump to 0.1.19	2021-04-14 23:16:30 +03:00
Soxoj	786cb59145	Bump to 0.1.19	2021-04-14 23:14:33 +03:00
soxoj	481baddec6	Merge pull request #109 from soxoj/fp-fixes Some false positive fixes	2021-04-12 23:18:47 +03:00
Soxoj	ecb3d76581	Some false positive fixes	2021-04-12 23:16:26 +03:00
soxoj	8a8fab5bed	Merge pull request #108 from soxoj/async-tasks-timeout Added asyncio tasks with timeouts, non-blocking work with queue	2021-04-12 23:01:59 +03:00
Soxoj	2fee65fe4e	Added asyncio tasks with timeouts, non-blocking work with queue	2021-04-11 17:56:27 +03:00
soxoj	dabba859f3	Merge pull request #107 from soxoj/main-module-bugfix Fixed maigret-as-a-module start	2021-04-06 00:36:45 +03:00
Soxoj	74d4d40abd	Fixed maigret-as-a-module start	2021-04-06 00:33:39 +03:00
soxoj	d6f6d78d3f	Merge pull request #104 from soxoj/ascii-tree-bugfix Fixed ascii tree bug	2021-04-02 09:08:14 +03:00
Soxoj	1b61c5085e	Fixed ascii tree bug	2021-04-02 09:03:22 +03:00
soxoj	01e20518c1	Merge pull request #100 from soxoj/fp-fixes Fixed some false positives	2021-03-31 23:20:18 +03:00
Soxoj	8477385289	Fixed some false positives	2021-03-31 23:17:47 +03:00
soxoj	491dd8f166	Merge pull request #99 from soxoj/no-progressbar-option Added `--no-progressbar` flag	2021-03-30 19:47:42 +03:00
Soxoj	c64b7a1c85	Added --no-progressbar flag	2021-03-30 19:44:01 +03:00
soxoj	03511a7a8f	Merge pull request #97 from soxoj/wizard Some API improvements	2021-03-30 01:16:12 +03:00
Soxoj	7f1a0fae03	Some API improvements	2021-03-30 01:14:46 +03:00
soxoj	b0de174df2	Merge pull request #96 from soxoj/wizard Added search wizard script as an API usage example	2021-03-30 01:11:12 +03:00
Soxoj	b5db3f0035	Added search wizard script as an API usage example	2021-03-30 01:09:06 +03:00
soxoj	53d698bb7b	Merge pull request #95 from soxoj/socid-bump Updated socid_extractor version	2021-03-30 00:37:02 +03:00
soxoj	23fff42ca7	Merge pull request #94 from soxoj/dependabot/pip/lxml-4.6.3 Bump lxml from 4.6.2 to 4.6.3	2021-03-30 00:34:13 +03:00
Soxoj	51d9e6f5f6	Bump to v0.1.17	2021-03-30 00:33:51 +03:00
Soxoj	640c04f20b	Updated socid_extractor version	2021-03-30 00:31:40 +03:00
dependabot[bot]	69f78e331b	Bump lxml from 4.6.2 to 4.6.3 Bumps [lxml](https://github.com/lxml/lxml) from 4.6.2 to 4.6.3. - [Release notes](https://github.com/lxml/lxml/releases) - [Changelog](https://github.com/lxml/lxml/blob/master/CHANGES.txt) - [Commits](https://github.com/lxml/lxml/compare/lxml-4.6.2...lxml-4.6.3) Signed-off-by: dependabot[bot] <support@github.com>	2021-03-29 21:25:19 +00:00
soxoj	69c315b00e	Merge pull request #93 from soxoj/docs-requirements Documentation and API improving	2021-03-30 00:24:49 +03:00
Soxoj	b755628a1d	Documentation and API improving	2021-03-30 00:19:17 +03:00
soxoj	7490a412db	Merge pull request #92 from soxoj/ignore403-bugfix Fixed bug with ignore403 for engine-based sites	2021-03-28 17:40:35 +03:00
Soxoj	2741680d4a	Fixed bug with ignore403 for engine-based sites	2021-03-28 17:37:18 +03:00
soxoj	e5fc221ce2	Merge pull request #91 from soxoj/async-3.6.9-fix Fix of 3.6.9 asyncio create_task error	2021-03-24 21:43:11 +03:00
Soxoj	a044e3dd79	Fix of 3.6.9 asyncio create_task error	2021-03-24 21:37:56 +03:00
soxoj	6da4ff1e7b	Merge pull request #89 from soxoj/v0.1.16 Bump to 0.1.16	2021-03-21 18:58:48 +03:00
Soxoj	eb2442401d	Bump to 0.1.16	2021-03-21 18:50:13 +03:00
soxoj	d23d24eeca	Merge pull request #88 from soxoj/parsing-mode-improve Improving "parse" mode for extracting usernames and other info for a …	2021-03-21 18:41:17 +03:00
Soxoj	a2ddb15f09	Improving "parse" mode for extracting usernames and other info for a further search	2021-03-21 18:34:57 +03:00
soxoj	e90e85d2a9	Merge pull request #85 from soxoj/submit-improving Improved submit mode, several sites added	2021-03-21 14:04:09 +03:00
Soxoj	2bb01f7019	Improved submit mode, several sites added	2021-03-21 13:59:59 +03:00
soxoj	b586a4cd06	Merge pull request #84 from soxoj/ucoz-support Added support of uID.me and uCoz sites	2021-03-20 23:26:35 +03:00
Soxoj	28733282ab	CI reruns	2021-03-20 23:24:55 +03:00
Soxoj	0a7a7ad70d	Added support of uID.me and uCoz sites	2021-03-20 23:21:53 +03:00
soxoj	c895f6b418	Merge pull request #82 from soxoj/dependabot/pip/jinja2-2.11.3 Bump jinja2 from 2.11.2 to 2.11.3	2021-03-20 20:59:35 +03:00
soxoj	a6286a0286	Merge pull request #83 from soxoj/executors-update Created async requests executors, some sites fixes	2021-03-20 20:59:22 +03:00
Soxoj	314eb25d1f	Created async requests executors, some sites fixes	2021-03-20 20:57:07 +03:00
dependabot[bot]	fbbc8b49f3	Bump jinja2 from 2.11.2 to 2.11.3 Bumps [jinja2](https://github.com/pallets/jinja) from 2.11.2 to 2.11.3. - [Release notes](https://github.com/pallets/jinja/releases) - [Changelog](https://github.com/pallets/jinja/blob/master/CHANGES.rst) - [Commits](https://github.com/pallets/jinja/compare/2.11.2...2.11.3) Signed-off-by: dependabot[bot] <support@github.com>	2021-03-20 05:47:45 +00:00
soxoj	faa03b62e5	Merge pull request #81 from soxoj/dependabot/pip/pillow-8.1.1 Bump pillow from 8.1.0 to 8.1.1	2021-03-19 21:04:50 +03:00
dependabot[bot]	d676f7bb94	Bump pillow from 8.1.0 to 8.1.1 Bumps [pillow](https://github.com/python-pillow/Pillow) from 8.1.0 to 8.1.1. - [Release notes](https://github.com/python-pillow/Pillow/releases) - [Changelog](https://github.com/python-pillow/Pillow/blob/master/CHANGES.rst) - [Commits](https://github.com/python-pillow/Pillow/compare/8.1.0...8.1.1) Signed-off-by: dependabot[bot] <support@github.com>	2021-03-19 15:57:58 +00:00
soxoj	d4757aab78	Merge pull request #80 from soxoj/reformatting Reformat code, some sites added	2021-03-19 01:52:54 +03:00
Soxoj	908176be85	Reformat code, some sites added	2021-03-19 01:48:20 +03:00
soxoj	940f408da3	Merge pull request #79 from soxoj/new-sites-submit Added new sites through auto submit, some fixes	2021-03-18 23:35:19 +03:00
Soxoj	8c700b9810	Added new sites through auto submit, some fixes	2021-03-18 23:21:33 +03:00
soxoj	f9c9af5f41	Merge pull request #78 from soxoj/docker-update-readme Update README.md	2021-03-16 23:39:33 +03:00
soxoj	57a9a82102	Update README.md	2021-03-16 23:38:58 +03:00
soxoj	9bbca995e9	Merge pull request #77 from vincenttjia/main Fix Dockerfile	2021-03-16 23:34:17 +03:00
Vincent Tjianattan	39b713497d	Fix scipy build dependencies Fix scipy build dependencies by changing the image from python:3.7-alpine to python:3.7	2021-03-17 00:42:35 +07:00
soxoj	6a84875775	Merge pull request #76 from soxoj/new-sites Several sites added, Disqus improved, tags fixes	2021-03-15 23:58:09 +03:00
soxoj	84f7d93478	Merge branch 'main' into new-sites	2021-03-15 23:52:52 +03:00
Soxoj	17870ef5c8	Several sites added, Disqus improved, tags fixes	2021-03-15 23:45:20 +03:00
soxoj	d3cd5e45a1	Merge pull request #75 from soxoj/collab-badge Collab link added	2021-03-15 02:52:52 +03:00
soxoj	9a3f2f0aa7	Update README.md	2021-03-15 02:50:54 +03:00
soxoj	4b7d344b41	Merge pull request #73 from soxoj/cloud-based-run Update README.md	2021-03-15 00:28:19 +03:00
soxoj	ac9cfe7885	Update README.md	2021-03-15 00:26:29 +03:00
soxoj	6058a4b70c	Fixed repl.it	2021-03-15 00:15:16 +03:00
soxoj	3aa225bda4	Update README.md	2021-03-15 00:13:29 +03:00
soxoj	c6661e22ff	Merge pull request #72 from soxoj/v0.1.15 Bump to 0.1.15	2021-03-14 20:15:12 +03:00
Soxoj	fdb68b5e80	Bump to 0.1.15	2021-03-14 20:11:32 +03:00
soxoj	9fe6b99239	Merge pull request #71 from soxoj/html-report-img-fix Fixed HTML report images hiding for small screens + some minor fixes	2021-03-14 17:31:12 +03:00
Soxoj	b9d303fde3	Fixed HTML report images hiding for small screens + some minor fixes	2021-03-14 16:15:31 +03:00
soxoj	d29e88d96f	Merge pull request #70 from soxoj/extracting-flag Added separate `no-extracing` flag to rule page parsing	2021-03-14 13:22:29 +03:00
Soxoj	731a8e01f9	Added separate `no-extracing` flag to rule page parsing	2021-03-14 13:03:29 +03:00
soxoj	cf7acfd8c8	Merge pull request #69 from soxoj/tiktok-fix TikTok fixes	2021-03-13 00:02:25 +03:00
soxoj	9e6bd05acc	Merge pull request #68 from soxoj/ssl-error-catching Fixed catching of python-specific exception	2021-03-13 00:00:45 +03:00
Soxoj	6ea1dc33f7	TikTok fixes	2021-03-12 23:58:46 +03:00
Soxoj	d5bc92d26a	Fixed catching of python-specific exception	2021-03-12 23:34:59 +03:00
soxoj	f7263c9b3c	Merge pull request #67 from soxoj/fp-fixes Some false positives fixes	2021-03-12 23:31:54 +03:00
Soxoj	e6f82a8ba3	Some false positives fixes	2021-03-12 22:53:53 +03:00
soxoj	ba7a38092c	Merge pull request #65 from soxoj/dependabot/pip/aiohttp-3.7.4 Bump aiohttp from 3.7.3 to 3.7.4	2021-02-26 22:06:04 +03:00
dependabot[bot]	92a1677213	Bump aiohttp from 3.7.3 to 3.7.4 Bumps [aiohttp](https://github.com/aio-libs/aiohttp) from 3.7.3 to 3.7.4. - [Release notes](https://github.com/aio-libs/aiohttp/releases) - [Changelog](https://github.com/aio-libs/aiohttp/blob/master/CHANGES.rst) - [Commits](https://github.com/aio-libs/aiohttp/compare/v3.7.3...v3.7.4) Signed-off-by: dependabot[bot] <support@github.com>	2021-02-26 03:07:44 +00:00
soxoj	9bbc5e61a7	Merge pull request #64 from soxoj/version-update Bump version to 0.1.14	2021-02-25 22:47:31 +03:00
Soxoj	da3e3f6719	Bump version to 0.1.14	2021-02-25 22:45:48 +03:00
soxoj	d28221462a	Merge pull request #63 from soxoj/updates Updates	2021-02-25 22:35:47 +03:00
Soxoj	5baccbae0c	Updates	2021-02-25 22:34:07 +03:00
soxoj	65de06dc13	Merge pull request #62 from soxoj/socid-dep-update Update socid-extractor version	2021-02-18 23:13:32 +03:00
Soxoj	dd71bc19c0	Fix requirements again	2021-02-18 23:11:51 +03:00
Soxoj	0625867f2a	Fix requirements conflict	2021-02-18 23:09:40 +03:00
Soxoj	ac7ff47fad	Update socid-extractor version	2021-02-18 23:06:28 +03:00
soxoj	0449142745	Merge pull request #61 from soxoj/steam-updates Spotify added, Steam ID processing updated	2021-02-18 20:57:38 +03:00
Soxoj	1a77bc7472	Spotify added, Steam ID processing updated	2021-02-18 20:53:26 +03:00
soxoj	8391d7317d	Merge pull request #59 from soxoj/small-updates Tags updates	2021-02-18 00:48:17 +03:00
soxoj	8bf789633e	Merge pull request #58 from Matrix86/main Fix for docker	2021-02-18 00:37:38 +03:00
Soxoj	2714ff8fff	Tags updates	2021-02-18 00:35:59 +03:00
Gianluca	b7c02456e7	fix: docker build returns an error on the pillow compilation	2021-02-17 11:51:33 +01:00
soxoj	15af5e14f2	Merge pull request #57 from soxoj/small-updates Added `--ignore-ids` option, some sites updates	2021-02-17 03:02:04 +03:00
Soxoj	f24ad4abfe	Added `--ignore-ids` option, some sites updates	2021-02-17 02:58:57 +03:00
soxoj	2e3eceed81	Merge pull request #56 from soxoj/stats-discourse Added stats flag, added Discourse engine	2021-02-15 23:21:59 +03:00
Soxoj	9bc3615afc	Added stats flag, added Discourse engine	2021-02-15 23:15:09 +03:00
soxoj	a9543e8303	Merge pull request #55 from soxoj/username-extraction Improved extraction of usernames from links in personal data	2021-02-15 01:59:36 +03:00
Soxoj	31df4eb44d	Fixed deepcopy problem for 3.6	2021-02-15 01:58:14 +03:00
Soxoj	89c33e5409	Removed pattern typing for compatibility	2021-02-15 01:52:53 +03:00
Soxoj	c0956a0e23	Improved extraction of usernames from links in personal data	2021-02-15 01:36:10 +03:00
soxoj	bb4c5dc67a	Merge pull request #54 from soxoj/sites-update Added several sites, updated sites list	2021-02-13 23:26:37 +03:00
Soxoj	c16fc7c002	Added several sites, updated sites list	2021-02-13 23:24:53 +03:00
soxoj	53f72edaff	Merge pull request #53 from soxoj/json-reports-submit-improvements Added JSON reports	2021-02-13 01:10:55 +03:00
Soxoj	631de7b346	Added reports of JSON format (simple, njdson); improved submit logic; added several sites	2021-02-13 01:06:05 +03:00
soxoj	7676c053f9	Merge pull request #51 from soxoj/submit-mode Experimental site submit mode	2021-02-09 00:45:41 +03:00
Soxoj	90135d4676	Experimental site submit mode	2021-02-09 00:43:59 +03:00
soxoj	4f9dace1de	Merge pull request #50 from soxoj/favicons Favicons added to sites list	2021-02-07 00:55:00 +03:00
Soxoj	cdec320062	Ordered list format fixed	2021-02-07 00:53:05 +03:00
Soxoj	10426c07aa	Favicons added to sites list	2021-02-07 00:43:42 +03:00