Merge pull request #51 from soxoj/submit-mode

Experimental site submit mode
2026-05-06 14:08:59 +00:00 · 2021-02-09 00:45:41 +03:00
parent 4f9dace1de 90135d4676
commit 7676c053f9
4 changed files with 807 additions and 602 deletions
@@ -0,0 +1,601 @@
+import asyncio
+import logging
+import re
+import ssl
+
+import aiohttp
+import tqdm.asyncio
+from aiohttp_socks import ProxyConnector
+from mock import Mock
+from python_socks import _errors as proxy_errors
+from socid_extractor import extract
+
+from .activation import ParsingActivator, import_aiohttp_cookies
+from .result import QueryResult, QueryStatus
+from .sites import MaigretDatabase, MaigretSite
+
+supported_recursive_search_ids = (
+    'yandex_public_id',
+    'gaia_id',
+    'vk_id',
+    'ok_id',
+    'wikimapia_uid',
+)
+
+common_errors = {
+    '<title>Attention Required! | Cloudflare</title>': 'Cloudflare captcha',
+    'Please stand by, while we are checking your browser': 'Cloudflare captcha',
+    '<title>Доступ ограничен</title>': 'Rostelecom censorship',
+    'document.getElementById(\'validate_form_submit\').disabled=true': 'Mail.ru captcha',
+    'Verifying your browser, please wait...<br>DDoS Protection by</font> Blazingfast.io': 'Blazingfast protection',
+    '404</h1><p class="error-card__description">Мы&nbsp;не&nbsp;нашли страницу': 'MegaFon 404 page',
+    'Доступ к информационному ресурсу ограничен на основании Федерального закона': 'MGTS censorship',
+    'Incapsula incident ID': 'Incapsula antibot protection',
+}
+
+unsupported_characters = '#'
+
+
+async def get_response(request_future, site_name, logger):
+    html_text = None
+    status_code = 0
+
+    error_text = "General Unknown Error"
+    expection_text = None
+
+    try:
+        response = await request_future
+
+        status_code = response.status
+        response_content = await response.content.read()
+        charset = response.charset or 'utf-8'
+        decoded_content = response_content.decode(charset, 'ignore')
+        html_text = decoded_content
+
+        if status_code > 0:
+            error_text = None
+
+        logger.debug(html_text)
+
+    except asyncio.TimeoutError as errt:
+        error_text = "Timeout Error"
+        expection_text = str(errt)
+    except (ssl.SSLCertVerificationError, ssl.SSLError) as err:
+        error_text = "SSL Error"
+        expection_text = str(err)
+    except aiohttp.client_exceptions.ClientConnectorError as err:
+        error_text = "Error Connecting"
+        expection_text = str(err)
+    except aiohttp.http_exceptions.BadHttpMessage as err:
+        error_text = "HTTP Error"
+        expection_text = str(err)
+    except proxy_errors.ProxyError as err:
+        error_text = "Proxy Error"
+        expection_text = str(err)
+    except Exception as err:
+        logger.warning(f'Unhandled error while requesting {site_name}: {err}')
+        logger.debug(err, exc_info=True)
+        error_text = "Some Error"
+        expection_text = str(err)
+
+    # TODO: return only needed information
+    return html_text, status_code, error_text, expection_text
+
+
+async def update_site_dict_from_response(sitename, site_dict, results_info, semaphore, logger, query_notify):
+    async with semaphore:
+        site_obj = site_dict[sitename]
+        future = site_obj.request_future
+        if not future:
+            # ignore: search by incompatible id type
+            return
+
+        response = await get_response(request_future=future,
+                                      site_name=sitename,
+                                      logger=logger)
+
+        site_dict[sitename] = process_site_result(response, query_notify, logger, results_info, site_obj)
+
+
+# TODO: move info separate module
+def detect_error_page(html_text, status_code, fail_flags, ignore_403):
+    # Detect service restrictions such as a country restriction
+    for flag, msg in fail_flags.items():
+        if flag in html_text:
+            return 'Some site error', msg
+
+    # Detect common restrictions such as provider censorship and bot protection
+    for flag, msg in common_errors.items():
+        if flag in html_text:
+            return 'Error', msg
+
+    # Detect common site errors
+    if status_code == 403 and not ignore_403:
+        return 'Access denied', 'Access denied, use proxy/vpn'
+    elif status_code >= 500:
+        return f'Error {status_code}', f'Site error {status_code}'
+
+    return None, None
+
+
+def process_site_result(response, query_notify, logger, results_info, site: MaigretSite):
+    if not response:
+        return results_info
+
+    fulltags = site.tags
+
+    # Retrieve other site information again
+    username = results_info['username']
+    is_parsing_enabled = results_info['parsing_enabled']
+    url = results_info.get("url_user")
+    logger.debug(url)
+
+    status = results_info.get("status")
+    if status is not None:
+        # We have already determined the user doesn't exist here
+        return results_info
+
+    # Get the expected check type
+    check_type = site.check_type
+
+    # Get the failure messages and comments
+    failure_errors = site.errors
+
+    # TODO: refactor
+    if not response:
+        logger.error(f'No response for {site.name}')
+        return results_info
+
+    html_text, status_code, error_text, expection_text = response
+    site_error_text = '?'
+
+    # TODO: add elapsed request time counting
+    response_time = None
+
+    if logger.level == logging.DEBUG:
+        with open('debug.txt', 'a') as f:
+            status = status_code or 'No response'
+            f.write(f'url: {url}\nerror: {str(error_text)}\nr: {status}\n')
+            if html_text:
+                f.write(f'code: {status}\nresponse: {str(html_text)}\n')
+
+    if status_code and not error_text:
+        error_text, site_error_text = detect_error_page(html_text, status_code, failure_errors,
+                                                        site.ignore_403)
+
+    if site.activation and html_text:
+        is_need_activation = any([s for s in site.activation['marks'] if s in html_text])
+        if is_need_activation:
+            method = site.activation['method']
+            try:
+                activate_fun = getattr(ParsingActivator(), method)
+                # TODO: async call
+                activate_fun(site, logger)
+            except AttributeError:
+                logger.warning(f'Activation method {method} for site {site.name} not found!')
+
+    # presense flags
+    # True by default
+    presense_flags = site.presense_strs
+    is_presense_detected = False
+    if html_text:
+        if not presense_flags:
+            is_presense_detected = True
+            site.stats['presense_flag'] = None
+        else:
+            for presense_flag in presense_flags:
+                if presense_flag in html_text:
+                    is_presense_detected = True
+                    site.stats['presense_flag'] = presense_flag
+                    logger.info(presense_flag)
+                    break
+
+    if error_text is not None:
+        logger.debug(error_text)
+        result = QueryResult(username,
+                             site.name,
+                             url,
+                             QueryStatus.UNKNOWN,
+                             query_time=response_time,
+                             context=f'{error_text}: {site_error_text}', tags=fulltags)
+    elif check_type == "message":
+        absence_flags = site.absence_strs
+        is_absence_flags_list = isinstance(absence_flags, list)
+        absence_flags_set = set(absence_flags) if is_absence_flags_list else {absence_flags}
+        # Checks if the error message is in the HTML
+        is_absence_detected = any([(absence_flag in html_text) for absence_flag in absence_flags_set])
+        if not is_absence_detected and is_presense_detected:
+            result = QueryResult(username,
+                                 site.name,
+                                 url,
+                                 QueryStatus.CLAIMED,
+                                 query_time=response_time, tags=fulltags)
+        else:
+            result = QueryResult(username,
+                                 site.name,
+                                 url,
+                                 QueryStatus.AVAILABLE,
+                                 query_time=response_time, tags=fulltags)
+    elif check_type == "status_code":
+        # Checks if the status code of the response is 2XX
+        if (not status_code >= 300 or status_code < 200) and is_presense_detected:
+            result = QueryResult(username,
+                                 site.name,
+                                 url,
+                                 QueryStatus.CLAIMED,
+                                 query_time=response_time, tags=fulltags)
+        else:
+            result = QueryResult(username,
+                                 site.name,
+                                 url,
+                                 QueryStatus.AVAILABLE,
+                                 query_time=response_time, tags=fulltags)
+    elif check_type == "response_url":
+        # For this detection method, we have turned off the redirect.
+        # So, there is no need to check the response URL: it will always
+        # match the request.  Instead, we will ensure that the response
+        # code indicates that the request was successful (i.e. no 404, or
+        # forward to some odd redirect).
+        if 200 <= status_code < 300 and is_presense_detected:
+            result = QueryResult(username,
+                                 site.name,
+                                 url,
+                                 QueryStatus.CLAIMED,
+                                 query_time=response_time, tags=fulltags)
+        else:
+            result = QueryResult(username,
+                                 site.name,
+                                 url,
+                                 QueryStatus.AVAILABLE,
+                                 query_time=response_time, tags=fulltags)
+    else:
+        # It should be impossible to ever get here...
+        raise ValueError(f"Unknown check type '{check_type}' for "
+                         f"site '{site.name}'")
+
+    extracted_ids_data = {}
+
+    if is_parsing_enabled and result.status == QueryStatus.CLAIMED:
+        try:
+            extracted_ids_data = extract(html_text)
+        except Exception as e:
+            logger.warning(f'Error while parsing {site.name}: {e}', exc_info=True)
+
+        if extracted_ids_data:
+            new_usernames = {}
+            for k, v in extracted_ids_data.items():
+                if 'username' in k:
+                    new_usernames[v] = 'username'
+                if k in supported_recursive_search_ids:
+                    new_usernames[v] = k
+
+            results_info['ids_usernames'] = new_usernames
+            result.ids_data = extracted_ids_data
+
+    # Notify caller about results of query.
+    query_notify.update(result, site.similar_search)
+
+    # Save status of request
+    results_info['status'] = result
+
+    # Save results from request
+    results_info['http_status'] = status_code
+    results_info['is_similar'] = site.similar_search
+    # results_site['response_text'] = html_text
+    results_info['rank'] = site.alexa_rank
+    return results_info
+
+
+async def maigret(username, site_dict, query_notify, logger,
+                  proxy=None, timeout=None, recursive_search=False,
+                  id_type='username', debug=False, forced=False,
+                  max_connections=100, no_progressbar=False,
+                  cookies=None):
+    """Main search func
+
+    Checks for existence of username on various social media sites.
+
+    Keyword Arguments:
+    username               -- String indicating username that report
+                              should be created against.
+    site_dict              -- Dictionary containing all of the site data.
+    query_notify           -- Object with base type of QueryNotify().
+                              This will be used to notify the caller about
+                              query results.
+    proxy                  -- String indicating the proxy URL
+    timeout                -- Time in seconds to wait before timing out request.
+                              Default is no timeout.
+    recursive_search       -- Search for other usernames in website pages & recursive search by them.
+
+    Return Value:
+    Dictionary containing results from report. Key of dictionary is the name
+    of the social network site, and the value is another dictionary with
+    the following keys:
+        url_main:      URL of main site.
+        url_user:      URL of user on site (if account exists).
+        status:        QueryResult() object indicating results of test for
+                       account existence.
+        http_status:   HTTP status code of query which checked for existence on
+                       site.
+        response_text: Text that came back from request.  May be None if
+                       there was an HTTP error when checking for existence.
+    """
+
+    # Notify caller that we are starting the query.
+    query_notify.start(username, id_type)
+
+    # TODO: connector
+    connector = ProxyConnector.from_url(proxy) if proxy else aiohttp.TCPConnector(ssl=False)
+    # connector = aiohttp.TCPConnector(ssl=False)
+    connector.verify_ssl = False
+
+    cookie_jar = None
+    if cookies:
+        cookie_jar = await import_aiohttp_cookies(cookies)
+
+    session = aiohttp.ClientSession(connector=connector, trust_env=True, cookie_jar=cookie_jar)
+
+    if logger.level == logging.DEBUG:
+        future = session.get(url='https://icanhazip.com')
+        ip, status, error, expection = await get_response(future, None, logger)
+        if ip:
+            logger.debug(f'My IP is: {ip.strip()}')
+        else:
+            logger.debug(f'IP requesting {error}: {expection}')
+
+    # Results from analysis of all sites
+    results_total = {}
+
+    # First create futures for all requests. This allows for the requests to run in parallel
+    for site_name, site in site_dict.items():
+
+        if site.type != id_type:
+            continue
+
+        if site.disabled and not forced:
+            logger.debug(f'Site {site.name} is disabled, skipping...')
+            continue
+
+        # Results from analysis of this specific site
+        results_site = {}
+
+        # Record URL of main site and username
+        results_site['username'] = username
+        results_site['parsing_enabled'] = recursive_search
+        results_site['url_main'] = site.url_main
+        results_site['cookies'] = cookie_jar and cookie_jar.filter_cookies(site.url_main) or None
+
+        headers = {
+            'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 11.1; rv:55.0) Gecko/20100101 Firefox/55.0',
+        }
+
+        headers.update(site.headers)
+
+        if not 'url' in site.__dict__:
+            logger.error('No URL for site %s', site.name)
+        # URL of user on site (if it exists)
+        url = site.url.format(
+            urlMain=site.url_main,
+            urlSubpath=site.url_subpath,
+            username=username
+        )
+        # workaround to prevent slash errors
+        url = re.sub('(?<!:)/+', '/', url)
+
+        # Don't make request if username is invalid for the site
+        if site.regex_check and re.search(site.regex_check, username) is None:
+            # No need to do the check at the site: this user name is not allowed.
+            results_site['status'] = QueryResult(username,
+                                                 site_name,
+                                                 url,
+                                                 QueryStatus.ILLEGAL)
+            results_site["url_user"] = ""
+            results_site['http_status'] = ""
+            results_site['response_text'] = ""
+            query_notify.update(results_site['status'])
+        else:
+            # URL of user on site (if it exists)
+            results_site["url_user"] = url
+            url_probe = site.url_probe
+            if url_probe is None:
+                # Probe URL is normal one seen by people out on the web.
+                url_probe = url
+            else:
+                # There is a special URL for probing existence separate
+                # from where the user profile normally can be found.
+                url_probe = url_probe.format(
+                    urlMain=site.url_main,
+                    urlSubpath=site.url_subpath,
+                    username=username,
+                )
+
+            for k, v in site.get_params.items():
+                url_probe += f'&{k}={v}'
+
+            if site.check_type == 'status_code' and site.request_head_only:
+                # In most cases when we are detecting by status code,
+                # it is not necessary to get the entire body:  we can
+                # detect fine with just the HEAD response.
+                request_method = session.head
+            else:
+                # Either this detect method needs the content associated
+                # with the GET response, or this specific website will
+                # not respond properly unless we request the whole page.
+                request_method = session.get
+
+            if site.check_type == "response_url":
+                # Site forwards request to a different URL if username not
+                # found.  Disallow the redirect so we can capture the
+                # http status from the original URL request.
+                allow_redirects = False
+            else:
+                # Allow whatever redirect that the site wants to do.
+                # The final result of the request will be what is available.
+                allow_redirects = True
+
+            future = request_method(url=url_probe, headers=headers,
+                                    allow_redirects=allow_redirects,
+                                    timeout=timeout,
+                                    )
+
+            # Store future in data for access later
+            # TODO: move to separate obj
+            site.request_future = future
+
+        # Add this site's results into final dictionary with all of the other results.
+        results_total[site_name] = results_site
+
+    # TODO: move into top-level function
+
+    sem = asyncio.Semaphore(max_connections)
+
+    tasks = []
+    for sitename, result_obj in results_total.items():
+        update_site_coro = update_site_dict_from_response(sitename, site_dict, result_obj, sem, logger, query_notify)
+        future = asyncio.ensure_future(update_site_coro)
+        tasks.append(future)
+
+    if no_progressbar:
+        await asyncio.gather(*tasks)
+    else:
+        for f in tqdm.asyncio.tqdm.as_completed(tasks):
+            await f
+
+    await session.close()
+
+    # Notify caller that all queries are finished.
+    query_notify.finish()
+
+    return results_total
+
+
+def timeout_check(value):
+    """Check Timeout Argument.
+
+    Checks timeout for validity.
+
+    Keyword Arguments:
+    value                  -- Time in seconds to wait before timing out request.
+
+    Return Value:
+    Floating point number representing the time (in seconds) that should be
+    used for the timeout.
+
+    NOTE:  Will raise an exception if the timeout in invalid.
+    """
+    from argparse import ArgumentTypeError
+
+    try:
+        timeout = float(value)
+    except ValueError:
+        raise ArgumentTypeError(f"Timeout '{value}' must be a number.")
+    if timeout <= 0:
+        raise ArgumentTypeError(f"Timeout '{value}' must be greater than 0.0s.")
+    return timeout
+
+
+async def site_self_check(site, logger, semaphore, db: MaigretDatabase, silent=False):
+    query_notify = Mock()
+    changes = {
+        'disabled': False,
+    }
+
+    try:
+        check_data = [
+            (site.username_claimed, QueryStatus.CLAIMED),
+            (site.username_unclaimed, QueryStatus.AVAILABLE),
+        ]
+    except Exception as e:
+        logger.error(e)
+        logger.error(site.__dict__)
+        check_data = []
+
+    logger.info(f'Checking {site.name}...')
+
+    for username, status in check_data:
+        async with semaphore:
+            results_dict = await maigret(
+                username,
+                {site.name: site},
+                query_notify,
+                logger,
+                timeout=30,
+                id_type=site.type,
+                forced=True,
+                no_progressbar=True,
+            )
+
+            # don't disable entries with other ids types
+            # TODO: make normal checking
+            if site.name not in results_dict:
+                logger.info(results_dict)
+                changes['disabled'] = True
+                continue
+
+            result = results_dict[site.name]['status']
+
+        site_status = result.status
+
+        if site_status != status:
+            if site_status == QueryStatus.UNKNOWN:
+                msgs = site.absence_strs
+                etype = site.check_type
+                logger.warning(
+                    f'Error while searching {username} in {site.name}: {result.context}, {msgs}, type {etype}')
+                # don't disable in case of available username
+                if status == QueryStatus.CLAIMED:
+                    changes['disabled'] = True
+            elif status == QueryStatus.CLAIMED:
+                logger.warning(f'Not found `{username}` in {site.name}, must be claimed')
+                logger.info(results_dict[site.name])
+                changes['disabled'] = True
+            else:
+                logger.warning(f'Found `{username}` in {site.name}, must be available')
+                logger.info(results_dict[site.name])
+                changes['disabled'] = True
+
+    logger.info(f'Site {site.name} checking is finished')
+
+    if changes['disabled'] != site.disabled:
+        site.disabled = changes['disabled']
+        db.update_site(site)
+        if not silent:
+            action = 'Disabled' if site.disabled else 'Enabled'
+            print(f'{action} site {site.name}...')
+
+    return changes
+
+
+async def self_check(db: MaigretDatabase, site_data: dict, logger, silent=False,
+                     max_connections=10) -> bool:
+    sem = asyncio.Semaphore(max_connections)
+    tasks = []
+    all_sites = site_data
+
+    def disabled_count(lst):
+        return len(list(filter(lambda x: x.disabled, lst)))
+
+    disabled_old_count = disabled_count(all_sites.values())
+
+    for _, site in all_sites.items():
+        check_coro = site_self_check(site, logger, sem, db, silent)
+        future = asyncio.ensure_future(check_coro)
+        tasks.append(future)
+
+    for f in tqdm.asyncio.tqdm.as_completed(tasks):
+        await f
+
+    disabled_new_count = disabled_count(all_sites.values())
+    total_disabled = disabled_new_count - disabled_old_count
+
+    if total_disabled >= 0:
+        message = 'Disabled'
+    else:
+        message = 'Enabled'
+        total_disabled *= -1
+
+    if not silent:
+        print(
+            f'{message} {total_disabled} ({disabled_old_count} => {disabled_new_count}) checked sites. Run with `--info` flag to get more information')
+
+    return total_disabled != 0
@@ -2,616 +2,22 @@
 Maigret main module
 """

-import asyncio
-import logging
 import os
 import platform
-import re
-import ssl
 import sys
 from argparse import ArgumentParser, RawDescriptionHelpFormatter

-import aiohttp
 import requests
-import tqdm.asyncio
-from aiohttp_socks import ProxyConnector
-from mock import Mock
-from python_socks import _errors as proxy_errors
-from socid_extractor import parse, extract, __version__ as socid_version
+from socid_extractor import parse, __version__ as socid_version

-from .activation import ParsingActivator, import_aiohttp_cookies
+from .checking import *
 from .notify import QueryNotifyPrint
 from .report import save_csv_report, save_xmind_report, save_html_report, save_pdf_report, \
    generate_report_context, save_txt_report
-from .result import QueryResult, QueryStatus
-from .sites import MaigretDatabase, MaigretSite
+from .submit import submit_dialog

 __version__ = '0.1.13'

-supported_recursive_search_ids = (
-    'yandex_public_id',
-    'gaia_id',
-    'vk_id',
-    'ok_id',
-    'wikimapia_uid',
-)
-
-common_errors = {
-    '<title>Attention Required! | Cloudflare</title>': 'Cloudflare captcha',
-    'Please stand by, while we are checking your browser': 'Cloudflare captcha',
-    '<title>Доступ ограничен</title>': 'Rostelecom censorship',
-    'document.getElementById(\'validate_form_submit\').disabled=true': 'Mail.ru captcha',
-    'Verifying your browser, please wait...<br>DDoS Protection by</font> Blazingfast.io': 'Blazingfast protection',
-    '404</h1><p class="error-card__description">Мы&nbsp;не&nbsp;нашли страницу': 'MegaFon 404 page',
-    'Доступ к информационному ресурсу ограничен на основании Федерального закона': 'MGTS censorship',
-    'Incapsula incident ID': 'Incapsula antibot protection',
-}
-
-unsupported_characters = '#'
-
-async def get_response(request_future, site_name, logger):
-    html_text = None
-    status_code = 0
-
-    error_text = "General Unknown Error"
-    expection_text = None
-
-    try:
-        response = await request_future
-
-        status_code = response.status
-        response_content = await response.content.read()
-        charset = response.charset or 'utf-8'
-        decoded_content = response_content.decode(charset, 'ignore')
-        html_text = decoded_content
-
-        if status_code > 0:
-            error_text = None
-
-        logger.debug(html_text)
-
-    except asyncio.TimeoutError as errt:
-        error_text = "Timeout Error"
-        expection_text = str(errt)
-    except (ssl.SSLCertVerificationError, ssl.SSLError) as err:
-        error_text = "SSL Error"
-        expection_text = str(err)
-    except aiohttp.client_exceptions.ClientConnectorError as err:
-        error_text = "Error Connecting"
-        expection_text = str(err)
-    except aiohttp.http_exceptions.BadHttpMessage as err:
-        error_text = "HTTP Error"
-        expection_text = str(err)
-    except proxy_errors.ProxyError as err:
-        error_text = "Proxy Error"
-        expection_text = str(err)
-    except Exception as err:
-        logger.warning(f'Unhandled error while requesting {site_name}: {err}')
-        logger.debug(err, exc_info=True)
-        error_text = "Some Error"
-        expection_text = str(err)
-
-    # TODO: return only needed information
-    return html_text, status_code, error_text, expection_text
-
-
-async def update_site_dict_from_response(sitename, site_dict, results_info, semaphore, logger, query_notify):
-    async with semaphore:
-        site_obj = site_dict[sitename]
-        future = site_obj.request_future
-        if not future:
-            # ignore: search by incompatible id type
-            return
-
-        response = await get_response(request_future=future,
-                                      site_name=sitename,
-                                      logger=logger)
-
-        site_dict[sitename] = process_site_result(response, query_notify, logger, results_info, site_obj)
-
-# TODO: move info separate module
-def detect_error_page(html_text, status_code, fail_flags, ignore_403):
-    # Detect service restrictions such as a country restriction
-    for flag, msg in fail_flags.items():
-        if flag in html_text:
-            return 'Some site error', msg
-
-    # Detect common restrictions such as provider censorship and bot protection
-    for flag, msg in common_errors.items():
-        if flag in html_text:
-            return 'Error', msg
-
-    # Detect common site errors
-    if status_code == 403 and not ignore_403:
-        return 'Access denied', 'Access denied, use proxy/vpn'
-    elif status_code >= 500:
-        return f'Error {status_code}', f'Site error {status_code}'
-
-    return None, None
-
-
-def process_site_result(response, query_notify, logger, results_info, site: MaigretSite):
-    if not response:
-        return results_info
-
-    fulltags = site.tags
-
-    # Retrieve other site information again
-    username = results_info['username']
-    is_parsing_enabled = results_info['parsing_enabled']
-    url = results_info.get("url_user")
-    logger.debug(url)
-
-    status = results_info.get("status")
-    if status is not None:
-        # We have already determined the user doesn't exist here
-        return results_info
-
-    # Get the expected check type
-    check_type = site.check_type
-
-    # Get the failure messages and comments
-    failure_errors = site.errors
-
-    # TODO: refactor
-    if not response:
-        logger.error(f'No response for {site.name}')
-        return results_info
-
-    html_text, status_code, error_text, expection_text = response
-    site_error_text = '?'
-
-    # TODO: add elapsed request time counting
-    response_time = None
-
-    if logger.level == logging.DEBUG:
-        with open('debug.txt', 'a') as f:
-            status = status_code or 'No response'
-            f.write(f'url: {url}\nerror: {str(error_text)}\nr: {status}\n')
-            if html_text:
-                f.write(f'code: {status}\nresponse: {str(html_text)}\n')
-
-    if status_code and not error_text:
-        error_text, site_error_text = detect_error_page(html_text, status_code, failure_errors,
-                                                        site.ignore_403)
-
-    if site.activation and html_text:
-        is_need_activation = any([s for s in site.activation['marks'] if s in html_text])
-        if is_need_activation:
-            method = site.activation['method']
-            try:
-                activate_fun = getattr(ParsingActivator(), method)
-                # TODO: async call
-                activate_fun(site, logger)
-            except AttributeError:
-                logger.warning(f'Activation method {method} for site {site.name} not found!')
-
-    # presense flags
-    # True by default
-    presense_flags = site.presense_strs
-    is_presense_detected = False
-    if html_text:
-        if not presense_flags:
-            is_presense_detected = True
-            site.stats['presense_flag'] = None
-        else:
-            for presense_flag in presense_flags:
-                if presense_flag in html_text:
-                    is_presense_detected = True
-                    site.stats['presense_flag'] = presense_flag
-                    logger.info(presense_flag)
-                    break
-
-    if error_text is not None:
-        logger.debug(error_text)
-        result = QueryResult(username,
-                             site.name,
-                             url,
-                             QueryStatus.UNKNOWN,
-                             query_time=response_time,
-                             context=f'{error_text}: {site_error_text}', tags=fulltags)
-    elif check_type == "message":
-        absence_flags = site.absence_strs
-        is_absence_flags_list = isinstance(absence_flags, list)
-        absence_flags_set = set(absence_flags) if is_absence_flags_list else {absence_flags}
-        # Checks if the error message is in the HTML
-        is_absence_detected = any([(absence_flag in html_text) for absence_flag in absence_flags_set])
-        if not is_absence_detected and is_presense_detected:
-            result = QueryResult(username,
-                                 site.name,
-                                 url,
-                                 QueryStatus.CLAIMED,
-                                 query_time=response_time, tags=fulltags)
-        else:
-            result = QueryResult(username,
-                                 site.name,
-                                 url,
-                                 QueryStatus.AVAILABLE,
-                                 query_time=response_time, tags=fulltags)
-    elif check_type == "status_code":
-        # Checks if the status code of the response is 2XX
-        if (not status_code >= 300 or status_code < 200) and is_presense_detected:
-            result = QueryResult(username,
-                                 site.name,
-                                 url,
-                                 QueryStatus.CLAIMED,
-                                 query_time=response_time, tags=fulltags)
-        else:
-            result = QueryResult(username,
-                                 site.name,
-                                 url,
-                                 QueryStatus.AVAILABLE,
-                                 query_time=response_time, tags=fulltags)
-    elif check_type == "response_url":
-        # For this detection method, we have turned off the redirect.
-        # So, there is no need to check the response URL: it will always
-        # match the request.  Instead, we will ensure that the response
-        # code indicates that the request was successful (i.e. no 404, or
-        # forward to some odd redirect).
-        if 200 <= status_code < 300 and is_presense_detected:
-            result = QueryResult(username,
-                                 site.name,
-                                 url,
-                                 QueryStatus.CLAIMED,
-                                 query_time=response_time, tags=fulltags)
-        else:
-            result = QueryResult(username,
-                                 site.name,
-                                 url,
-                                 QueryStatus.AVAILABLE,
-                                 query_time=response_time, tags=fulltags)
-    else:
-        # It should be impossible to ever get here...
-        raise ValueError(f"Unknown check type '{check_type}' for "
-                         f"site '{site.name}'")
-
-    extracted_ids_data = {}
-
-    if is_parsing_enabled and result.status == QueryStatus.CLAIMED:
-        try:
-            extracted_ids_data = extract(html_text)
-        except Exception as e:
-            logger.warning(f'Error while parsing {site.name}: {e}', exc_info=True)
-
-        if extracted_ids_data:
-            new_usernames = {}
-            for k, v in extracted_ids_data.items():
-                if 'username' in k:
-                    new_usernames[v] = 'username'
-                if k in supported_recursive_search_ids:
-                    new_usernames[v] = k
-
-            results_info['ids_usernames'] = new_usernames
-            result.ids_data = extracted_ids_data
-
-    # Notify caller about results of query.
-    query_notify.update(result, site.similar_search)
-
-    # Save status of request
-    results_info['status'] = result
-
-    # Save results from request
-    results_info['http_status'] = status_code
-    results_info['is_similar'] = site.similar_search
-    # results_site['response_text'] = html_text
-    results_info['rank'] = site.alexa_rank
-    return results_info
-
-
-
-
-async def maigret(username, site_dict, query_notify, logger,
-                  proxy=None, timeout=None, recursive_search=False,
-                  id_type='username', debug=False, forced=False,
-                  max_connections=100, no_progressbar=False,
-                  cookies=None):
-    """Main search func
-
-    Checks for existence of username on various social media sites.
-
-    Keyword Arguments:
-    username               -- String indicating username that report
-                              should be created against.
-    site_dict              -- Dictionary containing all of the site data.
-    query_notify           -- Object with base type of QueryNotify().
-                              This will be used to notify the caller about
-                              query results.
-    proxy                  -- String indicating the proxy URL
-    timeout                -- Time in seconds to wait before timing out request.
-                              Default is no timeout.
-    recursive_search       -- Search for other usernames in website pages & recursive search by them.
-
-    Return Value:
-    Dictionary containing results from report. Key of dictionary is the name
-    of the social network site, and the value is another dictionary with
-    the following keys:
-        url_main:      URL of main site.
-        url_user:      URL of user on site (if account exists).
-        status:        QueryResult() object indicating results of test for
-                       account existence.
-        http_status:   HTTP status code of query which checked for existence on
-                       site.
-        response_text: Text that came back from request.  May be None if
-                       there was an HTTP error when checking for existence.
-    """
-
-    # Notify caller that we are starting the query.
-    query_notify.start(username, id_type)
-
-    # TODO: connector
-    connector = ProxyConnector.from_url(proxy) if proxy else aiohttp.TCPConnector(ssl=False)
-    # connector = aiohttp.TCPConnector(ssl=False)
-    connector.verify_ssl=False
-
-    cookie_jar = None
-    if cookies:
-        cookie_jar = await import_aiohttp_cookies(cookies)
-
-    session = aiohttp.ClientSession(connector=connector, trust_env=True, cookie_jar=cookie_jar)
-
-    if logger.level == logging.DEBUG:
-        future = session.get(url='https://icanhazip.com')
-        ip, status, error, expection = await get_response(future, None, logger)
-        if ip:
-            logger.debug(f'My IP is: {ip.strip()}')
-        else:
-            logger.debug(f'IP requesting {error}: {expection}')
-
-
-    # Results from analysis of all sites
-    results_total = {}
-
-    # First create futures for all requests. This allows for the requests to run in parallel
-    for site_name, site in site_dict.items():
-
-        if site.type != id_type:
-            continue
-
-        if site.disabled and not forced:
-            logger.debug(f'Site {site.name} is disabled, skipping...')
-            continue
-
-        # Results from analysis of this specific site
-        results_site = {}
-
-        # Record URL of main site and username
-        results_site['username'] = username
-        results_site['parsing_enabled'] = recursive_search
-        results_site['url_main'] = site.url_main
-        results_site['cookies'] = cookie_jar and cookie_jar.filter_cookies(site.url_main) or None
-
-        headers = {
-            'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 11.1; rv:55.0) Gecko/20100101 Firefox/55.0',
-        }
-
-        headers.update(site.headers)
-
-        if not 'url' in site.__dict__:
-            logger.error('No URL for site %s', site.name)
-        # URL of user on site (if it exists)
-        url = site.url.format(
-            urlMain=site.url_main,
-            urlSubpath=site.url_subpath,
-            username=username
-        )
-        # workaround to prevent slash errors
-        url = re.sub('(?<!:)/+', '/', url)
-
-        # Don't make request if username is invalid for the site
-        if site.regex_check and re.search(site.regex_check, username) is None:
-            # No need to do the check at the site: this user name is not allowed.
-            results_site['status'] = QueryResult(username,
-                                                 site_name,
-                                                 url,
-                                                 QueryStatus.ILLEGAL)
-            results_site["url_user"] = ""
-            results_site['http_status'] = ""
-            results_site['response_text'] = ""
-            query_notify.update(results_site['status'])
-        else:
-            # URL of user on site (if it exists)
-            results_site["url_user"] = url
-            url_probe = site.url_probe
-            if url_probe is None:
-                # Probe URL is normal one seen by people out on the web.
-                url_probe = url
-            else:
-                # There is a special URL for probing existence separate
-                # from where the user profile normally can be found.
-                url_probe = url_probe.format(
-                    urlMain=site.url_main,
-                    urlSubpath=site.url_subpath,
-                    username=username,
-                )
-
-            for k, v in site.get_params.items():
-                url_probe += f'&{k}={v}'
-
-            if site.check_type == 'status_code' and site.request_head_only:
-                # In most cases when we are detecting by status code,
-                # it is not necessary to get the entire body:  we can
-                # detect fine with just the HEAD response.
-                request_method = session.head
-            else:
-                # Either this detect method needs the content associated
-                # with the GET response, or this specific website will
-                # not respond properly unless we request the whole page.
-                request_method = session.get
-
-            if site.check_type == "response_url":
-                # Site forwards request to a different URL if username not
-                # found.  Disallow the redirect so we can capture the
-                # http status from the original URL request.
-                allow_redirects = False
-            else:
-                # Allow whatever redirect that the site wants to do.
-                # The final result of the request will be what is available.
-                allow_redirects = True
-
-            future = request_method(url=url_probe, headers=headers,
-                                    allow_redirects=allow_redirects,
-                                    timeout=timeout,
-                                    )
-
-            # Store future in data for access later
-            # TODO: move to separate obj
-            site.request_future = future
-
-        # Add this site's results into final dictionary with all of the other results.
-        results_total[site_name] = results_site
-
-    # TODO: move into top-level function
-
-    sem = asyncio.Semaphore(max_connections)
-
-    tasks = []
-    for sitename, result_obj in results_total.items():
-        update_site_coro = update_site_dict_from_response(sitename, site_dict, result_obj, sem, logger, query_notify)
-        future = asyncio.ensure_future(update_site_coro)
-        tasks.append(future)
-
-    if no_progressbar:
-        await asyncio.gather(*tasks)
-    else:
-        for f in tqdm.asyncio.tqdm.as_completed(tasks):
-            await f
-
-    await session.close()
-
-    # Notify caller that all queries are finished.
-    query_notify.finish()
-
-    return results_total
-
-
-def timeout_check(value):
-    """Check Timeout Argument.
-
-    Checks timeout for validity.
-
-    Keyword Arguments:
-    value                  -- Time in seconds to wait before timing out request.
-
-    Return Value:
-    Floating point number representing the time (in seconds) that should be
-    used for the timeout.
-
-    NOTE:  Will raise an exception if the timeout in invalid.
-    """
-    from argparse import ArgumentTypeError
-
-    try:
-        timeout = float(value)
-    except ValueError:
-        raise ArgumentTypeError(f"Timeout '{value}' must be a number.")
-    if timeout <= 0:
-        raise ArgumentTypeError(f"Timeout '{value}' must be greater than 0.0s.")
-    return timeout
-
-
-async def site_self_check(site, logger, semaphore, db: MaigretDatabase, silent=False):
-    query_notify = Mock()
-    changes = {
-        'disabled': False,
-    }
-
-    try:
-        check_data = [
-            (site.username_claimed, QueryStatus.CLAIMED),
-            (site.username_unclaimed, QueryStatus.AVAILABLE),
-        ]
-    except:
-        print(site.__dict__)
-
-    logger.info(f'Checking {site.name}...')
-
-    for username, status in check_data:
-        async with semaphore:
-            results_dict = await maigret(
-                username,
-                {site.name: site},
-                query_notify,
-                logger,
-                timeout=30,
-                id_type=site.type,
-                forced=True,
-                no_progressbar=True,
-            )
-
-            # don't disable entries with other ids types
-            # TODO: make normal checking
-            if site.name not in results_dict:
-                logger.info(results_dict)
-                changes['disabled'] = True
-                continue
-
-            result = results_dict[site.name]['status']
-
-
-        site_status = result.status
-
-        if site_status != status:
-            if site_status == QueryStatus.UNKNOWN:
-                msgs = site.absence_strs
-                etype = site.check_type
-                logger.warning(f'Error while searching {username} in {site.name}: {result.context}, {msgs}, type {etype}')
-                # don't disable in case of available username
-                if status == QueryStatus.CLAIMED:
-                    changes['disabled'] = True
-            elif status == QueryStatus.CLAIMED:
-                logger.warning(f'Not found `{username}` in {site.name}, must be claimed')
-                logger.info(results_dict[site.name])
-                changes['disabled'] = True
-            else:
-                logger.warning(f'Found `{username}` in {site.name}, must be available')
-                logger.info(results_dict[site.name])
-                changes['disabled'] = True
-
-    logger.info(f'Site {site.name} checking is finished')
-
-    if changes['disabled'] != site.disabled:
-        site.disabled = changes['disabled']
-        db.update_site(site)
-        if not silent:
-            action = 'Disabled' if site.disabled else 'Enabled'
-            print(f'{action} site {site.name}...')
-
-    return changes
-
-
-async def self_check(db: MaigretDatabase, site_data: dict, logger, silent=False,
-                     max_connections=10) -> bool:
-    sem = asyncio.Semaphore(max_connections)
-    tasks = []
-    all_sites = site_data
-
-    def disabled_count(lst):
-        return len(list(filter(lambda x: x.disabled, lst)))
-
-    disabled_old_count = disabled_count(all_sites.values())
-
-    for _, site in all_sites.items():
-        check_coro = site_self_check(site, logger, sem, db, silent)
-        future = asyncio.ensure_future(check_coro)
-        tasks.append(future)
-
-    for f in tqdm.asyncio.tqdm.as_completed(tasks):
-        await f
-
-    disabled_new_count = disabled_count(all_sites.values())
-    total_disabled = disabled_new_count - disabled_old_count
-
-    if total_disabled >= 0:
-        message = 'Disabled'
-    else:
-        message = 'Enabled'
-        total_disabled *= -1
-
-    if not silent:
-        print(f'{message} {total_disabled} ({disabled_old_count} => {disabled_new_count}) checked sites. Run with `--info` flag to get more information')
-
-    return total_disabled != 0
-

 async def main():
    version_string = '\n'.join([
@@ -685,6 +91,10 @@ async def main():
                        action="store_true", dest="print_check_errors", default=False,
                        help="Print errors messages: connection, captcha, site country ban, etc."
                        )
+    parser.add_argument("--submit",
+                        type=str, dest="new_site_to_submit", default=False,
+                        help="URL of existing profile in new site to submit."
+                        )
    parser.add_argument("--no-color",
                        action="store_true", dest="no_color", default=False,
                        help="Don't color terminal output"
@@ -738,7 +148,7 @@ async def main():
                        action="store_true", dest="html", default=False,
                        help="Create an HTML report file (general report on all usernames)."
                        )
-    parser.add_argument("-X","--xmind",
+    parser.add_argument("-X", "--xmind",
                        action="store_true",
                        dest="xmind", default=False,
                        help="Generate an XMind 8 mindmap report (one report per username)."
@@ -820,6 +230,11 @@ async def main():

    site_data = get_top_sites_for_id(args.id_type)

+    if args.new_site_to_submit:
+        is_submitted = await submit_dialog(db, args.new_site_to_submit)
+        if is_submitted:
+            db.save_to_file(args.json_file)
+
    # Database self-checking
    if args.self_check:
        print('Maigret sites database self-checking...')
@@ -874,7 +289,8 @@ async def main():

        if found_unsupported_chars:
            pretty_chars_str = ','.join(map(lambda s: f'"{s}"', found_unsupported_chars))
-            query_notify.warning(f'Found unsupported URL characters: {pretty_chars_str}, skip search by username "{username}"')
+            query_notify.warning(
+                f'Found unsupported URL characters: {pretty_chars_str}, skip search by username "{username}"')
            continue

        sites_to_check = get_top_sites_for_id(id_type)
@@ -952,5 +368,6 @@ def run():
        print('Maigret is interrupted.')
        sys.exit(1)

+
 if __name__ == "__main__":
-    run()
+    run()
@@ -13590,7 +13590,7 @@
                "sec-ch-ua": "Google Chrome\";v=\"87\", \" Not;A Brand\";v=\"99\", \"Chromium\";v=\"87\"",
                "authorization": "Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA",
                "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36",
-                "x-guest-token": "1358064134064140290"
+                "x-guest-token": "1358893858789208065"
            },
            "errors": {
                "Bad guest token": "x-guest-token update required"
@@ -13956,7 +13956,7 @@
                "video"
            ],
            "headers": {
-                "Authorization": "jwt eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJleHAiOjE2MTI2MjQ4NjAsInVzZXJfaWQiOm51bGwsImFwcF9pZCI6NTg0NzksInNjb3BlcyI6InB1YmxpYyIsInRlYW1fdXNlcl9pZCI6bnVsbH0.kgp8r380d1aDWcd-ROncr0Tqf8EdA-l35EeEY9is6TI"
+                "Authorization": "jwt eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJleHAiOjE2MTI4MjE0MjAsInVzZXJfaWQiOm51bGwsImFwcF9pZCI6NTg0NzksInNjb3BlcyI6InB1YmxpYyIsInRlYW1fdXNlcl9pZCI6bnVsbH0.TXUhqilVT25xN4lZeoki6hEmbtcOiy7FKxTm5PWOMVs"
            },
            "activation": {
                "url": "https://vimeo.com/_rv/viewer",
@@ -23070,6 +23070,32 @@
            "urlMain": "https://protovary.style",
            "usernameClaimed": "alex",
            "usernameUnclaimed": "noonewouldeverusethis7"
+        },
+        "beacons.ai": {
+            "checkType": "message",
+            "presenseStrs": [
+                "https://cdn.beacons.ai/profile_pictures"
+            ],
+            "absenceStrs": [
+                "https://beacons.ai/bw_logo_full.png"
+            ],
+            "url": "https://beacons.ai/{username}",
+            "urlMain": "https://beacons.ai",
+            "usernameClaimed": "pasteljellies",
+            "usernameUnclaimed": "noonewouldeverusethis7"
+        },
+        "are.na": {
+            "checkType": "message",
+            "presenseStrs": [
+                "Profile--view"
+            ],
+            "absenceStrs": [
+                "Are.na home"
+            ],
+            "url": "https://www.are.na/{username}",
+            "urlMain": "https://www.are.na",
+            "usernameClaimed": "nate-cassel",
+            "usernameUnclaimed": "noonewouldeverusethis7"
        }
    },
    "engines": {
@@ -0,0 +1,161 @@
+import difflib
+
+import requests
+from mock import Mock
+
+from .checking import *
+
+DESIRED_STRINGS = ["username", "not found", "пользователь", "profile", "lastname", "firstname", "biography",
+                   "birthday", "репутация", "информация", "e-mail"]
+
+RATIO = 0.6
+TOP_FEATURES = 5
+
+
+def get_match_ratio(x):
+    return round(max([
+        difflib.SequenceMatcher(a=x.lower(), b=y).ratio()
+        for y in DESIRED_STRINGS
+    ]), 2)
+
+
+def extract_domain(url):
+    return '/'.join(url.split('/', 3)[:3])
+
+
+async def site_self_check(site, logger, semaphore, db: MaigretDatabase, silent=False):
+    query_notify = Mock()
+    changes = {
+        'disabled': False,
+    }
+
+    check_data = [
+        (site.username_claimed, QueryStatus.CLAIMED),
+        (site.username_unclaimed, QueryStatus.AVAILABLE),
+    ]
+
+    logger.info(f'Checking {site.name}...')
+
+    for username, status in check_data:
+        async with semaphore:
+            results_dict = await maigret(
+                username,
+                {site.name: site},
+                query_notify,
+                logger,
+                timeout=30,
+                id_type=site.type,
+                forced=True,
+                no_progressbar=True,
+            )
+
+            # don't disable entries with other ids types
+            # TODO: make normal checking
+            if site.name not in results_dict:
+                logger.info(results_dict)
+                changes['disabled'] = True
+                continue
+
+            result = results_dict[site.name]['status']
+
+        site_status = result.status
+
+        if site_status != status:
+            if site_status == QueryStatus.UNKNOWN:
+                msgs = site.absence_strs
+                etype = site.check_type
+                logger.warning(
+                    f'Error while searching {username} in {site.name}: {result.context}, {msgs}, type {etype}')
+                # don't disable in case of available username
+                if status == QueryStatus.CLAIMED:
+                    changes['disabled'] = True
+            elif status == QueryStatus.CLAIMED:
+                logger.warning(f'Not found `{username}` in {site.name}, must be claimed')
+                logger.info(results_dict[site.name])
+                changes['disabled'] = True
+            else:
+                logger.warning(f'Found `{username}` in {site.name}, must be available')
+                logger.info(results_dict[site.name])
+                changes['disabled'] = True
+
+    logger.info(f'Site {site.name} checking is finished')
+
+    return changes
+
+
+async def submit_dialog(db, url_exists):
+    url_parts = url_exists.split('/')
+    supposed_username = url_parts[-1]
+    new_name = input(f'Is "{supposed_username}" a valid username? If not, write it manually: ')
+    if new_name:
+        supposed_username = new_name
+    non_exist_username = 'noonewouldeverusethis7'
+
+    url_user = url_exists.replace(supposed_username, '{username}')
+    url_not_exists = url_exists.replace(supposed_username, non_exist_username)
+
+    a = requests.get(url_exists).text
+    b = requests.get(url_not_exists).text
+
+    tokens_a = set(a.split('"'))
+    tokens_b = set(b.split('"'))
+
+    a_minus_b = tokens_a.difference(tokens_b)
+    b_minus_a = tokens_b.difference(tokens_a)
+
+    top_features_count = int(input(f'Specify count of features to extract [default {TOP_FEATURES}]: '))
+    if not top_features_count:
+        top_features_count = TOP_FEATURES
+
+    presence_list = sorted(a_minus_b, key=get_match_ratio, reverse=True)[:top_features_count]
+
+    print('Detected text features of existing account: ' + ', '.join(presence_list))
+    features = input('If features was not detected correctly, write it manually: ')
+
+    if features:
+        presence_list = features.split(',')
+
+    absence_list = sorted(b_minus_a, key=get_match_ratio, reverse=True)[:top_features_count]
+    print('Detected text features of non-existing account: ' + ', '.join(absence_list))
+    features = input('If features was not detected correctly, write it manually: ')
+
+    if features:
+        absence_list = features.split(',')
+
+    url_main = extract_domain(url_exists)
+
+    site_data = {
+        'absenceStrs': absence_list,
+        'presenseStrs': presence_list,
+        'url': url_user,
+        'urlMain': url_main,
+        'usernameClaimed': supposed_username,
+        'usernameUnclaimed': non_exist_username,
+        'checkType': 'message',
+    }
+
+    site = MaigretSite(url_main.split('/')[-1], site_data)
+
+    print(site.__dict__)
+
+    sem = asyncio.Semaphore(1)
+    log_level = logging.INFO
+    logging.basicConfig(
+        format='[%(filename)s:%(lineno)d] %(levelname)-3s  %(asctime)s %(message)s',
+        datefmt='%H:%M:%S',
+        level=log_level
+    )
+    logger = logging.getLogger('site-submit')
+    logger.setLevel(log_level)
+
+    result = await site_self_check(site, logger, sem, db)
+
+    if result['disabled']:
+        print(f'Sorry, we couldn\'t find params to detect account presence/absence in {site.name}.')
+        print('Try to run this mode again and increase features count or choose others.')
+    else:
+        if input(f'Site {site.name} successfully checked. Do you want to save it in the Maigret DB? [yY] ') in 'yY':
+            db.update_site(site)
+            return True
+
+    return False