Improved "submit new site" mode, added tests, fixed top-500 sites (#1952)

2026-05-07 06:24:35 +00:00 · 2024-12-10 18:02:43 +01:00
parent 51ab988e36
commit 81a817a39f
12 changed files with 691 additions and 194 deletions
@@ -1970,6 +1970,7 @@
            "usernameUnclaimed": "noonewouldeverusethis7"
        },
        "BeerMoneyForum": {
+            "disabled": true,
            "ignore403": true,
            "tags": [
                "finance",
@@ -2366,19 +2367,30 @@
            "usernameUnclaimed": "noonewouldeverusethis7"
        },
        "BoardGameGeek": {
+            "checkType": "message",
            "tags": [
                "gaming",
                "us"
            ],
-            "checkType": "message",
            "absenceStrs": [
-                "User does not exist."
+                "\t\tUser not found",
+                "messagebox error",
+                ">\t<div class=",
+                "\t\t\t<title>Profile | BoardGameGeek</title>",
+                "\t</div></div>"
            ],
            "alexaRank": 4327,
-            "urlMain": "https://www.boardgamegeek.com",
-            "url": "https://www.boardgamegeek.com/user/{username}",
-            "usernameClaimed": "adam",
-            "usernameUnclaimed": "noonewouldeverusethis7"
+            "urlMain": "https://boardgamegeek.com",
+            "url": "https://boardgamegeek.com/user/{username}",
+            "usernameClaimed": "ZakuBG",
+            "usernameUnclaimed": "uzytnhstvj",
+            "presenseStrs": [
+                "username",
+                " style=",
+                "mail",
+                "  \tstyle=",
+                " data-username="
+            ]
        },
        "Bobrdobr": {
            "tags": [
@@ -3005,7 +3017,8 @@
            "alexaRank": 2689,
            "urlMain": "https://community.cbr.com",
            "usernameClaimed": "red",
-            "usernameUnclaimed": "noonewouldeverusethis7"
+            "usernameUnclaimed": "noonewouldeverusethis7",
+            "disabled": true
        },
        "Ccdi": {
            "tags": [
@@ -4645,21 +4658,6 @@
            "usernameUnclaimed": "noonewouldeverusethis7",
            "alexaRank": 301125
        },
-        "Eksisozluk": {
-            "tags": [
-                "tr"
-            ],
-            "checkType": "message",
-            "absenceStrs": [
-                "isimli bir yazar kayd\u0131 mevcut de\u011fil",
-                "olmaz \u00f6yle \u015fey"
-            ],
-            "alexaRank": 977,
-            "urlMain": "https://eksisozluk.com/biri/",
-            "url": "https://eksisozluk.com/biri/{username}",
-            "usernameClaimed": "adam",
-            "usernameUnclaimed": "noonewouldeverusethis7"
-        },
        "Elakiri": {
            "tags": [
                "lk"
@@ -5747,6 +5745,7 @@
            "usernameUnclaimed": "noonewouldeverusethis7"
        },
        "Folkd": {
+            "disabled": true,
            "tags": [
                "eu",
                "in"
@@ -7678,17 +7677,28 @@
        },
        "Hotcopper": {
            "tags": [
-                "au"
+                "finance"
            ],
            "checkType": "message",
            "absenceStrs": [
-                "The following error occurred"
+                "error-page",
+                "error-page home container",
+                "card-footer-item",
+                "><main id=",
+                "card-content"
            ],
            "alexaRank": 7767,
            "urlMain": "https://hotcopper.com.au",
            "url": "https://hotcopper.com.au/search/search?type=post&users={username}",
            "usernameClaimed": "red",
-            "usernameUnclaimed": "noonewouldeverusethis7"
+            "usernameUnclaimed": "ggyeplcpod",
+            "presenseStrs": [
+                "title-td",
+                "title is-1",
+                "pagination ",
+                "toggle",
+                "active "
+            ]
        },
        "House-Mixes.com": {
            "tags": [
@@ -9056,16 +9066,27 @@
        "Lichess": {
            "checkType": "message",
            "absenceStrs": [
-                "Page not found!"
+                "page-small box box-pad page",
+                "><h1 class=",
+                ">No such player</h1><div><p>This username doesn",
+                "})()</script></body></html>",
+                "IR0Cf7qpkpcOhvI9r03a0QbI"
            ],
            "alexaRank": 2374,
            "urlMain": "https://lichess.org",
            "url": "https://lichess.org/@/{username}",
-            "usernameClaimed": "blue",
-            "usernameUnclaimed": "noonewouldeverusethis7",
+            "usernameClaimed": "adam",
+            "usernameUnclaimed": "efxvyhnwrh",
            "tags": [
                "gaming",
                "hobby"
+            ],
+            "presenseStrs": [
+                "us_profile",
+                "og:title",
+                "profile-side",
+                " data-username=",
+                "og:site_name"
            ]
        },
        "Liebe69": {
@@ -14739,16 +14760,25 @@
            "usernameUnclaimed": "noonewouldeverusethis7"
        },
        "SlideShare": {
-            "tags": [
-                "documents",
-                "sharing"
-            ],
-            "checkType": "status_code",
+            "checkType": "message",
            "alexaRank": 158,
-            "urlMain": "https://slideshare.net/",
-            "url": "https://slideshare.net/{username}",
-            "usernameClaimed": "blue",
-            "usernameUnclaimed": "noonewouldeverusethis7"
+            "urlMain": "https://www.slideshare.net",
+            "url": "https://www.slideshare.net/{username}",
+            "usernameClaimed": "KumarSurya7",
+            "usernameUnclaimed": "kwbmsonxvp",
+            "presenseStrs": [
+                "user-name",
+                "pageInfo",
+                "listitem",
+                "polite",
+                "strippedTitle"
+            ],
+            "absenceStrs": [
+                "blankProfile",
+                "username-available",
+                "robots",
+                "noindex,nofollow"
+            ]
        },
        "Slides": {
            "tags": [
@@ -15447,7 +15477,8 @@
            "urlMain": "https://www.strava.com/",
            "url": "https://www.strava.com/athletes/{username}",
            "usernameClaimed": "adam",
-            "usernameUnclaimed": "noonewouldeverusethis7"
+            "usernameUnclaimed": "noonewouldeverusethis7",
+            "disabled": true
        },
        "Studfile": {
            "tags": [
@@ -16771,13 +16802,20 @@
            "regexCheck": "^[^\\.]+$",
            "checkType": "message",
            "absenceStrs": [
-                "There's nothing here."
+                "Not found.",
+                ":404,",
+                "userAgent",
+                ",displayStatus:"
            ],
            "alexaRank": 112,
-            "urlMain": "https://tumblr.com/",
-            "url": "https://{username}.tumblr.com/",
-            "usernameClaimed": "red",
-            "usernameUnclaimed": "noonewouldeverusethis7"
+            "urlMain": "https://www.tumblr.com",
+            "url": "https://www.tumblr.com/{username}",
+            "usernameClaimed": "soxoj",
+            "usernameUnclaimed": "zdbimdoqyt",
+            "presenseStrs": [
+                "profile",
+                " title="
+            ]
        },
        "Tunefind": {
            "checkType": "message",
@@ -17114,7 +17152,8 @@
            "urlMain": "https://vc.ru",
            "url": "https://vc.ru/search/v2/subsite/relevant?query={username}",
            "usernameClaimed": "adam",
-            "usernameUnclaimed": "noonewouldeverusethis7"
+            "usernameUnclaimed": "noonewouldeverusethis7",
+            "disabled": true
        },
        "Viddler": {
            "checkType": "message",
@@ -17377,7 +17416,7 @@
                "video"
            ],
            "headers": {
-                "Authorization": "jwt eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJleHAiOjE3MzM2MTc5MjAsInVzZXJfaWQiOm51bGwsImFwcF9pZCI6NTg0NzksInNjb3BlcyI6InB1YmxpYyIsInRlYW1fdXNlcl9pZCI6bnVsbCwianRpIjoiNGYxM2M4N2ItYWMwMy00Y2JhLWExMDctNmNiODhmM2U3NjZjIn0.Y7CWEWckdSMsmJ8ROPmhHR6el2QCYJRDl0RLPpdJOKc"
+                "Authorization": "jwt eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJleHAiOjE3MzM4MzkwODAsInVzZXJfaWQiOm51bGwsImFwcF9pZCI6NTg0NzksInNjb3BlcyI6InB1YmxpYyIsInRlYW1fdXNlcl9pZCI6bnVsbCwianRpIjoiOWNjMjk0ZjktZGZhOS00NDI0LWE0OGEtN2JjYzkwYjM2NTMyIn0.wG0kC7fWtrdKI9ccS-LE81lVgQRfYobrqCAPWxr1wzc"
            },
            "activation": {
                "url": "https://vimeo.com/_rv/viewer",
@@ -18971,7 +19010,8 @@
            "urlMain": "https://aminoapps.com/",
            "url": "https://aminoapps.com/u/{username}",
            "usernameClaimed": "blue",
-            "usernameUnclaimed": "noonewouldeverusethis77777"
+            "usernameUnclaimed": "noonewouldeverusethis77777",
+            "disabled": true
        },
        "analitika-forex.ru": {
            "engine": "uCoz",
@@ -35419,6 +35459,27 @@
                "Cache-Control": "no-cache",
                "TE": "trailers"
            }
+        },
+        "Eksisozluk": {
+            "absenceStrs": [
+                "  <h1>b\u00f6yle bir yazar yok</h1>\r"
+            ],
+            "presenseStrs": [
+                "profile-dots",
+                "profile-logo",
+                "profile-cards",
+                "profile-biography",
+                " data-title="
+            ],
+            "alexaRank": 977,
+            "url": "https://eksisozluk.com/biri/{username}",
+            "urlMain": "https://eksisozluk.com",
+            "usernameClaimed": "kartalbafilerrr",
+            "usernameUnclaimed": "rlcvuwlxqh",
+            "checkType": "message",
+            "tags": [
+                "tr"
+            ]
        }
    },
    "engines": {
@@ -96,7 +96,7 @@ class MaigretCheckResult:
        return self.status == MaigretCheckStatus.CLAIMED

    def __repr__(self):
-        return f"'{self.__str__()}'"
+        return f"<{self.__str__()}>"

    def __str__(self):
        """Convert Object To String.
@@ -2,7 +2,8 @@ import asyncio
 import json
 import re
 import os
-from typing import Any, Dict, List, Optional
+import logging
+from typing import Any, Dict, List, Optional, Tuple

 from aiohttp import ClientSession, TCPConnector
 from aiohttp_socks import ProxyConnector
@@ -15,7 +16,7 @@ from .settings import Settings
 from .sites import MaigretDatabase, MaigretEngine, MaigretSite
 from .utils import get_random_user_agent
 from .checking import site_self_check
-from .utils import get_match_ratio
+from .utils import get_match_ratio, generate_random_username


 class CloudflareSession:
@@ -125,21 +126,13 @@ class Submitter:
        return fields

    async def detect_known_engine(
-        self, url_exists, url_mainpage
+        self, url_exists, url_mainpage, session, follow_redirects, headers
    ) -> [List[MaigretSite], str]:

-        resp_text = ''
-
-        try:
-            r = await self.session.get(url_mainpage)
-            content = await r.content.read()
-            charset = r.charset or "utf-8"
-            resp_text = content.decode(charset, "ignore")
-            self.logger.debug(resp_text)
-        except Exception as e:
-            self.logger.warning(e, exc_info=True)
-            print(f"Some error while checking main page: {e}")
-            return [], resp_text
+        session = session or self.session
+        resp_text, _ = await self.get_html_response_to_compare(
+            url_exists, session, follow_redirects, headers
+        )

        for engine in self.db.engines:
            strs_to_check = engine.__dict__.get("presenseStrs")
@@ -195,113 +188,134 @@ class Submitter:
        )
        return entered_username if entered_username else supposed_username

-    async def check_features_manually(
-        self, url_exists, url_mainpage, cookie_file, redirects=False
+    @staticmethod
+    async def get_html_response_to_compare(
+        url: str, session: ClientSession = None, redirects=False, headers: Dict = None
    ):
-        custom_headers = {}
-        while self.args.verbose:
-            header_key = input(
-                'Specify custom header if you need or just press Enter to skip. Header name: '
+        async with session.get(
+            url, allow_redirects=redirects, headers=headers
+        ) as response:
+            # Try different encodings or fallback to 'ignore' errors
+            try:
+                html_response = await response.text(encoding='utf-8')
+            except UnicodeDecodeError:
+                try:
+                    html_response = await response.text(encoding='latin1')
+                except UnicodeDecodeError:
+                    html_response = await response.text(errors='ignore')
+            return html_response, response.status
+
+    async def check_features_manually(
+        self,
+        username: str,
+        url_exists: str,
+        cookie_filename="",  # TODO: use cookies
+        session: ClientSession = None,
+        follow_redirects=False,
+        headers: dict = None,
+    ) -> Tuple[List[str], List[str], str, str]:
+
+        random_username = generate_random_username()
+        url_of_non_existing_account = url_exists.lower().replace(
+            username.lower(), random_username
        )
-            if not header_key:
-                break
-            header_value = input('Header value: ')
-            custom_headers[header_key.strip()] = header_value.strip()

-        supposed_username = self.extract_username_dialog(url_exists)
-        non_exist_username = "noonewouldeverusethis7"
-
-        url_user = url_exists.replace(supposed_username, "{username}")
-        url_not_exists = url_exists.replace(supposed_username, non_exist_username)
-
-        headers = dict(self.HEADERS)
-        headers.update(custom_headers)
-
-        exists_resp = await self.session.get(
-            url_exists,
-            headers=headers,
-            allow_redirects=redirects,
+        try:
+            session = session or self.session
+            first_html_response, first_status = await self.get_html_response_to_compare(
+                url_exists, session, follow_redirects, headers
            )
-        exists_resp_text = await exists_resp.text()
-        self.logger.debug(url_exists)
-        self.logger.debug(exists_resp.status)
-        self.logger.debug(exists_resp_text)
-
-        non_exists_resp = await self.session.get(
-            url_not_exists,
-            headers=headers,
-            allow_redirects=redirects,
+            second_html_response, second_status = (
+                await self.get_html_response_to_compare(
+                    url_of_non_existing_account, session, follow_redirects, headers
                )
-        non_exists_resp_text = await non_exists_resp.text()
-        self.logger.debug(url_not_exists)
-        self.logger.debug(non_exists_resp.status)
-        self.logger.debug(non_exists_resp_text)
+            )
+            await session.close()
+        except Exception as e:
+            self.logger.error(
+                f"Error while getting HTTP response for username {username}: {e}",
+                exc_info=True,
+            )
+            return None, None, str(e), random_username

-        a = exists_resp_text
-        b = non_exists_resp_text
+        self.logger.info(f"URL with existing account: {url_exists}")
+        self.logger.info(
+            f"HTTP response status for URL with existing account: {first_status}"
+        )
+        self.logger.info(
+            f"HTTP response length URL with existing account: {len(first_html_response)}"
+        )
+        self.logger.debug(first_html_response)

-        tokens_a = set(re.split(f'[{self.SEPARATORS}]', a))
-        tokens_b = set(re.split(f'[{self.SEPARATORS}]', b))
+        self.logger.info(f"URL with existing account: {url_of_non_existing_account}")
+        self.logger.info(
+            f"HTTP response status for URL with non-existing account: {second_status}"
+        )
+        self.logger.info(
+            f"HTTP response length URL with non-existing account: {len(second_html_response)}"
+        )
+        self.logger.debug(second_html_response)
+
+        # TODO: filter by errors, move to dialog function
+        if (
+            "/cdn-cgi/challenge-platform" in first_html_response
+            or "\t\t\t\tnow: " in first_html_response
+            or "Sorry, you have been blocked" in first_html_response
+        ):
+            self.logger.info("Cloudflare detected, skipping")
+            return None, None, "Cloudflare detected, skipping", random_username
+
+        tokens_a = set(re.split(f'[{self.SEPARATORS}]', first_html_response))
+        tokens_b = set(re.split(f'[{self.SEPARATORS}]', second_html_response))

        a_minus_b = tokens_a.difference(tokens_b)
        b_minus_a = tokens_b.difference(tokens_a)

-        # additional filtering by html response
-        a_minus_b = [t for t in a_minus_b if t not in non_exists_resp_text]
-        b_minus_a = [t for t in b_minus_a if t not in exists_resp_text]
+        a_minus_b = list(map(lambda x: x.strip('\\'), a_minus_b))
+        b_minus_a = list(map(lambda x: x.strip('\\'), b_minus_a))
+
+        # Filter out strings containing usernames
+        a_minus_b = [s for s in a_minus_b if username.lower() not in s.lower()]
+        b_minus_a = [s for s in b_minus_a if random_username.lower() not in s.lower()]
+
+        def filter_tokens(token: str, html_response: str) -> bool:
+            is_in_html = token in html_response
+            is_long_str = len(token) >= 50
+            is_number = re.match(r'^\d\.?\d+$', token) or re.match(r':^\d+$', token)
+            is_whitelisted_number = token in ['200', '404', '403']
+
+            return not (
+                is_in_html or is_long_str or (is_number and not is_whitelisted_number)
+            )
+
+        a_minus_b = list(
+            filter(lambda t: filter_tokens(t, second_html_response), a_minus_b)
+        )
+        b_minus_a = list(
+            filter(lambda t: filter_tokens(t, first_html_response), b_minus_a)
+        )

        if len(a_minus_b) == len(b_minus_a) == 0:
-            print("The pages for existing and non-existing account are the same!")
-
-        top_features_count = int(
-            input(
-                f"Specify count of features to extract [default {self.TOP_FEATURES}]: "
-            )
-            or self.TOP_FEATURES
+            return (
+                None,
+                None,
+                "HTTP responses for pages with existing and non-existing accounts are the same",
+                random_username,
            )

        match_fun = get_match_ratio(self.settings.presence_strings)

        presence_list = sorted(a_minus_b, key=match_fun, reverse=True)[
-            :top_features_count
+            : self.TOP_FEATURES
        ]
-
-        self.logger.debug([(keyword, match_fun(keyword)) for keyword in presence_list])
-
-        print("Detected text features of existing account: " + ", ".join(presence_list))
-        features = input("If features was not detected correctly, write it manually: ")
-
-        if features:
-            presence_list = list(map(str.strip, features.split(",")))
-
        absence_list = sorted(b_minus_a, key=match_fun, reverse=True)[
-            :top_features_count
+            : self.TOP_FEATURES
        ]
-        self.logger.debug([(keyword, match_fun(keyword)) for keyword in absence_list])

-        print(
-            "Detected text features of non-existing account: " + ", ".join(absence_list)
-        )
-        features = input("If features was not detected correctly, write it manually: ")
+        self.logger.info(f"Detected presence features: {presence_list}")
+        self.logger.info(f"Detected absence features: {absence_list}")

-        if features:
-            absence_list = list(map(str.strip, features.split(",")))
-
-        site_data = {
-            "absenceStrs": absence_list,
-            "presenseStrs": presence_list,
-            "url": url_user,
-            "urlMain": url_mainpage,
-            "usernameClaimed": supposed_username,
-            "usernameUnclaimed": non_exist_username,
-            "checkType": "message",
-        }
-
-        if headers != self.HEADERS:
-            site_data['headers'] = headers
-
-        site = MaigretSite(url_mainpage.split("/")[-1], site_data)
-        return site
+        return presence_list, absence_list, "Found", random_username

    async def add_site(self, site):
        sem = asyncio.Semaphore(1)
@@ -376,6 +390,12 @@ class Submitter:
        }

    async def dialog(self, url_exists, cookie_file):
+        old_site = None
+        additional_options_enabled = self.logger.level in (
+            logging.DEBUG,
+            logging.WARNING,
+        )
+
        domain_raw = self.URL_RE.sub("", url_exists).strip().strip("/")
        domain_raw = domain_raw.split("/")[0]
        self.logger.info('Domain is %s', domain_raw)
@@ -386,9 +406,11 @@ class Submitter:
        )

        if matched_sites:
+            # TODO: update the existing site
            print(
-                f'Sites with domain "{domain_raw}" already exists in the Maigret database!'
+                f"{Fore.YELLOW}[!] Sites with domain \"{domain_raw}\" already exists in the Maigret database!{Style.RESET_ALL}"
            )
+
            status = lambda s: "(disabled)" if s.disabled else ""
            url_block = lambda s: f"\n\t{s.url_main}\n\t{s.url}"
            print(
@@ -400,16 +422,62 @@ class Submitter:
                )
            )

-            if input("Do you want to continue? [yN] ").lower() in "n":
+            if (
+                input(
+                    f"{Fore.GREEN}[?] Do you want to continue? [yN] {Style.RESET_ALL}"
+                ).lower()
+                in "n"
+            ):
                return False

+            site_names = [site.name for site in matched_sites]
+            site_name = (
+                input(
+                    f"{Fore.GREEN}[?] Which site do you want to update in case of success? 1st by default. [{', '.join(site_names)}] {Style.RESET_ALL}"
+                )
+                or matched_sites[0].name
+            )
+            old_site = next(
+                (site for site in matched_sites if site.name == site_name), None
+            )
+            print(
+                f'{Fore.GREEN}[+] We will update site "{old_site.name}" in case of success.{Style.RESET_ALL}'
+            )
+
        url_mainpage = self.extract_mainpage_url(url_exists)

+        # headers update
+        custom_headers = dict(self.HEADERS)
+        while additional_options_enabled:
+            header_key = input(
+                f'{Fore.GREEN}[?] Specify custom header if you need or just press Enter to skip. Header name: {Style.RESET_ALL}'
+            )
+            if not header_key:
+                break
+            header_value = input(f'{Fore.GREEN}[?] Header value: {Style.RESET_ALL}')
+            custom_headers[header_key.strip()] = header_value.strip()
+
+        # redirects settings update
+        redirects = False
+        if additional_options_enabled:
+            redirects = (
+                'y'
+                in input(
+                    f'{Fore.GREEN}[?] Should we do redirects automatically? [yN] {Style.RESET_ALL}'
+                ).lower()
+            )
+
        print('Detecting site engine, please wait...')
        sites = []
        text = None
        try:
-            sites, text = await self.detect_known_engine(url_exists, url_exists)
+            sites, text = await self.detect_known_engine(
+                url_exists,
+                url_exists,
+                session=None,
+                follow_redirects=redirects,
+                headers=custom_headers,
+            )
        except KeyboardInterrupt:
            print('Engine detect process is interrupted.')

@@ -422,26 +490,48 @@ class Submitter:
        if not sites:
            print("Unable to detect site engine, lets generate checking features")

-            redirects = False
-            if self.args.verbose:
-                redirects = (
-                    'y' in input('Should we do redirects automatically? [yN] ').lower()
+            supposed_username = self.extract_username_dialog(url_exists)
+            self.logger.info(f"Supposed username: {supposed_username}")
+
+            presence_list, absence_list, status, non_exist_username = (
+                await self.check_features_manually(
+                    username=supposed_username,
+                    url_exists=url_exists,
+                    cookie_filename=cookie_file,
+                    follow_redirects=redirects,
+                    headers=custom_headers,
+                )
            )

-            sites = [
-                await self.check_features_manually(
-                    url_exists,
-                    url_mainpage,
-                    cookie_file,
-                    redirects,
+            if status == "Found":
+                site_data = {
+                    "absenceStrs": absence_list,
+                    "presenseStrs": presence_list,
+                    "url": url_exists.replace(supposed_username, '{username}'),
+                    "urlMain": url_mainpage,
+                    "usernameClaimed": supposed_username,
+                    "usernameUnclaimed": non_exist_username,
+                    "checkType": "message",
+                }
+                self.logger.info(json.dumps(site_data, indent=4))
+
+                if custom_headers != self.HEADERS:
+                    site_data['headers'] = custom_headers
+
+                site = MaigretSite(url_mainpage.split("/")[-1], site_data)
+                sites.append(site)
+
+            else:
+                print(
+                    f"{Fore.RED}[!] The check for site failed! Reason: {status}{Style.RESET_ALL}"
                )
-            ]
+                return False

        self.logger.debug(sites[0].__dict__)

        sem = asyncio.Semaphore(1)

-        print("Checking, please wait...")
+        print(f"{Fore.GREEN}[*] Checking, please wait...{Style.RESET_ALL}")
        found = False
        chosen_site = None
        for s in sites:
@@ -463,7 +553,7 @@ class Submitter:
        else:
            if (
                input(
-                    f"Site {chosen_site.name} successfully checked. Do you want to save it in the Maigret DB? [Yn] "
+                    f"{Fore.GREEN}[?] Site {chosen_site.name} successfully checked. Do you want to save it in the Maigret DB? [Yn] {Style.RESET_ALL}"
                )
                .lower()
                .strip("y")
@@ -471,22 +561,73 @@ class Submitter:
                return False

        if self.args.verbose:
-            source = input("Name the source site if it is mirror: ")
+            self.logger.info(
+                "Verbose mode is enabled, additional settings are available"
+            )
+            source = input(
+                f"{Fore.GREEN}[?] Name the source site if it is mirror: {Style.RESET_ALL}"
+            )
            if source:
                chosen_site.source = source

-        chosen_site.name = input("Change site name if you want: ") or chosen_site.name
-        chosen_site.tags = list(map(str.strip, input("Site tags: ").split(',')))
+        default_site_name = old_site.name if old_site else chosen_site.name
+        new_name = (
+            input(
+                f"{Fore.GREEN}[?] Change site name if you want [{default_site_name}]: {Style.RESET_ALL}"
+            )
+            or default_site_name
+        )
+        if new_name != default_site_name:
+            self.logger.info(f"New site name is {new_name}")
+            chosen_site.name = new_name
+
+        # TODO: remove empty tags
+        new_tags = input(f"{Fore.GREEN}[?] Site tags: {Style.RESET_ALL}")
+        if new_tags:
+            chosen_site.tags = list(map(str.strip, new_tags.split(',')))
+        else:
+            chosen_site.tags = []
+        self.logger.info(f"Site tags are: {', '.join(chosen_site.tags)}")
        # rank = Submitter.get_alexa_rank(chosen_site.url_main)
        # if rank:
        #     print(f'New alexa rank: {rank}')
        #     chosen_site.alexa_rank = rank

-        self.logger.debug(chosen_site.json)
+        self.logger.info(chosen_site.json)
        site_data = chosen_site.strip_engine_data()
-        self.logger.debug(site_data.json)
-        self.db.update_site(site_data)
+        self.logger.info(site_data.json)

+        if old_site:
+            # Update old site with new values and log changes
+            fields_to_check = {
+                'url': 'URL',
+                'url_main': 'Main URL',
+                'username_claimed': 'Username claimed',
+                'username_unclaimed': 'Username unclaimed',
+                'check_type': 'Check type',
+                'presense_strs': 'Presence strings',
+                'absence_strs': 'Absence strings',
+                'tags': 'Tags',
+                'source': 'Source',
+                'headers': 'Headers',
+            }
+
+            for field, display_name in fields_to_check.items():
+                old_value = getattr(old_site, field)
+                new_value = getattr(site_data, field)
+                if field == 'tags' and not new_tags:
+                    continue
+                if str(old_value) != str(new_value):
+                    print(
+                        f"{Fore.YELLOW}[*] '{display_name}' updated: {Fore.RED}{old_value} {Fore.YELLOW}to {Fore.GREEN}{new_value}{Style.RESET_ALL}"
+                    )
+                old_site.__dict__[field] = new_value
+
+        # update the site
+        final_site = old_site if old_site else site_data
+        self.db.update_site(final_site)
+
+        # save the db in file
        if self.args.db_file != self.settings.sites_db_path:
            print(
                f"{Fore.GREEN}[+] Maigret DB is saved to {self.args.db}.{Style.RESET_ALL}"
@@ -3,6 +3,7 @@ import ast
 import difflib
 import re
 import random
+import string
 from typing import Any


@@ -119,3 +120,7 @@ def get_match_ratio(base_strs: list):
        )

    return get_match_inner
+
+
+def generate_random_username():
+    return ''.join(random.choices(string.ascii_lowercase, k=10))
@@ -77,7 +77,7 @@ Rank data fetched from Alexa by domains.
 1. ![](https://www.google.com/s2/favicons?domain=https://open.spotify.com/) [Spotify (https://open.spotify.com/)](https://open.spotify.com/)*: top 100, music, us*, search is disabled
 1. ![](https://www.google.com/s2/favicons?domain=https://www.tiktok.com/) [TikTok (https://www.tiktok.com/)](https://www.tiktok.com/)*: top 100, video*
 1. ![](https://www.google.com/s2/favicons?domain=https://xvideos.com/) [Xvideos (https://xvideos.com/)](https://xvideos.com/)*: top 500, porn, us*
-1. ![](https://www.google.com/s2/favicons?domain=https://tumblr.com/) [Tumblr (https://tumblr.com/)](https://tumblr.com/)*: top 500, blog*
+1. ![](https://www.google.com/s2/favicons?domain=https://www.tumblr.com) [Tumblr (https://www.tumblr.com)](https://www.tumblr.com)*: top 500, blog*
 1. ![](https://www.google.com/s2/favicons?domain=https://www.roblox.com/) [Roblox (https://www.roblox.com/)](https://www.roblox.com/)*: top 500, gaming, us*
 1. ![](https://www.google.com/s2/favicons?domain=https://soundcloud.com/) [SoundCloud (https://soundcloud.com/)](https://soundcloud.com/)*: top 500, music*
 1. ![](https://www.google.com/s2/favicons?domain=https://www.udemy.com) [Udemy (https://www.udemy.com)](https://www.udemy.com)*: top 500, in*
@@ -92,7 +92,7 @@ Rank data fetched from Alexa by domains.
 1. ![](https://www.google.com/s2/favicons?domain=https://www.pinterest.com/) [Pinterest (https://www.pinterest.com/)](https://www.pinterest.com/)*: top 500, art, photo, sharing*
 1. ![](https://www.google.com/s2/favicons?domain=https://www.fiverr.com/) [Fiverr (https://www.fiverr.com/)](https://www.fiverr.com/)*: top 500, shopping, us*
 1. ![](https://www.google.com/s2/favicons?domain=https://t.me/) [Telegram (https://t.me/)](https://t.me/)*: top 500, messaging*
-1. ![](https://www.google.com/s2/favicons?domain=https://slideshare.net/) [SlideShare (https://slideshare.net/)](https://slideshare.net/)*: top 500, documents, sharing*
+1. ![](https://www.google.com/s2/favicons?domain=https://www.slideshare.net) [SlideShare (https://www.slideshare.net)](https://www.slideshare.net)*: top 500*
 1. ![](https://www.google.com/s2/favicons?domain=https://theguardian.com) [TheGuardian (https://theguardian.com)](https://theguardian.com)*: top 500, news, us*, search is disabled
 1. ![](https://www.google.com/s2/favicons?domain=https://trello.com/) [Trello (https://trello.com/)](https://trello.com/)*: top 500, tasks*
 1. ![](https://www.google.com/s2/favicons?domain=https://support.mozilla.org) [Mozilla Support (https://support.mozilla.org)](https://support.mozilla.org)*: top 500, us*
@@ -187,7 +187,7 @@ Rank data fetched from Alexa by domains.
 1. ![](https://www.google.com/s2/favicons?domain=https://community.brave.com) [community.brave.com (https://community.brave.com)](https://community.brave.com)*: top 1K, forum, us*
 1. ![](https://www.google.com/s2/favicons?domain=https://tinder.com/) [Tinder (https://tinder.com/)](https://tinder.com/)*: top 1K, dating, us*
 1. ![](https://www.google.com/s2/favicons?domain=https://community.cloudflare.com/) [CloudflareCommunity (https://community.cloudflare.com/)](https://community.cloudflare.com/)*: top 1K, forum, tech*
-1. ![](https://www.google.com/s2/favicons?domain=https://eksisozluk.com/biri/) [Eksisozluk (https://eksisozluk.com/biri/)](https://eksisozluk.com/biri/)*: top 1K, tr*
+1. ![](https://www.google.com/s2/favicons?domain=https://eksisozluk.com) [Eksisozluk (https://eksisozluk.com)](https://eksisozluk.com)*: top 1K, tr*
 1. ![](https://www.google.com/s2/favicons?domain=https://www.allrecipes.com/) [AllRecipes (https://www.allrecipes.com/)](https://www.allrecipes.com/)*: top 1K, us*
 1. ![](https://www.google.com/s2/favicons?domain=https://support.t-mobile.com) [T-MobileSupport (https://support.t-mobile.com)](https://support.t-mobile.com)*: top 1K, us*, search is disabled
 1. ![](https://www.google.com/s2/favicons?domain=https://www.tinkoff.ru/invest/) [Tinkoff Invest (https://www.tinkoff.ru/invest/)](https://www.tinkoff.ru/invest/)*: top 5K, ru*
@@ -195,7 +195,7 @@ Rank data fetched from Alexa by domains.
 1. ![](https://www.google.com/s2/favicons?domain=https://discuss.python.org/) [DiscussPython (https://discuss.python.org/)](https://discuss.python.org/)*: top 5K, coding, forum, us*
 1. ![](https://www.google.com/s2/favicons?domain=https://www.nairaland.com/) [Nairaland Forum (https://www.nairaland.com/)](https://www.nairaland.com/)*: top 5K, ng*
 1. ![](https://www.google.com/s2/favicons?domain=https://ru.redtube.com/) [Redtube (https://ru.redtube.com/)](https://ru.redtube.com/)*: top 5K, porn, us*
-1. ![](https://www.google.com/s2/favicons?domain=https://www.strava.com/) [Strava (https://www.strava.com/)](https://www.strava.com/)*: top 5K, us*
+1. ![](https://www.google.com/s2/favicons?domain=https://www.strava.com/) [Strava (https://www.strava.com/)](https://www.strava.com/)*: top 5K, us*, search is disabled
 1. ![](https://www.google.com/s2/favicons?domain=https://profile.ameba.jp) [Ameba (https://profile.ameba.jp)](https://profile.ameba.jp)*: top 5K, jp*
 1. ![](https://www.google.com/s2/favicons?domain=https://adblockplus.org) [adblockplus.org (https://adblockplus.org)](https://adblockplus.org)*: top 5K, us*
 1. ![](https://www.google.com/s2/favicons?domain=https://houzz.com/) [Houzz (https://houzz.com/)](https://houzz.com/)*: top 5K, us*, search is disabled
@@ -265,7 +265,7 @@ Rank data fetched from Alexa by domains.
 1. ![](https://www.google.com/s2/favicons?domain=https://lichess.org) [Lichess (https://lichess.org)](https://lichess.org)*: top 5K, gaming, hobby*
 1. ![](https://www.google.com/s2/favicons?domain=https://jsfiddle.net) [jsfiddle.net (https://jsfiddle.net)](https://jsfiddle.net)*: top 5K, coding, sharing*
 1. ![](https://www.google.com/s2/favicons?domain=https://ru.pathofexile.com) [Pathofexile (https://ru.pathofexile.com)](https://ru.pathofexile.com)*: top 5K, ru, us*
-1. ![](https://www.google.com/s2/favicons?domain=https://vc.ru) [VC.ru (https://vc.ru)](https://vc.ru)*: top 5K, ru*
+1. ![](https://www.google.com/s2/favicons?domain=https://vc.ru) [VC.ru (https://vc.ru)](https://vc.ru)*: top 5K, ru*, search is disabled
 1. ![](https://www.google.com/s2/favicons?domain=https://www.metacritic.com/) [metacritic (https://www.metacritic.com/)](https://www.metacritic.com/)*: top 5K, us*, search is disabled
 1. ![](https://www.google.com/s2/favicons?domain=https://www.digitalocean.com/) [DigitalOcean (https://www.digitalocean.com/)](https://www.digitalocean.com/)*: top 5K, forum, in, tech*
 1. ![](https://www.google.com/s2/favicons?domain=http://www.jeuxvideo.com) [jeuxvideo (http://www.jeuxvideo.com)](http://www.jeuxvideo.com)*: top 5K, fr, gaming*
@@ -281,7 +281,7 @@ Rank data fetched from Alexa by domains.
 1. ![](https://www.google.com/s2/favicons?domain=https://archiveofourown.org) [ArchiveOfOurOwn (https://archiveofourown.org)](https://archiveofourown.org)*: top 5K, us*
 1. ![](https://www.google.com/s2/favicons?domain=https://bit.ly) [Bit.ly (https://bit.ly)](https://bit.ly)*: top 5K, links*
 1. ![](https://www.google.com/s2/favicons?domain=https://infourok.ru) [Infourok (https://infourok.ru)](https://infourok.ru)*: top 5K, ru*
-1. ![](https://www.google.com/s2/favicons?domain=https://community.cbr.com) [Cbr (https://community.cbr.com)](https://community.cbr.com)*: top 5K, forum, us*
+1. ![](https://www.google.com/s2/favicons?domain=https://community.cbr.com) [Cbr (https://community.cbr.com)](https://community.cbr.com)*: top 5K, forum, us*, search is disabled
 1. ![](https://www.google.com/s2/favicons?domain=https://segmentfault.com/) [segmentfault (https://segmentfault.com/)](https://segmentfault.com/)*: top 5K, cn*, search is disabled
 1. ![](https://www.google.com/s2/favicons?domain=https://www.warriorforum.com/) [Warrior Forum (https://www.warriorforum.com/)](https://www.warriorforum.com/)*: top 5K, forum, us*
 1. ![](https://www.google.com/s2/favicons?domain=https://hub.docker.com/) [Docker Hub (https://hub.docker.com/)](https://hub.docker.com/)*: top 5K, coding*
@@ -295,7 +295,7 @@ Rank data fetched from Alexa by domains.
 1. ![](https://www.google.com/s2/favicons?domain=https://creativemarket.com/) [CreativeMarket (https://creativemarket.com/)](https://creativemarket.com/)*: top 5K, art, stock*
 1. ![](https://www.google.com/s2/favicons?domain=https://bitbucket.org/) [BitBucket (https://bitbucket.org/)](https://bitbucket.org/)*: top 5K, coding*
 1. ![](https://www.google.com/s2/favicons?domain=https://www.techrepublic.com) [Techrepublic (https://www.techrepublic.com)](https://www.techrepublic.com)*: top 5K, us*
-1. ![](https://www.google.com/s2/favicons?domain=https://aminoapps.com/) [aminoapp (https://aminoapps.com/)](https://aminoapps.com/)*: top 5K, br, us*
+1. ![](https://www.google.com/s2/favicons?domain=https://aminoapps.com/) [aminoapp (https://aminoapps.com/)](https://aminoapps.com/)*: top 5K, br, us*, search is disabled
 1. ![](https://www.google.com/s2/favicons?domain=https://www.mixcloud.com/) [MixCloud (https://www.mixcloud.com/)](https://www.mixcloud.com/)*: top 5K, music*
 1. ![](https://www.google.com/s2/favicons?domain=https://forum.xda-developers.com) [XDA (https://forum.xda-developers.com)](https://forum.xda-developers.com)*: top 5K, apps, forum*, search is disabled
 1. ![](https://www.google.com/s2/favicons?domain=https://thechive.com/) [Thechive (https://thechive.com/)](https://thechive.com/)*: top 5K, us*
@@ -321,7 +321,7 @@ Rank data fetched from Alexa by domains.
 1. ![](https://www.google.com/s2/favicons?domain=http://forums.bulbagarden.net) [forums.bulbagarden.net (http://forums.bulbagarden.net)](http://forums.bulbagarden.net)*: top 5K, forum, us*
 1. ![](https://www.google.com/s2/favicons?domain=https://videohive.net) [videohive.net (https://videohive.net)](https://videohive.net)*: top 5K, video*
 1. ![](https://www.google.com/s2/favicons?domain=https://imginn.com) [ImgInn (https://imginn.com)](https://imginn.com)*: top 5K, photo*
-1. ![](https://www.google.com/s2/favicons?domain=https://www.boardgamegeek.com) [BoardGameGeek (https://www.boardgamegeek.com)](https://www.boardgamegeek.com)*: top 5K, gaming, us*
+1. ![](https://www.google.com/s2/favicons?domain=https://boardgamegeek.com) [BoardGameGeek (https://boardgamegeek.com)](https://boardgamegeek.com)*: top 5K, gaming, us*
 1. ![](https://www.google.com/s2/favicons?domain=https://osu.ppy.sh/) [osu! (https://osu.ppy.sh/)](https://osu.ppy.sh/)*: top 5K, us*
 1. ![](https://www.google.com/s2/favicons?domain=https://app.pluralsight.com) [Pluralsight (https://app.pluralsight.com)](https://app.pluralsight.com)*: top 5K, in, us*
 1. ![](https://www.google.com/s2/favicons?domain=https://www.techpowerup.com) [TechPowerUp (https://www.techpowerup.com)](https://www.techpowerup.com)*: top 5K, us*
@@ -406,7 +406,7 @@ Rank data fetched from Alexa by domains.
 1. ![](https://www.google.com/s2/favicons?domain=https://www.reverbnation.com/) [ReverbNation (https://www.reverbnation.com/)](https://www.reverbnation.com/)*: top 10K, us*
 1. ![](https://www.google.com/s2/favicons?domain=https://www.glavbukh.ru) [Scorcher (https://www.glavbukh.ru)](https://www.glavbukh.ru)*: top 10K, ru*, search is disabled
 1. ![](https://www.google.com/s2/favicons?domain=https://www.trakt.tv/) [Trakt (https://www.trakt.tv/)](https://www.trakt.tv/)*: top 10K, de, fr*
-1. ![](https://www.google.com/s2/favicons?domain=https://hotcopper.com.au) [Hotcopper (https://hotcopper.com.au)](https://hotcopper.com.au)*: top 10K, au*
+1. ![](https://www.google.com/s2/favicons?domain=https://hotcopper.com.au) [Hotcopper (https://hotcopper.com.au)](https://hotcopper.com.au)*: top 10K, finance*
 1. ![](https://www.google.com/s2/favicons?domain=https://pandia.ru) [Pandia (https://pandia.ru)](https://pandia.ru)*: top 10K, news, ru*
 1. ![](https://www.google.com/s2/favicons?domain=https://forums.majorgeeks.com) [forums.majorgeeks.com (https://forums.majorgeeks.com)](https://forums.majorgeeks.com)*: top 10K, forum, us*
 1. ![](https://www.google.com/s2/favicons?domain=https://www.hackerearth.com) [Hackerearth (https://www.hackerearth.com)](https://www.hackerearth.com)*: top 10K, freelance*
@@ -472,7 +472,7 @@ Rank data fetched from Alexa by domains.
 1. ![](https://www.google.com/s2/favicons?domain=https://3ddd.ru) [3ddd (https://3ddd.ru)](https://3ddd.ru)*: top 100K, ru*
 1. ![](https://www.google.com/s2/favicons?domain=https://namemc.com/) [NameMC (https://namemc.com/)](https://namemc.com/)*: top 100K, us*
 1. ![](https://www.google.com/s2/favicons?domain=https://www.b17.ru/) [B17 (https://www.b17.ru/)](https://www.b17.ru/)*: top 100K, ru*
-1. ![](https://www.google.com/s2/favicons?domain=https://www.beermoneyforum.com) [BeerMoneyForum (https://www.beermoneyforum.com)](https://www.beermoneyforum.com)*: top 100K, finance, forum, gambling*
+1. ![](https://www.google.com/s2/favicons?domain=https://www.beermoneyforum.com) [BeerMoneyForum (https://www.beermoneyforum.com)](https://www.beermoneyforum.com)*: top 100K, finance, forum, gambling*, search is disabled
 1. ![](https://www.google.com/s2/favicons?domain=https://diary.ru) [Diary.ru (https://diary.ru)](https://diary.ru)*: top 100K, blog, nl, ru*
 1. ![](https://www.google.com/s2/favicons?domain=https://www.americanthinker.com/) [Americanthinker (https://www.americanthinker.com/)](https://www.americanthinker.com/)*: top 100K*
 1. ![](https://www.google.com/s2/favicons?domain=https://contently.com/) [Contently (https://contently.com/)](https://contently.com/)*: top 100K, freelance, in*
@@ -497,7 +497,7 @@ Rank data fetched from Alexa by domains.
 1. ![](https://www.google.com/s2/favicons?domain=https://pbase.com/) [Pbase (https://pbase.com/)](https://pbase.com/)*: top 100K, in*
 1. ![](https://www.google.com/s2/favicons?domain=https://www.native-instruments.com/forum/) [NICommunityForum (https://www.native-instruments.com/forum/)](https://www.native-instruments.com/forum/)*: top 100K, forum*
 1. ![](https://www.google.com/s2/favicons?domain=https://spletnik.ru/) [spletnik (https://spletnik.ru/)](https://spletnik.ru/)*: top 100K, ru*
-1. ![](https://www.google.com/s2/favicons?domain=http://www.folkd.com/profile/) [Folkd (http://www.folkd.com/profile/)](http://www.folkd.com/profile/)*: top 100K, eu, in*
+1. ![](https://www.google.com/s2/favicons?domain=http://www.folkd.com/profile/) [Folkd (http://www.folkd.com/profile/)](http://www.folkd.com/profile/)*: top 100K, eu, in*, search is disabled
 1. ![](https://www.google.com/s2/favicons?domain=https://www.iphones.ru) [Iphones.ru (https://www.iphones.ru)](https://www.iphones.ru)*: top 100K, ru*
 1. ![](https://www.google.com/s2/favicons?domain=https://www.oper.ru/) [Oper (https://www.oper.ru/)](https://www.oper.ru/)*: top 100K, ru*
 1. ![](https://www.google.com/s2/favicons?domain=https://www.interpals.net/) [interpals (https://www.interpals.net/)](https://www.interpals.net/)*: top 100K, dating*
@@ -3141,20 +3141,20 @@ Rank data fetched from Alexa by domains.
 1. ![](https://www.google.com/s2/favicons?domain=https://pubg.op.gg) [OP.GG [PUBG] (https://pubg.op.gg)](https://pubg.op.gg)*: top 100M, gaming*
 1. ![](https://www.google.com/s2/favicons?domain=https://valorant.op.gg) [OP.GG [Valorant] (https://valorant.op.gg)](https://valorant.op.gg)*: top 100M, gaming*

-The list was updated at (2024-12-09)
+The list was updated at (2024-12-10)
 ## Statistics

-Enabled/total sites: 2699/3137 = 86.04%
+Enabled/total sites: 2693/3137 = 85.85%

-Incomplete message checks: 406/2699 = 15.04% (false positive risks)
+Incomplete message checks: 397/2693 = 14.74% (false positive risks)

-Status code checks: 720/2699 = 26.68% (false positive risks)
+Status code checks: 719/2693 = 26.7% (false positive risks)

-False positive risk (total): 41.72%
+False positive risk (total): 41.44%

 Top 20 profile URLs:
 - (796)	`{urlMain}/index/8-0-{username} (uCoz)`
- (300)	`/{username}`
+- (301)	`/{username}`
 - (221)	`{urlMain}{urlSubpath}/members/?username={username} (XenForo)`
 - (161)	`/user/{username}`
 - (133)	`{urlMain}{urlSubpath}/member.php?username={username} (vBulletin)`
@@ -3165,8 +3165,8 @@ Top 20 profile URLs:
 - (87)	`{urlMain}/u/{username}/summary (Discourse)`
 - (54)	`/wiki/User:{username}`
 - (52)	`/@{username}`
- (42)	`SUBDOMAIN`
 - (41)	`/members/?username={username}`
+- (41)	`SUBDOMAIN`
 - (32)	`/members/{username}`
 - (29)	`/author/{username}`
 - (27)	`{urlMain}{urlSubpath}/memberlist.php?username={username} (phpBB)`
@@ -3177,21 +3177,21 @@ Top 20 profile URLs:
 Top 20 tags:
 - (328)	`NO_TAGS` (non-standard)
 - (307)	`forum`
- (52)	`gaming`
+- (50)	`gaming`
 - (26)	`coding`
 - (21)	`photo`
- (21)	`blog`
+- (20)	`blog`
 - (19)	`news`
 - (15)	`music`
 - (14)	`tech`
- (12)	`sharing`
 - (12)	`freelance`
 - (12)	`finance`
+- (11)	`sharing`
 - (10)	`dating`
 - (10)	`art`
 - (10)	`shopping`
 - (10)	`movies`
- (8)	`hobby`
 - (8)	`crypto`
 - (7)	`sport`
+- (7)	`hobby`
 - (7)	`hacking`
@@ -79,6 +79,13 @@ def reports_autoclean():
    remove_test_reports()


+@pytest.fixture(scope='session')
+def settings():
+    settings = Settings()
+    settings.load([SETTINGS_FILE])
+    return settings
+
+
@pytest.fixture(scope='session')
 def argparser():
    settings = Settings()
@@ -26,7 +26,7 @@
            "alexaRank": 1,
            "url": "https://play.google.com/store/apps/developer?id={username}",
            "urlMain": "https://play.google.com/store",
-            "usernameClaimed": "OpenAI",
+            "usernameClaimed": "KONAMI",
            "usernameUnclaimed": "noonewouldeverusethis7"
        },
        "InvalidActive": {
@@ -36,7 +36,7 @@
            "alexaRank": 1,
            "url": "https://play.google.com/store/apps/dev?id={username}",
            "urlMain": "https://play.google.com/store",
-            "usernameClaimed": "OpenAI",
+            "usernameClaimed": "KONAMI",
            "usernameUnclaimed": "noonewouldeverusethis7"
        },
        "ValidInactive": {
@@ -46,7 +46,7 @@
            "alexaRank": 1,
            "url": "https://play.google.com/store/apps/developer?id={username}",
            "urlMain": "https://play.google.com/store",
-            "usernameClaimed": "OpenAI",
+            "usernameClaimed": "KONAMI",
            "usernameUnclaimed": "noonewouldeverusethis7"
        },
        "InvalidInactive": {
@@ -56,7 +56,7 @@
            "alexaRank": 1,
            "url": "https://play.google.com/store/apps/dev?id={username}",
            "urlMain": "https://play.google.com/store",
-            "usernameClaimed": "OpenAI",
+            "usernameClaimed": "KONAMI",
            "usernameUnclaimed": "noonewouldeverusethis7"
        }
    }
@@ -34,6 +34,7 @@ def test_vimeo_activation(default_db):
    assert token1 != token2


+@pytest.mark.slow
@pytest.mark.asyncio
 async def test_import_aiohttp_cookies():
    cookies_filename = 'cookies_test.txt'
@@ -1,8 +1,10 @@
 """Maigret data test functions"""

+import pytest
 from maigret.utils import is_country_tag


+@pytest.mark.slow
 def test_tags_validity(default_db):
    unknown_tags = set()

@@ -49,6 +49,7 @@ async def test_asyncio_progressbar_semaphore_executor():
    assert executor.execution_time < 0.4


+@pytest.mark.slow
@pytest.mark.asyncio
 async def test_asyncio_progressbar_queue_executor():
    tasks = [(func, [n], {}) for n in range(10)]
@@ -84,6 +84,7 @@ def test_maigret_results(test_db):
    assert results == RESULTS_EXAMPLE


+@pytest.mark.slow
 def test_extract_ids_from_url(default_db):
    assert default_db.extract_ids_from_url('https://www.reddit.com/user/test') == {
        'test': 'username'
@@ -0,0 +1,278 @@
+import pytest
+from unittest.mock import AsyncMock, MagicMock, patch
+from maigret.submit import Submitter, MaigretSite, MaigretEngine
+from aiohttp import ClientSession
+from maigret.sites import MaigretDatabase
+from maigret.settings import Settings
+import logging
+
+
+@pytest.mark.slow
+@pytest.mark.asyncio
+async def test_detect_known_engine(test_db, local_test_db):
+    # Use the database fixture instead of mocking
+    mock_db = test_db
+    mock_settings = MagicMock()
+    mock_logger = MagicMock()
+    mock_args = MagicMock()
+    mock_args.cookie_file = ""
+    mock_args.proxy = ""
+
+    # Mock the supposed usernames
+    mock_settings.supposed_usernames = ["adam"]
+    # Create the Submitter instance
+    submitter = Submitter(test_db, mock_settings, mock_logger, mock_args)
+
+    # Call the method with test URLs
+    url_exists = "https://devforum.zoom.us/u/adam"
+    url_mainpage = "https://devforum.zoom.us/"
+    # Mock extract_username_dialog to return "adam"
+    submitter.extract_username_dialog = MagicMock(return_value="adam")
+
+    sites, resp_text = await submitter.detect_known_engine(
+        url_exists, url_mainpage, session=None, follow_redirects=False, headers=None
+    )
+
+    # Assertions
+    assert len(sites) == 2
+    assert sites[0].name == "devforum.zoom.us"
+    assert sites[0].url_main == "https://devforum.zoom.us/"
+    assert sites[0].engine == "Discourse"
+    assert sites[0].username_claimed == "adam"
+    assert sites[0].username_unclaimed == "noonewouldeverusethis7"
+    assert resp_text != ""
+
+    await submitter.close()
+
+    # Create the Submitter instance without engines
+    submitter = Submitter(local_test_db, mock_settings, mock_logger, mock_args)
+    sites, resp_text = await submitter.detect_known_engine(
+        url_exists, url_mainpage, session=None, follow_redirects=False, headers=None
+    )
+    assert len(sites) == 0
+
+    await submitter.close()
+
+
+@pytest.mark.slow
+@pytest.mark.asyncio
+async def test_check_features_manually_success(settings):
+    # Setup
+    db = MaigretDatabase()
+    logger = logging.getLogger("test_logger")
+    args = type(
+        'Args', (object,), {'proxy': None, 'cookie_file': None, 'verbose': False}
+    )()
+
+    submitter = Submitter(db, settings, logger, args)
+
+    username = "KONAMI"
+    url_exists = "https://play.google.com/store/apps/developer?id=KONAMI"
+
+    # Execute
+    presence_list, absence_list, status, random_username = (
+        await submitter.check_features_manually(
+            username=username,
+            url_exists=url_exists,
+            session=ClientSession(),
+            follow_redirects=False,
+            headers=None,
+        )
+    )
+    await submitter.close()
+    # Assert
+    assert status == "Found", "Expected status to be 'Found'"
+    assert isinstance(presence_list, list), "Presence list should be a list"
+    assert isinstance(absence_list, list), "Absence list should be a list"
+    assert isinstance(random_username, str), "Random username should be a string"
+    assert (
+        random_username != username
+    ), "Random username should not be the same as the input username"
+    assert sorted(presence_list) == sorted(
+        [
+            ' title=',
+            'og:title',
+            'display: none;',
+            '4;0',
+            'main-title',
+        ]
+    )
+    assert sorted(absence_list) == sorted(
+        [
+            '  body {',
+            '  </style>',
+            '><title>Not Found</title>',
+            '  <style nonce=',
+            '  .rounded {',
+        ]
+    )
+
+
+@pytest.mark.slow
+@pytest.mark.asyncio
+async def test_check_features_manually_success(settings):
+    # Setup
+    db = MaigretDatabase()
+    logger = logging.getLogger("test_logger")
+    args = type(
+        'Args', (object,), {'proxy': None, 'cookie_file': None, 'verbose': False}
+    )()
+
+    submitter = Submitter(db, settings, logger, args)
+
+    username = "abel"
+    url_exists = "https://community.cloudflare.com/badges/1/basic?username=abel"
+
+    # Execute
+    presence_list, absence_list, status, random_username = (
+        await submitter.check_features_manually(
+            username=username,
+            url_exists=url_exists,
+            session=ClientSession(),
+            follow_redirects=False,
+            headers=None,
+        )
+    )
+    await submitter.close()
+
+    # Assert
+    assert status == "Cloudflare detected, skipping"
+    assert presence_list is None
+    assert absence_list is None
+    assert random_username != username
+
+
+@pytest.mark.slow
+@pytest.mark.asyncio
+async def test_dialog_adds_site_positive(settings):
+    # Initialize necessary objects
+    db = MaigretDatabase()
+    logger = logging.getLogger("test_logger")
+    logger.setLevel(logging.INFO)
+    args = type(
+        'Args',
+        (object,),
+        {
+            'proxy': None,
+            'cookie_file': None,
+            'verbose': False,
+            'db_file': 'test_db.json',
+            'db': 'test_db.json',
+        },
+    )()
+
+    submitter = Submitter(db, settings, logger, args)
+
+    # Mock user inputs
+    user_inputs = [
+        'KONAMI',  # Manually input username
+        'y',  # Save the site in the Maigret DB
+        'GooglePlayStore',  # Custom site name
+        '',  # no custom tags
+    ]
+
+    with patch('builtins.input', side_effect=user_inputs):
+        result = await submitter.dialog(
+            "https://play.google.com/store/apps/developer?id=KONAMI", None
+        )
+        await submitter.close()
+
+    assert result is True
+    assert len(db.sites) == 1
+
+    site = db.sites[0]
+    assert site.url_main == "https://play.google.com"
+    assert site.name == "GooglePlayStore"
+    assert site.tags == []
+    assert site.presense_strs != []
+    assert site.absence_strs != []
+    assert site.username_claimed == "KONAMI"
+    assert site.check_type == "message"
+
+
+@pytest.mark.slow
+@pytest.mark.asyncio
+async def test_dialog_replace_site(settings, test_db):
+    # Initialize necessary objects
+    db = test_db
+    logger = logging.getLogger("test_logger")
+    logger.setLevel(logging.DEBUG)
+    args = type(
+        'Args',
+        (object,),
+        {
+            'proxy': None,
+            'cookie_file': None,
+            'verbose': False,
+            'db_file': 'test_db.json',
+            'db': 'test_db.json',
+        },
+    )()
+
+    assert len(db.sites) == 4
+
+    submitter = Submitter(db, settings, logger, args)
+
+    # Mock user inputs
+    user_inputs = [
+        'y',  # Similar sites found, continue
+        'InvalidActive',  # Choose site to replace
+        '',  # Custom headers
+        'y',  # Should we do redirects automatically?
+        'KONAMI',  # Manually input username
+        'y',  # Save the site in the Maigret DB
+        '',  # Custom site name
+        '',  # no custom tags
+    ]
+
+    with patch('builtins.input', side_effect=user_inputs):
+        result = await submitter.dialog(
+            "https://play.google.com/store/apps/developer?id=KONAMI", None
+        )
+        await submitter.close()
+
+    assert result is True
+    assert len(db.sites) == 4
+
+    site = db.sites_dict["InvalidActive"]
+    assert site.name == "InvalidActive"
+    assert site.url_main == "https://play.google.com"
+    assert site.tags == ['global', 'us']
+    assert site.presense_strs != []
+    assert site.absence_strs != []
+    assert site.username_claimed == "KONAMI"
+    assert site.check_type == "message"
+
+
+@pytest.mark.slow
+@pytest.mark.asyncio
+async def test_dialog_adds_site_negative(settings):
+    # Initialize necessary objects
+    db = MaigretDatabase()
+    logger = logging.getLogger("test_logger")
+    logger.setLevel(logging.INFO)
+    args = type(
+        'Args',
+        (object,),
+        {
+            'proxy': None,
+            'cookie_file': None,
+            'verbose': False,
+            'db_file': 'test_db.json',
+            'db': 'test_db.json',
+        },
+    )()
+
+    submitter = Submitter(db, settings, logger, args)
+
+    # Mock user inputs
+    user_inputs = [
+        'sokrat',  # Manually input username
+        'y',  # Save the site in the Maigret DB
+    ]
+
+    with patch('builtins.input', side_effect=user_inputs):
+        result = await submitter.dialog("https://icq.im/sokrat", None)
+        await submitter.close()
+
+    assert result is False