Refactoring of submit module, some fixes

2026-05-07 06:24:35 +00:00 · 2021-06-13 00:43:28 +03:00
parent eb721dc7e3
commit 9b0acc092a
11 changed files with 534 additions and 438 deletions
@@ -25,7 +25,7 @@ format:
 pull:
 	git stash
 	git checkout main
-	git pull origin head
+	git pull origin main
 	git stash pop
 clean:
@@ -36,9 +36,10 @@ from .report import (
    sort_report_by_data_points,
 )
 from .sites import MaigretDatabase
-from .submit import submit_dialog
+from .submit import Submitter
 from .types import QueryResultWrapper
 from .utils import get_dict_ascii_tree
 from .settings import Settings
 def notify_about_errors(search_results: QueryResultWrapper, query_notify):
@@ -496,6 +497,12 @@ async def main():
    if args.tags:
        args.tags = list(set(str(args.tags).split(',')))
    settings = Settings(
        os.path.join(
            os.path.dirname(os.path.realpath(__file__)), "resources/settings.json"
        )
    )
    if args.db_file is None:
        args.db_file = os.path.join(
            os.path.dirname(os.path.realpath(__file__)), "resources/data.json"
@@ -526,9 +533,8 @@ async def main():
    site_data = get_top_sites_for_id(args.id_type)
    if args.new_site_to_submit:
-        is_submitted = await submit_dialog(
+        submitter = Submitter(db=db, logger=logger, settings=settings)
-            db, args.new_site_to_submit, args.cookie_file, logger
+        is_submitted = await submitter.dialog(args.new_site_to_submit, args.cookie_file)
        )
        if is_submitted:
            db.save_to_file(args.db_file)
@@ -13036,7 +13036,7 @@
                "us"
            ],
            "headers": {
-                "authorization": "Bearer BQCypIuUtz7zDFov8xN86mj1BelLf7Apf9WBaC5yYfNkmGe4r7Hz4Awp6dqPuCAP9K9F5yYtjbyZX_vlr4I"
+                "authorization": "Bearer BQAkHoH1XLhjIl6oh6r9YzH3kHC1OZg3UXgLiz39FzqRFh_xQrFaVrZcU-esM-t87B6Hqdc4L1HBgukKnWE"
            },
            "errors": {
                "Spotify is currently not available in your country.": "Access denied in your country, use proxy/vpn"
@@ -13990,7 +13990,8 @@
                "us"
            ],
            "errors": {
-                "Website unavailable": "Site error"
+                "Website unavailable": "Site error",
                "is currently offline": "Site error"
            },
            "checkType": "message",
            "absenceStrs": [
@@ -14462,7 +14463,7 @@
                "sec-ch-ua": "Google Chrome\";v=\"87\", \" Not;A Brand\";v=\"99\", \"Chromium\";v=\"87\"",
                "authorization": "Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA",
                "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36",
-                "x-guest-token": "1400174453577900043"
+                "x-guest-token": "1403829602053771266"
            },
            "errors": {
                "Bad guest token": "x-guest-token update required"
@@ -14869,7 +14870,7 @@
                "video"
            ],
            "headers": {
-                "Authorization": "jwt eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJleHAiOjE2MjI2NjcxMjAsInVzZXJfaWQiOm51bGwsImFwcF9pZCI6NTg0NzksInNjb3BlcyI6InB1YmxpYyIsInRlYW1fdXNlcl9pZCI6bnVsbH0.V4VVbLzNwPU21rNP5moSxrPcPw--C7_Qz9VHgcJc1CA"
+                "Authorization": "jwt eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJleHAiOjE2MjM1MzQ5NjAsInVzZXJfaWQiOm51bGwsImFwcF9pZCI6NTg0NzksInNjb3BlcyI6InB1YmxpYyIsInRlYW1fdXNlcl9pZCI6bnVsbH0.5T8_p_q9zXOHXI2FT_XtMhsZUJMtPgCIaqwVF2u4aZI"
            },
            "activation": {
                "url": "https://vimeo.com/_rv/viewer",
@@ -28457,5 +28458,63 @@
                ]
            }
        }
-    }
+    },
    "tags": [
        "gaming",
        "coding",
        "photo",
        "music",
        "blog",
        "finance",
        "freelance",
        "dating",
        "tech",
        "forum",
        "porn",
        "erotic",
        "webcam",
        "video",
        "movies",
        "hacking",
        "art",
        "discussion",
        "sharing",
        "writing",
        "wiki",
        "business",
        "shopping",
        "sport",
        "books",
        "news",
        "documents",
        "travel",
        "maps",
        "hobby",
        "apps",
        "classified",
        "career",
        "geosocial",
        "streaming",
        "education",
        "networking",
        "torrent",
        "science",
        "medicine",
        "reading",
        "stock",
        "messaging",
        "trading",
        "links",
        "fashion",
        "tasks",
        "military",
        "auto",
        "gambling",
        "cybercriminal",
        "review",
        "bookmarks",
        "design",
        "tor",
        "i2p"
    ]
 }
@@ -0,0 +1,17 @@
 {
    "presence_strings": [
        "username",
        "not found",
        "пользователь",
        "profile",
        "lastname",
        "firstname",
        "biography",
        "birthday",
        "репутация",
        "информация",
        "e-mail"
    ],
    "supposed_usernames": [
        "alex", "god", "admin", "red", "blue", "john"]
 }
@@ -0,0 +1,29 @@
 import json
 class Settings:
    presence_strings: list
    supposed_usernames: list
    def __init__(self, filename):
        data = {}
        try:
            with open(filename, "r", encoding="utf-8") as file:
                try:
                    data = json.load(file)
                except Exception as error:
                    raise ValueError(
                        f"Problem with parsing json contents of "
                        f"settings file '{filename}':  {str(error)}."
                    )
        except FileNotFoundError as error:
            raise FileNotFoundError(
                f"Problem while attempting to access settings file '{filename}'."
            ) from error
        self.__dict__.update(data)
    @property
    def json(self):
        return self.__dict__
@@ -9,66 +9,6 @@ import requests
 from .utils import CaseConverter, URLMatcher, is_country_tag
 # TODO: move to data.json
 SUPPORTED_TAGS = [
    "gaming",
    "coding",
    "photo",
    "music",
    "blog",
    "finance",
    "freelance",
    "dating",
    "tech",
    "forum",
    "porn",
    "erotic",
    "webcam",
    "video",
    "movies",
    "hacking",
    "art",
    "discussion",
    "sharing",
    "writing",
    "wiki",
    "business",
    "shopping",
    "sport",
    "books",
    "news",
    "documents",
    "travel",
    "maps",
    "hobby",
    "apps",
    "classified",
    "career",
    "geosocial",
    "streaming",
    "education",
    "networking",
    "torrent",
    "science",
    "medicine",
    "reading",
    "stock",
    "messaging",
    "trading",
    "links",
    "fashion",
    "tasks",
    "military",
    "auto",
    "gambling",
    "cybercriminal",
    "review",
    "bookmarks",
    "design",
    "tor",
    "i2p",
 ]
 class MaigretEngine:
    site: Dict[str, Any] = {}
@@ -204,12 +144,12 @@ class MaigretSite:
        errors.update(self.errors)
        return errors
-    def get_url_type(self) -> str:
+    def get_url_template(self) -> str:
        url = URLMatcher.extract_main_part(self.url)
        if url.startswith("{username}"):
            url = "SUBDOMAIN"
        elif url == "":
-            url = f"{self.url} ({self.engine})"
+            url = f"{self.url} ({self.engine or 'no engine'})"
        else:
            parts = url.split("/")
            url = "/" + "/".join(parts[1:])
@@ -273,8 +213,9 @@ class MaigretSite:
 class MaigretDatabase:
    def __init__(self):
-        self._sites = []
+        self._tags: list = []
-        self._engines = []
+        self._sites: list = []
        self._engines: list = []
    @property
    def sites(self):
@@ -354,6 +295,7 @@ class MaigretDatabase:
        db_data = {
            "sites": {site.name: site.strip_engine_data().json for site in self._sites},
            "engines": {engine.name: engine.json for engine in self._engines},
            "tags": self._tags,
        }
        json_data = json.dumps(db_data, indent=4)
@@ -367,6 +309,9 @@ class MaigretDatabase:
        # Add all of site information from the json file to internal site list.
        site_data = json_data.get("sites", {})
        engines_data = json_data.get("engines", {})
        tags = json_data.get("tags", [])
        self._tags += tags
        for engine_name in engines_data:
            self._engines.append(MaigretEngine(engine_name, engines_data[engine_name]))
@@ -469,7 +414,7 @@ class MaigretDatabase:
            if site.disabled:
                disabled_count += 1
-            url_type = site.get_url_type()
+            url_type = site.get_url_template()
            urls[url_type] = urls.get(url_type, 0) + 1
            if not site.tags:
@@ -488,7 +433,7 @@ class MaigretDatabase:
        output += "Top tags:\n"
        for tag, count in sorted(tags.items(), key=lambda x: x[1], reverse=True)[:200]:
            mark = ""
-            if tag not in SUPPORTED_TAGS:
+            if tag not in self._tags:
                mark = " (non-standard)"
            output += f"{count}\t{tag}{mark}\n"
@@ -1,5 +1,4 @@
 import asyncio
 import difflib
 import re
 from typing import List
 import xml.etree.ElementTree as ET
@@ -8,47 +7,29 @@ import requests
 from .activation import import_aiohttp_cookies
 from .checking import maigret
 from .result import QueryStatus
 from .settings import Settings
 from .sites import MaigretDatabase, MaigretSite, MaigretEngine
-from .utils import get_random_user_agent
+from .utils import get_random_user_agent, get_match_ratio
-DESIRED_STRINGS = [
+class Submitter:
-    "username",
+    HEADERS = {
    "not found",
    "пользователь",
    "profile",
    "lastname",
    "firstname",
    "biography",
    "birthday",
    "репутация",
    "информация",
    "e-mail",
 ]
 SUPPOSED_USERNAMES = ["alex", "god", "admin", "red", "blue", "john"]
 HEADERS = {
        "User-Agent": get_random_user_agent(),
-}
+    }
-SEPARATORS = "\"'"
+    SEPARATORS = "\"'"
-RATIO = 0.6
+    RATIO = 0.6
-TOP_FEATURES = 5
+    TOP_FEATURES = 5
-URL_RE = re.compile(r"https?://(www\.)?")
+    URL_RE = re.compile(r"https?://(www\.)?")
    def __init__(self, db: MaigretDatabase, settings: Settings, logger):
        self.settings = settings
        self.db = db
        self.logger = logger
-def get_match_ratio(x):
+    @staticmethod
-    return round(
+    def get_alexa_rank(site_url_main):
        max(
            [difflib.SequenceMatcher(a=x.lower(), b=y).ratio() for y in DESIRED_STRINGS]
        ),
        2,
    )
 def get_alexa_rank(site_url_main):
        url = f"http://data.alexa.com/data?cli=10&url={site_url_main}"
        xml_data = requests.get(url).text
        root = ET.fromstring(xml_data)
@@ -61,12 +42,11 @@ def get_alexa_rank(site_url_main):
        return alexa_rank
-
+    @staticmethod
-def extract_mainpage_url(url):
+    def extract_mainpage_url(url):
        return "/".join(url.split("/", 3)[:3])
-
+    async def site_self_check(self, site, semaphore, silent=False):
 async def site_self_check(site, logger, semaphore, db: MaigretDatabase, silent=False):
        changes = {
            "disabled": False,
        }
@@ -76,13 +56,13 @@ async def site_self_check(site, logger, semaphore, db: MaigretDatabase, silent=F
            (site.username_unclaimed, QueryStatus.AVAILABLE),
        ]
-    logger.info(f"Checking {site.name}...")
+        self.logger.info(f"Checking {site.name}...")
        for username, status in check_data:
            results_dict = await maigret(
                username=username,
                site_dict={site.name: site},
-            logger=logger,
+                logger=self.logger,
                timeout=30,
                id_type=site.type,
                forced=True,
@@ -92,7 +72,7 @@ async def site_self_check(site, logger, semaphore, db: MaigretDatabase, silent=F
            # don't disable entries with other ids types
            # TODO: make normal checking
            if site.name not in results_dict:
-            logger.info(results_dict)
+                self.logger.info(results_dict)
                changes["disabled"] = True
                continue
@@ -104,7 +84,7 @@ async def site_self_check(site, logger, semaphore, db: MaigretDatabase, silent=F
                if site_status == QueryStatus.UNKNOWN:
                    msgs = site.absence_strs
                    etype = site.check_type
-                logger.warning(
+                    self.logger.warning(
                        "Error while searching '%s' in %s: %s, %s, check type %s",
                        username,
                        site.name,
@@ -116,22 +96,23 @@ async def site_self_check(site, logger, semaphore, db: MaigretDatabase, silent=F
                    if status == QueryStatus.CLAIMED:
                        changes["disabled"] = True
                elif status == QueryStatus.CLAIMED:
-                logger.warning(
+                    self.logger.warning(
                        f"Not found `{username}` in {site.name}, must be claimed"
                    )
-                logger.info(results_dict[site.name])
+                    self.logger.info(results_dict[site.name])
                    changes["disabled"] = True
                else:
-                logger.warning(f"Found `{username}` in {site.name}, must be available")
+                    self.logger.warning(
-                logger.info(results_dict[site.name])
+                        f"Found `{username}` in {site.name}, must be available"
                    )
                    self.logger.info(results_dict[site.name])
                    changes["disabled"] = True
-    logger.info(f"Site {site.name} checking is finished")
+        self.logger.info(f"Site {site.name} checking is finished")
        return changes
-
+    def generate_additional_fields_dialog(self, engine: MaigretEngine, dialog):
 def generate_additional_fields_dialog(engine: MaigretEngine, dialog):
        fields = {}
        if 'urlSubpath' in engine.site.get('url', ''):
            msg = (
@@ -143,19 +124,16 @@ def generate_additional_fields_dialog(engine: MaigretEngine, dialog):
                fields['urlSubpath'] = f'/{subpath}'
        return fields
-
+    async def detect_known_engine(self, url_exists, url_mainpage) -> List[MaigretSite]:
 async def detect_known_engine(
    db, url_exists, url_mainpage, logger
 ) -> List[MaigretSite]:
        try:
            r = requests.get(url_mainpage)
-        logger.debug(r.text)
+            self.logger.debug(r.text)
        except Exception as e:
-        logger.warning(e)
+            self.logger.warning(e)
            print("Some error while checking main page")
            return []
-    for engine in db.engines:
+        for engine in self.db.engines:
            strs_to_check = engine.__dict__.get("presenseStrs")
            if strs_to_check and r and r.text:
                all_strs_in_response = True
@@ -168,12 +146,14 @@ async def detect_known_engine(
                    print(f"Detected engine {engine_name} for site {url_mainpage}")
-                usernames_to_check = SUPPOSED_USERNAMES
+                    usernames_to_check = self.settings.supposed_usernames
-                supposed_username = extract_username_dialog(url_exists)
+                    supposed_username = self.extract_username_dialog(url_exists)
                    if supposed_username:
                        usernames_to_check = [supposed_username] + usernames_to_check
-                add_fields = generate_additional_fields_dialog(engine, url_exists)
+                    add_fields = self.generate_additional_fields_dialog(
                        engine, url_exists
                    )
                    for u in usernames_to_check:
                        site_data = {
@@ -184,18 +164,21 @@ async def detect_known_engine(
                            "usernameUnclaimed": "noonewouldeverusethis7",
                            **add_fields,
                        }
-                    logger.info(site_data)
+                        self.logger.info(site_data)
-                    maigret_site = MaigretSite(url_mainpage.split("/")[-1], site_data)
+                        maigret_site = MaigretSite(
-                    maigret_site.update_from_engine(db.engines_dict[engine_name])
+                            url_mainpage.split("/")[-1], site_data
                        )
                        maigret_site.update_from_engine(
                            self.db.engines_dict[engine_name]
                        )
                        sites.append(maigret_site)
                    return sites
        return []
-
+    def extract_username_dialog(self, url):
 def extract_username_dialog(url):
        url_parts = url.rstrip("/").split("/")
        supposed_username = url_parts[-1].strip('@')
        entered_username = input(
@@ -203,10 +186,9 @@ def extract_username_dialog(url):
        )
        return entered_username if entered_username else supposed_username
-
+    async def check_features_manually(
-async def check_features_manually(
+        self, url_exists, url_mainpage, cookie_file, redirects=False
-    db, url_exists, url_mainpage, cookie_file, logger, redirects=False
+    ):
 ):
        custom_headers = {}
        while True:
            header_key = input(
@@ -217,41 +199,44 @@ async def check_features_manually(
            header_value = input('Header value: ')
            custom_headers[header_key.strip()] = header_value.strip()
-    supposed_username = extract_username_dialog(url_exists)
+        supposed_username = self.extract_username_dialog(url_exists)
        non_exist_username = "noonewouldeverusethis7"
        url_user = url_exists.replace(supposed_username, "{username}")
        url_not_exists = url_exists.replace(supposed_username, non_exist_username)
-    headers = dict(HEADERS)
+        headers = dict(self.HEADERS)
        headers.update(custom_headers)
        # cookies
        cookie_dict = None
        if cookie_file:
-        logger.info(f'Use {cookie_file} for cookies')
+            self.logger.info(f'Use {cookie_file} for cookies')
            cookie_jar = import_aiohttp_cookies(cookie_file)
            cookie_dict = {c.key: c.value for c in cookie_jar}
        exists_resp = requests.get(
            url_exists, cookies=cookie_dict, headers=headers, allow_redirects=redirects
        )
-    logger.debug(url_exists)
+        self.logger.debug(url_exists)
-    logger.debug(exists_resp.status_code)
+        self.logger.debug(exists_resp.status_code)
-    logger.debug(exists_resp.text)
+        self.logger.debug(exists_resp.text)
        non_exists_resp = requests.get(
-        url_not_exists, cookies=cookie_dict, headers=headers, allow_redirects=redirects
+            url_not_exists,
            cookies=cookie_dict,
            headers=headers,
            allow_redirects=redirects,
        )
-    logger.debug(url_not_exists)
+        self.logger.debug(url_not_exists)
-    logger.debug(non_exists_resp.status_code)
+        self.logger.debug(non_exists_resp.status_code)
-    logger.debug(non_exists_resp.text)
+        self.logger.debug(non_exists_resp.text)
        a = exists_resp.text
        b = non_exists_resp.text
-    tokens_a = set(re.split(f'[{SEPARATORS}]', a))
+        tokens_a = set(re.split(f'[{self.SEPARATORS}]', a))
-    tokens_b = set(re.split(f'[{SEPARATORS}]', b))
+        tokens_b = set(re.split(f'[{self.SEPARATORS}]', b))
        a_minus_b = tokens_a.difference(tokens_b)
        b_minus_a = tokens_b.difference(tokens_a)
@@ -260,11 +245,15 @@ async def check_features_manually(
            print("The pages for existing and non-existing account are the same!")
        top_features_count = int(
-        input(f"Specify count of features to extract [default {TOP_FEATURES}]: ")
+            input(
-        or TOP_FEATURES
+                f"Specify count of features to extract [default {self.TOP_FEATURES}]: "
            )
            or self.TOP_FEATURES
        )
-    presence_list = sorted(a_minus_b, key=get_match_ratio, reverse=True)[
+        match_fun = get_match_ratio(self.settings.presence_strings)
        presence_list = sorted(a_minus_b, key=match_fun, reverse=True)[
            :top_features_count
        ]
@@ -274,10 +263,12 @@ async def check_features_manually(
        if features:
            presence_list = list(map(str.strip, features.split(",")))
-    absence_list = sorted(b_minus_a, key=get_match_ratio, reverse=True)[
+        absence_list = sorted(b_minus_a, key=match_fun, reverse=True)[
            :top_features_count
        ]
-    print("Detected text features of non-existing account: " + ", ".join(absence_list))
+        print(
            "Detected text features of non-existing account: " + ", ".join(absence_list)
        )
        features = input("If features was not detected correctly, write it manually: ")
        if features:
@@ -293,20 +284,21 @@ async def check_features_manually(
            "checkType": "message",
        }
-    if headers != HEADERS:
+        if headers != self.HEADERS:
            site_data['headers'] = headers
        site = MaigretSite(url_mainpage.split("/")[-1], site_data)
        return site
-
+    async def dialog(self, url_exists, cookie_file):
-async def submit_dialog(db, url_exists, cookie_file, logger):
+        domain_raw = self.URL_RE.sub("", url_exists).strip().strip("/")
    domain_raw = URL_RE.sub("", url_exists).strip().strip("/")
        domain_raw = domain_raw.split("/")[0]
-    logger.info('Domain is %s', domain_raw)
+        self.logger.info('Domain is %s', domain_raw)
        # check for existence
-    matched_sites = list(filter(lambda x: domain_raw in x.url_main + x.url, db.sites))
+        matched_sites = list(
            filter(lambda x: domain_raw in x.url_main + x.url, self.db.sites)
        )
        if matched_sites:
            print(
@@ -326,24 +318,24 @@ async def submit_dialog(db, url_exists, cookie_file, logger):
            if input("Do you want to continue? [yN] ").lower() in "n":
                return False
-    url_mainpage = extract_mainpage_url(url_exists)
+        url_mainpage = self.extract_mainpage_url(url_exists)
        print('Detecting site engine, please wait...')
        sites = []
        try:
-        sites = await detect_known_engine(db, url_exists, url_mainpage, logger)
+            sites = await self.detect_known_engine(url_exists, url_mainpage)
        except KeyboardInterrupt:
            print('Engine detect process is interrupted.')
        if not sites:
            print("Unable to detect site engine, lets generate checking features")
            sites = [
-            await check_features_manually(
+                await self.check_features_manually(
-                db, url_exists, url_mainpage, cookie_file, logger
+                    url_exists, url_mainpage, cookie_file
                )
            ]
-    logger.debug(sites[0].__dict__)
+        self.logger.debug(sites[0].__dict__)
        sem = asyncio.Semaphore(1)
@@ -352,7 +344,7 @@ async def submit_dialog(db, url_exists, cookie_file, logger):
        chosen_site = None
        for s in sites:
            chosen_site = s
-        result = await site_self_check(s, logger, sem, db)
+            result = await self.site_self_check(s, sem)
            if not result["disabled"]:
                found = True
                break
@@ -377,13 +369,13 @@ async def submit_dialog(db, url_exists, cookie_file, logger):
        chosen_site.name = input("Change site name if you want: ") or chosen_site.name
        chosen_site.tags = list(map(str.strip, input("Site tags: ").split(',')))
-    rank = get_alexa_rank(chosen_site.url_main)
+        rank = Submitter.get_alexa_rank(chosen_site.url_main)
        if rank:
            print(f'New alexa rank: {rank}')
            chosen_site.alexa_rank = rank
-    logger.debug(chosen_site.json)
+        self.logger.debug(chosen_site.json)
        site_data = chosen_site.strip_engine_data()
-    logger.debug(site_data.json)
+        self.logger.debug(site_data.json)
-    db.update_site(site_data)
+        self.db.update_site(site_data)
        return True
@@ -1,4 +1,5 @@
 import ast
 import difflib
 import re
 import random
 from typing import Any
@@ -95,3 +96,18 @@ def get_dict_ascii_tree(items, prepend="", new_line=True):
 def get_random_user_agent():
    return random.choice(DEFAULT_USER_AGENTS)
 def get_match_ratio(base_strs: list):
    def get_match_inner(s: str):
        return round(
            max(
                [
                    difflib.SequenceMatcher(a=s.lower(), b=s2.lower()).ratio()
                    for s2 in base_strs
                ]
            ),
            2,
        )
    return get_match_inner
@@ -1,15 +1,16 @@
 """Maigret data test functions"""
 from maigret.utils import is_country_tag
 from maigret.sites import SUPPORTED_TAGS
 def test_tags_validity(default_db):
    unknown_tags = set()
    tags = default_db._tags
    for site in default_db.sites:
        for tag in filter(lambda x: not is_country_tag(x), site.tags):
-            if tag not in SUPPORTED_TAGS:
+            if tag not in tags:
                unknown_tags.add(tag)
    assert unknown_tags == set()
@@ -1,5 +1,6 @@
 """Maigret Database test functions"""
 from maigret.sites import MaigretDatabase, MaigretSite
 from maigret.utils import URLMatcher
 EXAMPLE_DB = {
    'engines': {
@@ -179,3 +180,26 @@ def test_ranked_sites_dict_id_type():
    assert len(db.ranked_sites_dict()) == 2
    assert len(db.ranked_sites_dict(id_type='username')) == 2
    assert len(db.ranked_sites_dict(id_type='gaia_id')) == 1
 def test_get_url_template():
    site = MaigretSite(
        "test",
        {
            "urlMain": "https://ya.ru/",
            "url": "{urlMain}{urlSubpath}/members/?username={username}",
        },
    )
    assert (
        site.get_url_template()
        == "{urlMain}{urlSubpath}/members/?username={username} (no engine)"
    )
    site = MaigretSite(
        "test",
        {
            "urlMain": "https://ya.ru/",
            "url": "https://{username}.ya.ru",
        },
    )
    assert site.get_url_template() == "SUBDOMAIN"
@@ -8,6 +8,7 @@ from maigret.utils import (
    enrich_link_str,
    URLMatcher,
    get_dict_ascii_tree,
    get_match_ratio,
 )
@@ -136,3 +137,9 @@ def test_get_dict_ascii_tree():
 ┣╸instagram_username: Street.Reality.Photography
 ┗╸twitter_username: Alexaimephotogr"""
    )
 def test_get_match_ratio():
    fun = get_match_ratio(["test", "maigret", "username"])
    assert fun("test") == 1