Refactoring of submit module, some fixes

This commit is contained in:
Soxoj
2021-06-13 00:43:28 +03:00
parent eb721dc7e3
commit 9b0acc092a
11 changed files with 534 additions and 438 deletions
+11 -66
View File
@@ -9,66 +9,6 @@ import requests
from .utils import CaseConverter, URLMatcher, is_country_tag
# TODO: move to data.json
SUPPORTED_TAGS = [
"gaming",
"coding",
"photo",
"music",
"blog",
"finance",
"freelance",
"dating",
"tech",
"forum",
"porn",
"erotic",
"webcam",
"video",
"movies",
"hacking",
"art",
"discussion",
"sharing",
"writing",
"wiki",
"business",
"shopping",
"sport",
"books",
"news",
"documents",
"travel",
"maps",
"hobby",
"apps",
"classified",
"career",
"geosocial",
"streaming",
"education",
"networking",
"torrent",
"science",
"medicine",
"reading",
"stock",
"messaging",
"trading",
"links",
"fashion",
"tasks",
"military",
"auto",
"gambling",
"cybercriminal",
"review",
"bookmarks",
"design",
"tor",
"i2p",
]
class MaigretEngine:
site: Dict[str, Any] = {}
@@ -204,12 +144,12 @@ class MaigretSite:
errors.update(self.errors)
return errors
def get_url_type(self) -> str:
def get_url_template(self) -> str:
url = URLMatcher.extract_main_part(self.url)
if url.startswith("{username}"):
url = "SUBDOMAIN"
elif url == "":
url = f"{self.url} ({self.engine})"
url = f"{self.url} ({self.engine or 'no engine'})"
else:
parts = url.split("/")
url = "/" + "/".join(parts[1:])
@@ -273,8 +213,9 @@ class MaigretSite:
class MaigretDatabase:
def __init__(self):
self._sites = []
self._engines = []
self._tags: list = []
self._sites: list = []
self._engines: list = []
@property
def sites(self):
@@ -354,6 +295,7 @@ class MaigretDatabase:
db_data = {
"sites": {site.name: site.strip_engine_data().json for site in self._sites},
"engines": {engine.name: engine.json for engine in self._engines},
"tags": self._tags,
}
json_data = json.dumps(db_data, indent=4)
@@ -367,6 +309,9 @@ class MaigretDatabase:
# Add all of site information from the json file to internal site list.
site_data = json_data.get("sites", {})
engines_data = json_data.get("engines", {})
tags = json_data.get("tags", [])
self._tags += tags
for engine_name in engines_data:
self._engines.append(MaigretEngine(engine_name, engines_data[engine_name]))
@@ -469,7 +414,7 @@ class MaigretDatabase:
if site.disabled:
disabled_count += 1
url_type = site.get_url_type()
url_type = site.get_url_template()
urls[url_type] = urls.get(url_type, 0) + 1
if not site.tags:
@@ -488,7 +433,7 @@ class MaigretDatabase:
output += "Top tags:\n"
for tag, count in sorted(tags.items(), key=lambda x: x[1], reverse=True)[:200]:
mark = ""
if tag not in SUPPORTED_TAGS:
if tag not in self._tags:
mark = " (non-standard)"
output += f"{count}\t{tag}{mark}\n"