mirror of
https://github.com/soxoj/maigret.git
synced 2026-05-07 06:24:35 +00:00
Sites checks fixes (#1896)
Fixed incorrect site names, added method to compare sites
This commit is contained in:
@@ -31025,7 +31025,7 @@
|
||||
"qa-part-form-profile"
|
||||
]
|
||||
},
|
||||
".com": {
|
||||
"{username}.com": {
|
||||
"protocol": "dns",
|
||||
"url": "{username}.com",
|
||||
"urlMain": "{username}.com",
|
||||
@@ -31033,7 +31033,7 @@
|
||||
"usernameUnclaimed": "noonewouldeverusethis7",
|
||||
"checkType": "status_code"
|
||||
},
|
||||
".pro": {
|
||||
"{username}.pro": {
|
||||
"protocol": "dns",
|
||||
"url": "{username}.pro",
|
||||
"urlMain": "{username}.pro",
|
||||
@@ -31041,7 +31041,7 @@
|
||||
"usernameUnclaimed": "noonewouldeverusethis7",
|
||||
"checkType": "status_code"
|
||||
},
|
||||
".me": {
|
||||
"{username}.me": {
|
||||
"protocol": "dns",
|
||||
"url": "{username}.me",
|
||||
"urlMain": "{username}.me",
|
||||
@@ -31049,7 +31049,7 @@
|
||||
"usernameUnclaimed": "noonewouldeverusethis7",
|
||||
"checkType": "status_code"
|
||||
},
|
||||
".biz": {
|
||||
"{username}.biz": {
|
||||
"protocol": "dns",
|
||||
"url": "{username}.biz",
|
||||
"urlMain": "{username}.biz",
|
||||
@@ -31057,7 +31057,7 @@
|
||||
"usernameUnclaimed": "noonewouldeverusethis7",
|
||||
"checkType": "status_code"
|
||||
},
|
||||
".email": {
|
||||
"{username}.email": {
|
||||
"protocol": "dns",
|
||||
"url": "{username}.email",
|
||||
"urlMain": "{username}.email",
|
||||
@@ -31065,7 +31065,7 @@
|
||||
"usernameUnclaimed": "noonewouldeverusethis7",
|
||||
"checkType": "status_code"
|
||||
},
|
||||
".guru": {
|
||||
"{username}.guru": {
|
||||
"protocol": "dns",
|
||||
"url": "{username}.guru",
|
||||
"urlMain": "{username}.guru",
|
||||
@@ -31073,7 +31073,7 @@
|
||||
"usernameUnclaimed": "noonewouldeverusethis7",
|
||||
"checkType": "status_code"
|
||||
},
|
||||
".ddns.net": {
|
||||
"{username}.ddns.net": {
|
||||
"protocol": "dns",
|
||||
"url": "{username}.ddns.net",
|
||||
"urlMain": "{username}.ddns.net",
|
||||
@@ -35201,6 +35201,27 @@
|
||||
"urlMain": "https://massagerepublic.com",
|
||||
"usernameClaimed": "lily88",
|
||||
"usernameUnclaimed": "xzhsxfyfzi"
|
||||
},
|
||||
"mynickname.com": {
|
||||
"checkType": "message",
|
||||
"absenceStrs": [
|
||||
"<h1>Error 404: Page not found</h1>",
|
||||
"Nickname , certificate for username ",
|
||||
"btn green",
|
||||
"mailto:info@mynickname.com",
|
||||
">Register nickname</span></a></p>"
|
||||
],
|
||||
"presenseStrs": [
|
||||
" title=",
|
||||
"bold",
|
||||
"title-line",
|
||||
"codehtml",
|
||||
"User offline"
|
||||
],
|
||||
"url": "https://mynickname.com/{username}",
|
||||
"urlMain": "https://mynickname.com",
|
||||
"usernameClaimed": "godbrithil",
|
||||
"usernameUnclaimed": "fqiakbtdhu"
|
||||
}
|
||||
},
|
||||
"engines": {
|
||||
|
||||
@@ -80,6 +80,36 @@ class MaigretSite:
|
||||
def __str__(self):
|
||||
return f"{self.name} ({self.url_main})"
|
||||
|
||||
def __is_equal_by_url_or_name(self, url_or_name_str: str):
|
||||
lower_url_or_name_str = url_or_name_str.lower()
|
||||
lower_url = self.url.lower()
|
||||
lower_name = self.name.lower()
|
||||
lower_url_main = self.url_main.lower()
|
||||
|
||||
return \
|
||||
lower_name == lower_url_or_name_str or \
|
||||
(lower_url_main and lower_url_main == lower_url_or_name_str) or \
|
||||
(lower_url_main and lower_url_main in lower_url_or_name_str) or \
|
||||
(lower_url_main and lower_url_or_name_str in lower_url_main) or \
|
||||
(lower_url and lower_url_or_name_str in lower_url)
|
||||
|
||||
def __eq__(self, other):
|
||||
if isinstance(other, MaigretSite):
|
||||
# Compare only relevant attributes, not internal state like request_future
|
||||
attrs_to_compare = ['name', 'url_main', 'url_subpath', 'type', 'headers',
|
||||
'errors', 'activation', 'regex_check', 'url_probe',
|
||||
'check_type', 'request_head_only', 'get_params',
|
||||
'presense_strs', 'absence_strs', 'stats', 'engine',
|
||||
'engine_data', 'alexa_rank', 'source', 'protocol']
|
||||
|
||||
return all(getattr(self, attr) == getattr(other, attr)
|
||||
for attr in attrs_to_compare)
|
||||
elif isinstance(other, str):
|
||||
# Compare only by name (exactly) or url_main (partial similarity)
|
||||
return self.__is_equal_by_url_or_name(other)
|
||||
return False
|
||||
|
||||
|
||||
def update_detectors(self):
|
||||
if "url" in self.__dict__:
|
||||
url = self.url
|
||||
@@ -101,6 +131,10 @@ class MaigretSite:
|
||||
return None
|
||||
|
||||
def extract_id_from_url(self, url: str) -> Optional[Tuple[str, str]]:
|
||||
"""
|
||||
Extracts username from url.
|
||||
It's outdated, detects only a format of https://example.com/{username}
|
||||
"""
|
||||
if not self.url_regexp:
|
||||
return None
|
||||
|
||||
@@ -223,6 +257,16 @@ class MaigretDatabase:
|
||||
def sites_dict(self):
|
||||
return {site.name: site for site in self._sites}
|
||||
|
||||
def has_site(self, site: MaigretSite):
|
||||
for s in self._sites:
|
||||
if site == s:
|
||||
print(f"input == site: {site} == {s}")
|
||||
return True
|
||||
return False
|
||||
|
||||
def __contains__(self, site):
|
||||
return self.has_site(site)
|
||||
|
||||
def ranked_sites_dict(
|
||||
self,
|
||||
reverse=False,
|
||||
|
||||
@@ -154,6 +154,11 @@ class Submitter:
|
||||
|
||||
self.logger.info(f"Site {site.name} checking is finished")
|
||||
|
||||
# remove service tag "unchecked"
|
||||
if "unchecked" in site.tags:
|
||||
site.tags.remove("unchecked")
|
||||
changes["tags"] = site.tags
|
||||
|
||||
return changes
|
||||
|
||||
def generate_additional_fields_dialog(self, engine: MaigretEngine, dialog):
|
||||
|
||||
Reference in New Issue
Block a user