diff --git a/maigret/resources/data.json b/maigret/resources/data.json
index 1814779..78ee883 100644
--- a/maigret/resources/data.json
+++ b/maigret/resources/data.json
@@ -31025,7 +31025,7 @@
"qa-part-form-profile"
]
},
- ".com": {
+ "{username}.com": {
"protocol": "dns",
"url": "{username}.com",
"urlMain": "{username}.com",
@@ -31033,7 +31033,7 @@
"usernameUnclaimed": "noonewouldeverusethis7",
"checkType": "status_code"
},
- ".pro": {
+ "{username}.pro": {
"protocol": "dns",
"url": "{username}.pro",
"urlMain": "{username}.pro",
@@ -31041,7 +31041,7 @@
"usernameUnclaimed": "noonewouldeverusethis7",
"checkType": "status_code"
},
- ".me": {
+ "{username}.me": {
"protocol": "dns",
"url": "{username}.me",
"urlMain": "{username}.me",
@@ -31049,7 +31049,7 @@
"usernameUnclaimed": "noonewouldeverusethis7",
"checkType": "status_code"
},
- ".biz": {
+ "{username}.biz": {
"protocol": "dns",
"url": "{username}.biz",
"urlMain": "{username}.biz",
@@ -31057,7 +31057,7 @@
"usernameUnclaimed": "noonewouldeverusethis7",
"checkType": "status_code"
},
- ".email": {
+ "{username}.email": {
"protocol": "dns",
"url": "{username}.email",
"urlMain": "{username}.email",
@@ -31065,7 +31065,7 @@
"usernameUnclaimed": "noonewouldeverusethis7",
"checkType": "status_code"
},
- ".guru": {
+ "{username}.guru": {
"protocol": "dns",
"url": "{username}.guru",
"urlMain": "{username}.guru",
@@ -31073,7 +31073,7 @@
"usernameUnclaimed": "noonewouldeverusethis7",
"checkType": "status_code"
},
- ".ddns.net": {
+ "{username}.ddns.net": {
"protocol": "dns",
"url": "{username}.ddns.net",
"urlMain": "{username}.ddns.net",
@@ -35201,6 +35201,27 @@
"urlMain": "https://massagerepublic.com",
"usernameClaimed": "lily88",
"usernameUnclaimed": "xzhsxfyfzi"
+ },
+ "mynickname.com": {
+ "checkType": "message",
+ "absenceStrs": [
+ "
Error 404: Page not found
",
+ "Nickname , certificate for username ",
+ "btn green",
+ "mailto:info@mynickname.com",
+ ">Register nickname"
+ ],
+ "presenseStrs": [
+ " title=",
+ "bold",
+ "title-line",
+ "codehtml",
+ "User offline"
+ ],
+ "url": "https://mynickname.com/{username}",
+ "urlMain": "https://mynickname.com",
+ "usernameClaimed": "godbrithil",
+ "usernameUnclaimed": "fqiakbtdhu"
}
},
"engines": {
diff --git a/maigret/sites.py b/maigret/sites.py
index bc71f84..267928f 100644
--- a/maigret/sites.py
+++ b/maigret/sites.py
@@ -80,6 +80,36 @@ class MaigretSite:
def __str__(self):
return f"{self.name} ({self.url_main})"
+ def __is_equal_by_url_or_name(self, url_or_name_str: str):
+ lower_url_or_name_str = url_or_name_str.lower()
+ lower_url = self.url.lower()
+ lower_name = self.name.lower()
+ lower_url_main = self.url_main.lower()
+
+ return \
+ lower_name == lower_url_or_name_str or \
+ (lower_url_main and lower_url_main == lower_url_or_name_str) or \
+ (lower_url_main and lower_url_main in lower_url_or_name_str) or \
+ (lower_url_main and lower_url_or_name_str in lower_url_main) or \
+ (lower_url and lower_url_or_name_str in lower_url)
+
+ def __eq__(self, other):
+ if isinstance(other, MaigretSite):
+ # Compare only relevant attributes, not internal state like request_future
+ attrs_to_compare = ['name', 'url_main', 'url_subpath', 'type', 'headers',
+ 'errors', 'activation', 'regex_check', 'url_probe',
+ 'check_type', 'request_head_only', 'get_params',
+ 'presense_strs', 'absence_strs', 'stats', 'engine',
+ 'engine_data', 'alexa_rank', 'source', 'protocol']
+
+ return all(getattr(self, attr) == getattr(other, attr)
+ for attr in attrs_to_compare)
+ elif isinstance(other, str):
+ # Compare only by name (exactly) or url_main (partial similarity)
+ return self.__is_equal_by_url_or_name(other)
+ return False
+
+
def update_detectors(self):
if "url" in self.__dict__:
url = self.url
@@ -101,6 +131,10 @@ class MaigretSite:
return None
def extract_id_from_url(self, url: str) -> Optional[Tuple[str, str]]:
+ """
+ Extracts username from url.
+ It's outdated, detects only a format of https://example.com/{username}
+ """
if not self.url_regexp:
return None
@@ -223,6 +257,16 @@ class MaigretDatabase:
def sites_dict(self):
return {site.name: site for site in self._sites}
+ def has_site(self, site: MaigretSite):
+ for s in self._sites:
+ if site == s:
+ print(f"input == site: {site} == {s}")
+ return True
+ return False
+
+ def __contains__(self, site):
+ return self.has_site(site)
+
def ranked_sites_dict(
self,
reverse=False,
diff --git a/maigret/submit.py b/maigret/submit.py
index e623ed7..f980082 100644
--- a/maigret/submit.py
+++ b/maigret/submit.py
@@ -154,6 +154,11 @@ class Submitter:
self.logger.info(f"Site {site.name} checking is finished")
+ # remove service tag "unchecked"
+ if "unchecked" in site.tags:
+ site.tags.remove("unchecked")
+ changes["tags"] = site.tags
+
return changes
def generate_additional_fields_dialog(self, engine: MaigretEngine, dialog):
diff --git a/sites.md b/sites.md
index 7c6c26a..3481cb1 100644
--- a/sites.md
+++ b/sites.md
@@ -1,5 +1,5 @@
-## List of supported sites (search methods): total 3125
+## List of supported sites (search methods): total 3126
Rank data fetched from Alexa by domains.
@@ -2864,13 +2864,13 @@ Rank data fetched from Alexa by domains.
1.  [ovnl.in (https://ovnl.in)](https://ovnl.in)*: top 100M, forum*, search is disabled
1.  [wls.social (https://wls.social)](https://wls.social)*: top 100M, blog*, search is disabled
1.  [HiddenAnswers (http://answerszuvs3gg2l64e6hmnryudl5zgrmwm3vh65hzszdghblddvfiqd.onion)](http://answerszuvs3gg2l64e6hmnryudl5zgrmwm3vh65hzszdghblddvfiqd.onion)*: top 100M, q&a, tor*
-1.  [.com ({username}.com)]({username}.com)*: top 100M*
-1.  [.pro ({username}.pro)]({username}.pro)*: top 100M*
-1.  [.me ({username}.me)]({username}.me)*: top 100M*
-1.  [.biz ({username}.biz)]({username}.biz)*: top 100M*
-1.  [.email ({username}.email)]({username}.email)*: top 100M*
-1.  [.guru ({username}.guru)]({username}.guru)*: top 100M*
-1.  [.ddns.net ({username}.ddns.net)]({username}.ddns.net)*: top 100M*
+1.  [{username}.com ({username}.com)]({username}.com)*: top 100M*
+1.  [{username}.pro ({username}.pro)]({username}.pro)*: top 100M*
+1.  [{username}.me ({username}.me)]({username}.me)*: top 100M*
+1.  [{username}.biz ({username}.biz)]({username}.biz)*: top 100M*
+1.  [{username}.email ({username}.email)]({username}.email)*: top 100M*
+1.  [{username}.guru ({username}.guru)]({username}.guru)*: top 100M*
+1.  [{username}.ddns.net ({username}.ddns.net)]({username}.ddns.net)*: top 100M*
1.  [forum-history.ru (http://forum-history.ru)](http://forum-history.ru)*: top 100M*
1.  [forum.alconar.ru (https://forum.alconar.ru)](https://forum.alconar.ru)*: top 100M*
1.  [krskforum.com (https://krskforum.com)](https://krskforum.com)*: top 100M*
@@ -3117,6 +3117,7 @@ Rank data fetched from Alexa by domains.
1.  [www.stopstalk.com (https://www.stopstalk.com)](https://www.stopstalk.com)*: top 100M*
1.  [www.polywork.com (https://www.polywork.com)](https://www.polywork.com)*: top 100M*
1.  [oshwlab.com (https://oshwlab.com)](https://oshwlab.com)*: top 100M*
+1.  [www.xshaker.net (https://www.xshaker.net)](https://www.xshaker.net)*: top 100M*
1.  [chaturbator.su (https://chaturbator.su)](https://chaturbator.su)*: top 100M*
1.  [imgflip.com (https://imgflip.com)](https://imgflip.com)*: top 100M*
1.  [www.flickr.com (https://www.flickr.com)](https://www.flickr.com)*: top 100M*
@@ -3127,21 +3128,22 @@ Rank data fetched from Alexa by domains.
1.  [archive.transformativeworks.org (https://archive.transformativeworks.org)](https://archive.transformativeworks.org)*: top 100M*
1.  [www.tnaflix.com (https://www.tnaflix.com)](https://www.tnaflix.com)*: top 100M*
1.  [massagerepublic.com (https://massagerepublic.com)](https://massagerepublic.com)*: top 100M*
+1.  [mynickname.com (https://mynickname.com)](https://mynickname.com)*: top 100M, unchecked*
-The list was updated at (2024-11-25 17:22:43.959448+00:00 UTC)
+The list was updated at (2024-11-26 10:27:01.383232+00:00 UTC)
## Statistics
-Enabled/total sites: 2693/3125 = 86.18%
+Enabled/total sites: 2694/3126 = 86.18%
-Incomplete message checks: 405/2693 = 15.04% (false positive risks)
+Incomplete message checks: 405/2694 = 15.03% (false positive risks)
-Status code checks: 720/2693 = 26.74% (false positive risks)
+Status code checks: 720/2694 = 26.73% (false positive risks)
-False positive risk (total): 41.78%
+False positive risk (total): 41.76%
Top 20 profile URLs:
- (796) `{urlMain}/index/8-0-{username} (uCoz)`
-- (301) `/{username}`
+- (302) `/{username}`
- (221) `{urlMain}{urlSubpath}/members/?username={username} (XenForo)`
- (160) `/user/{username}`
- (133) `{urlMain}{urlSubpath}/member.php?username={username} (vBulletin)`
diff --git a/tests/test_sites.py b/tests/test_sites.py
index f0a4092..7b386cf 100644
--- a/tests/test_sites.py
+++ b/tests/test_sites.py
@@ -202,3 +202,20 @@ def test_get_url_template():
},
)
assert site.get_url_template() == "SUBDOMAIN"
+
+
+def test_has_site_url_or_name(default_db):
+ # by the same url or partial match
+ assert default_db.has_site("https://aback.com.ua/user/") == True
+ assert default_db.has_site("https://aback.com.ua") == True
+
+ # acceptable partial match
+ assert default_db.has_site("https://aback.com.ua/use") == True
+ assert default_db.has_site("https://aback.com") == True
+
+ # by name
+ assert default_db.has_site("Aback") == True
+
+ # false
+ assert default_db.has_site("https://aeifgoai3h4g8a3u4g5") == False
+ assert default_db.has_site("aeifgoai3h4g8a3u4g5") == False