Sites fixes (#1917)

* Some sites fixes

* Sites stats updated
This commit is contained in:
Soxoj
2024-12-01 03:19:36 +01:00
committed by GitHub
parent 0c7e3898e8
commit d15e12750b
3 changed files with 44 additions and 25 deletions
+27 -18
View File
@@ -10640,10 +10640,11 @@
"type": "ok_id", "type": "ok_id",
"checkType": "message", "checkType": "message",
"presenceStrs": [ "presenceStrs": [
"profile__menu" "profile__content_header_user"
], ],
"absenceStrs": [ "absenceStrs": [
"mm-profile_not-found_content" "mm-profile_not-found_content",
"<title>\u041c\u043e\u0439 \u041c\u0438\u0440@Mail.Ru</title>"
], ],
"alexaRank": 49, "alexaRank": 49,
"urlMain": "https://my.mail.ru/", "urlMain": "https://my.mail.ru/",
@@ -10658,10 +10659,11 @@
"type": "vk_id", "type": "vk_id",
"checkType": "message", "checkType": "message",
"presenceStrs": [ "presenceStrs": [
"profile__menu" "profile__content_header_user"
], ],
"absenceStrs": [ "absenceStrs": [
"mm-profile_not-found_content" "mm-profile_not-found_content",
"<title>\u041c\u043e\u0439 \u041c\u0438\u0440@Mail.Ru</title>"
], ],
"alexaRank": 49, "alexaRank": 49,
"urlMain": "https://my.mail.ru/", "urlMain": "https://my.mail.ru/",
@@ -10675,10 +10677,11 @@
], ],
"checkType": "message", "checkType": "message",
"presenceStrs": [ "presenceStrs": [
"profile__menu" "profile__content_header_user"
], ],
"absenceStrs": [ "absenceStrs": [
"mm-profile_not-found_content" "mm-profile_not-found_content",
"<title>\u041c\u043e\u0439 \u041c\u0438\u0440@Mail.Ru</title>"
], ],
"alexaRank": 49, "alexaRank": 49,
"urlMain": "https://my.mail.ru/", "urlMain": "https://my.mail.ru/",
@@ -10692,10 +10695,11 @@
], ],
"checkType": "message", "checkType": "message",
"presenceStrs": [ "presenceStrs": [
"profile__menu" "profile__content_header_user"
], ],
"absenceStrs": [ "absenceStrs": [
"mm-profile_not-found_content" "mm-profile_not-found_content",
"<title>\u041c\u043e\u0439 \u041c\u0438\u0440@Mail.Ru</title>"
], ],
"alexaRank": 49, "alexaRank": 49,
"urlMain": "https://my.mail.ru/", "urlMain": "https://my.mail.ru/",
@@ -10709,10 +10713,11 @@
], ],
"checkType": "message", "checkType": "message",
"presenceStrs": [ "presenceStrs": [
"profile__menu" "profile__content_header_user"
], ],
"absenceStrs": [ "absenceStrs": [
"mm-profile_not-found_content" "mm-profile_not-found_content",
"<title>\u041c\u043e\u0439 \u041c\u0438\u0440@Mail.Ru</title>"
], ],
"alexaRank": 49, "alexaRank": 49,
"urlMain": "https://my.mail.ru/", "urlMain": "https://my.mail.ru/",
@@ -10726,10 +10731,11 @@
], ],
"checkType": "message", "checkType": "message",
"presenceStrs": [ "presenceStrs": [
"profile__menu" "profile__content_header_user"
], ],
"absenceStrs": [ "absenceStrs": [
"mm-profile_not-found_content" "mm-profile_not-found_content",
"<title>\u041c\u043e\u0439 \u041c\u0438\u0440@Mail.Ru</title>"
], ],
"alexaRank": 49, "alexaRank": 49,
"urlMain": "https://my.mail.ru/", "urlMain": "https://my.mail.ru/",
@@ -10743,16 +10749,17 @@
], ],
"checkType": "message", "checkType": "message",
"presenceStrs": [ "presenceStrs": [
"profile__menu" "profile__content_header_user"
], ],
"absenceStrs": [ "absenceStrs": [
"mm-profile_not-found_content" "mm-profile_not-found_content",
"<title>\u041c\u043e\u0439 \u041c\u0438\u0440@Mail.Ru</title>"
], ],
"alexaRank": 49, "alexaRank": 49,
"urlMain": "https://my.mail.ru/", "urlMain": "https://my.mail.ru/",
"url": "https://my.mail.ru/ya.ru/{username}/", "url": "https://my.mail.ru/ya.ru/{username}/",
"usernameClaimed": "hovsepovich", "usernameClaimed": "hovsepovich",
"usernameUnclaimed": "noonewouldeverusethis7" "usernameUnclaimed": "MAlKOVyd"
}, },
"My.Mail.ru@yandex.ru": { "My.Mail.ru@yandex.ru": {
"tags": [ "tags": [
@@ -10760,10 +10767,11 @@
], ],
"checkType": "message", "checkType": "message",
"presenceStrs": [ "presenceStrs": [
"profile__menu" "profile__content_header_user"
], ],
"absenceStrs": [ "absenceStrs": [
"mm-profile_not-found_content" "mm-profile_not-found_content",
"<title>\u041c\u043e\u0439 \u041c\u0438\u0440@Mail.Ru</title>"
], ],
"alexaRank": 49, "alexaRank": 49,
"urlMain": "https://my.mail.ru/", "urlMain": "https://my.mail.ru/",
@@ -18773,7 +18781,8 @@
"\u7528\u6237\u4e0d\u5b58\u5728" "\u7528\u6237\u4e0d\u5b58\u5728"
], ],
"usernameClaimed": "blue", "usernameClaimed": "blue",
"usernameUnclaimed": "noonewouldeverusethis7" "usernameUnclaimed": "noonewouldeverusethis7",
"disabled": true
}, },
"Zhyk": { "Zhyk": {
"disabled": true, "disabled": true,
+11 -1
View File
@@ -260,7 +260,6 @@ class MaigretDatabase:
def has_site(self, site: MaigretSite): def has_site(self, site: MaigretSite):
for s in self._sites: for s in self._sites:
if site == s: if site == s:
print(f"input == site: {site} == {s}")
return True return True
return False return False
@@ -278,6 +277,17 @@ class MaigretDatabase:
): ):
""" """
Ranking and filtering of the sites list Ranking and filtering of the sites list
Args:
reverse (bool, optional): Reverse the sorting order. Defaults to False.
top (int, optional): Maximum number of sites to return. Defaults to sys.maxsize.
tags (list, optional): List of tags to filter sites by. Defaults to empty list.
names (list, optional): List of site names (or urls, see MaigretSite.__eq__) to filter by. Defaults to empty list.
disabled (bool, optional): Whether to include disabled sites. Defaults to True.
id_type (str, optional): Type of identifier to filter by. Defaults to "username".
Returns:
dict: Dictionary of filtered and ranked sites, with site names as keys and MaigretSite objects as values
""" """
normalized_names = list(map(str.lower, names)) normalized_names = list(map(str.lower, names))
normalized_tags = list(map(str.lower, tags)) normalized_tags = list(map(str.lower, tags))
+6 -6
View File
@@ -84,7 +84,7 @@ Rank data fetched from Alexa by domains.
1. ![](https://www.google.com/s2/favicons?domain=https://discourse.mozilla.org) [discourse.mozilla.org (https://discourse.mozilla.org)](https://discourse.mozilla.org)*: top 500* 1. ![](https://www.google.com/s2/favicons?domain=https://discourse.mozilla.org) [discourse.mozilla.org (https://discourse.mozilla.org)](https://discourse.mozilla.org)*: top 500*
1. ![](https://www.google.com/s2/favicons?domain=https://linktr.ee) [linktr.ee (https://linktr.ee)](https://linktr.ee)*: top 500, links* 1. ![](https://www.google.com/s2/favicons?domain=https://linktr.ee) [linktr.ee (https://linktr.ee)](https://linktr.ee)*: top 500, links*
1. ![](https://www.google.com/s2/favicons?domain=https://xhamster.com) [xHamster (https://xhamster.com)](https://xhamster.com)*: top 500, porn, us* 1. ![](https://www.google.com/s2/favicons?domain=https://xhamster.com) [xHamster (https://xhamster.com)](https://xhamster.com)*: top 500, porn, us*
1. ![](https://www.google.com/s2/favicons?domain=https://www.zhihu.com/) [Zhihu (https://www.zhihu.com/)](https://www.zhihu.com/)*: top 500, cn* 1. ![](https://www.google.com/s2/favicons?domain=https://www.zhihu.com/) [Zhihu (https://www.zhihu.com/)](https://www.zhihu.com/)*: top 500, cn*, search is disabled
1. ![](https://www.google.com/s2/favicons?domain=https://www.blogger.com) [Blogger (by GAIA id) (https://www.blogger.com)](https://www.blogger.com)*: top 500, blog* 1. ![](https://www.google.com/s2/favicons?domain=https://www.blogger.com) [Blogger (by GAIA id) (https://www.blogger.com)](https://www.blogger.com)*: top 500, blog*
1. ![](https://www.google.com/s2/favicons?domain=https://www.researchgate.net/) [ResearchGate (https://www.researchgate.net/)](https://www.researchgate.net/)*: top 500, in, us* 1. ![](https://www.google.com/s2/favicons?domain=https://www.researchgate.net/) [ResearchGate (https://www.researchgate.net/)](https://www.researchgate.net/)*: top 500, in, us*
1. ![](https://www.google.com/s2/favicons?domain=https://www.freepik.com) [Freepik (https://www.freepik.com)](https://www.freepik.com)*: top 500, art, photo, stock* 1. ![](https://www.google.com/s2/favicons?domain=https://www.freepik.com) [Freepik (https://www.freepik.com)](https://www.freepik.com)*: top 500, art, photo, stock*
@@ -3130,16 +3130,16 @@ Rank data fetched from Alexa by domains.
1. ![](https://www.google.com/s2/favicons?domain=https://massagerepublic.com) [massagerepublic.com (https://massagerepublic.com)](https://massagerepublic.com)*: top 100M* 1. ![](https://www.google.com/s2/favicons?domain=https://massagerepublic.com) [massagerepublic.com (https://massagerepublic.com)](https://massagerepublic.com)*: top 100M*
1. ![](https://www.google.com/s2/favicons?domain=https://mynickname.com) [mynickname.com (https://mynickname.com)](https://mynickname.com)*: top 100M* 1. ![](https://www.google.com/s2/favicons?domain=https://mynickname.com) [mynickname.com (https://mynickname.com)](https://mynickname.com)*: top 100M*
The list was updated at (2024-11-27 UTC) The list was updated at (2024-11-29 UTC)
## Statistics ## Statistics
Enabled/total sites: 2694/3126 = 86.18% Enabled/total sites: 2693/3126 = 86.15%
Incomplete message checks: 405/2694 = 15.03% (false positive risks) Incomplete message checks: 404/2693 = 15.0% (false positive risks)
Status code checks: 720/2694 = 26.73% (false positive risks) Status code checks: 720/2693 = 26.74% (false positive risks)
False positive risk (total): 41.76% False positive risk (total): 41.74%
Top 20 profile URLs: Top 20 profile URLs:
- (796) `{urlMain}/index/8-0-{username} (uCoz)` - (796) `{urlMain}/index/8-0-{username} (uCoz)`