Sites fixes (#1917)

* Some sites fixes

* Sites stats updated
This commit is contained in:
Soxoj
2024-12-01 03:19:36 +01:00
committed by GitHub
parent 0c7e3898e8
commit d15e12750b
3 changed files with 44 additions and 25 deletions
+27 -18
View File
@@ -10640,10 +10640,11 @@
"type": "ok_id",
"checkType": "message",
"presenceStrs": [
"profile__menu"
"profile__content_header_user"
],
"absenceStrs": [
"mm-profile_not-found_content"
"mm-profile_not-found_content",
"<title>\u041c\u043e\u0439 \u041c\u0438\u0440@Mail.Ru</title>"
],
"alexaRank": 49,
"urlMain": "https://my.mail.ru/",
@@ -10658,10 +10659,11 @@
"type": "vk_id",
"checkType": "message",
"presenceStrs": [
"profile__menu"
"profile__content_header_user"
],
"absenceStrs": [
"mm-profile_not-found_content"
"mm-profile_not-found_content",
"<title>\u041c\u043e\u0439 \u041c\u0438\u0440@Mail.Ru</title>"
],
"alexaRank": 49,
"urlMain": "https://my.mail.ru/",
@@ -10675,10 +10677,11 @@
],
"checkType": "message",
"presenceStrs": [
"profile__menu"
"profile__content_header_user"
],
"absenceStrs": [
"mm-profile_not-found_content"
"mm-profile_not-found_content",
"<title>\u041c\u043e\u0439 \u041c\u0438\u0440@Mail.Ru</title>"
],
"alexaRank": 49,
"urlMain": "https://my.mail.ru/",
@@ -10692,10 +10695,11 @@
],
"checkType": "message",
"presenceStrs": [
"profile__menu"
"profile__content_header_user"
],
"absenceStrs": [
"mm-profile_not-found_content"
"mm-profile_not-found_content",
"<title>\u041c\u043e\u0439 \u041c\u0438\u0440@Mail.Ru</title>"
],
"alexaRank": 49,
"urlMain": "https://my.mail.ru/",
@@ -10709,10 +10713,11 @@
],
"checkType": "message",
"presenceStrs": [
"profile__menu"
"profile__content_header_user"
],
"absenceStrs": [
"mm-profile_not-found_content"
"mm-profile_not-found_content",
"<title>\u041c\u043e\u0439 \u041c\u0438\u0440@Mail.Ru</title>"
],
"alexaRank": 49,
"urlMain": "https://my.mail.ru/",
@@ -10726,10 +10731,11 @@
],
"checkType": "message",
"presenceStrs": [
"profile__menu"
"profile__content_header_user"
],
"absenceStrs": [
"mm-profile_not-found_content"
"mm-profile_not-found_content",
"<title>\u041c\u043e\u0439 \u041c\u0438\u0440@Mail.Ru</title>"
],
"alexaRank": 49,
"urlMain": "https://my.mail.ru/",
@@ -10743,16 +10749,17 @@
],
"checkType": "message",
"presenceStrs": [
"profile__menu"
"profile__content_header_user"
],
"absenceStrs": [
"mm-profile_not-found_content"
"mm-profile_not-found_content",
"<title>\u041c\u043e\u0439 \u041c\u0438\u0440@Mail.Ru</title>"
],
"alexaRank": 49,
"urlMain": "https://my.mail.ru/",
"url": "https://my.mail.ru/ya.ru/{username}/",
"usernameClaimed": "hovsepovich",
"usernameUnclaimed": "noonewouldeverusethis7"
"usernameUnclaimed": "MAlKOVyd"
},
"My.Mail.ru@yandex.ru": {
"tags": [
@@ -10760,10 +10767,11 @@
],
"checkType": "message",
"presenceStrs": [
"profile__menu"
"profile__content_header_user"
],
"absenceStrs": [
"mm-profile_not-found_content"
"mm-profile_not-found_content",
"<title>\u041c\u043e\u0439 \u041c\u0438\u0440@Mail.Ru</title>"
],
"alexaRank": 49,
"urlMain": "https://my.mail.ru/",
@@ -18773,7 +18781,8 @@
"\u7528\u6237\u4e0d\u5b58\u5728"
],
"usernameClaimed": "blue",
"usernameUnclaimed": "noonewouldeverusethis7"
"usernameUnclaimed": "noonewouldeverusethis7",
"disabled": true
},
"Zhyk": {
"disabled": true,
+11 -1
View File
@@ -260,7 +260,6 @@ class MaigretDatabase:
def has_site(self, site: MaigretSite):
for s in self._sites:
if site == s:
print(f"input == site: {site} == {s}")
return True
return False
@@ -278,6 +277,17 @@ class MaigretDatabase:
):
"""
Ranking and filtering of the sites list
Args:
reverse (bool, optional): Reverse the sorting order. Defaults to False.
top (int, optional): Maximum number of sites to return. Defaults to sys.maxsize.
tags (list, optional): List of tags to filter sites by. Defaults to empty list.
names (list, optional): List of site names (or urls, see MaigretSite.__eq__) to filter by. Defaults to empty list.
disabled (bool, optional): Whether to include disabled sites. Defaults to True.
id_type (str, optional): Type of identifier to filter by. Defaults to "username".
Returns:
dict: Dictionary of filtered and ranked sites, with site names as keys and MaigretSite objects as values
"""
normalized_names = list(map(str.lower, names))
normalized_tags = list(map(str.lower, tags))
+6 -6
View File
@@ -84,7 +84,7 @@ Rank data fetched from Alexa by domains.
1. ![](https://www.google.com/s2/favicons?domain=https://discourse.mozilla.org) [discourse.mozilla.org (https://discourse.mozilla.org)](https://discourse.mozilla.org)*: top 500*
1. ![](https://www.google.com/s2/favicons?domain=https://linktr.ee) [linktr.ee (https://linktr.ee)](https://linktr.ee)*: top 500, links*
1. ![](https://www.google.com/s2/favicons?domain=https://xhamster.com) [xHamster (https://xhamster.com)](https://xhamster.com)*: top 500, porn, us*
1. ![](https://www.google.com/s2/favicons?domain=https://www.zhihu.com/) [Zhihu (https://www.zhihu.com/)](https://www.zhihu.com/)*: top 500, cn*
1. ![](https://www.google.com/s2/favicons?domain=https://www.zhihu.com/) [Zhihu (https://www.zhihu.com/)](https://www.zhihu.com/)*: top 500, cn*, search is disabled
1. ![](https://www.google.com/s2/favicons?domain=https://www.blogger.com) [Blogger (by GAIA id) (https://www.blogger.com)](https://www.blogger.com)*: top 500, blog*
1. ![](https://www.google.com/s2/favicons?domain=https://www.researchgate.net/) [ResearchGate (https://www.researchgate.net/)](https://www.researchgate.net/)*: top 500, in, us*
1. ![](https://www.google.com/s2/favicons?domain=https://www.freepik.com) [Freepik (https://www.freepik.com)](https://www.freepik.com)*: top 500, art, photo, stock*
@@ -3130,16 +3130,16 @@ Rank data fetched from Alexa by domains.
1. ![](https://www.google.com/s2/favicons?domain=https://massagerepublic.com) [massagerepublic.com (https://massagerepublic.com)](https://massagerepublic.com)*: top 100M*
1. ![](https://www.google.com/s2/favicons?domain=https://mynickname.com) [mynickname.com (https://mynickname.com)](https://mynickname.com)*: top 100M*
The list was updated at (2024-11-27 UTC)
The list was updated at (2024-11-29 UTC)
## Statistics
Enabled/total sites: 2694/3126 = 86.18%
Enabled/total sites: 2693/3126 = 86.15%
Incomplete message checks: 405/2694 = 15.03% (false positive risks)
Incomplete message checks: 404/2693 = 15.0% (false positive risks)
Status code checks: 720/2694 = 26.73% (false positive risks)
Status code checks: 720/2693 = 26.74% (false positive risks)
False positive risk (total): 41.76%
False positive risk (total): 41.74%
Top 20 profile URLs:
- (796) `{urlMain}/index/8-0-{username} (uCoz)`