Fixed/Disabled sites. Update requirements.txt (#1517)

* Fixed/Disabled sites. Update requirements.txt

fixed_sites: AllRecipes, Linktree, CreativeMarket, ImgInn, Shutterstock, Contently

disabled_sites: Forums.ea.com. CrunchyRoll, Windy, MetaCritic, InfosecInstitute, Armchairgm.fandom.com, Bleach.fandom.com

Update requirements to prevent dependency conflicts.

* Update requirements.txt

Update requirements.txt to prevent dependency conflicts

* Update requirements.txt

* Update sites.md

* fixed_sites: Armchairgm.fandom.com, Bleach.fandom.com, Battleraprus. disabled_sites: MicrosoftTechNet, club.cnews.ru, Scorcher

* fixed_sites: Armchairgm.fandom.com, Bleach.fandom.com, Battleraprus. disabled_sites: MicrosoftTechNet, club.cnews.ru, Scorcher
This commit is contained in:
Richard Mwewa
2024-05-14 15:11:17 +02:00
committed by GitHub
parent 7a8c077c57
commit f7f77e587c
5 changed files with 121 additions and 99 deletions
+55 -33
View File
@@ -656,7 +656,12 @@
], ],
"checkType": "message", "checkType": "message",
"absenceStrs": [ "absenceStrs": [
"<title>Allrecipes Member | Allrecipes</title>" "Page Not Found.",
"You may have mistyped the address, or the page may have moved."
],
"presenseStrs": [
"Saved Items & Collections",
"{username}"
], ],
"alexaRank": 983, "alexaRank": 983,
"urlMain": "https://www.allrecipes.com/", "urlMain": "https://www.allrecipes.com/",
@@ -1314,11 +1319,7 @@
"us", "us",
"wiki" "wiki"
], ],
"checkType": "message", "checkType": "status_code",
"absenceStrs": [
"does not exist",
"This user has not filled out their profile page yet."
],
"alexaRank": 80, "alexaRank": 80,
"urlMain": "https://armchairgm.fandom.com/", "urlMain": "https://armchairgm.fandom.com/",
"url": "https://armchairgm.fandom.com/wiki/User:{username}", "url": "https://armchairgm.fandom.com/wiki/User:{username}",
@@ -1909,13 +1910,7 @@
"us", "us",
"wiki" "wiki"
], ],
"checkType": "message", "checkType": "status_code",
"presenseStrs": [
"user-profile-navigation"
],
"absenceStrs": [
"\u041e\u0448\u0438\u0431\u043a\u0430</h1>"
],
"alexaRank": 80, "alexaRank": 80,
"urlMain": "https://battleraprus.fandom.com/ru", "urlMain": "https://battleraprus.fandom.com/ru",
"url": "https://battleraprus.fandom.com/ru/wiki/%D0%A3%D1%87%D0%B0%D1%81%D1%82%D0%BD%D0%B8%D0%BA:{username}", "url": "https://battleraprus.fandom.com/ru/wiki/%D0%A3%D1%87%D0%B0%D1%81%D1%82%D0%BD%D0%B8%D0%BA:{username}",
@@ -2293,11 +2288,7 @@
"ru", "ru",
"wiki" "wiki"
], ],
"checkType": "message", "checkType": "status_code",
"absenceStrs": [
"does not exist",
"\u042d\u0442\u043e\u0442 \u0443\u0447\u0430\u0441\u0442\u043d\u0438\u043a \u043f\u043e\u043a\u0430 \u043d\u0435 \u0437\u0430\u043f\u043e\u043b\u043d\u0438\u043b \u0441\u0432\u043e\u0439 \u043f\u0440\u043e\u0444\u0438\u043b\u044c."
],
"alexaRank": 80, "alexaRank": 80,
"urlMain": "https://bleach.fandom.com/ru", "urlMain": "https://bleach.fandom.com/ru",
"url": "https://bleach.fandom.com/ru/wiki/%D0%A3%D1%87%D0%B0%D1%81%D1%82%D0%BD%D0%B8%D0%BA:{username}", "url": "https://bleach.fandom.com/ru/wiki/%D0%A3%D1%87%D0%B0%D1%81%D1%82%D0%BD%D0%B8%D0%BA:{username}",
@@ -3484,10 +3475,12 @@
"freelance", "freelance",
"in" "in"
], ],
"regexCheck": "^[a-zA-Z][a-zA-Z0-9_-]*$",
"checkType": "message", "checkType": "message",
"absenceStrs": [ "absenceStrs": [
"We can't find that page!" "Request A Meeting</a></div>"
],
"presenseStrs": [
"<h4>\nPROJECTS"
], ],
"alexaRank": 11587, "alexaRank": 11587,
"urlMain": "https://contently.com/", "urlMain": "https://contently.com/",
@@ -3579,7 +3572,11 @@
}, },
"checkType": "message", "checkType": "message",
"absenceStrs": [ "absenceStrs": [
"The page you were looking for was not found." "Whoomp, there it isn't...",
"It looks like the page you\u2019re looking for is no longer available. "
],
"presenseStrs": [
"Likes"
], ],
"alexaRank": 3054, "alexaRank": 3054,
"urlMain": "https://creativemarket.com/", "urlMain": "https://creativemarket.com/",
@@ -3616,6 +3613,7 @@
"usernameUnclaimed": "noonewouldeverusethis7" "usernameUnclaimed": "noonewouldeverusethis7"
}, },
"Crunchyroll": { "Crunchyroll": {
"disabled": true,
"tags": [ "tags": [
"forum", "forum",
"movies", "movies",
@@ -7939,6 +7937,7 @@
"usernameUnclaimed": "noonewouldeverusethis7" "usernameUnclaimed": "noonewouldeverusethis7"
}, },
"InfosecInstitute": { "InfosecInstitute": {
"disabled": true,
"tags": [ "tags": [
"us" "us"
], ],
@@ -9267,6 +9266,7 @@
"usernameUnclaimed": "noonewouldeverusethis7" "usernameUnclaimed": "noonewouldeverusethis7"
}, },
"Lolchess": { "Lolchess": {
"disabled": true,
"tags": [ "tags": [
"kr" "kr"
], ],
@@ -9277,6 +9277,9 @@
"absenceStrs": [ "absenceStrs": [
"No search results" "No search results"
], ],
"presenseStrs": [
"results were displayed out of"
],
"alexaRank": 4911, "alexaRank": 4911,
"urlMain": "https://lolchess.gg/", "urlMain": "https://lolchess.gg/",
"url": "https://lolchess.gg/profile/na/{username}", "url": "https://lolchess.gg/profile/na/{username}",
@@ -10060,6 +10063,7 @@
"usernameUnclaimed": "noonewouldeverusethis7" "usernameUnclaimed": "noonewouldeverusethis7"
}, },
"MicrosoftTechNet": { "MicrosoftTechNet": {
"disabled": true,
"tags": [ "tags": [
"us" "us"
], ],
@@ -14050,6 +14054,7 @@
"usernameUnclaimed": "noonewouldeverusethis7" "usernameUnclaimed": "noonewouldeverusethis7"
}, },
"Scorcher": { "Scorcher": {
"disabled": true,
"tags": [ "tags": [
"ru" "ru"
], ],
@@ -14432,11 +14437,17 @@
], ],
"checkType": "message", "checkType": "message",
"absenceStrs": [ "absenceStrs": [
"<img loading=\"lazy\" src=\"/assets/images/ent_not_found_404.png\" alt=\"404\"" "This surprising...",
"Unfortunately, we can't find what you're looking for.",
"<title>Not Found | Shutterstock</title>"
],
"presenseStrs": [
"{username}",
"Information"
], ],
"alexaRank": 184, "alexaRank": 184,
"urlMain": "https://www.shutterstock.com", "urlMain": "https://www.shutterstock.com",
"url": "https://www.shutterstock.com/fi/g/{username}/about", "url": "https://www.shutterstock.com/g/{username}/about",
"usernameClaimed": "adam", "usernameClaimed": "adam",
"usernameUnclaimed": "noonewouldeverusethis7" "usernameUnclaimed": "noonewouldeverusethis7"
}, },
@@ -17846,6 +17857,7 @@
"usernameUnclaimed": "noonewouldeverusethis7" "usernameUnclaimed": "noonewouldeverusethis7"
}, },
"Windy": { "Windy": {
"disabled": true,
"tags": [ "tags": [
"in", "in",
"jp", "jp",
@@ -19158,6 +19170,7 @@
"usernameUnclaimed": "noonewouldeverusethis7" "usernameUnclaimed": "noonewouldeverusethis7"
}, },
"club.cnews.ru": { "club.cnews.ru": {
"disabled": true,
"tags": [ "tags": [
"blog", "blog",
"ru" "ru"
@@ -20715,13 +20728,17 @@
"usernameUnclaimed": "noonewouldeverusethis77777" "usernameUnclaimed": "noonewouldeverusethis77777"
}, },
"metacritic": { "metacritic": {
"disabled": true,
"tags": [ "tags": [
"us" "us"
], ],
"regexCheck": "^(?![-_])[A-Za-z0-9-_]{3,15}$", "regexCheck": "^(?![-_])[A-Za-z0-9-_]{3,15}$",
"checkType": "message", "checkType": "message",
"absenceStrs": [ "absenceStrs": [
"User not found" "This user hasn\u2019t rated anything yet"
],
"presenseStrs": [
"Avg. User score"
], ],
"alexaRank": 2409, "alexaRank": 2409,
"urlMain": "https://www.metacritic.com/", "urlMain": "https://www.metacritic.com/",
@@ -26829,11 +26846,18 @@
"tags": [ "tags": [
"links" "links"
], ],
"engine": "engine404", "checkType": "message",
"absenceStrs": [
"The page you\u2019re looking for doesn\u2019t exist.",
"Want this to be your username?"
],
"presenseStrs": [
"Join {username} on Linktree today"
],
"urlMain": "https://linktr.ee", "urlMain": "https://linktr.ee",
"url": "https://linktr.ee/{username}", "url": "https://linktr.ee/{username}",
"usernameUnclaimed": "noonewouldeverusethis7", "usernameUnclaimed": "noonewouldeverusethis7",
"usernameClaimed": "red", "usernameClaimed": "Blisscartoos",
"alexaRank": 134 "alexaRank": 134
}, },
"jsfiddle.net": { "jsfiddle.net": {
@@ -34067,16 +34091,14 @@
}, },
"ImgInn": { "ImgInn": {
"absenceStrs": [ "absenceStrs": [
"Page Not Found</div>" "Page Not Found",
"The content has been deleted"
], ],
"presenseStrs": [ "presenseStrs": [
"username", "followers",
"/{username}/", "{username}"
" data-username=",
"name",
" data-name="
], ],
"url": "https://imginn.com/tagged/{username}/", "url": "https://imginn.com/{username}/",
"urlMain": "https://imginn.com", "urlMain": "https://imginn.com",
"usernameClaimed": "morgen_shtern", "usernameClaimed": "morgen_shtern",
"usernameUnclaimed": "noonewouldeverusethis7", "usernameUnclaimed": "noonewouldeverusethis7",
+36 -36
View File
@@ -1,39 +1,39 @@
aiodns==3.0.0 aiodns>=3.0.0
aiohttp==3.8.6 aiohttp>=3.8.6
aiohttp-socks==0.7.1 aiohttp-socks>=0.7.1
arabic-reshaper~=3.0.0 arabic-reshaper~=3.0.0
async-timeout==4.0.3 async-timeout>=4.0.3
attrs==22.2.0 attrs>=22.2.0
certifi==2023.7.22 certifi>=2023.7.22
chardet==5.0.0 chardet>=5.0.0
colorama==0.4.6 colorama>=0.4.6
future==0.18.3 future>=0.18.3
future-annotations==1.0.0 future-annotations>=1.0.0
html5lib==1.1 html5lib>=1.1
idna==3.4 idna>=3.4
Jinja2==3.1.3 Jinja2>=3.1.3
lxml==4.9.2 lxml>=4.9.2
MarkupSafe==2.1.1 MarkupSafe>=2.1.1
mock==4.0.3 mock>=4.0.3
multidict==6.0.4 multidict>=6.0.4
pycountry==22.3.5 pycountry>=22.3.5
PyPDF2==3.0.1 PyPDF2>=3.0.1
PySocks==1.7.1 PySocks>=1.7.1
python-bidi==0.4.2 python-bidi>=0.4.2
requests==2.31.0 requests>=2.31.0
requests-futures==1.0.0 requests-futures>=1.0.0
six==1.16.0 six>=1.16.0
socid-extractor>=0.0.24 socid-extractor>=0.0.24
soupsieve==2.3.2.post1 soupsieve>=2.3.2.post1
stem==1.8.1 stem>=1.8.1
torrequest==0.1.0 torrequest>=0.1.0
tqdm==4.66.1 tqdm>=4.66.1
typing-extensions==4.8.0 typing-extensions>=4.8.0
webencodings==0.5.1 webencodings>=0.5.1
xhtml2pdf~=0.2.11 xhtml2pdf~=0.2.11
XMind==1.2.0 XMind>=1.2.0
yarl==1.8.2 yarl>=1.8.2
networkx==2.6.3 networkx>=2.6.3
pyvis==0.2.1 pyvis>=0.2.1
reportlab==3.6.13 reportlab>=3.6.13
cloudscraper==1.2.71 cloudscraper>=1.2.71
+18 -18
View File
@@ -14,7 +14,7 @@ Rank data fetched from Alexa by domains.
1. ![](https://www.google.com/s2/favicons?domain=https://www.wikipedia.org/) [Wikipedia (https://www.wikipedia.org/)](https://www.wikipedia.org/)*: top 50, wiki* 1. ![](https://www.google.com/s2/favicons?domain=https://www.wikipedia.org/) [Wikipedia (https://www.wikipedia.org/)](https://www.wikipedia.org/)*: top 50, wiki*
1. ![](https://www.google.com/s2/favicons?domain=https://www.reddit.com/) [Reddit (https://www.reddit.com/)](https://www.reddit.com/)*: top 50, discussion, news* 1. ![](https://www.google.com/s2/favicons?domain=https://www.reddit.com/) [Reddit (https://www.reddit.com/)](https://www.reddit.com/)*: top 50, discussion, news*
1. ![](https://www.google.com/s2/favicons?domain=https://social.msdn.microsoft.com) [social.msdn.microsoft.com (https://social.msdn.microsoft.com)](https://social.msdn.microsoft.com)*: top 50, us* 1. ![](https://www.google.com/s2/favicons?domain=https://social.msdn.microsoft.com) [social.msdn.microsoft.com (https://social.msdn.microsoft.com)](https://social.msdn.microsoft.com)*: top 50, us*
1. ![](https://www.google.com/s2/favicons?domain=https://social.technet.microsoft.com) [MicrosoftTechNet (https://social.technet.microsoft.com)](https://social.technet.microsoft.com)*: top 50, us* 1. ![](https://www.google.com/s2/favicons?domain=https://social.technet.microsoft.com) [MicrosoftTechNet (https://social.technet.microsoft.com)](https://social.technet.microsoft.com)*: top 50, us*, search is disabled
1. ![](https://www.google.com/s2/favicons?domain=https://weibo.com) [Weibo (https://weibo.com)](https://weibo.com)*: top 50, cn, networking* 1. ![](https://www.google.com/s2/favicons?domain=https://weibo.com) [Weibo (https://weibo.com)](https://weibo.com)*: top 50, cn, networking*
1. ![](https://www.google.com/s2/favicons?domain=https://gist.github.com) [GitHubGist (https://gist.github.com)](https://gist.github.com)*: top 50, coding, sharing* 1. ![](https://www.google.com/s2/favicons?domain=https://gist.github.com) [GitHubGist (https://gist.github.com)](https://gist.github.com)*: top 50, coding, sharing*
1. ![](https://www.google.com/s2/favicons?domain=https://vk.com/) [VK (https://vk.com/)](https://vk.com/)*: top 50, ru* 1. ![](https://www.google.com/s2/favicons?domain=https://vk.com/) [VK (https://vk.com/)](https://vk.com/)*: top 50, ru*
@@ -127,7 +127,7 @@ Rank data fetched from Alexa by domains.
1. ![](https://www.google.com/s2/favicons?domain=https://tripadvisor.com/) [TripAdvisor (https://tripadvisor.com/)](https://tripadvisor.com/)*: top 500, travel* 1. ![](https://www.google.com/s2/favicons?domain=https://tripadvisor.com/) [TripAdvisor (https://tripadvisor.com/)](https://tripadvisor.com/)*: top 500, travel*
1. ![](https://www.google.com/s2/favicons?domain=https://www.academia.edu/) [Academia.edu (https://www.academia.edu/)](https://www.academia.edu/)*: top 500, id* 1. ![](https://www.google.com/s2/favicons?domain=https://www.academia.edu/) [Academia.edu (https://www.academia.edu/)](https://www.academia.edu/)*: top 500, id*
1. ![](https://www.google.com/s2/favicons?domain=https://www.mercadolivre.com.br) [mercadolivre (https://www.mercadolivre.com.br)](https://www.mercadolivre.com.br)*: top 500, br* 1. ![](https://www.google.com/s2/favicons?domain=https://www.mercadolivre.com.br) [mercadolivre (https://www.mercadolivre.com.br)](https://www.mercadolivre.com.br)*: top 500, br*
1. ![](https://www.google.com/s2/favicons?domain=https://www.crunchyroll.com/) [Crunchyroll (https://www.crunchyroll.com/)](https://www.crunchyroll.com/)*: top 500, forum, movies, us* 1. ![](https://www.google.com/s2/favicons?domain=https://www.crunchyroll.com/) [Crunchyroll (https://www.crunchyroll.com/)](https://www.crunchyroll.com/)*: top 500, forum, movies, us*, search is disabled
1. ![](https://www.google.com/s2/favicons?domain=https://wordpress.org/) [WordPressOrg (https://wordpress.org/)](https://wordpress.org/)*: top 500, in* 1. ![](https://www.google.com/s2/favicons?domain=https://wordpress.org/) [WordPressOrg (https://wordpress.org/)](https://wordpress.org/)*: top 500, in*
1. ![](https://www.google.com/s2/favicons?domain=https://ameblo.jp) [Ameblo (https://ameblo.jp)](https://ameblo.jp)*: top 500, blog, jp* 1. ![](https://www.google.com/s2/favicons?domain=https://ameblo.jp) [Ameblo (https://ameblo.jp)](https://ameblo.jp)*: top 500, blog, jp*
1. ![](https://www.google.com/s2/favicons?domain=https://unsplash.com/) [Unsplash (https://unsplash.com/)](https://unsplash.com/)*: top 500, art, photo* 1. ![](https://www.google.com/s2/favicons?domain=https://unsplash.com/) [Unsplash (https://unsplash.com/)](https://unsplash.com/)*: top 500, art, photo*
@@ -242,7 +242,7 @@ Rank data fetched from Alexa by domains.
1. ![](https://www.google.com/s2/favicons?domain=https://pastebin.com/) [Pastebin (https://pastebin.com/)](https://pastebin.com/)*: top 5K, sharing* 1. ![](https://www.google.com/s2/favicons?domain=https://pastebin.com/) [Pastebin (https://pastebin.com/)](https://pastebin.com/)*: top 5K, sharing*
1. ![](https://www.google.com/s2/favicons?domain=https://gfycat.com/) [gfycat (https://gfycat.com/)](https://gfycat.com/)*: top 5K, photo, sharing* 1. ![](https://www.google.com/s2/favicons?domain=https://gfycat.com/) [gfycat (https://gfycat.com/)](https://gfycat.com/)*: top 5K, photo, sharing*
1. ![](https://www.google.com/s2/favicons?domain=https://last.fm/) [last.fm (https://last.fm/)](https://last.fm/)*: top 5K, music* 1. ![](https://www.google.com/s2/favicons?domain=https://last.fm/) [last.fm (https://last.fm/)](https://last.fm/)*: top 5K, music*
1. ![](https://www.google.com/s2/favicons?domain=https://windy.com/) [Windy (https://windy.com/)](https://windy.com/)*: top 5K, in, jp, kr, pl, us* 1. ![](https://www.google.com/s2/favicons?domain=https://windy.com/) [Windy (https://windy.com/)](https://windy.com/)*: top 5K, in, jp, kr, pl, us*, search is disabled
1. ![](https://www.google.com/s2/favicons?domain=https://profile.hatena.ne.jp) [profile.hatena.ne.jp (https://profile.hatena.ne.jp)](https://profile.hatena.ne.jp)*: top 5K, jp* 1. ![](https://www.google.com/s2/favicons?domain=https://profile.hatena.ne.jp) [profile.hatena.ne.jp (https://profile.hatena.ne.jp)](https://profile.hatena.ne.jp)*: top 5K, jp*
1. ![](https://www.google.com/s2/favicons?domain=https://bodyspace.bodybuilding.com/) [BodyBuilding (https://bodyspace.bodybuilding.com/)](https://bodyspace.bodybuilding.com/)*: top 5K, us* 1. ![](https://www.google.com/s2/favicons?domain=https://bodyspace.bodybuilding.com/) [BodyBuilding (https://bodyspace.bodybuilding.com/)](https://bodyspace.bodybuilding.com/)*: top 5K, us*
1. ![](https://www.google.com/s2/favicons?domain=https://community.icons8.com) [community.icons8.com (https://community.icons8.com)](https://community.icons8.com)*: top 5K, forum, in* 1. ![](https://www.google.com/s2/favicons?domain=https://community.icons8.com) [community.icons8.com (https://community.icons8.com)](https://community.icons8.com)*: top 5K, forum, in*
@@ -258,7 +258,7 @@ Rank data fetched from Alexa by domains.
1. ![](https://www.google.com/s2/favicons?domain=https://jsfiddle.net) [jsfiddle.net (https://jsfiddle.net)](https://jsfiddle.net)*: top 5K, coding, sharing* 1. ![](https://www.google.com/s2/favicons?domain=https://jsfiddle.net) [jsfiddle.net (https://jsfiddle.net)](https://jsfiddle.net)*: top 5K, coding, sharing*
1. ![](https://www.google.com/s2/favicons?domain=https://ru.pathofexile.com) [Pathofexile (https://ru.pathofexile.com)](https://ru.pathofexile.com)*: top 5K, ru, us* 1. ![](https://www.google.com/s2/favicons?domain=https://ru.pathofexile.com) [Pathofexile (https://ru.pathofexile.com)](https://ru.pathofexile.com)*: top 5K, ru, us*
1. ![](https://www.google.com/s2/favicons?domain=https://vc.ru) [VC.ru (https://vc.ru)](https://vc.ru)*: top 5K, ru* 1. ![](https://www.google.com/s2/favicons?domain=https://vc.ru) [VC.ru (https://vc.ru)](https://vc.ru)*: top 5K, ru*
1. ![](https://www.google.com/s2/favicons?domain=https://www.metacritic.com/) [metacritic (https://www.metacritic.com/)](https://www.metacritic.com/)*: top 5K, us* 1. ![](https://www.google.com/s2/favicons?domain=https://www.metacritic.com/) [metacritic (https://www.metacritic.com/)](https://www.metacritic.com/)*: top 5K, us*, search is disabled
1. ![](https://www.google.com/s2/favicons?domain=https://www.digitalocean.com/) [DigitalOcean (https://www.digitalocean.com/)](https://www.digitalocean.com/)*: top 5K, forum, in, tech* 1. ![](https://www.google.com/s2/favicons?domain=https://www.digitalocean.com/) [DigitalOcean (https://www.digitalocean.com/)](https://www.digitalocean.com/)*: top 5K, forum, in, tech*
1. ![](https://www.google.com/s2/favicons?domain=http://www.jeuxvideo.com) [jeuxvideo (http://www.jeuxvideo.com)](http://www.jeuxvideo.com)*: top 5K, fr, gaming* 1. ![](https://www.google.com/s2/favicons?domain=http://www.jeuxvideo.com) [jeuxvideo (http://www.jeuxvideo.com)](http://www.jeuxvideo.com)*: top 5K, fr, gaming*
1. ![](https://www.google.com/s2/favicons?domain=https://forum.shiftdelete.net) [ShiftDelete (https://forum.shiftdelete.net)](https://forum.shiftdelete.net)*: top 5K, forum, tr*, search is disabled 1. ![](https://www.google.com/s2/favicons?domain=https://forum.shiftdelete.net) [ShiftDelete (https://forum.shiftdelete.net)](https://forum.shiftdelete.net)*: top 5K, forum, tr*, search is disabled
@@ -337,7 +337,7 @@ Rank data fetched from Alexa by domains.
1. ![](https://www.google.com/s2/favicons?domain=https://www.buymeacoffee.com/) [BuyMeACoffee (https://www.buymeacoffee.com/)](https://www.buymeacoffee.com/)*: top 5K, in* 1. ![](https://www.google.com/s2/favicons?domain=https://www.buymeacoffee.com/) [BuyMeACoffee (https://www.buymeacoffee.com/)](https://www.buymeacoffee.com/)*: top 5K, in*
1. ![](https://www.google.com/s2/favicons?domain=https://muckrack.com) [Muckrack (https://muckrack.com)](https://muckrack.com)*: top 5K, us* 1. ![](https://www.google.com/s2/favicons?domain=https://muckrack.com) [Muckrack (https://muckrack.com)](https://muckrack.com)*: top 5K, us*
1. ![](https://www.google.com/s2/favicons?domain=https://www.fixya.com) [fixya (https://www.fixya.com)](https://www.fixya.com)*: top 5K, us* 1. ![](https://www.google.com/s2/favicons?domain=https://www.fixya.com) [fixya (https://www.fixya.com)](https://www.fixya.com)*: top 5K, us*
1. ![](https://www.google.com/s2/favicons?domain=https://lolchess.gg/) [Lolchess (https://lolchess.gg/)](https://lolchess.gg/)*: top 5K, kr* 1. ![](https://www.google.com/s2/favicons?domain=https://lolchess.gg/) [Lolchess (https://lolchess.gg/)](https://lolchess.gg/)*: top 5K, kr*, search is disabled
1. ![](https://www.google.com/s2/favicons?domain=https://www.ifttt.com/) [IFTTT (https://www.ifttt.com/)](https://www.ifttt.com/)*: top 5K, tech* 1. ![](https://www.google.com/s2/favicons?domain=https://www.ifttt.com/) [IFTTT (https://www.ifttt.com/)](https://www.ifttt.com/)*: top 5K, tech*
1. ![](https://www.google.com/s2/favicons?domain=https://www.minds.com) [www.minds.com (https://www.minds.com)](https://www.minds.com)*: top 5K, in* 1. ![](https://www.google.com/s2/favicons?domain=https://www.minds.com) [www.minds.com (https://www.minds.com)](https://www.minds.com)*: top 5K, in*
1. ![](https://www.google.com/s2/favicons?domain=https://forums.imore.com) [forums.imore.com (https://forums.imore.com)](https://forums.imore.com)*: top 5K, forum, us*, search is disabled 1. ![](https://www.google.com/s2/favicons?domain=https://forums.imore.com) [forums.imore.com (https://forums.imore.com)](https://forums.imore.com)*: top 5K, forum, us*, search is disabled
@@ -396,7 +396,7 @@ Rank data fetched from Alexa by domains.
1. ![](https://www.google.com/s2/favicons?domain=https://about.me/) [About.me (https://about.me/)](https://about.me/)*: top 10K, blog, in* 1. ![](https://www.google.com/s2/favicons?domain=https://about.me/) [About.me (https://about.me/)](https://about.me/)*: top 10K, blog, in*
1. ![](https://www.google.com/s2/favicons?domain=https://www.fark.com/) [Fark (https://www.fark.com/)](https://www.fark.com/)*: top 10K, forum, news* 1. ![](https://www.google.com/s2/favicons?domain=https://www.fark.com/) [Fark (https://www.fark.com/)](https://www.fark.com/)*: top 10K, forum, news*
1. ![](https://www.google.com/s2/favicons?domain=https://www.reverbnation.com/) [ReverbNation (https://www.reverbnation.com/)](https://www.reverbnation.com/)*: top 10K, us* 1. ![](https://www.google.com/s2/favicons?domain=https://www.reverbnation.com/) [ReverbNation (https://www.reverbnation.com/)](https://www.reverbnation.com/)*: top 10K, us*
1. ![](https://www.google.com/s2/favicons?domain=https://www.glavbukh.ru) [Scorcher (https://www.glavbukh.ru)](https://www.glavbukh.ru)*: top 10K, ru* 1. ![](https://www.google.com/s2/favicons?domain=https://www.glavbukh.ru) [Scorcher (https://www.glavbukh.ru)](https://www.glavbukh.ru)*: top 10K, ru*, search is disabled
1. ![](https://www.google.com/s2/favicons?domain=https://www.trakt.tv/) [Trakt (https://www.trakt.tv/)](https://www.trakt.tv/)*: top 10K, de, fr* 1. ![](https://www.google.com/s2/favicons?domain=https://www.trakt.tv/) [Trakt (https://www.trakt.tv/)](https://www.trakt.tv/)*: top 10K, de, fr*
1. ![](https://www.google.com/s2/favicons?domain=https://hotcopper.com.au) [Hotcopper (https://hotcopper.com.au)](https://hotcopper.com.au)*: top 10K, au* 1. ![](https://www.google.com/s2/favicons?domain=https://hotcopper.com.au) [Hotcopper (https://hotcopper.com.au)](https://hotcopper.com.au)*: top 10K, au*
1. ![](https://www.google.com/s2/favicons?domain=https://pandia.ru) [Pandia (https://pandia.ru)](https://pandia.ru)*: top 10K, news, ru* 1. ![](https://www.google.com/s2/favicons?domain=https://pandia.ru) [Pandia (https://pandia.ru)](https://pandia.ru)*: top 10K, news, ru*
@@ -515,7 +515,7 @@ Rank data fetched from Alexa by domains.
1. ![](https://www.google.com/s2/favicons?domain=https://forums.indiegala.com) [forums.indiegala.com (https://forums.indiegala.com)](https://forums.indiegala.com)*: top 100K, forum, us* 1. ![](https://www.google.com/s2/favicons?domain=https://forums.indiegala.com) [forums.indiegala.com (https://forums.indiegala.com)](https://forums.indiegala.com)*: top 100K, forum, us*
1. ![](https://www.google.com/s2/favicons?domain=https://ptvintern.picarto.tv) [Picarto (https://ptvintern.picarto.tv)](https://ptvintern.picarto.tv)*: top 100K, art, streaming* 1. ![](https://www.google.com/s2/favicons?domain=https://ptvintern.picarto.tv) [Picarto (https://ptvintern.picarto.tv)](https://ptvintern.picarto.tv)*: top 100K, art, streaming*
1. ![](https://www.google.com/s2/favicons?domain=https://www.neoseeker.com) [Neoseeker (https://www.neoseeker.com)](https://www.neoseeker.com)*: top 100K, us* 1. ![](https://www.google.com/s2/favicons?domain=https://www.neoseeker.com) [Neoseeker (https://www.neoseeker.com)](https://www.neoseeker.com)*: top 100K, us*
1. ![](https://www.google.com/s2/favicons?domain=https://community.infosecinstitute.com) [InfosecInstitute (https://community.infosecinstitute.com)](https://community.infosecinstitute.com)*: top 100K, us* 1. ![](https://www.google.com/s2/favicons?domain=https://community.infosecinstitute.com) [InfosecInstitute (https://community.infosecinstitute.com)](https://community.infosecinstitute.com)*: top 100K, us*, search is disabled
1. ![](https://www.google.com/s2/favicons?domain=https://armorgames.com) [Armorgames (https://armorgames.com)](https://armorgames.com)*: top 100K, gaming, us* 1. ![](https://www.google.com/s2/favicons?domain=https://armorgames.com) [Armorgames (https://armorgames.com)](https://armorgames.com)*: top 100K, gaming, us*
1. ![](https://www.google.com/s2/favicons?domain=https://giters.com) [giters.com (https://giters.com)](https://giters.com)*: top 100K, coding* 1. ![](https://www.google.com/s2/favicons?domain=https://giters.com) [giters.com (https://giters.com)](https://giters.com)*: top 100K, coding*
1. ![](https://www.google.com/s2/favicons?domain=https://teamtreehouse.com) [teamtreehouse.com (https://teamtreehouse.com)](https://teamtreehouse.com)*: top 100K, us* 1. ![](https://www.google.com/s2/favicons?domain=https://teamtreehouse.com) [teamtreehouse.com (https://teamtreehouse.com)](https://teamtreehouse.com)*: top 100K, us*
@@ -556,7 +556,7 @@ Rank data fetched from Alexa by domains.
1. ![](https://www.google.com/s2/favicons?domain=https://www.donationalerts.com/) [DonationsAlerts (https://www.donationalerts.com/)](https://www.donationalerts.com/)*: top 100K, finance, ru* 1. ![](https://www.google.com/s2/favicons?domain=https://www.donationalerts.com/) [DonationsAlerts (https://www.donationalerts.com/)](https://www.donationalerts.com/)*: top 100K, finance, ru*
1. ![](https://www.google.com/s2/favicons?domain=https://www.trueachievements.com) [TrueAchievements (https://www.trueachievements.com)](https://www.trueachievements.com)*: top 100K, us* 1. ![](https://www.google.com/s2/favicons?domain=https://www.trueachievements.com) [TrueAchievements (https://www.trueachievements.com)](https://www.trueachievements.com)*: top 100K, us*
1. ![](https://www.google.com/s2/favicons?domain=https://jimdosite.com/) [Jimdo (https://jimdosite.com/)](https://jimdosite.com/)*: top 100K, jp* 1. ![](https://www.google.com/s2/favicons?domain=https://jimdosite.com/) [Jimdo (https://jimdosite.com/)](https://jimdosite.com/)*: top 100K, jp*
1. ![](https://www.google.com/s2/favicons?domain=https://club.cnews.ru/) [club.cnews.ru (https://club.cnews.ru/)](https://club.cnews.ru/)*: top 100K, blog, ru* 1. ![](https://www.google.com/s2/favicons?domain=https://club.cnews.ru/) [club.cnews.ru (https://club.cnews.ru/)](https://club.cnews.ru/)*: top 100K, blog, ru*, search is disabled
1. ![](https://www.google.com/s2/favicons?domain=https://psnprofiles.com/) [PSNProfiles.com (https://psnprofiles.com/)](https://psnprofiles.com/)*: top 100K, gaming*, search is disabled 1. ![](https://www.google.com/s2/favicons?domain=https://psnprofiles.com/) [PSNProfiles.com (https://psnprofiles.com/)](https://psnprofiles.com/)*: top 100K, gaming*, search is disabled
1. ![](https://www.google.com/s2/favicons?domain=https://donorbox.org) [donorbox (https://donorbox.org)](https://donorbox.org)*: top 100K, finance* 1. ![](https://www.google.com/s2/favicons?domain=https://donorbox.org) [donorbox (https://donorbox.org)](https://donorbox.org)*: top 100K, finance*
1. ![](https://www.google.com/s2/favicons?domain=https://www.sbazar.cz/) [Sbazar.cz (https://www.sbazar.cz/)](https://www.sbazar.cz/)*: top 100K, cz, shopping* 1. ![](https://www.google.com/s2/favicons?domain=https://www.sbazar.cz/) [Sbazar.cz (https://www.sbazar.cz/)](https://www.sbazar.cz/)*: top 100K, cz, shopping*
@@ -3100,20 +3100,20 @@ Rank data fetched from Alexa by domains.
1. ![](https://www.google.com/s2/favicons?domain=https://ngl.link) [ngl.link (https://ngl.link)](https://ngl.link)*: top 100M, q&a* 1. ![](https://www.google.com/s2/favicons?domain=https://ngl.link) [ngl.link (https://ngl.link)](https://ngl.link)*: top 100M, q&a*
1. ![](https://www.google.com/s2/favicons?domain=https://bitpapa.com) [bitpapa.com (https://bitpapa.com)](https://bitpapa.com)*: top 100M, crypto* 1. ![](https://www.google.com/s2/favicons?domain=https://bitpapa.com) [bitpapa.com (https://bitpapa.com)](https://bitpapa.com)*: top 100M, crypto*
The list was updated at (2023-10-27 19:46:13.899883 UTC) The list was updated at (2024-05-13 20:09:33.626841+00:00 UTC)
## Statistics ## Statistics
Enabled/total sites: 2802/3096 = 90.5% Enabled/total sites: 2794/3096 = 90.25%
Incomplete message checks: 447/2802 = 15.95% (false positive risks) Incomplete message checks: 438/2794 = 15.68% (false positive risks)
Status code checks: 720/2802 = 25.7% (false positive risks) Status code checks: 722/2794 = 25.84% (false positive risks)
False positive risk (total): 41.65% False positive risk (total): 41.52%
Top 20 profile URLs: Top 20 profile URLs:
- (796) `{urlMain}/index/8-0-{username} (uCoz)` - (796) `{urlMain}/index/8-0-{username} (uCoz)`
- (294) `/{username}` - (295) `/{username}`
- (221) `{urlMain}{urlSubpath}/members/?username={username} (XenForo)` - (221) `{urlMain}{urlSubpath}/members/?username={username} (XenForo)`
- (158) `/user/{username}` - (158) `/user/{username}`
- (133) `{urlMain}{urlSubpath}/member.php?username={username} (vBulletin)` - (133) `{urlMain}{urlSubpath}/member.php?username={username} (vBulletin)`
@@ -3138,16 +3138,16 @@ Top 20 tags:
- (279) `forum` - (279) `forum`
- (49) `gaming` - (49) `gaming`
- (25) `coding` - (25) `coding`
- (22) `photo` - (21) `photo`
- (19) `news` - (19) `news`
- (18) `blog` - (18) `blog`
- (16) `music` - (15) `music`
- (14) `tech` - (14) `tech`
- (13) `freelance` - (12) `freelance`
- (11) `sharing` - (11) `sharing`
- (11) `art`
- (11) `finance` - (11) `finance`
- (10) `dating` - (10) `dating`
- (10) `art`
- (10) `shopping` - (10) `shopping`
- (9) `movies` - (9) `movies`
- (8) `hobby` - (8) `hobby`
+8 -8
View File
@@ -1,8 +1,8 @@
reportlab==4.0.4 reportlab>=4.0.4
flake8==6.1.0 flake8>=6.1.0
pytest==7.2.0 pytest>=7.2.0
pytest-asyncio==0.16.0;python_version<"3.7" pytest-asyncio>=0.16.0;python_version<"3.7"
pytest-asyncio==0.20.1;python_version>="3.7" pytest-asyncio>=0.20.1;python_version>="3.7"
pytest-cov==4.0.0 pytest-cov>=4.0.0
pytest-httpserver~=1.0.8 pytest-httpserver>=1.0.8
pytest-rerunfailures==12.0 pytest-rerunfailures>=12.0
+3 -3
View File
@@ -3,13 +3,12 @@
This module generates the listing of supported sites in file `SITES.md` This module generates the listing of supported sites in file `SITES.md`
and pretty prints file with sites data. and pretty prints file with sites data.
""" """
import json
import sys import sys
import requests import requests
import logging import logging
import threading import threading
import xml.etree.ElementTree as ET import xml.etree.ElementTree as ET
from datetime import datetime from datetime import datetime, timezone
from argparse import ArgumentParser, RawDescriptionHelpFormatter from argparse import ArgumentParser, RawDescriptionHelpFormatter
from maigret.maigret import MaigretDatabase from maigret.maigret import MaigretDatabase
@@ -27,6 +26,7 @@ RANKS.update({
SEMAPHORE = threading.Semaphore(20) SEMAPHORE = threading.Semaphore(20)
def get_rank(domain_to_query, site, print_errors=True): def get_rank(domain_to_query, site, print_errors=True):
with SEMAPHORE: with SEMAPHORE:
# Retrieve ranking data via alexa API # Retrieve ranking data via alexa API
@@ -137,7 +137,7 @@ Rank data fetched from Alexa by domains.
site_file.write(f'1. {favicon} [{site}]({url_main})*: top {valid_rank}{tags}*{note}\n') site_file.write(f'1. {favicon} [{site}]({url_main})*: top {valid_rank}{tags}*{note}\n')
db.update_site(site) db.update_site(site)
site_file.write(f'\nThe list was updated at ({datetime.utcnow()} UTC)\n') site_file.write(f'\nThe list was updated at ({datetime.now(timezone.utc)} UTC)\n')
db.save_to_file(args.base_file) db.save_to_file(args.base_file)
statistics_text = db.get_db_stats(is_markdown=True) statistics_text = db.get_db_stats(is_markdown=True)