From f7f77e587ce89ce9e789924e2383de4dc5f37f87 Mon Sep 17 00:00:00 2001 From: Richard Mwewa <74001397+rly0nheart@users.noreply.github.com> Date: Tue, 14 May 2024 15:11:17 +0200 Subject: [PATCH] Fixed/Disabled sites. Update requirements.txt (#1517) * Fixed/Disabled sites. Update requirements.txt fixed_sites: AllRecipes, Linktree, CreativeMarket, ImgInn, Shutterstock, Contently disabled_sites: Forums.ea.com. CrunchyRoll, Windy, MetaCritic, InfosecInstitute, Armchairgm.fandom.com, Bleach.fandom.com Update requirements to prevent dependency conflicts. * Update requirements.txt Update requirements.txt to prevent dependency conflicts * Update requirements.txt * Update sites.md * fixed_sites: Armchairgm.fandom.com, Bleach.fandom.com, Battleraprus. disabled_sites: MicrosoftTechNet, club.cnews.ru, Scorcher * fixed_sites: Armchairgm.fandom.com, Bleach.fandom.com, Battleraprus. disabled_sites: MicrosoftTechNet, club.cnews.ru, Scorcher --- maigret/resources/data.json | 88 +++++++++++++++++++++++-------------- requirements.txt | 72 +++++++++++++++--------------- sites.md | 36 +++++++-------- test-requirements.txt | 16 +++---- utils/update_site_data.py | 8 ++-- 5 files changed, 121 insertions(+), 99 deletions(-) diff --git a/maigret/resources/data.json b/maigret/resources/data.json index d6f9a8b..09ee849 100644 --- a/maigret/resources/data.json +++ b/maigret/resources/data.json @@ -656,7 +656,12 @@ ], "checkType": "message", "absenceStrs": [ - "Allrecipes Member | Allrecipes" + "Page Not Found.", + "You may have mistyped the address, or the page may have moved." + ], + "presenseStrs": [ + "Saved Items & Collections", + "{username}" ], "alexaRank": 983, "urlMain": "https://www.allrecipes.com/", @@ -1314,11 +1319,7 @@ "us", "wiki" ], - "checkType": "message", - "absenceStrs": [ - "does not exist", - "This user has not filled out their profile page yet." - ], + "checkType": "status_code", "alexaRank": 80, "urlMain": "https://armchairgm.fandom.com/", "url": "https://armchairgm.fandom.com/wiki/User:{username}", @@ -1909,13 +1910,7 @@ "us", "wiki" ], - "checkType": "message", - "presenseStrs": [ - "user-profile-navigation" - ], - "absenceStrs": [ - "\u041e\u0448\u0438\u0431\u043a\u0430" - ], + "checkType": "status_code", "alexaRank": 80, "urlMain": "https://battleraprus.fandom.com/ru", "url": "https://battleraprus.fandom.com/ru/wiki/%D0%A3%D1%87%D0%B0%D1%81%D1%82%D0%BD%D0%B8%D0%BA:{username}", @@ -2293,11 +2288,7 @@ "ru", "wiki" ], - "checkType": "message", - "absenceStrs": [ - "does not exist", - "\u042d\u0442\u043e\u0442 \u0443\u0447\u0430\u0441\u0442\u043d\u0438\u043a \u043f\u043e\u043a\u0430 \u043d\u0435 \u0437\u0430\u043f\u043e\u043b\u043d\u0438\u043b \u0441\u0432\u043e\u0439 \u043f\u0440\u043e\u0444\u0438\u043b\u044c." - ], + "checkType": "status_code", "alexaRank": 80, "urlMain": "https://bleach.fandom.com/ru", "url": "https://bleach.fandom.com/ru/wiki/%D0%A3%D1%87%D0%B0%D1%81%D1%82%D0%BD%D0%B8%D0%BA:{username}", @@ -3484,10 +3475,12 @@ "freelance", "in" ], - "regexCheck": "^[a-zA-Z][a-zA-Z0-9_-]*$", "checkType": "message", "absenceStrs": [ - "We can't find that page!" + "Request A Meeting" + ], + "presenseStrs": [ + "

\nPROJECTS" ], "alexaRank": 11587, "urlMain": "https://contently.com/", @@ -3579,7 +3572,11 @@ }, "checkType": "message", "absenceStrs": [ - "The page you were looking for was not found." + "Whoomp, there it isn't...", + "It looks like the page you\u2019re looking for is no longer available. " + ], + "presenseStrs": [ + "Likes" ], "alexaRank": 3054, "urlMain": "https://creativemarket.com/", @@ -3616,6 +3613,7 @@ "usernameUnclaimed": "noonewouldeverusethis7" }, "Crunchyroll": { + "disabled": true, "tags": [ "forum", "movies", @@ -7939,6 +7937,7 @@ "usernameUnclaimed": "noonewouldeverusethis7" }, "InfosecInstitute": { + "disabled": true, "tags": [ "us" ], @@ -9267,6 +9266,7 @@ "usernameUnclaimed": "noonewouldeverusethis7" }, "Lolchess": { + "disabled": true, "tags": [ "kr" ], @@ -9277,6 +9277,9 @@ "absenceStrs": [ "No search results" ], + "presenseStrs": [ + "results were displayed out of" + ], "alexaRank": 4911, "urlMain": "https://lolchess.gg/", "url": "https://lolchess.gg/profile/na/{username}", @@ -10060,6 +10063,7 @@ "usernameUnclaimed": "noonewouldeverusethis7" }, "MicrosoftTechNet": { + "disabled": true, "tags": [ "us" ], @@ -14050,6 +14054,7 @@ "usernameUnclaimed": "noonewouldeverusethis7" }, "Scorcher": { + "disabled": true, "tags": [ "ru" ], @@ -14432,11 +14437,17 @@ ], "checkType": "message", "absenceStrs": [ - "\"404\""Not Found | Shutterstock" + ], + "presenseStrs": [ + "{username}", + "Information" ], "alexaRank": 184, "urlMain": "https://www.shutterstock.com", - "url": "https://www.shutterstock.com/fi/g/{username}/about", + "url": "https://www.shutterstock.com/g/{username}/about", "usernameClaimed": "adam", "usernameUnclaimed": "noonewouldeverusethis7" }, @@ -17846,6 +17857,7 @@ "usernameUnclaimed": "noonewouldeverusethis7" }, "Windy": { + "disabled": true, "tags": [ "in", "jp", @@ -19158,6 +19170,7 @@ "usernameUnclaimed": "noonewouldeverusethis7" }, "club.cnews.ru": { + "disabled": true, "tags": [ "blog", "ru" @@ -20715,13 +20728,17 @@ "usernameUnclaimed": "noonewouldeverusethis77777" }, "metacritic": { + "disabled": true, "tags": [ "us" ], "regexCheck": "^(?![-_])[A-Za-z0-9-_]{3,15}$", "checkType": "message", "absenceStrs": [ - "User not found" + "This user hasn\u2019t rated anything yet" + ], + "presenseStrs": [ + "Avg. User score" ], "alexaRank": 2409, "urlMain": "https://www.metacritic.com/", @@ -26829,11 +26846,18 @@ "tags": [ "links" ], - "engine": "engine404", + "checkType": "message", + "absenceStrs": [ + "The page you\u2019re looking for doesn\u2019t exist.", + "Want this to be your username?" + ], + "presenseStrs": [ + "Join {username} on Linktree today" + ], "urlMain": "https://linktr.ee", "url": "https://linktr.ee/{username}", "usernameUnclaimed": "noonewouldeverusethis7", - "usernameClaimed": "red", + "usernameClaimed": "Blisscartoos", "alexaRank": 134 }, "jsfiddle.net": { @@ -34067,16 +34091,14 @@ }, "ImgInn": { "absenceStrs": [ - "Page Not Found" + "Page Not Found", + "The content has been deleted" ], "presenseStrs": [ - "username", - "/{username}/", - " data-username=", - "name", - " data-name=" + "followers", + "{username}" ], - "url": "https://imginn.com/tagged/{username}/", + "url": "https://imginn.com/{username}/", "urlMain": "https://imginn.com", "usernameClaimed": "morgen_shtern", "usernameUnclaimed": "noonewouldeverusethis7", diff --git a/requirements.txt b/requirements.txt index 4f5299a..66cfd0d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,39 +1,39 @@ -aiodns==3.0.0 -aiohttp==3.8.6 -aiohttp-socks==0.7.1 +aiodns>=3.0.0 +aiohttp>=3.8.6 +aiohttp-socks>=0.7.1 arabic-reshaper~=3.0.0 -async-timeout==4.0.3 -attrs==22.2.0 -certifi==2023.7.22 -chardet==5.0.0 -colorama==0.4.6 -future==0.18.3 -future-annotations==1.0.0 -html5lib==1.1 -idna==3.4 -Jinja2==3.1.3 -lxml==4.9.2 -MarkupSafe==2.1.1 -mock==4.0.3 -multidict==6.0.4 -pycountry==22.3.5 -PyPDF2==3.0.1 -PySocks==1.7.1 -python-bidi==0.4.2 -requests==2.31.0 -requests-futures==1.0.0 -six==1.16.0 +async-timeout>=4.0.3 +attrs>=22.2.0 +certifi>=2023.7.22 +chardet>=5.0.0 +colorama>=0.4.6 +future>=0.18.3 +future-annotations>=1.0.0 +html5lib>=1.1 +idna>=3.4 +Jinja2>=3.1.3 +lxml>=4.9.2 +MarkupSafe>=2.1.1 +mock>=4.0.3 +multidict>=6.0.4 +pycountry>=22.3.5 +PyPDF2>=3.0.1 +PySocks>=1.7.1 +python-bidi>=0.4.2 +requests>=2.31.0 +requests-futures>=1.0.0 +six>=1.16.0 socid-extractor>=0.0.24 -soupsieve==2.3.2.post1 -stem==1.8.1 -torrequest==0.1.0 -tqdm==4.66.1 -typing-extensions==4.8.0 -webencodings==0.5.1 +soupsieve>=2.3.2.post1 +stem>=1.8.1 +torrequest>=0.1.0 +tqdm>=4.66.1 +typing-extensions>=4.8.0 +webencodings>=0.5.1 xhtml2pdf~=0.2.11 -XMind==1.2.0 -yarl==1.8.2 -networkx==2.6.3 -pyvis==0.2.1 -reportlab==3.6.13 -cloudscraper==1.2.71 +XMind>=1.2.0 +yarl>=1.8.2 +networkx>=2.6.3 +pyvis>=0.2.1 +reportlab>=3.6.13 +cloudscraper>=1.2.71 diff --git a/sites.md b/sites.md index 322aa17..d9a1ded 100644 --- a/sites.md +++ b/sites.md @@ -14,7 +14,7 @@ Rank data fetched from Alexa by domains. 1. ![](https://www.google.com/s2/favicons?domain=https://www.wikipedia.org/) [Wikipedia (https://www.wikipedia.org/)](https://www.wikipedia.org/)*: top 50, wiki* 1. ![](https://www.google.com/s2/favicons?domain=https://www.reddit.com/) [Reddit (https://www.reddit.com/)](https://www.reddit.com/)*: top 50, discussion, news* 1. ![](https://www.google.com/s2/favicons?domain=https://social.msdn.microsoft.com) [social.msdn.microsoft.com (https://social.msdn.microsoft.com)](https://social.msdn.microsoft.com)*: top 50, us* -1. ![](https://www.google.com/s2/favicons?domain=https://social.technet.microsoft.com) [MicrosoftTechNet (https://social.technet.microsoft.com)](https://social.technet.microsoft.com)*: top 50, us* +1. ![](https://www.google.com/s2/favicons?domain=https://social.technet.microsoft.com) [MicrosoftTechNet (https://social.technet.microsoft.com)](https://social.technet.microsoft.com)*: top 50, us*, search is disabled 1. ![](https://www.google.com/s2/favicons?domain=https://weibo.com) [Weibo (https://weibo.com)](https://weibo.com)*: top 50, cn, networking* 1. ![](https://www.google.com/s2/favicons?domain=https://gist.github.com) [GitHubGist (https://gist.github.com)](https://gist.github.com)*: top 50, coding, sharing* 1. ![](https://www.google.com/s2/favicons?domain=https://vk.com/) [VK (https://vk.com/)](https://vk.com/)*: top 50, ru* @@ -127,7 +127,7 @@ Rank data fetched from Alexa by domains. 1. ![](https://www.google.com/s2/favicons?domain=https://tripadvisor.com/) [TripAdvisor (https://tripadvisor.com/)](https://tripadvisor.com/)*: top 500, travel* 1. ![](https://www.google.com/s2/favicons?domain=https://www.academia.edu/) [Academia.edu (https://www.academia.edu/)](https://www.academia.edu/)*: top 500, id* 1. ![](https://www.google.com/s2/favicons?domain=https://www.mercadolivre.com.br) [mercadolivre (https://www.mercadolivre.com.br)](https://www.mercadolivre.com.br)*: top 500, br* -1. ![](https://www.google.com/s2/favicons?domain=https://www.crunchyroll.com/) [Crunchyroll (https://www.crunchyroll.com/)](https://www.crunchyroll.com/)*: top 500, forum, movies, us* +1. ![](https://www.google.com/s2/favicons?domain=https://www.crunchyroll.com/) [Crunchyroll (https://www.crunchyroll.com/)](https://www.crunchyroll.com/)*: top 500, forum, movies, us*, search is disabled 1. ![](https://www.google.com/s2/favicons?domain=https://wordpress.org/) [WordPressOrg (https://wordpress.org/)](https://wordpress.org/)*: top 500, in* 1. ![](https://www.google.com/s2/favicons?domain=https://ameblo.jp) [Ameblo (https://ameblo.jp)](https://ameblo.jp)*: top 500, blog, jp* 1. ![](https://www.google.com/s2/favicons?domain=https://unsplash.com/) [Unsplash (https://unsplash.com/)](https://unsplash.com/)*: top 500, art, photo* @@ -242,7 +242,7 @@ Rank data fetched from Alexa by domains. 1. ![](https://www.google.com/s2/favicons?domain=https://pastebin.com/) [Pastebin (https://pastebin.com/)](https://pastebin.com/)*: top 5K, sharing* 1. ![](https://www.google.com/s2/favicons?domain=https://gfycat.com/) [gfycat (https://gfycat.com/)](https://gfycat.com/)*: top 5K, photo, sharing* 1. ![](https://www.google.com/s2/favicons?domain=https://last.fm/) [last.fm (https://last.fm/)](https://last.fm/)*: top 5K, music* -1. ![](https://www.google.com/s2/favicons?domain=https://windy.com/) [Windy (https://windy.com/)](https://windy.com/)*: top 5K, in, jp, kr, pl, us* +1. ![](https://www.google.com/s2/favicons?domain=https://windy.com/) [Windy (https://windy.com/)](https://windy.com/)*: top 5K, in, jp, kr, pl, us*, search is disabled 1. ![](https://www.google.com/s2/favicons?domain=https://profile.hatena.ne.jp) [profile.hatena.ne.jp (https://profile.hatena.ne.jp)](https://profile.hatena.ne.jp)*: top 5K, jp* 1. ![](https://www.google.com/s2/favicons?domain=https://bodyspace.bodybuilding.com/) [BodyBuilding (https://bodyspace.bodybuilding.com/)](https://bodyspace.bodybuilding.com/)*: top 5K, us* 1. ![](https://www.google.com/s2/favicons?domain=https://community.icons8.com) [community.icons8.com (https://community.icons8.com)](https://community.icons8.com)*: top 5K, forum, in* @@ -258,7 +258,7 @@ Rank data fetched from Alexa by domains. 1. ![](https://www.google.com/s2/favicons?domain=https://jsfiddle.net) [jsfiddle.net (https://jsfiddle.net)](https://jsfiddle.net)*: top 5K, coding, sharing* 1. ![](https://www.google.com/s2/favicons?domain=https://ru.pathofexile.com) [Pathofexile (https://ru.pathofexile.com)](https://ru.pathofexile.com)*: top 5K, ru, us* 1. ![](https://www.google.com/s2/favicons?domain=https://vc.ru) [VC.ru (https://vc.ru)](https://vc.ru)*: top 5K, ru* -1. ![](https://www.google.com/s2/favicons?domain=https://www.metacritic.com/) [metacritic (https://www.metacritic.com/)](https://www.metacritic.com/)*: top 5K, us* +1. ![](https://www.google.com/s2/favicons?domain=https://www.metacritic.com/) [metacritic (https://www.metacritic.com/)](https://www.metacritic.com/)*: top 5K, us*, search is disabled 1. ![](https://www.google.com/s2/favicons?domain=https://www.digitalocean.com/) [DigitalOcean (https://www.digitalocean.com/)](https://www.digitalocean.com/)*: top 5K, forum, in, tech* 1. ![](https://www.google.com/s2/favicons?domain=http://www.jeuxvideo.com) [jeuxvideo (http://www.jeuxvideo.com)](http://www.jeuxvideo.com)*: top 5K, fr, gaming* 1. ![](https://www.google.com/s2/favicons?domain=https://forum.shiftdelete.net) [ShiftDelete (https://forum.shiftdelete.net)](https://forum.shiftdelete.net)*: top 5K, forum, tr*, search is disabled @@ -337,7 +337,7 @@ Rank data fetched from Alexa by domains. 1. ![](https://www.google.com/s2/favicons?domain=https://www.buymeacoffee.com/) [BuyMeACoffee (https://www.buymeacoffee.com/)](https://www.buymeacoffee.com/)*: top 5K, in* 1. ![](https://www.google.com/s2/favicons?domain=https://muckrack.com) [Muckrack (https://muckrack.com)](https://muckrack.com)*: top 5K, us* 1. ![](https://www.google.com/s2/favicons?domain=https://www.fixya.com) [fixya (https://www.fixya.com)](https://www.fixya.com)*: top 5K, us* -1. ![](https://www.google.com/s2/favicons?domain=https://lolchess.gg/) [Lolchess (https://lolchess.gg/)](https://lolchess.gg/)*: top 5K, kr* +1. ![](https://www.google.com/s2/favicons?domain=https://lolchess.gg/) [Lolchess (https://lolchess.gg/)](https://lolchess.gg/)*: top 5K, kr*, search is disabled 1. ![](https://www.google.com/s2/favicons?domain=https://www.ifttt.com/) [IFTTT (https://www.ifttt.com/)](https://www.ifttt.com/)*: top 5K, tech* 1. ![](https://www.google.com/s2/favicons?domain=https://www.minds.com) [www.minds.com (https://www.minds.com)](https://www.minds.com)*: top 5K, in* 1. ![](https://www.google.com/s2/favicons?domain=https://forums.imore.com) [forums.imore.com (https://forums.imore.com)](https://forums.imore.com)*: top 5K, forum, us*, search is disabled @@ -396,7 +396,7 @@ Rank data fetched from Alexa by domains. 1. ![](https://www.google.com/s2/favicons?domain=https://about.me/) [About.me (https://about.me/)](https://about.me/)*: top 10K, blog, in* 1. ![](https://www.google.com/s2/favicons?domain=https://www.fark.com/) [Fark (https://www.fark.com/)](https://www.fark.com/)*: top 10K, forum, news* 1. ![](https://www.google.com/s2/favicons?domain=https://www.reverbnation.com/) [ReverbNation (https://www.reverbnation.com/)](https://www.reverbnation.com/)*: top 10K, us* -1. ![](https://www.google.com/s2/favicons?domain=https://www.glavbukh.ru) [Scorcher (https://www.glavbukh.ru)](https://www.glavbukh.ru)*: top 10K, ru* +1. ![](https://www.google.com/s2/favicons?domain=https://www.glavbukh.ru) [Scorcher (https://www.glavbukh.ru)](https://www.glavbukh.ru)*: top 10K, ru*, search is disabled 1. ![](https://www.google.com/s2/favicons?domain=https://www.trakt.tv/) [Trakt (https://www.trakt.tv/)](https://www.trakt.tv/)*: top 10K, de, fr* 1. ![](https://www.google.com/s2/favicons?domain=https://hotcopper.com.au) [Hotcopper (https://hotcopper.com.au)](https://hotcopper.com.au)*: top 10K, au* 1. ![](https://www.google.com/s2/favicons?domain=https://pandia.ru) [Pandia (https://pandia.ru)](https://pandia.ru)*: top 10K, news, ru* @@ -515,7 +515,7 @@ Rank data fetched from Alexa by domains. 1. ![](https://www.google.com/s2/favicons?domain=https://forums.indiegala.com) [forums.indiegala.com (https://forums.indiegala.com)](https://forums.indiegala.com)*: top 100K, forum, us* 1. ![](https://www.google.com/s2/favicons?domain=https://ptvintern.picarto.tv) [Picarto (https://ptvintern.picarto.tv)](https://ptvintern.picarto.tv)*: top 100K, art, streaming* 1. ![](https://www.google.com/s2/favicons?domain=https://www.neoseeker.com) [Neoseeker (https://www.neoseeker.com)](https://www.neoseeker.com)*: top 100K, us* -1. ![](https://www.google.com/s2/favicons?domain=https://community.infosecinstitute.com) [InfosecInstitute (https://community.infosecinstitute.com)](https://community.infosecinstitute.com)*: top 100K, us* +1. ![](https://www.google.com/s2/favicons?domain=https://community.infosecinstitute.com) [InfosecInstitute (https://community.infosecinstitute.com)](https://community.infosecinstitute.com)*: top 100K, us*, search is disabled 1. ![](https://www.google.com/s2/favicons?domain=https://armorgames.com) [Armorgames (https://armorgames.com)](https://armorgames.com)*: top 100K, gaming, us* 1. ![](https://www.google.com/s2/favicons?domain=https://giters.com) [giters.com (https://giters.com)](https://giters.com)*: top 100K, coding* 1. ![](https://www.google.com/s2/favicons?domain=https://teamtreehouse.com) [teamtreehouse.com (https://teamtreehouse.com)](https://teamtreehouse.com)*: top 100K, us* @@ -556,7 +556,7 @@ Rank data fetched from Alexa by domains. 1. ![](https://www.google.com/s2/favicons?domain=https://www.donationalerts.com/) [DonationsAlerts (https://www.donationalerts.com/)](https://www.donationalerts.com/)*: top 100K, finance, ru* 1. ![](https://www.google.com/s2/favicons?domain=https://www.trueachievements.com) [TrueAchievements (https://www.trueachievements.com)](https://www.trueachievements.com)*: top 100K, us* 1. ![](https://www.google.com/s2/favicons?domain=https://jimdosite.com/) [Jimdo (https://jimdosite.com/)](https://jimdosite.com/)*: top 100K, jp* -1. ![](https://www.google.com/s2/favicons?domain=https://club.cnews.ru/) [club.cnews.ru (https://club.cnews.ru/)](https://club.cnews.ru/)*: top 100K, blog, ru* +1. ![](https://www.google.com/s2/favicons?domain=https://club.cnews.ru/) [club.cnews.ru (https://club.cnews.ru/)](https://club.cnews.ru/)*: top 100K, blog, ru*, search is disabled 1. ![](https://www.google.com/s2/favicons?domain=https://psnprofiles.com/) [PSNProfiles.com (https://psnprofiles.com/)](https://psnprofiles.com/)*: top 100K, gaming*, search is disabled 1. ![](https://www.google.com/s2/favicons?domain=https://donorbox.org) [donorbox (https://donorbox.org)](https://donorbox.org)*: top 100K, finance* 1. ![](https://www.google.com/s2/favicons?domain=https://www.sbazar.cz/) [Sbazar.cz (https://www.sbazar.cz/)](https://www.sbazar.cz/)*: top 100K, cz, shopping* @@ -3100,20 +3100,20 @@ Rank data fetched from Alexa by domains. 1. ![](https://www.google.com/s2/favicons?domain=https://ngl.link) [ngl.link (https://ngl.link)](https://ngl.link)*: top 100M, q&a* 1. ![](https://www.google.com/s2/favicons?domain=https://bitpapa.com) [bitpapa.com (https://bitpapa.com)](https://bitpapa.com)*: top 100M, crypto* -The list was updated at (2023-10-27 19:46:13.899883 UTC) +The list was updated at (2024-05-13 20:09:33.626841+00:00 UTC) ## Statistics -Enabled/total sites: 2802/3096 = 90.5% +Enabled/total sites: 2794/3096 = 90.25% -Incomplete message checks: 447/2802 = 15.95% (false positive risks) +Incomplete message checks: 438/2794 = 15.68% (false positive risks) -Status code checks: 720/2802 = 25.7% (false positive risks) +Status code checks: 722/2794 = 25.84% (false positive risks) -False positive risk (total): 41.65% +False positive risk (total): 41.52% Top 20 profile URLs: - (796) `{urlMain}/index/8-0-{username} (uCoz)` -- (294) `/{username}` +- (295) `/{username}` - (221) `{urlMain}{urlSubpath}/members/?username={username} (XenForo)` - (158) `/user/{username}` - (133) `{urlMain}{urlSubpath}/member.php?username={username} (vBulletin)` @@ -3138,16 +3138,16 @@ Top 20 tags: - (279) `forum` - (49) `gaming` - (25) `coding` -- (22) `photo` +- (21) `photo` - (19) `news` - (18) `blog` -- (16) `music` +- (15) `music` - (14) `tech` -- (13) `freelance` +- (12) `freelance` - (11) `sharing` -- (11) `art` - (11) `finance` - (10) `dating` +- (10) `art` - (10) `shopping` - (9) `movies` - (8) `hobby` diff --git a/test-requirements.txt b/test-requirements.txt index c8dd66b..3fb2589 100644 --- a/test-requirements.txt +++ b/test-requirements.txt @@ -1,8 +1,8 @@ -reportlab==4.0.4 -flake8==6.1.0 -pytest==7.2.0 -pytest-asyncio==0.16.0;python_version<"3.7" -pytest-asyncio==0.20.1;python_version>="3.7" -pytest-cov==4.0.0 -pytest-httpserver~=1.0.8 -pytest-rerunfailures==12.0 +reportlab>=4.0.4 +flake8>=6.1.0 +pytest>=7.2.0 +pytest-asyncio>=0.16.0;python_version<"3.7" +pytest-asyncio>=0.20.1;python_version>="3.7" +pytest-cov>=4.0.0 +pytest-httpserver>=1.0.8 +pytest-rerunfailures>=12.0 diff --git a/utils/update_site_data.py b/utils/update_site_data.py index f197475..39fe98e 100755 --- a/utils/update_site_data.py +++ b/utils/update_site_data.py @@ -3,13 +3,12 @@ This module generates the listing of supported sites in file `SITES.md` and pretty prints file with sites data. """ -import json import sys import requests import logging import threading import xml.etree.ElementTree as ET -from datetime import datetime +from datetime import datetime, timezone from argparse import ArgumentParser, RawDescriptionHelpFormatter from maigret.maigret import MaigretDatabase @@ -27,9 +26,10 @@ RANKS.update({ SEMAPHORE = threading.Semaphore(20) + def get_rank(domain_to_query, site, print_errors=True): with SEMAPHORE: - #Retrieve ranking data via alexa API + # Retrieve ranking data via alexa API url = f"http://data.alexa.com/data?cli=10&url={domain_to_query}" xml_data = requests.get(url).text root = ET.fromstring(xml_data) @@ -137,7 +137,7 @@ Rank data fetched from Alexa by domains. site_file.write(f'1. {favicon} [{site}]({url_main})*: top {valid_rank}{tags}*{note}\n') db.update_site(site) - site_file.write(f'\nThe list was updated at ({datetime.utcnow()} UTC)\n') + site_file.write(f'\nThe list was updated at ({datetime.now(timezone.utc)} UTC)\n') db.save_to_file(args.base_file) statistics_text = db.get_db_stats(is_markdown=True)