mirror of
https://github.com/soxoj/maigret.git
synced 2026-05-06 14:08:59 +00:00
Fixed/Disabled sites. Update requirements.txt (#1517)
* Fixed/Disabled sites. Update requirements.txt fixed_sites: AllRecipes, Linktree, CreativeMarket, ImgInn, Shutterstock, Contently disabled_sites: Forums.ea.com. CrunchyRoll, Windy, MetaCritic, InfosecInstitute, Armchairgm.fandom.com, Bleach.fandom.com Update requirements to prevent dependency conflicts. * Update requirements.txt Update requirements.txt to prevent dependency conflicts * Update requirements.txt * Update sites.md * fixed_sites: Armchairgm.fandom.com, Bleach.fandom.com, Battleraprus. disabled_sites: MicrosoftTechNet, club.cnews.ru, Scorcher * fixed_sites: Armchairgm.fandom.com, Bleach.fandom.com, Battleraprus. disabled_sites: MicrosoftTechNet, club.cnews.ru, Scorcher
This commit is contained in:
+55
-33
@@ -656,7 +656,12 @@
|
||||
],
|
||||
"checkType": "message",
|
||||
"absenceStrs": [
|
||||
"<title>Allrecipes Member | Allrecipes</title>"
|
||||
"Page Not Found.",
|
||||
"You may have mistyped the address, or the page may have moved."
|
||||
],
|
||||
"presenseStrs": [
|
||||
"Saved Items & Collections",
|
||||
"{username}"
|
||||
],
|
||||
"alexaRank": 983,
|
||||
"urlMain": "https://www.allrecipes.com/",
|
||||
@@ -1314,11 +1319,7 @@
|
||||
"us",
|
||||
"wiki"
|
||||
],
|
||||
"checkType": "message",
|
||||
"absenceStrs": [
|
||||
"does not exist",
|
||||
"This user has not filled out their profile page yet."
|
||||
],
|
||||
"checkType": "status_code",
|
||||
"alexaRank": 80,
|
||||
"urlMain": "https://armchairgm.fandom.com/",
|
||||
"url": "https://armchairgm.fandom.com/wiki/User:{username}",
|
||||
@@ -1909,13 +1910,7 @@
|
||||
"us",
|
||||
"wiki"
|
||||
],
|
||||
"checkType": "message",
|
||||
"presenseStrs": [
|
||||
"user-profile-navigation"
|
||||
],
|
||||
"absenceStrs": [
|
||||
"\u041e\u0448\u0438\u0431\u043a\u0430</h1>"
|
||||
],
|
||||
"checkType": "status_code",
|
||||
"alexaRank": 80,
|
||||
"urlMain": "https://battleraprus.fandom.com/ru",
|
||||
"url": "https://battleraprus.fandom.com/ru/wiki/%D0%A3%D1%87%D0%B0%D1%81%D1%82%D0%BD%D0%B8%D0%BA:{username}",
|
||||
@@ -2293,11 +2288,7 @@
|
||||
"ru",
|
||||
"wiki"
|
||||
],
|
||||
"checkType": "message",
|
||||
"absenceStrs": [
|
||||
"does not exist",
|
||||
"\u042d\u0442\u043e\u0442 \u0443\u0447\u0430\u0441\u0442\u043d\u0438\u043a \u043f\u043e\u043a\u0430 \u043d\u0435 \u0437\u0430\u043f\u043e\u043b\u043d\u0438\u043b \u0441\u0432\u043e\u0439 \u043f\u0440\u043e\u0444\u0438\u043b\u044c."
|
||||
],
|
||||
"checkType": "status_code",
|
||||
"alexaRank": 80,
|
||||
"urlMain": "https://bleach.fandom.com/ru",
|
||||
"url": "https://bleach.fandom.com/ru/wiki/%D0%A3%D1%87%D0%B0%D1%81%D1%82%D0%BD%D0%B8%D0%BA:{username}",
|
||||
@@ -3484,10 +3475,12 @@
|
||||
"freelance",
|
||||
"in"
|
||||
],
|
||||
"regexCheck": "^[a-zA-Z][a-zA-Z0-9_-]*$",
|
||||
"checkType": "message",
|
||||
"absenceStrs": [
|
||||
"We can't find that page!"
|
||||
"Request A Meeting</a></div>"
|
||||
],
|
||||
"presenseStrs": [
|
||||
"<h4>\nPROJECTS"
|
||||
],
|
||||
"alexaRank": 11587,
|
||||
"urlMain": "https://contently.com/",
|
||||
@@ -3579,7 +3572,11 @@
|
||||
},
|
||||
"checkType": "message",
|
||||
"absenceStrs": [
|
||||
"The page you were looking for was not found."
|
||||
"Whoomp, there it isn't...",
|
||||
"It looks like the page you\u2019re looking for is no longer available. "
|
||||
],
|
||||
"presenseStrs": [
|
||||
"Likes"
|
||||
],
|
||||
"alexaRank": 3054,
|
||||
"urlMain": "https://creativemarket.com/",
|
||||
@@ -3616,6 +3613,7 @@
|
||||
"usernameUnclaimed": "noonewouldeverusethis7"
|
||||
},
|
||||
"Crunchyroll": {
|
||||
"disabled": true,
|
||||
"tags": [
|
||||
"forum",
|
||||
"movies",
|
||||
@@ -7939,6 +7937,7 @@
|
||||
"usernameUnclaimed": "noonewouldeverusethis7"
|
||||
},
|
||||
"InfosecInstitute": {
|
||||
"disabled": true,
|
||||
"tags": [
|
||||
"us"
|
||||
],
|
||||
@@ -9267,6 +9266,7 @@
|
||||
"usernameUnclaimed": "noonewouldeverusethis7"
|
||||
},
|
||||
"Lolchess": {
|
||||
"disabled": true,
|
||||
"tags": [
|
||||
"kr"
|
||||
],
|
||||
@@ -9277,6 +9277,9 @@
|
||||
"absenceStrs": [
|
||||
"No search results"
|
||||
],
|
||||
"presenseStrs": [
|
||||
"results were displayed out of"
|
||||
],
|
||||
"alexaRank": 4911,
|
||||
"urlMain": "https://lolchess.gg/",
|
||||
"url": "https://lolchess.gg/profile/na/{username}",
|
||||
@@ -10060,6 +10063,7 @@
|
||||
"usernameUnclaimed": "noonewouldeverusethis7"
|
||||
},
|
||||
"MicrosoftTechNet": {
|
||||
"disabled": true,
|
||||
"tags": [
|
||||
"us"
|
||||
],
|
||||
@@ -14050,6 +14054,7 @@
|
||||
"usernameUnclaimed": "noonewouldeverusethis7"
|
||||
},
|
||||
"Scorcher": {
|
||||
"disabled": true,
|
||||
"tags": [
|
||||
"ru"
|
||||
],
|
||||
@@ -14432,11 +14437,17 @@
|
||||
],
|
||||
"checkType": "message",
|
||||
"absenceStrs": [
|
||||
"<img loading=\"lazy\" src=\"/assets/images/ent_not_found_404.png\" alt=\"404\""
|
||||
"This surprising...",
|
||||
"Unfortunately, we can't find what you're looking for.",
|
||||
"<title>Not Found | Shutterstock</title>"
|
||||
],
|
||||
"presenseStrs": [
|
||||
"{username}",
|
||||
"Information"
|
||||
],
|
||||
"alexaRank": 184,
|
||||
"urlMain": "https://www.shutterstock.com",
|
||||
"url": "https://www.shutterstock.com/fi/g/{username}/about",
|
||||
"url": "https://www.shutterstock.com/g/{username}/about",
|
||||
"usernameClaimed": "adam",
|
||||
"usernameUnclaimed": "noonewouldeverusethis7"
|
||||
},
|
||||
@@ -17846,6 +17857,7 @@
|
||||
"usernameUnclaimed": "noonewouldeverusethis7"
|
||||
},
|
||||
"Windy": {
|
||||
"disabled": true,
|
||||
"tags": [
|
||||
"in",
|
||||
"jp",
|
||||
@@ -19158,6 +19170,7 @@
|
||||
"usernameUnclaimed": "noonewouldeverusethis7"
|
||||
},
|
||||
"club.cnews.ru": {
|
||||
"disabled": true,
|
||||
"tags": [
|
||||
"blog",
|
||||
"ru"
|
||||
@@ -20715,13 +20728,17 @@
|
||||
"usernameUnclaimed": "noonewouldeverusethis77777"
|
||||
},
|
||||
"metacritic": {
|
||||
"disabled": true,
|
||||
"tags": [
|
||||
"us"
|
||||
],
|
||||
"regexCheck": "^(?![-_])[A-Za-z0-9-_]{3,15}$",
|
||||
"checkType": "message",
|
||||
"absenceStrs": [
|
||||
"User not found"
|
||||
"This user hasn\u2019t rated anything yet"
|
||||
],
|
||||
"presenseStrs": [
|
||||
"Avg. User score"
|
||||
],
|
||||
"alexaRank": 2409,
|
||||
"urlMain": "https://www.metacritic.com/",
|
||||
@@ -26829,11 +26846,18 @@
|
||||
"tags": [
|
||||
"links"
|
||||
],
|
||||
"engine": "engine404",
|
||||
"checkType": "message",
|
||||
"absenceStrs": [
|
||||
"The page you\u2019re looking for doesn\u2019t exist.",
|
||||
"Want this to be your username?"
|
||||
],
|
||||
"presenseStrs": [
|
||||
"Join {username} on Linktree today"
|
||||
],
|
||||
"urlMain": "https://linktr.ee",
|
||||
"url": "https://linktr.ee/{username}",
|
||||
"usernameUnclaimed": "noonewouldeverusethis7",
|
||||
"usernameClaimed": "red",
|
||||
"usernameClaimed": "Blisscartoos",
|
||||
"alexaRank": 134
|
||||
},
|
||||
"jsfiddle.net": {
|
||||
@@ -34067,16 +34091,14 @@
|
||||
},
|
||||
"ImgInn": {
|
||||
"absenceStrs": [
|
||||
"Page Not Found</div>"
|
||||
"Page Not Found",
|
||||
"The content has been deleted"
|
||||
],
|
||||
"presenseStrs": [
|
||||
"username",
|
||||
"/{username}/",
|
||||
" data-username=",
|
||||
"name",
|
||||
" data-name="
|
||||
"followers",
|
||||
"{username}"
|
||||
],
|
||||
"url": "https://imginn.com/tagged/{username}/",
|
||||
"url": "https://imginn.com/{username}/",
|
||||
"urlMain": "https://imginn.com",
|
||||
"usernameClaimed": "morgen_shtern",
|
||||
"usernameUnclaimed": "noonewouldeverusethis7",
|
||||
|
||||
+36
-36
@@ -1,39 +1,39 @@
|
||||
aiodns==3.0.0
|
||||
aiohttp==3.8.6
|
||||
aiohttp-socks==0.7.1
|
||||
aiodns>=3.0.0
|
||||
aiohttp>=3.8.6
|
||||
aiohttp-socks>=0.7.1
|
||||
arabic-reshaper~=3.0.0
|
||||
async-timeout==4.0.3
|
||||
attrs==22.2.0
|
||||
certifi==2023.7.22
|
||||
chardet==5.0.0
|
||||
colorama==0.4.6
|
||||
future==0.18.3
|
||||
future-annotations==1.0.0
|
||||
html5lib==1.1
|
||||
idna==3.4
|
||||
Jinja2==3.1.3
|
||||
lxml==4.9.2
|
||||
MarkupSafe==2.1.1
|
||||
mock==4.0.3
|
||||
multidict==6.0.4
|
||||
pycountry==22.3.5
|
||||
PyPDF2==3.0.1
|
||||
PySocks==1.7.1
|
||||
python-bidi==0.4.2
|
||||
requests==2.31.0
|
||||
requests-futures==1.0.0
|
||||
six==1.16.0
|
||||
async-timeout>=4.0.3
|
||||
attrs>=22.2.0
|
||||
certifi>=2023.7.22
|
||||
chardet>=5.0.0
|
||||
colorama>=0.4.6
|
||||
future>=0.18.3
|
||||
future-annotations>=1.0.0
|
||||
html5lib>=1.1
|
||||
idna>=3.4
|
||||
Jinja2>=3.1.3
|
||||
lxml>=4.9.2
|
||||
MarkupSafe>=2.1.1
|
||||
mock>=4.0.3
|
||||
multidict>=6.0.4
|
||||
pycountry>=22.3.5
|
||||
PyPDF2>=3.0.1
|
||||
PySocks>=1.7.1
|
||||
python-bidi>=0.4.2
|
||||
requests>=2.31.0
|
||||
requests-futures>=1.0.0
|
||||
six>=1.16.0
|
||||
socid-extractor>=0.0.24
|
||||
soupsieve==2.3.2.post1
|
||||
stem==1.8.1
|
||||
torrequest==0.1.0
|
||||
tqdm==4.66.1
|
||||
typing-extensions==4.8.0
|
||||
webencodings==0.5.1
|
||||
soupsieve>=2.3.2.post1
|
||||
stem>=1.8.1
|
||||
torrequest>=0.1.0
|
||||
tqdm>=4.66.1
|
||||
typing-extensions>=4.8.0
|
||||
webencodings>=0.5.1
|
||||
xhtml2pdf~=0.2.11
|
||||
XMind==1.2.0
|
||||
yarl==1.8.2
|
||||
networkx==2.6.3
|
||||
pyvis==0.2.1
|
||||
reportlab==3.6.13
|
||||
cloudscraper==1.2.71
|
||||
XMind>=1.2.0
|
||||
yarl>=1.8.2
|
||||
networkx>=2.6.3
|
||||
pyvis>=0.2.1
|
||||
reportlab>=3.6.13
|
||||
cloudscraper>=1.2.71
|
||||
|
||||
@@ -14,7 +14,7 @@ Rank data fetched from Alexa by domains.
|
||||
1.  [Wikipedia (https://www.wikipedia.org/)](https://www.wikipedia.org/)*: top 50, wiki*
|
||||
1.  [Reddit (https://www.reddit.com/)](https://www.reddit.com/)*: top 50, discussion, news*
|
||||
1.  [social.msdn.microsoft.com (https://social.msdn.microsoft.com)](https://social.msdn.microsoft.com)*: top 50, us*
|
||||
1.  [MicrosoftTechNet (https://social.technet.microsoft.com)](https://social.technet.microsoft.com)*: top 50, us*
|
||||
1.  [MicrosoftTechNet (https://social.technet.microsoft.com)](https://social.technet.microsoft.com)*: top 50, us*, search is disabled
|
||||
1.  [Weibo (https://weibo.com)](https://weibo.com)*: top 50, cn, networking*
|
||||
1.  [GitHubGist (https://gist.github.com)](https://gist.github.com)*: top 50, coding, sharing*
|
||||
1.  [VK (https://vk.com/)](https://vk.com/)*: top 50, ru*
|
||||
@@ -127,7 +127,7 @@ Rank data fetched from Alexa by domains.
|
||||
1.  [TripAdvisor (https://tripadvisor.com/)](https://tripadvisor.com/)*: top 500, travel*
|
||||
1.  [Academia.edu (https://www.academia.edu/)](https://www.academia.edu/)*: top 500, id*
|
||||
1.  [mercadolivre (https://www.mercadolivre.com.br)](https://www.mercadolivre.com.br)*: top 500, br*
|
||||
1.  [Crunchyroll (https://www.crunchyroll.com/)](https://www.crunchyroll.com/)*: top 500, forum, movies, us*
|
||||
1.  [Crunchyroll (https://www.crunchyroll.com/)](https://www.crunchyroll.com/)*: top 500, forum, movies, us*, search is disabled
|
||||
1.  [WordPressOrg (https://wordpress.org/)](https://wordpress.org/)*: top 500, in*
|
||||
1.  [Ameblo (https://ameblo.jp)](https://ameblo.jp)*: top 500, blog, jp*
|
||||
1.  [Unsplash (https://unsplash.com/)](https://unsplash.com/)*: top 500, art, photo*
|
||||
@@ -242,7 +242,7 @@ Rank data fetched from Alexa by domains.
|
||||
1.  [Pastebin (https://pastebin.com/)](https://pastebin.com/)*: top 5K, sharing*
|
||||
1.  [gfycat (https://gfycat.com/)](https://gfycat.com/)*: top 5K, photo, sharing*
|
||||
1.  [last.fm (https://last.fm/)](https://last.fm/)*: top 5K, music*
|
||||
1.  [Windy (https://windy.com/)](https://windy.com/)*: top 5K, in, jp, kr, pl, us*
|
||||
1.  [Windy (https://windy.com/)](https://windy.com/)*: top 5K, in, jp, kr, pl, us*, search is disabled
|
||||
1.  [profile.hatena.ne.jp (https://profile.hatena.ne.jp)](https://profile.hatena.ne.jp)*: top 5K, jp*
|
||||
1.  [BodyBuilding (https://bodyspace.bodybuilding.com/)](https://bodyspace.bodybuilding.com/)*: top 5K, us*
|
||||
1.  [community.icons8.com (https://community.icons8.com)](https://community.icons8.com)*: top 5K, forum, in*
|
||||
@@ -258,7 +258,7 @@ Rank data fetched from Alexa by domains.
|
||||
1.  [jsfiddle.net (https://jsfiddle.net)](https://jsfiddle.net)*: top 5K, coding, sharing*
|
||||
1.  [Pathofexile (https://ru.pathofexile.com)](https://ru.pathofexile.com)*: top 5K, ru, us*
|
||||
1.  [VC.ru (https://vc.ru)](https://vc.ru)*: top 5K, ru*
|
||||
1.  [metacritic (https://www.metacritic.com/)](https://www.metacritic.com/)*: top 5K, us*
|
||||
1.  [metacritic (https://www.metacritic.com/)](https://www.metacritic.com/)*: top 5K, us*, search is disabled
|
||||
1.  [DigitalOcean (https://www.digitalocean.com/)](https://www.digitalocean.com/)*: top 5K, forum, in, tech*
|
||||
1.  [jeuxvideo (http://www.jeuxvideo.com)](http://www.jeuxvideo.com)*: top 5K, fr, gaming*
|
||||
1.  [ShiftDelete (https://forum.shiftdelete.net)](https://forum.shiftdelete.net)*: top 5K, forum, tr*, search is disabled
|
||||
@@ -337,7 +337,7 @@ Rank data fetched from Alexa by domains.
|
||||
1.  [BuyMeACoffee (https://www.buymeacoffee.com/)](https://www.buymeacoffee.com/)*: top 5K, in*
|
||||
1.  [Muckrack (https://muckrack.com)](https://muckrack.com)*: top 5K, us*
|
||||
1.  [fixya (https://www.fixya.com)](https://www.fixya.com)*: top 5K, us*
|
||||
1.  [Lolchess (https://lolchess.gg/)](https://lolchess.gg/)*: top 5K, kr*
|
||||
1.  [Lolchess (https://lolchess.gg/)](https://lolchess.gg/)*: top 5K, kr*, search is disabled
|
||||
1.  [IFTTT (https://www.ifttt.com/)](https://www.ifttt.com/)*: top 5K, tech*
|
||||
1.  [www.minds.com (https://www.minds.com)](https://www.minds.com)*: top 5K, in*
|
||||
1.  [forums.imore.com (https://forums.imore.com)](https://forums.imore.com)*: top 5K, forum, us*, search is disabled
|
||||
@@ -396,7 +396,7 @@ Rank data fetched from Alexa by domains.
|
||||
1.  [About.me (https://about.me/)](https://about.me/)*: top 10K, blog, in*
|
||||
1.  [Fark (https://www.fark.com/)](https://www.fark.com/)*: top 10K, forum, news*
|
||||
1.  [ReverbNation (https://www.reverbnation.com/)](https://www.reverbnation.com/)*: top 10K, us*
|
||||
1.  [Scorcher (https://www.glavbukh.ru)](https://www.glavbukh.ru)*: top 10K, ru*
|
||||
1.  [Scorcher (https://www.glavbukh.ru)](https://www.glavbukh.ru)*: top 10K, ru*, search is disabled
|
||||
1.  [Trakt (https://www.trakt.tv/)](https://www.trakt.tv/)*: top 10K, de, fr*
|
||||
1.  [Hotcopper (https://hotcopper.com.au)](https://hotcopper.com.au)*: top 10K, au*
|
||||
1.  [Pandia (https://pandia.ru)](https://pandia.ru)*: top 10K, news, ru*
|
||||
@@ -515,7 +515,7 @@ Rank data fetched from Alexa by domains.
|
||||
1.  [forums.indiegala.com (https://forums.indiegala.com)](https://forums.indiegala.com)*: top 100K, forum, us*
|
||||
1.  [Picarto (https://ptvintern.picarto.tv)](https://ptvintern.picarto.tv)*: top 100K, art, streaming*
|
||||
1.  [Neoseeker (https://www.neoseeker.com)](https://www.neoseeker.com)*: top 100K, us*
|
||||
1.  [InfosecInstitute (https://community.infosecinstitute.com)](https://community.infosecinstitute.com)*: top 100K, us*
|
||||
1.  [InfosecInstitute (https://community.infosecinstitute.com)](https://community.infosecinstitute.com)*: top 100K, us*, search is disabled
|
||||
1.  [Armorgames (https://armorgames.com)](https://armorgames.com)*: top 100K, gaming, us*
|
||||
1.  [giters.com (https://giters.com)](https://giters.com)*: top 100K, coding*
|
||||
1.  [teamtreehouse.com (https://teamtreehouse.com)](https://teamtreehouse.com)*: top 100K, us*
|
||||
@@ -556,7 +556,7 @@ Rank data fetched from Alexa by domains.
|
||||
1.  [DonationsAlerts (https://www.donationalerts.com/)](https://www.donationalerts.com/)*: top 100K, finance, ru*
|
||||
1.  [TrueAchievements (https://www.trueachievements.com)](https://www.trueachievements.com)*: top 100K, us*
|
||||
1.  [Jimdo (https://jimdosite.com/)](https://jimdosite.com/)*: top 100K, jp*
|
||||
1.  [club.cnews.ru (https://club.cnews.ru/)](https://club.cnews.ru/)*: top 100K, blog, ru*
|
||||
1.  [club.cnews.ru (https://club.cnews.ru/)](https://club.cnews.ru/)*: top 100K, blog, ru*, search is disabled
|
||||
1.  [PSNProfiles.com (https://psnprofiles.com/)](https://psnprofiles.com/)*: top 100K, gaming*, search is disabled
|
||||
1.  [donorbox (https://donorbox.org)](https://donorbox.org)*: top 100K, finance*
|
||||
1.  [Sbazar.cz (https://www.sbazar.cz/)](https://www.sbazar.cz/)*: top 100K, cz, shopping*
|
||||
@@ -3100,20 +3100,20 @@ Rank data fetched from Alexa by domains.
|
||||
1.  [ngl.link (https://ngl.link)](https://ngl.link)*: top 100M, q&a*
|
||||
1.  [bitpapa.com (https://bitpapa.com)](https://bitpapa.com)*: top 100M, crypto*
|
||||
|
||||
The list was updated at (2023-10-27 19:46:13.899883 UTC)
|
||||
The list was updated at (2024-05-13 20:09:33.626841+00:00 UTC)
|
||||
## Statistics
|
||||
|
||||
Enabled/total sites: 2802/3096 = 90.5%
|
||||
Enabled/total sites: 2794/3096 = 90.25%
|
||||
|
||||
Incomplete message checks: 447/2802 = 15.95% (false positive risks)
|
||||
Incomplete message checks: 438/2794 = 15.68% (false positive risks)
|
||||
|
||||
Status code checks: 720/2802 = 25.7% (false positive risks)
|
||||
Status code checks: 722/2794 = 25.84% (false positive risks)
|
||||
|
||||
False positive risk (total): 41.65%
|
||||
False positive risk (total): 41.52%
|
||||
|
||||
Top 20 profile URLs:
|
||||
- (796) `{urlMain}/index/8-0-{username} (uCoz)`
|
||||
- (294) `/{username}`
|
||||
- (295) `/{username}`
|
||||
- (221) `{urlMain}{urlSubpath}/members/?username={username} (XenForo)`
|
||||
- (158) `/user/{username}`
|
||||
- (133) `{urlMain}{urlSubpath}/member.php?username={username} (vBulletin)`
|
||||
@@ -3138,16 +3138,16 @@ Top 20 tags:
|
||||
- (279) `forum`
|
||||
- (49) `gaming`
|
||||
- (25) `coding`
|
||||
- (22) `photo`
|
||||
- (21) `photo`
|
||||
- (19) `news`
|
||||
- (18) `blog`
|
||||
- (16) `music`
|
||||
- (15) `music`
|
||||
- (14) `tech`
|
||||
- (13) `freelance`
|
||||
- (12) `freelance`
|
||||
- (11) `sharing`
|
||||
- (11) `art`
|
||||
- (11) `finance`
|
||||
- (10) `dating`
|
||||
- (10) `art`
|
||||
- (10) `shopping`
|
||||
- (9) `movies`
|
||||
- (8) `hobby`
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
reportlab==4.0.4
|
||||
flake8==6.1.0
|
||||
pytest==7.2.0
|
||||
pytest-asyncio==0.16.0;python_version<"3.7"
|
||||
pytest-asyncio==0.20.1;python_version>="3.7"
|
||||
pytest-cov==4.0.0
|
||||
pytest-httpserver~=1.0.8
|
||||
pytest-rerunfailures==12.0
|
||||
reportlab>=4.0.4
|
||||
flake8>=6.1.0
|
||||
pytest>=7.2.0
|
||||
pytest-asyncio>=0.16.0;python_version<"3.7"
|
||||
pytest-asyncio>=0.20.1;python_version>="3.7"
|
||||
pytest-cov>=4.0.0
|
||||
pytest-httpserver>=1.0.8
|
||||
pytest-rerunfailures>=12.0
|
||||
|
||||
@@ -3,13 +3,12 @@
|
||||
This module generates the listing of supported sites in file `SITES.md`
|
||||
and pretty prints file with sites data.
|
||||
"""
|
||||
import json
|
||||
import sys
|
||||
import requests
|
||||
import logging
|
||||
import threading
|
||||
import xml.etree.ElementTree as ET
|
||||
from datetime import datetime
|
||||
from datetime import datetime, timezone
|
||||
from argparse import ArgumentParser, RawDescriptionHelpFormatter
|
||||
|
||||
from maigret.maigret import MaigretDatabase
|
||||
@@ -27,9 +26,10 @@ RANKS.update({
|
||||
|
||||
SEMAPHORE = threading.Semaphore(20)
|
||||
|
||||
|
||||
def get_rank(domain_to_query, site, print_errors=True):
|
||||
with SEMAPHORE:
|
||||
#Retrieve ranking data via alexa API
|
||||
# Retrieve ranking data via alexa API
|
||||
url = f"http://data.alexa.com/data?cli=10&url={domain_to_query}"
|
||||
xml_data = requests.get(url).text
|
||||
root = ET.fromstring(xml_data)
|
||||
@@ -137,7 +137,7 @@ Rank data fetched from Alexa by domains.
|
||||
site_file.write(f'1. {favicon} [{site}]({url_main})*: top {valid_rank}{tags}*{note}\n')
|
||||
db.update_site(site)
|
||||
|
||||
site_file.write(f'\nThe list was updated at ({datetime.utcnow()} UTC)\n')
|
||||
site_file.write(f'\nThe list was updated at ({datetime.now(timezone.utc)} UTC)\n')
|
||||
db.save_to_file(args.base_file)
|
||||
|
||||
statistics_text = db.get_db_stats(is_markdown=True)
|
||||
|
||||
Reference in New Issue
Block a user