From d4d525647ce1c1e84717a9e4d3c9f35b751e5ffd Mon Sep 17 00:00:00 2001
From: fen0s <37670363+fen0s@users.noreply.github.com>
Date: Mon, 3 Oct 2022 23:00:48 +0300
Subject: [PATCH] fix sites from issues (#680)
* Update data.json
* Update sites.md
---
maigret/resources/data.json | 18 ++++++++++--------
sites.md | 24 ++++++++++++++----------
2 files changed, 24 insertions(+), 18 deletions(-)
diff --git a/maigret/resources/data.json b/maigret/resources/data.json
index d181b1c..3b41fe3 100644
--- a/maigret/resources/data.json
+++ b/maigret/resources/data.json
@@ -3152,7 +3152,8 @@
"checkType": "message",
"absenceStrs": [
"
Internet Chess Club Forum | Forum Home",
- "The member profile you requested is currently not available"
+ "The member profile you requested is currently not available",
+ "There are no records on this user."
],
"alexaRank": 325766,
"urlMain": "https://www.chessclub.com",
@@ -29684,6 +29685,7 @@
"alexaRank": 349163
},
"Reddit Search (Pushshift)": {
+ "disabled": true,
"urlMain": "https://camas.github.io/reddit-search/",
"url": "https://camas.github.io/reddit-search/#{{\"author\":\"{username}\",\"resultSize\":100}}",
"urlProbe": "https://api.pushshift.io/reddit/comment/search?html_decode=true&author={username}&size=100",
@@ -33756,23 +33758,23 @@
"checkType": "status_code",
"url": "https://scholar.harvard.edu/{username}",
"urlMain": "https://scholar.harvard.edu/",
- "usernameClaimed": "ousmanekane",
+ "usernameClaimed": "ousmanekane",
"usernameUnclaimed": "noonewouldeverusethis7"
- },
- "Google Scholar": {
+ },
+ "Google Scholar": {
"checkType": "status_code",
"url": "https://scholar.google.com/scholar?hl=en&as_sdt=0%2C5&q={username}&btnG=",
"urlMain": "https://scholar.google.com/",
"usernameClaimed": "Blue",
"usernameUnclaimed": "noonewouldeverusethis7"
- },
- "HuggingFace": {
+ },
+ "HuggingFace": {
"checkType": "status_code",
"url": "https://huggingface.co/{username}",
"urlMain": "https://huggingface.co/",
"usernameClaimed": "blue",
"usernameUnclaimed": "noonewouldeverusethis7"
- },
+ },
"dlive.tv": {
"absenceStrs": [
"Channel not found"
@@ -33798,7 +33800,7 @@
"urlMain": "https://manifold.markets/",
"usernameClaimed": "ManifoldMarkets",
"usernameUnclaimed": "noonewouldeverusethis7"
- },
+ },
"instaprofi.ru": {
"absenceStrs": [
"/static/img/pages/profile/nobody.jpg"
diff --git a/sites.md b/sites.md
index a183dca..1d1c6c1 100644
--- a/sites.md
+++ b/sites.md
@@ -1,5 +1,5 @@
-## List of supported sites (search methods): total 3063
+## List of supported sites (search methods): total 3067
Rank data fetched from Alexa by domains.
@@ -851,7 +851,7 @@ Rank data fetched from Alexa by domains.
1.  [dumskaya.net (https://dumskaya.net)](https://dumskaya.net)*: top 100K, ru*
1.  [WOW Circle (https://forum.wowcircle.net)](https://forum.wowcircle.net)*: top 100K, forum, it, ru*
1.  [Dating.Ru (http://dating.ru)](http://dating.ru)*: top 100K, dating, ru, us*
-1.  [Reddit Search (Pushshift) (https://camas.github.io/reddit-search/)](https://camas.github.io/reddit-search/)*: top 100K, discussion, news*
+1.  [Reddit Search (Pushshift) (https://camas.github.io/reddit-search/)](https://camas.github.io/reddit-search/)*: top 100K, discussion, news*, search is disabled
1.  [Antichat (https://forum.antichat.ru/)](https://forum.antichat.ru/)*: top 100K, forum, ru, us*
1.  [F-droid (https://forum.f-droid.org)](https://forum.f-droid.org)*: top 100K, forum, in, us*
1.  [Math10 (https://www.math10.com/)](https://www.math10.com/)*: top 100K, forum, ru, us*
@@ -3066,21 +3066,25 @@ Rank data fetched from Alexa by domains.
1.  [lifeintravel.ru (https://lifeintravel.ru/forum)](https://lifeintravel.ru/forum)*: top 100M, forum, ru*
1.  [make-ups.ru (http://make-ups.ru/forum)](http://make-ups.ru/forum)*: top 100M, forum, ru*
1.  [rest.feo.ru (https://rest.feo.ru/forum)](https://rest.feo.ru/forum)*: top 100M, forum, ru*
+1.  [Harvard Scholar (https://scholar.harvard.edu/)](https://scholar.harvard.edu/)*: top 100M*
+1.  [Google Scholar (https://scholar.google.com/)](https://scholar.google.com/)*: top 100M*
+1.  [HuggingFace (https://huggingface.co/)](https://huggingface.co/)*: top 100M*
+1.  [ManifoldMarkets (https://manifold.markets/)](https://manifold.markets/)*: top 100M*
-The list was updated at (2022-09-29 16:30:45.625433 UTC)
+The list was updated at (2022-10-03 19:57:24.497052 UTC)
## Statistics
-Enabled/total sites: 2825/3063 = 92.23%
+Enabled/total sites: 2828/3067 = 92.21%
-Incomplete message checks: 474/2825 = 16.78% (false positive risks)
+Incomplete message checks: 474/2828 = 16.76% (false positive risks)
-Status code checks: 718/2825 = 25.42% (false positive risks)
+Status code checks: 722/2828 = 25.53% (false positive risks)
-False positive risk (total): 42.2%
+False positive risk (total): 42.290000000000006%
Top 20 profile URLs:
- (796) `{urlMain}/index/8-0-{username} (uCoz)`
-- (289) `/{username}`
+- (292) `/{username}`
- (221) `{urlMain}{urlSubpath}/members/?username={username} (XenForo)`
- (160) `/user/{username}`
- (133) `{urlMain}{urlSubpath}/member.php?username={username} (vBulletin)`
@@ -3101,12 +3105,12 @@ Top 20 profile URLs:
- (17) `/search.php?keywords=&terms=all&author={username}`
Top 20 tags:
-- (297) `NO_TAGS` (non-standard)
+- (301) `NO_TAGS` (non-standard)
- (275) `forum`
- (47) `gaming`
- (23) `photo`
- (23) `coding`
-- (19) `news`
+- (20) `news`
- (19) `blog`
- (16) `music`
- (16) `tech`