Fix false positives (#370)

* Fixed several false positives, improved statistics info

* Disabled some sites, fixed fp percent count method

* Updated site list and statistics
This commit is contained in:
Soxoj
2022-02-26 16:01:22 +03:00
committed by GitHub
parent dcf5181e28
commit bc787cdf51
3 changed files with 35 additions and 29 deletions
+19 -13
View File
@@ -1880,7 +1880,7 @@
],
"checkType": "message",
"absenceStrs": [
"\u041f\u043e\u043b\u044c\u0437\u043e\u0432\u0430\u0442\u0435\u043b\u044f \u0441 \u0442\u0430\u043a\u0438\u043c \u0438\u043c\u0435\u043d\u0435\u043c \u043d\u0435 \u0441\u0443\u0449\u0435\u0441\u0442\u0432\u0443\u0435\u0442!"
"\u041e\u0448\u0438\u0431\u043a\u0430 / \u041f\u043e\u043b\u044c\u0437\u043e\u0432\u0430\u0442\u0435\u043b\u044f \u0441 \u0442\u0430\u043a\u0438\u043c \u0438\u043c\u0435\u043d\u0435\u043c \u043d\u0435 \u0441\u0443\u0449\u0435\u0441\u0442\u0432\u0443\u0435\u0442"
],
"alexaRank": 2303903,
"urlMain": "https://bgforum.ru",
@@ -3598,6 +3598,7 @@
"tags": [
"ru"
],
"disabled": true,
"checkType": "status_code",
"urlMain": "https://dinsk.su",
"url": "https://dinsk.su/user/{username}",
@@ -4571,6 +4572,7 @@
"tags": [
"ru"
],
"disabled": true,
"checkType": "status_code",
"alexaRank": 1225740,
"urlMain": "https://favera.ru",
@@ -8546,6 +8548,7 @@
"tags": [
"forum"
],
"disabled": true,
"checkType": "message",
"absenceStrs": [
"The specified member cannot be found"
@@ -9082,12 +9085,9 @@
},
"Movescount": {
"tags": [
"es",
"in",
"pk",
"ru",
"us"
"maps"
],
"disabled": true,
"checkType": "message",
"absenceStrs": [
"error=4&"
@@ -12891,16 +12891,19 @@
},
"Snooth": {
"tags": [
"in"
"news"
],
"checkType": "message",
"absenceStrs": [
"<title>Profiles on Snooth</title>"
"<title>Page not found"
],
"presenseStrs": [
"content=\"https://www.snooth.com/author/"
],
"alexaRank": 4088489,
"urlMain": "https://www.snooth.com/",
"url": "https://www.snooth.com/profiles/{username}/",
"usernameClaimed": "GregT",
"url": "https://www.snooth.com/author/{username}/",
"usernameClaimed": "joshua",
"usernameUnclaimed": "noonewouldeverusethis7"
},
"SocialLibremOne": {
@@ -14989,7 +14992,7 @@
"video"
],
"headers": {
"Authorization": "jwt eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJleHAiOjE2NDU4Nzg1NDAsInVzZXJfaWQiOm51bGwsImFwcF9pZCI6NTg0NzksInNjb3BlcyI6InB1YmxpYyIsInRlYW1fdXNlcl9pZCI6bnVsbH0.Bs6VBcKPsl-5dqoThdAImBIex1mas1UcyG2pSnIYqYk"
"Authorization": "jwt eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJleHAiOjE2NDU4Nzk3NDAsInVzZXJfaWQiOm51bGwsImFwcF9pZCI6NTg0NzksInNjb3BlcyI6InB1YmxpYyIsInRlYW1fdXNlcl9pZCI6bnVsbH0.T8E8Vrx0sO-9WP4RdZGNQZw2EB1hYTIXbIguXIZbfNQ"
},
"activation": {
"url": "https://vimeo.com/_rv/viewer",
@@ -24833,7 +24836,7 @@
"usernameClaimed": "alex",
"usernameUnclaimed": "noonewouldeverusethis7"
},
"hashnode.com": {
"hashnode": {
"tags": [
"in"
],
@@ -24846,7 +24849,7 @@
" name="
],
"absenceStrs": [
"></title><meta name="
"We can\u2019t find the page you\u2019re looking for!"
],
"urlMain": "https://hashnode.com",
"url": "https://hashnode.com/@{username}",
@@ -26947,6 +26950,9 @@
},
"forum.rastrnet.ru": {
"urlMain": "http://forum.rastrnet.ru",
"errors": {
"\u0418\u0437\u0432\u0438\u043d\u0438\u0442\u0435, \u043f\u0440\u043e\u0432\u043e\u0434\u044f\u0442\u0441\u044f \u0442\u0435\u0445\u043d\u0438\u0447\u0435\u0441\u043a\u0438\u0435 \u0440\u0430\u0431\u043e\u0442\u044b.": "Site error"
},
"engine": "vBulletin",
"usernameClaimed": "alex",
"usernameUnclaimed": "noonewouldeverusethis7",
+1 -1
View File
@@ -438,7 +438,7 @@ class MaigretDatabase:
url_type = site.get_url_template()
urls[url_type] = urls.get(url_type, 0) + 1
if site.check_type == 'message':
if site.check_type == 'message' and not site.disabled:
message_checks += 1
if site.absence_strs and site.presense_strs:
continue
+15 -15
View File
@@ -812,7 +812,7 @@ Rank data fetched from Alexa by domains.
1. ![](https://www.google.com/s2/favicons?domain=https://bbs.huami.com) [bbs.huami.com (https://bbs.huami.com)](https://bbs.huami.com)*: top 10M, cn, in, ir, ru, us*, search is disabled
1. ![](https://www.google.com/s2/favicons?domain=https://amazfitwatchfaces.com) [AmazfitWatchFaces (https://amazfitwatchfaces.com)](https://amazfitwatchfaces.com)*: top 10M, ae, es, forum, gr, id, ir, ru*
1. ![](https://www.google.com/s2/favicons?domain=https://moikrug.ru/) [MoiKrug (https://moikrug.ru/)](https://moikrug.ru/)*: top 10M, career, us*
1. ![](https://www.google.com/s2/favicons?domain=http://www.movescount.com) [Movescount (http://www.movescount.com)](http://www.movescount.com)*: top 10M, es, in, pk, ru, us*
1. ![](https://www.google.com/s2/favicons?domain=http://www.movescount.com) [Movescount (http://www.movescount.com)](http://www.movescount.com)*: top 10M, maps*, search is disabled
1. ![](https://www.google.com/s2/favicons?domain=https://tamtam.chat/) [TamTam (https://tamtam.chat/)](https://tamtam.chat/)*: top 10M, ru*
1. ![](https://www.google.com/s2/favicons?domain=https://forum.velomania.ru/) [Velomania (https://forum.velomania.ru/)](https://forum.velomania.ru/)*: top 10M, forum, ru*
1. ![](https://www.google.com/s2/favicons?domain=https://forum.itvdn.com) [ITVDN Forum (https://forum.itvdn.com)](https://forum.itvdn.com)*: top 10M, forum, ru, ua*
@@ -1199,7 +1199,7 @@ Rank data fetched from Alexa by domains.
1. ![](https://www.google.com/s2/favicons?domain=https://mastodon.xyz/) [mastodon.xyz (https://mastodon.xyz/)](https://mastodon.xyz/)*: top 10M, th*
1. ![](https://www.google.com/s2/favicons?domain=https://www.gays.com) [Gays (https://www.gays.com)](https://www.gays.com)*: top 10M, in*, search is disabled
1. ![](https://www.google.com/s2/favicons?domain=http://transit-club.com) [transit-club.com (http://transit-club.com)](http://transit-club.com)*: top 10M, ru*
1. ![](https://www.google.com/s2/favicons?domain=https://favera.ru) [Favera (https://favera.ru)](https://favera.ru)*: top 10M, ru*
1. ![](https://www.google.com/s2/favicons?domain=https://favera.ru) [Favera (https://favera.ru)](https://favera.ru)*: top 10M, ru*, search is disabled
1. ![](https://www.google.com/s2/favicons?domain=https://soylentnews.org) [soylentnews (https://soylentnews.org)](https://soylentnews.org)*: top 10M, us*
1. ![](https://www.google.com/s2/favicons?domain=http://chan4chan.com/) [Chan4chan (http://chan4chan.com/)](http://chan4chan.com/)*: top 10M, hu*
1. ![](https://www.google.com/s2/favicons?domain=http://the-mainboard.com/index.php) [the-mainboard.com (http://the-mainboard.com/index.php)](http://the-mainboard.com/index.php)*: top 10M, forum, us*
@@ -1390,7 +1390,7 @@ Rank data fetched from Alexa by domains.
1. ![](https://www.google.com/s2/favicons?domain=https://uaksu.forum24.ru/) [Uaksu (https://uaksu.forum24.ru/)](https://uaksu.forum24.ru/)*: top 10M, forum, ru, ua*
1. ![](https://www.google.com/s2/favicons?domain=http://0-3.ru) [0-3.RU (http://0-3.ru)](http://0-3.ru)*: top 10M, forum, ru*
1. ![](https://www.google.com/s2/favicons?domain=http://www.forumsi.org) [Forumsi (http://www.forumsi.org)](http://www.forumsi.org)*: top 10M, forum, ru*
1. ![](https://www.google.com/s2/favicons?domain=https://www.snooth.com/) [Snooth (https://www.snooth.com/)](https://www.snooth.com/)*: top 10M, in*
1. ![](https://www.google.com/s2/favicons?domain=https://www.snooth.com/) [Snooth (https://www.snooth.com/)](https://www.snooth.com/)*: top 10M, news*
1. ![](https://www.google.com/s2/favicons?domain=http://soft-deniz.ucoz.ru) [soft-deniz.ucoz.ru (http://soft-deniz.ucoz.ru)](http://soft-deniz.ucoz.ru)*: top 10M*
1. ![](https://www.google.com/s2/favicons?domain=http://oih.at.ua) [oih.at.ua (http://oih.at.ua)](http://oih.at.ua)*: top 10M, ua*
1. ![](https://www.google.com/s2/favicons?domain=http://gorodanapa.ru/) [Gorodanapa (http://gorodanapa.ru/)](http://gorodanapa.ru/)*: top 10M, ru*, search is disabled
@@ -1559,7 +1559,7 @@ Rank data fetched from Alexa by domains.
1. ![](https://www.google.com/s2/favicons?domain=https://cyberclock.cc) [Cyberclock (https://cyberclock.cc)](https://cyberclock.cc)*: top 100M, ru*
1. ![](https://www.google.com/s2/favicons?domain=http://www.cydak.ru) [Cydak (http://www.cydak.ru)](http://www.cydak.ru)*: top 100M, ru*
1. ![](https://www.google.com/s2/favicons?domain=https://www.designspiration.net/) [Designspiration (https://www.designspiration.net/)](https://www.designspiration.net/)*: top 100M*
1. ![](https://www.google.com/s2/favicons?domain=https://dinsk.su) [Dinsk (https://dinsk.su)](https://dinsk.su)*: top 100M, ru*
1. ![](https://www.google.com/s2/favicons?domain=https://dinsk.su) [Dinsk (https://dinsk.su)](https://dinsk.su)*: top 100M, ru*, search is disabled
1. ![](https://www.google.com/s2/favicons?domain=https://forum.djangoproject.co) [Djangoproject.co (https://forum.djangoproject.co)](https://forum.djangoproject.co)*: top 100M, coding, forum*
1. ![](https://www.google.com/s2/favicons?domain=https://www.dublikat.shop) [Dublikat (https://www.dublikat.shop)](https://www.dublikat.shop)*: top 100M, ru*
1. ![](https://www.google.com/s2/favicons?domain=http://eightbit.me/) [Eightbit (http://eightbit.me/)](http://eightbit.me/)*: top 100M*
@@ -1586,7 +1586,7 @@ Rank data fetched from Alexa by domains.
1. ![](https://www.google.com/s2/favicons?domain=https://macqa.ru) [Macqa (https://macqa.ru)](https://macqa.ru)*: top 100M, ru*
1. ![](https://www.google.com/s2/favicons?domain=https://magiimir.com) [Magiimir (https://magiimir.com)](https://magiimir.com)*: top 100M, ru*
1. ![](https://www.google.com/s2/favicons?domain=https://mamochki.by/) [Mamochki (https://mamochki.by/)](https://mamochki.by/)*: top 100M, by, ru*
1. ![](https://www.google.com/s2/favicons?domain=https://mastersofcrypto.com) [Mastersofcrypto (https://mastersofcrypto.com)](https://mastersofcrypto.com)*: top 100M, forum*
1. ![](https://www.google.com/s2/favicons?domain=https://mastersofcrypto.com) [Mastersofcrypto (https://mastersofcrypto.com)](https://mastersofcrypto.com)*: top 100M, forum*, search is disabled
1. ![](https://www.google.com/s2/favicons?domain=http:/mixlr.com/) [Mixlr (http:/mixlr.com/)](http:/mixlr.com/)*: top 100M, gb*
1. ![](https://www.google.com/s2/favicons?domain=https://www.munzee.com/) [Munzee (https://www.munzee.com/)](https://www.munzee.com/)*: top 100M, gb*
1. ![](https://www.google.com/s2/favicons?domain=http://murmansk-life.ru) [MurmanskLife (http://murmansk-life.ru)](http://murmansk-life.ru)*: top 100M, ru*
@@ -2313,7 +2313,7 @@ Rank data fetched from Alexa by domains.
1. ![](https://www.google.com/s2/favicons?domain=http://mednolit.ru) [mednolit.ru (http://mednolit.ru)](http://mednolit.ru)*: top 100M, ru*
1. ![](https://www.google.com/s2/favicons?domain=http://mikele-loconte.ru) [mikele-loconte.ru (http://mikele-loconte.ru)](http://mikele-loconte.ru)*: top 100M*
1. ![](https://www.google.com/s2/favicons?domain=http://mkuniverse.ru) [mkuniverse.ru (http://mkuniverse.ru)](http://mkuniverse.ru)*: top 100M*
1. ![](https://www.google.com/s2/favicons?domain=https://hashnode.com) [hashnode.com (https://hashnode.com)](https://hashnode.com)*: top 100M, in*
1. ![](https://www.google.com/s2/favicons?domain=https://hashnode.com) [hashnode (https://hashnode.com)](https://hashnode.com)*: top 100M, in*
1. ![](https://www.google.com/s2/favicons?domain=https://www.change.org) [www.change.org (https://www.change.org)](https://www.change.org)*: top 100M, us*
1. ![](https://www.google.com/s2/favicons?domain=https://www.ifunny.co) [www.ifunny.co (https://www.ifunny.co)](https://www.ifunny.co)*: top 100M, us*
1. ![](https://www.google.com/s2/favicons?domain=https://localcryptosapi.com) [LocalCryptos (https://localcryptosapi.com)](https://localcryptosapi.com)*: top 100M*
@@ -2599,12 +2599,12 @@ Rank data fetched from Alexa by domains.
1. ![](https://www.google.com/s2/favicons?domain=https://www.hozpitality.com) [hozpitality (https://www.hozpitality.com)](https://www.hozpitality.com)*: top 100M*
1. ![](https://www.google.com/s2/favicons?domain=https://kazanlashkigalab.com) [kazanlashkigalab.com (https://kazanlashkigalab.com)](https://kazanlashkigalab.com)*: top 100M, kz*
Alexa.com rank data fetched at (2022-02-26 12:19:53.127789 UTC)
Alexa.com rank data fetched at (2022-02-26 12:55:54.605333 UTC)
## Statistics
Enabled/total sites: 2447/2595 = 94.3%
Enabled/total sites: 2443/2595 = 94.14%
Incomplete checks: 582/1978 = 29.42% (false positive risks)
Incomplete checks: 525/1853 = 28.33% (false positive risks)
Top 20 profile URLs:
- (796) `{urlMain}/index/8-0-{username} (uCoz)`
@@ -2625,18 +2625,18 @@ Top 20 profile URLs:
- (18) `/forum/members/?username={username}`
- (18) `/forum/search.php?keywords=&terms=all&author={username}`
- (17) `/search.php?keywords=&terms=all&author={username}`
- (15) `/author/{username}`
- (16) `/author/{username}`
- (14) `/profile.php?mode=viewprofile&u={username}`
Top 20 tags:
- (255) `forum`
- (271) `forum`
- (79) `NO_TAGS` (non-standard)
- (50) `gaming`
- (40) `NO_TAGS` (non-standard)
- (24) `photo`
- (24) `coding`
- (23) `photo`
- (18) `news`
- (18) `blog`
- (17) `music`
- (18) `music`
- (15) `tech`
- (13) `freelance`
- (12) `sharing`
@@ -2645,7 +2645,7 @@ Top 20 tags:
- (10) `dating`
- (10) `art`
- (9) `hobby`
- (8) `movies`
- (9) `movies`
- (7) `sport`
- (7) `hacking`
- (5) `stock`