Fixed several false positives, improved statistics info (#368)

* Fixed several false positives, improved statistics info

* Updated site list and statistics
This commit is contained in:
Soxoj
2022-02-26 15:31:15 +03:00
committed by GitHub
parent 61452d56d3
commit dcf5181e28
3 changed files with 40 additions and 21 deletions
+29 -13
View File
@@ -5203,11 +5203,14 @@
], ],
"checkType": "message", "checkType": "message",
"presenceStrs": [ "presenceStrs": [
"userStatsTitle" "<meta content=\"name=Profile"
],
"absenceStrs": [
"<title>Foursquare "
], ],
"alexaRank": 3413, "alexaRank": 3413,
"urlMain": "https://ru.foursquare.com/", "urlMain": "https://foursquare.com/",
"url": "https://ru.foursquare.com/{username}", "url": "https://foursquare.com/{username}",
"usernameClaimed": "adam", "usernameClaimed": "adam",
"usernameUnclaimed": "noonewouldeverusethis7" "usernameUnclaimed": "noonewouldeverusethis7"
}, },
@@ -6310,7 +6313,10 @@
], ],
"checkType": "message", "checkType": "message",
"absenceStrs": [ "absenceStrs": [
"Page not found." "Page not found"
],
"presenseStrs": [
"title=\"Gumroad\""
], ],
"alexaRank": 4728, "alexaRank": 4728,
"urlMain": "https://www.gumroad.com/", "urlMain": "https://www.gumroad.com/",
@@ -8857,7 +8863,10 @@
], ],
"checkType": "message", "checkType": "message",
"absenceStrs": [ "absenceStrs": [
"\u0417\u0434\u0435\u0441\u044c \u043f\u043e\u043a\u0430 \u043d\u0438\u0447\u0435\u0433\u043e \u043d\u0435\u0442" "\u041f\u043e \u0412\u0430\u0448\u0435\u043c\u0443 \u0437\u0430\u043f\u0440\u043e\u0441\u0443 \u043d\u0438\u0447\u0435\u0433\u043e \u043d\u0435 \u043d\u0430\u0439\u0434\u0435\u043d\u043e"
],
"presenseStrs": [
"<span>\u041b\u044e\u0434\u0438</span>"
], ],
"alexaRank": 6409, "alexaRank": 6409,
"urlMain": "https://mirtesen.ru", "urlMain": "https://mirtesen.ru",
@@ -10166,10 +10175,7 @@
"tags": [ "tags": [
"ru" "ru"
], ],
"checkType": "message", "checkType": "status_code",
"absenceStrs": [
"404 - Not Found"
],
"alexaRank": 25200, "alexaRank": 25200,
"urlMain": "https://overclockers.ru", "urlMain": "https://overclockers.ru",
"url": "https://overclockers.ru/cpubase/user/{username}", "url": "https://overclockers.ru/cpubase/user/{username}",
@@ -10714,7 +10720,11 @@
"checkType": "message", "checkType": "message",
"absenceStrs": [ "absenceStrs": [
"Hmm, it seems that you've come across an invalid username", "Hmm, it seems that you've come across an invalid username",
"404 Not Found" "404 Not Found",
"Member Not Found"
],
"presenseStrs": [
"profile on Planet Minecraft to see their public Minecraft community activity"
], ],
"alexaRank": 9050, "alexaRank": 9050,
"urlMain": "https://www.planetminecraft.com", "urlMain": "https://www.planetminecraft.com",
@@ -12851,7 +12861,13 @@
"tags": [ "tags": [
"music" "music"
], ],
"checkType": "status_code", "checkType": "message",
"presenseStrs": [
"Profile: "
],
"absenceStrs": [
"Smule | Page Not Found (404)"
],
"alexaRank": 11742, "alexaRank": 11742,
"urlMain": "https://www.smule.com/", "urlMain": "https://www.smule.com/",
"url": "https://www.smule.com/{username}", "url": "https://www.smule.com/{username}",
@@ -13117,7 +13133,7 @@
"us" "us"
], ],
"headers": { "headers": {
"authorization": "Bearer BQC-v69M-AcXsPLrSktz0Era-J2P1SXWB42HLKRHnCNpj00jLEbbbDFpIFo1UhBKrHrL7FqLQd-X4MIuhFo" "authorization": "Bearer BQBFTijjpshGAhX7n9-sO46wb8zJIkhu6TT3Ss7b-0V1dw_jXZhcff1agUpqRgbhznOG8pSIRlHtJAtd2TU"
}, },
"errors": { "errors": {
"Spotify is currently not available in your country.": "Access denied in your country, use proxy/vpn" "Spotify is currently not available in your country.": "Access denied in your country, use proxy/vpn"
@@ -14973,7 +14989,7 @@
"video" "video"
], ],
"headers": { "headers": {
"Authorization": "jwt eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJleHAiOjE2NDExNzg4NjAsInVzZXJfaWQiOm51bGwsImFwcF9pZCI6NTg0NzksInNjb3BlcyI6InB1YmxpYyIsInRlYW1fdXNlcl9pZCI6bnVsbH0.9rznMue0JmX9SAPuWQDIYR-mmsozFq5PoKUvlvElpkQ" "Authorization": "jwt eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJleHAiOjE2NDU4Nzg1NDAsInVzZXJfaWQiOm51bGwsImFwcF9pZCI6NTg0NzksInNjb3BlcyI6InB1YmxpYyIsInRlYW1fdXNlcl9pZCI6bnVsbH0.Bs6VBcKPsl-5dqoThdAImBIex1mas1UcyG2pSnIYqYk"
}, },
"activation": { "activation": {
"url": "https://vimeo.com/_rv/viewer", "url": "https://vimeo.com/_rv/viewer",
+5 -2
View File
@@ -450,8 +450,11 @@ class MaigretDatabase:
for tag in filter(lambda x: not is_country_tag(x), site.tags): for tag in filter(lambda x: not is_country_tag(x), site.tags):
tags[tag] = tags.get(tag, 0) + 1 tags[tag] = tags.get(tag, 0) + 1
output += f"Enabled/total sites: {total_count - disabled_count}/{total_count}\n\n" enabled_perc = round(100*(total_count-disabled_count)/total_count, 2)
output += f"Incomplete checks: {message_checks_one_factor}/{message_checks} (false positive risks)\n\n" output += f"Enabled/total sites: {total_count - disabled_count}/{total_count} = {enabled_perc}%\n\n"
checks_perc = round(100*message_checks_one_factor/message_checks, 2)
output += f"Incomplete checks: {message_checks_one_factor}/{message_checks} = {checks_perc}% (false positive risks)\n\n"
top_urls_count = 20 top_urls_count = 20
output += f"Top {top_urls_count} profile URLs:\n" output += f"Top {top_urls_count} profile URLs:\n"
+6 -6
View File
@@ -249,7 +249,7 @@ Rank data fetched from Alexa by domains.
1. ![](https://www.google.com/s2/favicons?domain=https://forum.xda-developers.com) [XDA (https://forum.xda-developers.com)](https://forum.xda-developers.com)*: top 5K, apps, forum*, search is disabled 1. ![](https://www.google.com/s2/favicons?domain=https://forum.xda-developers.com) [XDA (https://forum.xda-developers.com)](https://forum.xda-developers.com)*: top 5K, apps, forum*, search is disabled
1. ![](https://www.google.com/s2/favicons?domain=https://i.thechive.com/) [Thechive (https://i.thechive.com/)](https://i.thechive.com/)*: top 5K, us* 1. ![](https://www.google.com/s2/favicons?domain=https://i.thechive.com/) [Thechive (https://i.thechive.com/)](https://i.thechive.com/)*: top 5K, us*
1. ![](https://www.google.com/s2/favicons?domain=https://999.md) [999.md (https://999.md)](https://999.md)*: top 5K, freelance, md, shopping* 1. ![](https://www.google.com/s2/favicons?domain=https://999.md) [999.md (https://999.md)](https://999.md)*: top 5K, freelance, md, shopping*
1. ![](https://www.google.com/s2/favicons?domain=https://ru.foursquare.com/) [Foursquare (https://ru.foursquare.com/)](https://ru.foursquare.com/)*: top 5K, geosocial, in* 1. ![](https://www.google.com/s2/favicons?domain=https://foursquare.com/) [Foursquare (https://foursquare.com/)](https://foursquare.com/)*: top 5K, geosocial, in*
1. ![](https://www.google.com/s2/favicons?domain=https://4pda.ru/) [4pda (https://4pda.ru/)](https://4pda.ru/)*: top 5K, ru* 1. ![](https://www.google.com/s2/favicons?domain=https://4pda.ru/) [4pda (https://4pda.ru/)](https://4pda.ru/)*: top 5K, ru*
1. ![](https://www.google.com/s2/favicons?domain=https://www.weforum.org) [Weforum (https://www.weforum.org)](https://www.weforum.org)*: top 5K, forum, us* 1. ![](https://www.google.com/s2/favicons?domain=https://www.weforum.org) [Weforum (https://www.weforum.org)](https://www.weforum.org)*: top 5K, forum, us*
1. ![](https://www.google.com/s2/favicons?domain=http://www.techspot.com/community/) [techspot.com (http://www.techspot.com/community/)](http://www.techspot.com/community/)*: top 5K, forum, us* 1. ![](https://www.google.com/s2/favicons?domain=http://www.techspot.com/community/) [techspot.com (http://www.techspot.com/community/)](http://www.techspot.com/community/)*: top 5K, forum, us*
@@ -2599,12 +2599,12 @@ Rank data fetched from Alexa by domains.
1. ![](https://www.google.com/s2/favicons?domain=https://www.hozpitality.com) [hozpitality (https://www.hozpitality.com)](https://www.hozpitality.com)*: top 100M* 1. ![](https://www.google.com/s2/favicons?domain=https://www.hozpitality.com) [hozpitality (https://www.hozpitality.com)](https://www.hozpitality.com)*: top 100M*
1. ![](https://www.google.com/s2/favicons?domain=https://kazanlashkigalab.com) [kazanlashkigalab.com (https://kazanlashkigalab.com)](https://kazanlashkigalab.com)*: top 100M, kz* 1. ![](https://www.google.com/s2/favicons?domain=https://kazanlashkigalab.com) [kazanlashkigalab.com (https://kazanlashkigalab.com)](https://kazanlashkigalab.com)*: top 100M, kz*
Alexa.com rank data fetched at (2022-02-26 11:41:48.847517 UTC) Alexa.com rank data fetched at (2022-02-26 12:19:53.127789 UTC)
## Statistics ## Statistics
Enabled/total sites: 2447/2595 Enabled/total sites: 2447/2595 = 94.3%
Incomplete checks: 586/1978 (false positive risks) Incomplete checks: 582/1978 = 29.42% (false positive risks)
Top 20 profile URLs: Top 20 profile URLs:
- (796) `{urlMain}/index/8-0-{username} (uCoz)` - (796) `{urlMain}/index/8-0-{username} (uCoz)`
@@ -2634,9 +2634,9 @@ Top 20 tags:
- (40) `NO_TAGS` (non-standard) - (40) `NO_TAGS` (non-standard)
- (24) `coding` - (24) `coding`
- (23) `photo` - (23) `photo`
- (19) `news` - (18) `news`
- (18) `blog` - (18) `blog`
- (18) `music` - (17) `music`
- (15) `tech` - (15) `tech`
- (13) `freelance` - (13) `freelance`
- (12) `sharing` - (12) `sharing`