Updated logic of false positive risk estimating (#475)

This commit is contained in:
Soxoj
2022-05-10 14:54:09 +03:00
committed by GitHub
parent 6400d83a46
commit bb6ed59e44
3 changed files with 28 additions and 9 deletions
+6 -2
View File
@@ -31965,8 +31965,12 @@
}, },
"wiki.tfes.org": { "wiki.tfes.org": {
"checkType": "message", "checkType": "message",
"presenseStrs": ["History"], "presenseStrs": [
"absenceStrs": ["is not registered."], "History"
],
"absenceStrs": [
"is not registered."
],
"usernameClaimed": "Tom_Bishop", "usernameClaimed": "Tom_Bishop",
"usernameUnclaimed": "noonewouldeverusethis7", "usernameUnclaimed": "noonewouldeverusethis7",
"url": "https://wiki.tfes.org/User:{username}" "url": "https://wiki.tfes.org/User:{username}"
+15 -4
View File
@@ -431,6 +431,8 @@ class MaigretDatabase:
message_checks = 0 message_checks = 0
message_checks_one_factor = 0 message_checks_one_factor = 0
status_checks = 0
for _, site in sites_dict.items(): for _, site in sites_dict.items():
if site.disabled: if site.disabled:
disabled_count += 1 disabled_count += 1
@@ -444,17 +446,26 @@ class MaigretDatabase:
continue continue
message_checks_one_factor += 1 message_checks_one_factor += 1
if site.check_type == 'status_code':
status_checks += 1
if not site.tags: if not site.tags:
tags["NO_TAGS"] = tags.get("NO_TAGS", 0) + 1 tags["NO_TAGS"] = tags.get("NO_TAGS", 0) + 1
for tag in filter(lambda x: not is_country_tag(x), site.tags): for tag in filter(lambda x: not is_country_tag(x), site.tags):
tags[tag] = tags.get(tag, 0) + 1 tags[tag] = tags.get(tag, 0) + 1
enabled_perc = round(100*(total_count-disabled_count)/total_count, 2) enabled_count = total_count-disabled_count
output += f"Enabled/total sites: {total_count - disabled_count}/{total_count} = {enabled_perc}%\n\n" enabled_perc = round(100*enabled_count/total_count, 2)
output += f"Enabled/total sites: {enabled_count}/{total_count} = {enabled_perc}%\n\n"
checks_perc = round(100*message_checks_one_factor/message_checks, 2) checks_perc = round(100*message_checks_one_factor/enabled_count, 2)
output += f"Incomplete checks: {message_checks_one_factor}/{message_checks} = {checks_perc}% (false positive risks)\n\n" output += f"Incomplete message checks: {message_checks_one_factor}/{enabled_count} = {checks_perc}% (false positive risks)\n\n"
status_checks_perc = round(100*status_checks/enabled_count, 2)
output += f"Status code checks: {status_checks}/{enabled_count} = {status_checks_perc}% (false positive risks)\n\n"
output += f"False positive risk (total): {checks_perc+status_checks_perc}%\n\n"
top_urls_count = 20 top_urls_count = 20
output += f"Top {top_urls_count} profile URLs:\n" output += f"Top {top_urls_count} profile URLs:\n"
+7 -3
View File
@@ -2968,12 +2968,16 @@ Rank data fetched from Alexa by domains.
1. ![](https://www.google.com/s2/favicons?domain=) [zeldadungeon.net ()]()*: top 100M* 1. ![](https://www.google.com/s2/favicons?domain=) [zeldadungeon.net ()]()*: top 100M*
1. ![](https://www.google.com/s2/favicons?domain=) [zoig.com ()]()*: top 100M* 1. ![](https://www.google.com/s2/favicons?domain=) [zoig.com ()]()*: top 100M*
The list was updated at (2022-05-10 07:37:30.589483 UTC) The list was updated at (2022-05-10 11:50:34.852022 UTC)
## Statistics ## Statistics
Enabled/total sites: 2793/2964 = 94.23% Enabled/total sites: 2793/2964 = 94.23%
Incomplete checks: 511/1995 = 25.61% (false positive risks) Incomplete message checks: 511/2793 = 18.3% (false positive risks)
Status code checks: 733/2793 = 26.24% (false positive risks)
False positive risk (total): 44.54%
Top 20 profile URLs: Top 20 profile URLs:
- (796) `{urlMain}/index/8-0-{username} (uCoz)` - (796) `{urlMain}/index/8-0-{username} (uCoz)`
@@ -2998,7 +3002,7 @@ Top 20 profile URLs:
- (17) `/search.php?keywords=&terms=all&author={username}` - (17) `/search.php?keywords=&terms=all&author={username}`
Top 20 tags: Top 20 tags:
- (295) `NO_TAGS` (non-standard) - (294) `NO_TAGS` (non-standard)
- (271) `forum` - (271) `forum`
- (50) `gaming` - (50) `gaming`
- (24) `photo` - (24) `photo`