From bb6ed59e448e8e1a7b13245d59df17fbc82c1c5f Mon Sep 17 00:00:00 2001 From: Soxoj <31013580+soxoj@users.noreply.github.com> Date: Tue, 10 May 2022 14:54:09 +0300 Subject: [PATCH] Updated logic of false positive risk estimating (#475) --- maigret/resources/data.json | 8 ++++++-- maigret/sites.py | 19 +++++++++++++++---- sites.md | 10 +++++++--- 3 files changed, 28 insertions(+), 9 deletions(-) diff --git a/maigret/resources/data.json b/maigret/resources/data.json index e18a384..ed528eb 100644 --- a/maigret/resources/data.json +++ b/maigret/resources/data.json @@ -31965,8 +31965,12 @@ }, "wiki.tfes.org": { "checkType": "message", - "presenseStrs": ["History"], - "absenceStrs": ["is not registered."], + "presenseStrs": [ + "History" + ], + "absenceStrs": [ + "is not registered." + ], "usernameClaimed": "Tom_Bishop", "usernameUnclaimed": "noonewouldeverusethis7", "url": "https://wiki.tfes.org/User:{username}" diff --git a/maigret/sites.py b/maigret/sites.py index c073fe7..310c4e4 100644 --- a/maigret/sites.py +++ b/maigret/sites.py @@ -431,6 +431,8 @@ class MaigretDatabase: message_checks = 0 message_checks_one_factor = 0 + status_checks = 0 + for _, site in sites_dict.items(): if site.disabled: disabled_count += 1 @@ -444,17 +446,26 @@ class MaigretDatabase: continue message_checks_one_factor += 1 + if site.check_type == 'status_code': + status_checks += 1 + if not site.tags: tags["NO_TAGS"] = tags.get("NO_TAGS", 0) + 1 for tag in filter(lambda x: not is_country_tag(x), site.tags): tags[tag] = tags.get(tag, 0) + 1 - enabled_perc = round(100*(total_count-disabled_count)/total_count, 2) - output += f"Enabled/total sites: {total_count - disabled_count}/{total_count} = {enabled_perc}%\n\n" + enabled_count = total_count-disabled_count + enabled_perc = round(100*enabled_count/total_count, 2) + output += f"Enabled/total sites: {enabled_count}/{total_count} = {enabled_perc}%\n\n" - checks_perc = round(100*message_checks_one_factor/message_checks, 2) - output += f"Incomplete checks: {message_checks_one_factor}/{message_checks} = {checks_perc}% (false positive risks)\n\n" + checks_perc = round(100*message_checks_one_factor/enabled_count, 2) + output += f"Incomplete message checks: {message_checks_one_factor}/{enabled_count} = {checks_perc}% (false positive risks)\n\n" + + status_checks_perc = round(100*status_checks/enabled_count, 2) + output += f"Status code checks: {status_checks}/{enabled_count} = {status_checks_perc}% (false positive risks)\n\n" + + output += f"False positive risk (total): {checks_perc+status_checks_perc}%\n\n" top_urls_count = 20 output += f"Top {top_urls_count} profile URLs:\n" diff --git a/sites.md b/sites.md index 1ecd4a0..2992ad4 100644 --- a/sites.md +++ b/sites.md @@ -2968,12 +2968,16 @@ Rank data fetched from Alexa by domains. 1. ![](https://www.google.com/s2/favicons?domain=) [zeldadungeon.net ()]()*: top 100M* 1. ![](https://www.google.com/s2/favicons?domain=) [zoig.com ()]()*: top 100M* -The list was updated at (2022-05-10 07:37:30.589483 UTC) +The list was updated at (2022-05-10 11:50:34.852022 UTC) ## Statistics Enabled/total sites: 2793/2964 = 94.23% -Incomplete checks: 511/1995 = 25.61% (false positive risks) +Incomplete message checks: 511/2793 = 18.3% (false positive risks) + +Status code checks: 733/2793 = 26.24% (false positive risks) + +False positive risk (total): 44.54% Top 20 profile URLs: - (796) `{urlMain}/index/8-0-{username} (uCoz)` @@ -2998,7 +3002,7 @@ Top 20 profile URLs: - (17) `/search.php?keywords=&terms=all&author={username}` Top 20 tags: -- (295) `NO_TAGS` (non-standard) +- (294) `NO_TAGS` (non-standard) - (271) `forum` - (50) `gaming` - (24) `photo`