Added DB statistics autoupdate and write to sites.md (#357)

This commit is contained in:
Soxoj
2022-02-23 18:01:42 +03:00
committed by GitHub
parent 31fc656721
commit 1683e5b744
3 changed files with 27 additions and 11 deletions
+1 -1
View File
@@ -566,7 +566,7 @@ async def main():
# Database statistics
if args.stats:
print(db.get_db_stats(db.sites_dict))
print(db.get_db_stats())
report_dir = path.join(os.getcwd(), args.folderoutput)
+22 -10
View File
@@ -419,9 +419,8 @@ class MaigretDatabase:
results[_id] = _type
return results
def get_db_stats(self, sites_dict):
if not sites_dict:
sites_dict = self.sites_dict()
def get_db_stats(self, is_markdown=False):
sites_dict = self.sites_dict
urls = {}
tags = {}
@@ -429,6 +428,9 @@ class MaigretDatabase:
disabled_count = 0
total_count = len(sites_dict)
message_checks = 0
message_checks_one_factor = 0
for _, site in sites_dict.items():
if site.disabled:
disabled_count += 1
@@ -436,24 +438,34 @@ class MaigretDatabase:
url_type = site.get_url_template()
urls[url_type] = urls.get(url_type, 0) + 1
if site.check_type == 'message':
message_checks += 1
if site.absence_strs and site.presense_strs:
continue
message_checks_one_factor += 1
if not site.tags:
tags["NO_TAGS"] = tags.get("NO_TAGS", 0) + 1
for tag in filter(lambda x: not is_country_tag(x), site.tags):
tags[tag] = tags.get(tag, 0) + 1
output += f"Enabled/total sites: {total_count - disabled_count}/{total_count}\n"
output += "Top profile URLs:\n"
for url, count in sorted(urls.items(), key=lambda x: x[1], reverse=True)[:20]:
output += f"Enabled/total sites: {total_count - disabled_count}/{total_count}\n\n"
output += f"Incomplete checks: {message_checks_one_factor}/{message_checks} (false positive risks)\n\n"
top_urls_count = 20
output += f"Top {top_urls_count} profile URLs:\n"
for url, count in sorted(urls.items(), key=lambda x: x[1], reverse=True)[:top_urls_count]:
if count == 1:
break
output += f"{count}\t{url}\n"
output += f"- ({count})\t`{url}`\n" if is_markdown else f"{count}\t{url}\n"
output += "Top tags:\n"
for tag, count in sorted(tags.items(), key=lambda x: x[1], reverse=True)[:200]:
top_tags_count = 20
output += f"\nTop {top_tags_count} tags:\n"
for tag, count in sorted(tags.items(), key=lambda x: x[1], reverse=True)[:top_tags_count]:
mark = ""
if tag not in self._tags:
mark = " (non-standard)"
output += f"{count}\t{tag}{mark}\n"
output += f"- ({count})\t`{tag}`{mark}\n" if is_markdown else f"{count}\t{tag}{mark}\n"
return output
+4
View File
@@ -140,4 +140,8 @@ Rank data fetched from Alexa by domains.
site_file.write(f'\nAlexa.com rank data fetched at ({datetime.utcnow()} UTC)\n')
db.save_to_file(args.base_file)
statistics_text = db.get_db_stats(is_markdown=True)
site_file.write('## Statistics\n\n')
site_file.write(statistics_text)
print("\nFinished updating supported site listing!")