Tags updates, script added

This commit is contained in:
Soxoj
2021-05-09 16:20:39 +03:00
parent 5bda7fb339
commit 43f189f774
4 changed files with 1864 additions and 965 deletions
+1791 -963
View File
File diff suppressed because it is too large Load Diff
+4 -1
View File
@@ -61,6 +61,9 @@ SUPPORTED_TAGS = [
"military",
"auto",
"gambling",
"business",
"cybercriminal",
"review",
]
@@ -472,7 +475,7 @@ class MaigretDatabase:
output += f"{count}\t{url}\n"
output += "Top tags:\n"
for tag, count in sorted(tags.items(), key=lambda x: x[1], reverse=True)[:20]:
for tag, count in sorted(tags.items(), key=lambda x: x[1], reverse=True)[:200]:
mark = ""
if tag not in SUPPORTED_TAGS:
mark = " (non-standard)"
+15 -1
View File
@@ -2,7 +2,7 @@ import asyncio
import difflib
import re
from typing import List
import xml.etree.ElementTree as ET
import requests
from .activation import import_aiohttp_cookies
@@ -46,6 +46,20 @@ def get_match_ratio(x):
)
def get_alexa_rank(site_url_main):
url = f"http://data.alexa.com/data?cli=10&url={site_url_main}"
xml_data = requests.get(url).text
root = ET.fromstring(xml_data)
alexa_rank = 0
try:
alexa_rank = int(root.find('.//REACH').attrib['RANK'])
except Exception:
pass
return alexa_rank
def extract_mainpage_url(url):
return "/".join(url.split("/", 3)[:3])