Merge pull request #39 from soxoj/import-404-specific

Default engines for specific cases, sites list updated
This commit is contained in:
soxoj
2021-01-21 22:20:38 +03:00
committed by GitHub
4 changed files with 3156 additions and 2807 deletions
+1887 -1584
View File
File diff suppressed because it is too large Load Diff
+2 -1
View File
@@ -117,7 +117,8 @@ class MaigretSite:
# remove list items
if isinstance(engine_data[k], list) and is_exists:
for f in engine_data[k]:
self_copy.__dict__[field].remove(f)
if f in self_copy.__dict__[field]:
self_copy.__dict__[field].remove(f)
continue
if is_exists:
del self_copy.__dict__[field]
+1241 -1199
View File
File diff suppressed because it is too large Load Diff
+26 -23
View File
@@ -24,32 +24,35 @@ RANKS.update({
'50000000': '10M',
})
SEMAPHORE = threading.Semaphore(10)
def get_rank(domain_to_query, site, print_errors=True):
#Retrieve ranking data via alexa API
url = f"http://data.alexa.com/data?cli=10&url={domain_to_query}"
xml_data = requests.get(url).text
root = ET.fromstring(xml_data)
with SEMAPHORE:
#Retrieve ranking data via alexa API
url = f"http://data.alexa.com/data?cli=10&url={domain_to_query}"
xml_data = requests.get(url).text
root = ET.fromstring(xml_data)
try:
#Get ranking for this site.
site.alexa_rank = int(root.find('.//REACH').attrib['RANK'])
country = root.find('.//COUNTRY')
if not country is None and country.attrib:
country_code = country.attrib['CODE']
tags = set(site.tags)
if country_code:
tags.add(country_code.lower())
site.tags = sorted(list(tags))
if site.type != 'username':
site.disabled = False
except Exception as e:
if print_errors:
logging.error(e)
# We did not find the rank for some reason.
print(f"Error retrieving rank information for '{domain_to_query}'")
print(f" Returned XML is |{xml_data}|")
try:
#Get ranking for this site.
site.alexa_rank = int(root.find('.//REACH').attrib['RANK'])
country = root.find('.//COUNTRY')
if not country is None and country.attrib:
country_code = country.attrib['CODE']
tags = set(site.tags)
if country_code:
tags.add(country_code.lower())
site.tags = sorted(list(tags))
if site.type != 'username':
site.disabled = False
except Exception as e:
if print_errors:
logging.error(e)
# We did not find the rank for some reason.
print(f"Error retrieving rank information for '{domain_to_query}'")
print(f" Returned XML is |{xml_data}|")
return
return
def get_step_rank(rank):