mirror of
https://github.com/soxoj/maigret.git
synced 2026-05-06 14:08:59 +00:00
Merge pull request #39 from soxoj/import-404-specific
Default engines for specific cases, sites list updated
This commit is contained in:
+1887
-1584
File diff suppressed because it is too large
Load Diff
+2
-1
@@ -117,7 +117,8 @@ class MaigretSite:
|
||||
# remove list items
|
||||
if isinstance(engine_data[k], list) and is_exists:
|
||||
for f in engine_data[k]:
|
||||
self_copy.__dict__[field].remove(f)
|
||||
if f in self_copy.__dict__[field]:
|
||||
self_copy.__dict__[field].remove(f)
|
||||
continue
|
||||
if is_exists:
|
||||
del self_copy.__dict__[field]
|
||||
|
||||
+26
-23
@@ -24,32 +24,35 @@ RANKS.update({
|
||||
'50000000': '10M',
|
||||
})
|
||||
|
||||
SEMAPHORE = threading.Semaphore(10)
|
||||
|
||||
def get_rank(domain_to_query, site, print_errors=True):
|
||||
#Retrieve ranking data via alexa API
|
||||
url = f"http://data.alexa.com/data?cli=10&url={domain_to_query}"
|
||||
xml_data = requests.get(url).text
|
||||
root = ET.fromstring(xml_data)
|
||||
with SEMAPHORE:
|
||||
#Retrieve ranking data via alexa API
|
||||
url = f"http://data.alexa.com/data?cli=10&url={domain_to_query}"
|
||||
xml_data = requests.get(url).text
|
||||
root = ET.fromstring(xml_data)
|
||||
|
||||
try:
|
||||
#Get ranking for this site.
|
||||
site.alexa_rank = int(root.find('.//REACH').attrib['RANK'])
|
||||
country = root.find('.//COUNTRY')
|
||||
if not country is None and country.attrib:
|
||||
country_code = country.attrib['CODE']
|
||||
tags = set(site.tags)
|
||||
if country_code:
|
||||
tags.add(country_code.lower())
|
||||
site.tags = sorted(list(tags))
|
||||
if site.type != 'username':
|
||||
site.disabled = False
|
||||
except Exception as e:
|
||||
if print_errors:
|
||||
logging.error(e)
|
||||
# We did not find the rank for some reason.
|
||||
print(f"Error retrieving rank information for '{domain_to_query}'")
|
||||
print(f" Returned XML is |{xml_data}|")
|
||||
try:
|
||||
#Get ranking for this site.
|
||||
site.alexa_rank = int(root.find('.//REACH').attrib['RANK'])
|
||||
country = root.find('.//COUNTRY')
|
||||
if not country is None and country.attrib:
|
||||
country_code = country.attrib['CODE']
|
||||
tags = set(site.tags)
|
||||
if country_code:
|
||||
tags.add(country_code.lower())
|
||||
site.tags = sorted(list(tags))
|
||||
if site.type != 'username':
|
||||
site.disabled = False
|
||||
except Exception as e:
|
||||
if print_errors:
|
||||
logging.error(e)
|
||||
# We did not find the rank for some reason.
|
||||
print(f"Error retrieving rank information for '{domain_to_query}'")
|
||||
print(f" Returned XML is |{xml_data}|")
|
||||
|
||||
return
|
||||
return
|
||||
|
||||
|
||||
def get_step_rank(rank):
|
||||
|
||||
Reference in New Issue
Block a user