Merge branch 'main' into cookies-support

This commit is contained in:
soxoj
2021-01-27 01:46:47 +03:00
committed by GitHub
5 changed files with 3463 additions and 2906 deletions
+4 -3
View File
@@ -579,8 +579,9 @@ async def site_self_check(site, logger, semaphore, db: MaigretDatabase, silent=F
return changes
async def self_check(db: MaigretDatabase, site_data: dict, logger, silent=False) -> bool:
sem = asyncio.Semaphore(10)
async def self_check(db: MaigretDatabase, site_data: dict, logger, silent=False,
max_connections=10) -> bool:
sem = asyncio.Semaphore(max_connections)
tasks = []
all_sites = site_data
@@ -815,7 +816,7 @@ async def main():
# Database self-checking
if args.self_check:
print('Maigret sites database self-checking...')
is_need_update = await self_check(db, site_data, logger)
is_need_update = await self_check(db, site_data, logger, max_connections=args.connections)
if is_need_update:
if input('Do you want to save changes permanently? [yYnN]\n').lower() == 'y':
db.save_to_file(args.json_file)
+2185 -1679
View File
File diff suppressed because it is too large Load Diff
+7 -2
View File
@@ -118,7 +118,8 @@ class MaigretSite:
# remove list items
if isinstance(engine_data[k], list) and is_exists:
for f in engine_data[k]:
self_copy.__dict__[field].remove(f)
if f in self_copy.__dict__[field]:
self_copy.__dict__[field].remove(f)
continue
if is_exists:
del self_copy.__dict__[field]
@@ -143,7 +144,11 @@ class MaigretDatabase:
normalized_names = list(map(str.lower, names))
normalized_tags = list(map(str.lower, tags))
is_tags_ok = lambda x: set(x.tags).intersection(set(normalized_tags))
def is_tags_ok(site):
intersected_tags = set(site.tags).intersection(set(normalized_tags))
is_disabled = 'disabled' in tags and site.disabled
return intersected_tags or is_disabled
is_name_ok = lambda x: x.name.lower() in normalized_names
is_engine_ok = lambda x: isinstance(x.engine, str) and x.engine.lower() in normalized_tags
+1241 -1199
View File
File diff suppressed because it is too large Load Diff
+26 -23
View File
@@ -24,32 +24,35 @@ RANKS.update({
'50000000': '10M',
})
SEMAPHORE = threading.Semaphore(10)
def get_rank(domain_to_query, site, print_errors=True):
#Retrieve ranking data via alexa API
url = f"http://data.alexa.com/data?cli=10&url={domain_to_query}"
xml_data = requests.get(url).text
root = ET.fromstring(xml_data)
with SEMAPHORE:
#Retrieve ranking data via alexa API
url = f"http://data.alexa.com/data?cli=10&url={domain_to_query}"
xml_data = requests.get(url).text
root = ET.fromstring(xml_data)
try:
#Get ranking for this site.
site.alexa_rank = int(root.find('.//REACH').attrib['RANK'])
country = root.find('.//COUNTRY')
if not country is None and country.attrib:
country_code = country.attrib['CODE']
tags = set(site.tags)
if country_code:
tags.add(country_code.lower())
site.tags = sorted(list(tags))
if site.type != 'username':
site.disabled = False
except Exception as e:
if print_errors:
logging.error(e)
# We did not find the rank for some reason.
print(f"Error retrieving rank information for '{domain_to_query}'")
print(f" Returned XML is |{xml_data}|")
try:
#Get ranking for this site.
site.alexa_rank = int(root.find('.//REACH').attrib['RANK'])
country = root.find('.//COUNTRY')
if not country is None and country.attrib:
country_code = country.attrib['CODE']
tags = set(site.tags)
if country_code:
tags.add(country_code.lower())
site.tags = sorted(list(tags))
if site.type != 'username':
site.disabled = False
except Exception as e:
if print_errors:
logging.error(e)
# We did not find the rank for some reason.
print(f"Error retrieving rank information for '{domain_to_query}'")
print(f" Returned XML is |{xml_data}|")
return
return
def get_step_rank(rank):