Merge branch 'main' into cookies-support

This commit is contained in:
soxoj
2021-01-27 01:46:47 +03:00
committed by GitHub
5 changed files with 3463 additions and 2906 deletions
+4 -3
View File
@@ -579,8 +579,9 @@ async def site_self_check(site, logger, semaphore, db: MaigretDatabase, silent=F
return changes
async def self_check(db: MaigretDatabase, site_data: dict, logger, silent=False) -> bool:
sem = asyncio.Semaphore(10)
async def self_check(db: MaigretDatabase, site_data: dict, logger, silent=False,
max_connections=10) -> bool:
sem = asyncio.Semaphore(max_connections)
tasks = []
all_sites = site_data
@@ -815,7 +816,7 @@ async def main():
# Database self-checking
if args.self_check:
print('Maigret sites database self-checking...')
is_need_update = await self_check(db, site_data, logger)
is_need_update = await self_check(db, site_data, logger, max_connections=args.connections)
if is_need_update:
if input('Do you want to save changes permanently? [yYnN]\n').lower() == 'y':
db.save_to_file(args.json_file)
+2185 -1679
View File
File diff suppressed because it is too large Load Diff
+6 -1
View File
@@ -118,6 +118,7 @@ class MaigretSite:
# remove list items
if isinstance(engine_data[k], list) and is_exists:
for f in engine_data[k]:
if f in self_copy.__dict__[field]:
self_copy.__dict__[field].remove(f)
continue
if is_exists:
@@ -143,7 +144,11 @@ class MaigretDatabase:
normalized_names = list(map(str.lower, names))
normalized_tags = list(map(str.lower, tags))
is_tags_ok = lambda x: set(x.tags).intersection(set(normalized_tags))
def is_tags_ok(site):
intersected_tags = set(site.tags).intersection(set(normalized_tags))
is_disabled = 'disabled' in tags and site.disabled
return intersected_tags or is_disabled
is_name_ok = lambda x: x.name.lower() in normalized_names
is_engine_ok = lambda x: isinstance(x.engine, str) and x.engine.lower() in normalized_tags
+1241 -1199
View File
File diff suppressed because it is too large Load Diff
+3
View File
@@ -24,7 +24,10 @@ RANKS.update({
'50000000': '10M',
})
SEMAPHORE = threading.Semaphore(10)
def get_rank(domain_to_query, site, print_errors=True):
with SEMAPHORE:
#Retrieve ranking data via alexa API
url = f"http://data.alexa.com/data?cli=10&url={domain_to_query}"
xml_data = requests.get(url).text