mirror of
https://github.com/soxoj/maigret.git
synced 2026-05-06 14:08:59 +00:00
Parallel execution optimization (#1897)
* Connection failure fix: removed futures, added semaphores * Additional fixes * Tqdm replace to alive_progress, poetry update * Self-check mode fix, tests fixes * Sites checks fixes (#1896) * Fixed incorrect site names, added method to compare sites
This commit is contained in:
+6
-14
@@ -3,23 +3,13 @@
|
||||
This module generates the listing of supported sites in file `SITES.md`
|
||||
and pretty prints file with sites data.
|
||||
"""
|
||||
import aiohttp
|
||||
import asyncio
|
||||
import json
|
||||
import sys
|
||||
import requests
|
||||
import logging
|
||||
import threading
|
||||
import xml.etree.ElementTree as ET
|
||||
from datetime import datetime
|
||||
from argparse import ArgumentParser, RawDescriptionHelpFormatter
|
||||
|
||||
import tqdm.asyncio
|
||||
|
||||
from maigret.maigret import get_response, site_self_check
|
||||
from maigret.sites import MaigretSite, MaigretDatabase, MaigretEngine
|
||||
from maigret.utils import CaseConverter
|
||||
|
||||
from maigret.maigret import get_response
|
||||
from maigret.sites import MaigretDatabase, MaigretEngine
|
||||
|
||||
async def check_engine_of_site(site_name, sites_with_engines, future, engine_name, semaphore, logger):
|
||||
async with semaphore:
|
||||
@@ -98,8 +88,10 @@ if __name__ == '__main__':
|
||||
tasks.append(future)
|
||||
|
||||
# progress bar
|
||||
for f in tqdm.asyncio.tqdm.as_completed(tasks):
|
||||
loop.run_until_complete(f)
|
||||
with alive_progress(len(tasks), title='Checking sites') as progress:
|
||||
for f in asyncio.as_completed(tasks):
|
||||
loop.run_until_complete(f)
|
||||
progress()
|
||||
|
||||
print(f'Total detected {len(new_engine_sites)} sites on engine {engine_name}')
|
||||
# dict with new found engine sites
|
||||
|
||||
@@ -3,7 +3,7 @@ import json
|
||||
import random
|
||||
import re
|
||||
|
||||
import tqdm.asyncio
|
||||
import alive_progress
|
||||
from mock import Mock
|
||||
import requests
|
||||
|
||||
@@ -181,7 +181,7 @@ if __name__ == '__main__':
|
||||
raw_maigret_data = json.dumps({site.name: site.json for site in sites_subset})
|
||||
|
||||
new_sites = []
|
||||
for site in tqdm.asyncio.tqdm(urls):
|
||||
for site in alive_progress.alive_it(urls):
|
||||
site_lowercase = site.lower()
|
||||
|
||||
domain_raw = URL_RE.sub('', site_lowercase).strip().strip('/')
|
||||
@@ -271,7 +271,9 @@ if __name__ == '__main__':
|
||||
future = asyncio.ensure_future(check_coro)
|
||||
tasks.append(future)
|
||||
|
||||
for f in tqdm.asyncio.tqdm.as_completed(tasks, timeout=TIMEOUT):
|
||||
with alive_progress(len(tasks), title='Checking sites') as progress:
|
||||
for f in asyncio.as_completed(tasks):
|
||||
progress()
|
||||
try:
|
||||
loop.run_until_complete(f)
|
||||
except asyncio.exceptions.TimeoutError:
|
||||
|
||||
@@ -137,7 +137,7 @@ Rank data fetched from Alexa by domains.
|
||||
site_file.write(f'1. {favicon} [{site}]({url_main})*: top {valid_rank}{tags}*{note}\n')
|
||||
db.update_site(site)
|
||||
|
||||
site_file.write(f'\nThe list was updated at ({datetime.now(timezone.utc)} UTC)\n')
|
||||
site_file.write(f'\nThe list was updated at ({datetime.now(timezone.utc).date()} UTC)\n')
|
||||
db.save_to_file(args.base_file)
|
||||
|
||||
statistics_text = db.get_db_stats(is_markdown=True)
|
||||
|
||||
Reference in New Issue
Block a user