Merge pull request #143 from soxoj/tags-updates-1

Tags sorting and some updates
This commit is contained in:
soxoj
2021-05-09 23:21:57 +03:00
committed by GitHub
4 changed files with 1890 additions and 1814 deletions
+1075 -1032
View File
File diff suppressed because it is too large Load Diff
+799 -774
View File
File diff suppressed because it is too large Load Diff
+4 -1
View File
@@ -46,7 +46,10 @@ if __name__ == '__main__':
while True: while True:
site = random.choice(db.sites) site = random.choice(db.sites)
if site.engine == 'uCoz' or site.tags: if site.engine == 'uCoz':
continue
if not 'in' in site.tags:
continue continue
update_tags(site) update_tags(site)
+12 -7
View File
@@ -74,6 +74,7 @@ if __name__ == '__main__':
dest="base_file", default="maigret/resources/data.json", dest="base_file", default="maigret/resources/data.json",
help="JSON file with sites data to update.") help="JSON file with sites data to update.")
parser.add_argument('--without-rank', help='update with use of local data only', action='store_true')
parser.add_argument('--empty-only', help='update only sites without rating', action='store_true') parser.add_argument('--empty-only', help='update only sites without rating', action='store_true')
parser.add_argument('--exclude-engine', help='do not update score with certain engine', parser.add_argument('--exclude-engine', help='do not update score with certain engine',
action="append", dest="exclude_engine_list", default=[]) action="append", dest="exclude_engine_list", default=[])
@@ -93,22 +94,25 @@ Rank data fetched from Alexa by domains.
""") """)
for site in sites_subset: for site in sites_subset:
if args.without_rank:
break
url_main = site.url_main url_main = site.url_main
if site.alexa_rank < sys.maxsize and args.empty_only: if site.alexa_rank < sys.maxsize and args.empty_only:
continue continue
if args.exclude_engine_list and site.engine in args.exclude_engine_list: if args.exclude_engine_list and site.engine in args.exclude_engine_list:
continue continue
site.alexa_rank = 0 site.alexa_rank = 0
th = threading.Thread(target=get_rank, args=(url_main, site)) th = threading.Thread(target=get_rank, args=(url_main, site,))
pool.append((site.name, url_main, th)) pool.append((site.name, url_main, th))
th.start() th.start()
index = 1 if not args.without_rank:
for site_name, url_main, th in pool: index = 1
th.join() for site_name, url_main, th in pool:
sys.stdout.write("\r{0}".format(f"Updated {index} out of {len(sites_subset)} entries")) th.join()
sys.stdout.flush() sys.stdout.write("\r{0}".format(f"Updated {index} out of {len(sites_subset)} entries"))
index = index + 1 sys.stdout.flush()
index = index + 1
sites_full_list = [(s, s.alexa_rank) for s in sites_subset] sites_full_list = [(s, s.alexa_rank) for s in sites_subset]
@@ -123,6 +127,7 @@ Rank data fetched from Alexa by domains.
url_main = site.url_main url_main = site.url_main
valid_rank = get_step_rank(rank) valid_rank = get_step_rank(rank)
all_tags = site.tags all_tags = site.tags
all_tags.sort()
tags = ', ' + ', '.join(all_tags) if all_tags else '' tags = ', ' + ', '.join(all_tags) if all_tags else ''
note = '' note = ''
if site.disabled: if site.disabled: