From 0f7aa2c456e8647b458aaaa5991be2a3e2ae3c65 Mon Sep 17 00:00:00 2001 From: pykereaper <74609327+pykereaper@users.noreply.github.com> Date: Sat, 28 Jun 2025 23:15:56 +0200 Subject: [PATCH] Pass db_file configuration to web interface (#2019) * pass db_file configuration to web interface * Autoformatting --------- Co-authored-by: Soxoj --- maigret/maigret.py | 21 +++++++------ maigret/report.py | 42 ++++++++++++++++++------- maigret/web/app.py | 76 ++++++++++++++++++++++++++-------------------- 3 files changed, 86 insertions(+), 53 deletions(-) diff --git a/maigret/maigret.py b/maigret/maigret.py index 5c25382..2935409 100755 --- a/maigret/maigret.py +++ b/maigret/maigret.py @@ -493,15 +493,6 @@ async def main(): log_level = logging.WARNING logger.setLevel(log_level) - if args.web is not None: - from maigret.web.app import app - - port = ( - args.web if args.web else 5000 - ) # args.web is either the specified port or 5000 by default - app.run(port=port) - return - # Usernames initial list usernames = { u: args.id_type @@ -609,6 +600,18 @@ async def main(): # Define one report filename template report_filepath_tpl = path.join(report_dir, 'report_{username}{postfix}') + # Web interface + if args.web is not None: + from maigret.web.app import app + + app.config["MAIGRET_DB_FILE"] = db_file + + port = ( + args.web if args.web else 5000 + ) # args.web is either the specified port or 5000 by default + app.run(port=port) + return + if usernames == {}: # magic params to exit after init query_notify.warning('No usernames to check, exiting.') diff --git a/maigret/report.py b/maigret/report.py index 7b76a6b..22814b7 100644 --- a/maigret/report.py +++ b/maigret/report.py @@ -106,7 +106,7 @@ class MaigretGraph: params = dict(self.username_params) elif value.startswith('http'): params = dict(self.site_params) - + params['title'] = node_name if color: params['color'] = color @@ -141,10 +141,12 @@ def save_graph_report(filename: str, username_results: list, db: MaigretDatabase if not status or status.status != MaigretCheckStatus.CLAIMED: continue - # base site node + # base site node site_base_url = website_name if site_base_url not in base_site_nodes: - base_site_nodes[site_base_url] = graph.add_node('site', site_base_url, color='#28a745') # Green color + base_site_nodes[site_base_url] = graph.add_node( + 'site', site_base_url, color='#28a745' + ) # Green color site_base_node_name = base_site_nodes[site_base_url] @@ -152,7 +154,9 @@ def save_graph_report(filename: str, username_results: list, db: MaigretDatabase account_url = dictionary.get('url_user', f'{site_base_url}/{norm_username}') account_node_id = f"{site_base_url}: {account_url}" if account_node_id not in site_account_nodes: - site_account_nodes[account_node_id] = graph.add_node('account', account_url) + site_account_nodes[account_node_id] = graph.add_node( + 'account', account_url + ) account_node_name = site_account_nodes[account_node_id] @@ -162,13 +166,18 @@ def save_graph_report(filename: str, username_results: list, db: MaigretDatabase def process_ids(parent_node, ids): for k, v in ids.items(): - if k.endswith('_count') or k.startswith('is_') or k.endswith('_at') or k in 'image': + if ( + k.endswith('_count') + or k.startswith('is_') + or k.endswith('_at') + or k in 'image' + ): continue # Normalize value if string norm_v = v.lower() if isinstance(v, str) else v value_key = f"{k}:{norm_v}" - + if value_key in processed_values: ids_data_name = processed_values[value_key] else: @@ -187,7 +196,9 @@ def save_graph_report(filename: str, username_results: list, db: MaigretDatabase data_node_name = graph.add_node(vv, site_base_url) graph.link(list_node_name, data_node_name) - add_ids = {a: b for b, a in db.extract_ids_from_url(vv).items()} + add_ids = { + a: b for b, a in db.extract_ids_from_url(vv).items() + } if add_ids: process_ids(data_node_name, add_ids) ids_data_name = list_node_name @@ -198,11 +209,17 @@ def save_graph_report(filename: str, username_results: list, db: MaigretDatabase if 'username' in k or k in SUPPORTED_IDS: new_username_key = f"username:{norm_v}" if new_username_key not in processed_values: - new_username_node_name = graph.add_node('username', norm_v) - processed_values[new_username_key] = new_username_node_name + new_username_node_name = graph.add_node( + 'username', norm_v + ) + processed_values[new_username_key] = ( + new_username_node_name + ) graph.link(ids_data_name, new_username_node_name) - add_ids = {k: v for v, k in db.extract_ids_from_url(v).items()} + add_ids = { + k: v for v, k in db.extract_ids_from_url(v).items() + } if add_ids: process_ids(ids_data_name, add_ids) @@ -216,11 +233,14 @@ def save_graph_report(filename: str, username_results: list, db: MaigretDatabase G.remove_nodes_from(nodes_to_remove) # Remove site nodes with only one connection - single_degree_sites = [n for n, deg in G.degree() if n.startswith("site:") and deg <= 1] + single_degree_sites = [ + n for n, deg in G.degree() if n.startswith("site:") and deg <= 1 + ] G.remove_nodes_from(single_degree_sites) # Generate interactive visualization from pyvis.network import Network + nt = Network(notebook=True, height="750px", width="100%") nt.from_nx(G) nt.show(filename) diff --git a/maigret/web/app.py b/maigret/web/app.py index 48177dc..e7dda41 100644 --- a/maigret/web/app.py +++ b/maigret/web/app.py @@ -21,18 +21,15 @@ from maigret.report import generate_report_context app = Flask(__name__) app.secret_key = 'your-secret-key-here' -#add background job tracking +# add background job tracking background_jobs = {} job_results = {} # Configuration -MAIGRET_DB_FILE = os.path.join('maigret', 'resources', 'data.json') -COOKIES_FILE = "cookies.txt" -UPLOAD_FOLDER = 'uploads' -REPORTS_FOLDER = os.path.abspath('/tmp/maigret_reports') - -os.makedirs(UPLOAD_FOLDER, exist_ok=True) -os.makedirs(REPORTS_FOLDER, exist_ok=True) +app.config["MAIGRET_DB_FILE"] = os.path.join('maigret', 'resources', 'data.json') +app.config["COOKIES_FILE"] = "cookies.txt" +app.config["UPLOAD_FOLDER"] = 'uploads' +app.config["REPORTS_FOLDER"] = os.path.abspath('/tmp/maigret_reports') def setup_logger(log_level, name): @@ -44,24 +41,24 @@ def setup_logger(log_level, name): async def maigret_search(username, options): logger = setup_logger(logging.WARNING, 'maigret') try: - db = MaigretDatabase().load_from_path(MAIGRET_DB_FILE) - - top_sites = int(options.get('top_sites') or 500) + db = MaigretDatabase().load_from_path(app.config["MAIGRET_DB_FILE"]) + + top_sites = int(options.get('top_sites') or 500) if options.get('all_sites'): top_sites = 999999999 # effectively all - + tags = options.get('tags', []) - site_list= options.get('site_list', []) + site_list = options.get('site_list', []) logger.info(f"Filtering sites by tags: {tags}") - + sites = db.ranked_sites_dict( top=top_sites, tags=tags, names=site_list, disabled=False, - id_type='username' + id_type='username', ) - + logger.info(f"Found {len(sites)} sites matching the tag criteria") results = await maigret.search( @@ -70,9 +67,11 @@ async def maigret_search(username, options): timeout=int(options.get('timeout', 30)), logger=logger, id_type='username', - cookies=COOKIES_FILE if options.get('use_cookies') else None, - is_parsing_enabled=(not options.get('disable_extracting', False)), - recursive_search_enabled=(not options.get('disable_recursive_search', False)), + cookies=app.config["COOKIES_FILE"] if options.get('use_cookies') else None, + is_parsing_enabled=(not options.get('disable_extracting', False)), + recursive_search_enabled=( + not options.get('disable_recursive_search', False) + ), check_domains=options.get('with_domains', False), proxy=options.get('proxy', None), tor_proxy=options.get('tor_proxy', None), @@ -104,14 +103,17 @@ def process_search_task(usernames, options, timestamp): search_multiple_usernames(usernames, options) ) - session_folder = os.path.join(REPORTS_FOLDER, f"search_{timestamp}") + os.makedirs(app.config["REPORTS_FOLDER"], exist_ok=True) + session_folder = os.path.join( + app.config["REPORTS_FOLDER"], f"search_{timestamp}" + ) os.makedirs(session_folder, exist_ok=True) graph_path = os.path.join(session_folder, "combined_graph.html") maigret.report.save_graph_report( graph_path, general_results, - MaigretDatabase().load_from_path(MAIGRET_DB_FILE), + MaigretDatabase().load_from_path(app.config["MAIGRET_DB_FILE"]), ) individual_reports = [] @@ -188,20 +190,20 @@ def process_search_task(usernames, options, timestamp): @app.route('/') def index(): - #load site data for autocomplete - db = MaigretDatabase().load_from_path(MAIGRET_DB_FILE) + # load site data for autocomplete + db = MaigretDatabase().load_from_path(app.config["MAIGRET_DB_FILE"]) site_options = [] - + for site in db.sites: - #add main site name + # add main site name site_options.append(site.name) - #add URL if different from name + # add URL if different from name if site.url_main and site.url_main not in site_options: site_options.append(site.url_main) - - #sort and deduplicate + + # sort and deduplicate site_options = sorted(set(site_options)) - + return render_template('index.html', site_options=site_options) @@ -237,10 +239,14 @@ def search(): 'i2p_proxy': request.form.get('i2p_proxy', None) or None, 'permute': 'permute' in request.form, 'tags': selected_tags, # Pass selected tags as a list - 'site_list': [s.strip() for s in request.form.get('site', '').split(',') if s.strip()], + 'site_list': [ + s.strip() for s in request.form.get('site', '').split(',') if s.strip() + ], } - logging.info(f"Starting search for usernames: {usernames} with tags: {selected_tags}") + logging.info( + f"Starting search for usernames: {usernames} with tags: {selected_tags}" + ) # Start background job background_jobs[timestamp] = { @@ -253,6 +259,7 @@ def search(): return redirect(url_for('status', timestamp=timestamp)) + @app.route('/status/') def status(timestamp): logging.info(f"Status check for timestamp: {timestamp}") @@ -313,8 +320,11 @@ def results(session_id): @app.route('/reports/') def download_report(filename): try: - file_path = os.path.normpath(os.path.join(REPORTS_FOLDER, filename)) - if not file_path.startswith(REPORTS_FOLDER): + os.makedirs(app.config["REPORTS_FOLDER"], exist_ok=True) + file_path = os.path.normpath( + os.path.join(app.config["REPORTS_FOLDER"], filename) + ) + if not file_path.startswith(app.config["REPORTS_FOLDER"]): raise Exception("Invalid file path") return send_file(file_path) except Exception as e: