Pass db_file configuration to web interface (#2019)

* pass db_file configuration to web interface
* Autoformatting

---------

Co-authored-by: Soxoj <soxoj@protonmail.com>
This commit is contained in:
pykereaper
2025-06-28 23:15:56 +02:00
committed by GitHub
parent c0e60e25b8
commit 0f7aa2c456
3 changed files with 86 additions and 53 deletions
+12 -9
View File
@@ -493,15 +493,6 @@ async def main():
log_level = logging.WARNING log_level = logging.WARNING
logger.setLevel(log_level) logger.setLevel(log_level)
if args.web is not None:
from maigret.web.app import app
port = (
args.web if args.web else 5000
) # args.web is either the specified port or 5000 by default
app.run(port=port)
return
# Usernames initial list # Usernames initial list
usernames = { usernames = {
u: args.id_type u: args.id_type
@@ -609,6 +600,18 @@ async def main():
# Define one report filename template # Define one report filename template
report_filepath_tpl = path.join(report_dir, 'report_{username}{postfix}') report_filepath_tpl = path.join(report_dir, 'report_{username}{postfix}')
# Web interface
if args.web is not None:
from maigret.web.app import app
app.config["MAIGRET_DB_FILE"] = db_file
port = (
args.web if args.web else 5000
) # args.web is either the specified port or 5000 by default
app.run(port=port)
return
if usernames == {}: if usernames == {}:
# magic params to exit after init # magic params to exit after init
query_notify.warning('No usernames to check, exiting.') query_notify.warning('No usernames to check, exiting.')
+31 -11
View File
@@ -106,7 +106,7 @@ class MaigretGraph:
params = dict(self.username_params) params = dict(self.username_params)
elif value.startswith('http'): elif value.startswith('http'):
params = dict(self.site_params) params = dict(self.site_params)
params['title'] = node_name params['title'] = node_name
if color: if color:
params['color'] = color params['color'] = color
@@ -141,10 +141,12 @@ def save_graph_report(filename: str, username_results: list, db: MaigretDatabase
if not status or status.status != MaigretCheckStatus.CLAIMED: if not status or status.status != MaigretCheckStatus.CLAIMED:
continue continue
# base site node # base site node
site_base_url = website_name site_base_url = website_name
if site_base_url not in base_site_nodes: if site_base_url not in base_site_nodes:
base_site_nodes[site_base_url] = graph.add_node('site', site_base_url, color='#28a745') # Green color base_site_nodes[site_base_url] = graph.add_node(
'site', site_base_url, color='#28a745'
) # Green color
site_base_node_name = base_site_nodes[site_base_url] site_base_node_name = base_site_nodes[site_base_url]
@@ -152,7 +154,9 @@ def save_graph_report(filename: str, username_results: list, db: MaigretDatabase
account_url = dictionary.get('url_user', f'{site_base_url}/{norm_username}') account_url = dictionary.get('url_user', f'{site_base_url}/{norm_username}')
account_node_id = f"{site_base_url}: {account_url}" account_node_id = f"{site_base_url}: {account_url}"
if account_node_id not in site_account_nodes: if account_node_id not in site_account_nodes:
site_account_nodes[account_node_id] = graph.add_node('account', account_url) site_account_nodes[account_node_id] = graph.add_node(
'account', account_url
)
account_node_name = site_account_nodes[account_node_id] account_node_name = site_account_nodes[account_node_id]
@@ -162,13 +166,18 @@ def save_graph_report(filename: str, username_results: list, db: MaigretDatabase
def process_ids(parent_node, ids): def process_ids(parent_node, ids):
for k, v in ids.items(): for k, v in ids.items():
if k.endswith('_count') or k.startswith('is_') or k.endswith('_at') or k in 'image': if (
k.endswith('_count')
or k.startswith('is_')
or k.endswith('_at')
or k in 'image'
):
continue continue
# Normalize value if string # Normalize value if string
norm_v = v.lower() if isinstance(v, str) else v norm_v = v.lower() if isinstance(v, str) else v
value_key = f"{k}:{norm_v}" value_key = f"{k}:{norm_v}"
if value_key in processed_values: if value_key in processed_values:
ids_data_name = processed_values[value_key] ids_data_name = processed_values[value_key]
else: else:
@@ -187,7 +196,9 @@ def save_graph_report(filename: str, username_results: list, db: MaigretDatabase
data_node_name = graph.add_node(vv, site_base_url) data_node_name = graph.add_node(vv, site_base_url)
graph.link(list_node_name, data_node_name) graph.link(list_node_name, data_node_name)
add_ids = {a: b for b, a in db.extract_ids_from_url(vv).items()} add_ids = {
a: b for b, a in db.extract_ids_from_url(vv).items()
}
if add_ids: if add_ids:
process_ids(data_node_name, add_ids) process_ids(data_node_name, add_ids)
ids_data_name = list_node_name ids_data_name = list_node_name
@@ -198,11 +209,17 @@ def save_graph_report(filename: str, username_results: list, db: MaigretDatabase
if 'username' in k or k in SUPPORTED_IDS: if 'username' in k or k in SUPPORTED_IDS:
new_username_key = f"username:{norm_v}" new_username_key = f"username:{norm_v}"
if new_username_key not in processed_values: if new_username_key not in processed_values:
new_username_node_name = graph.add_node('username', norm_v) new_username_node_name = graph.add_node(
processed_values[new_username_key] = new_username_node_name 'username', norm_v
)
processed_values[new_username_key] = (
new_username_node_name
)
graph.link(ids_data_name, new_username_node_name) graph.link(ids_data_name, new_username_node_name)
add_ids = {k: v for v, k in db.extract_ids_from_url(v).items()} add_ids = {
k: v for v, k in db.extract_ids_from_url(v).items()
}
if add_ids: if add_ids:
process_ids(ids_data_name, add_ids) process_ids(ids_data_name, add_ids)
@@ -216,11 +233,14 @@ def save_graph_report(filename: str, username_results: list, db: MaigretDatabase
G.remove_nodes_from(nodes_to_remove) G.remove_nodes_from(nodes_to_remove)
# Remove site nodes with only one connection # Remove site nodes with only one connection
single_degree_sites = [n for n, deg in G.degree() if n.startswith("site:") and deg <= 1] single_degree_sites = [
n for n, deg in G.degree() if n.startswith("site:") and deg <= 1
]
G.remove_nodes_from(single_degree_sites) G.remove_nodes_from(single_degree_sites)
# Generate interactive visualization # Generate interactive visualization
from pyvis.network import Network from pyvis.network import Network
nt = Network(notebook=True, height="750px", width="100%") nt = Network(notebook=True, height="750px", width="100%")
nt.from_nx(G) nt.from_nx(G)
nt.show(filename) nt.show(filename)
+43 -33
View File
@@ -21,18 +21,15 @@ from maigret.report import generate_report_context
app = Flask(__name__) app = Flask(__name__)
app.secret_key = 'your-secret-key-here' app.secret_key = 'your-secret-key-here'
#add background job tracking # add background job tracking
background_jobs = {} background_jobs = {}
job_results = {} job_results = {}
# Configuration # Configuration
MAIGRET_DB_FILE = os.path.join('maigret', 'resources', 'data.json') app.config["MAIGRET_DB_FILE"] = os.path.join('maigret', 'resources', 'data.json')
COOKIES_FILE = "cookies.txt" app.config["COOKIES_FILE"] = "cookies.txt"
UPLOAD_FOLDER = 'uploads' app.config["UPLOAD_FOLDER"] = 'uploads'
REPORTS_FOLDER = os.path.abspath('/tmp/maigret_reports') app.config["REPORTS_FOLDER"] = os.path.abspath('/tmp/maigret_reports')
os.makedirs(UPLOAD_FOLDER, exist_ok=True)
os.makedirs(REPORTS_FOLDER, exist_ok=True)
def setup_logger(log_level, name): def setup_logger(log_level, name):
@@ -44,24 +41,24 @@ def setup_logger(log_level, name):
async def maigret_search(username, options): async def maigret_search(username, options):
logger = setup_logger(logging.WARNING, 'maigret') logger = setup_logger(logging.WARNING, 'maigret')
try: try:
db = MaigretDatabase().load_from_path(MAIGRET_DB_FILE) db = MaigretDatabase().load_from_path(app.config["MAIGRET_DB_FILE"])
top_sites = int(options.get('top_sites') or 500) top_sites = int(options.get('top_sites') or 500)
if options.get('all_sites'): if options.get('all_sites'):
top_sites = 999999999 # effectively all top_sites = 999999999 # effectively all
tags = options.get('tags', []) tags = options.get('tags', [])
site_list= options.get('site_list', []) site_list = options.get('site_list', [])
logger.info(f"Filtering sites by tags: {tags}") logger.info(f"Filtering sites by tags: {tags}")
sites = db.ranked_sites_dict( sites = db.ranked_sites_dict(
top=top_sites, top=top_sites,
tags=tags, tags=tags,
names=site_list, names=site_list,
disabled=False, disabled=False,
id_type='username' id_type='username',
) )
logger.info(f"Found {len(sites)} sites matching the tag criteria") logger.info(f"Found {len(sites)} sites matching the tag criteria")
results = await maigret.search( results = await maigret.search(
@@ -70,9 +67,11 @@ async def maigret_search(username, options):
timeout=int(options.get('timeout', 30)), timeout=int(options.get('timeout', 30)),
logger=logger, logger=logger,
id_type='username', id_type='username',
cookies=COOKIES_FILE if options.get('use_cookies') else None, cookies=app.config["COOKIES_FILE"] if options.get('use_cookies') else None,
is_parsing_enabled=(not options.get('disable_extracting', False)), is_parsing_enabled=(not options.get('disable_extracting', False)),
recursive_search_enabled=(not options.get('disable_recursive_search', False)), recursive_search_enabled=(
not options.get('disable_recursive_search', False)
),
check_domains=options.get('with_domains', False), check_domains=options.get('with_domains', False),
proxy=options.get('proxy', None), proxy=options.get('proxy', None),
tor_proxy=options.get('tor_proxy', None), tor_proxy=options.get('tor_proxy', None),
@@ -104,14 +103,17 @@ def process_search_task(usernames, options, timestamp):
search_multiple_usernames(usernames, options) search_multiple_usernames(usernames, options)
) )
session_folder = os.path.join(REPORTS_FOLDER, f"search_{timestamp}") os.makedirs(app.config["REPORTS_FOLDER"], exist_ok=True)
session_folder = os.path.join(
app.config["REPORTS_FOLDER"], f"search_{timestamp}"
)
os.makedirs(session_folder, exist_ok=True) os.makedirs(session_folder, exist_ok=True)
graph_path = os.path.join(session_folder, "combined_graph.html") graph_path = os.path.join(session_folder, "combined_graph.html")
maigret.report.save_graph_report( maigret.report.save_graph_report(
graph_path, graph_path,
general_results, general_results,
MaigretDatabase().load_from_path(MAIGRET_DB_FILE), MaigretDatabase().load_from_path(app.config["MAIGRET_DB_FILE"]),
) )
individual_reports = [] individual_reports = []
@@ -188,20 +190,20 @@ def process_search_task(usernames, options, timestamp):
@app.route('/') @app.route('/')
def index(): def index():
#load site data for autocomplete # load site data for autocomplete
db = MaigretDatabase().load_from_path(MAIGRET_DB_FILE) db = MaigretDatabase().load_from_path(app.config["MAIGRET_DB_FILE"])
site_options = [] site_options = []
for site in db.sites: for site in db.sites:
#add main site name # add main site name
site_options.append(site.name) site_options.append(site.name)
#add URL if different from name # add URL if different from name
if site.url_main and site.url_main not in site_options: if site.url_main and site.url_main not in site_options:
site_options.append(site.url_main) site_options.append(site.url_main)
#sort and deduplicate # sort and deduplicate
site_options = sorted(set(site_options)) site_options = sorted(set(site_options))
return render_template('index.html', site_options=site_options) return render_template('index.html', site_options=site_options)
@@ -237,10 +239,14 @@ def search():
'i2p_proxy': request.form.get('i2p_proxy', None) or None, 'i2p_proxy': request.form.get('i2p_proxy', None) or None,
'permute': 'permute' in request.form, 'permute': 'permute' in request.form,
'tags': selected_tags, # Pass selected tags as a list 'tags': selected_tags, # Pass selected tags as a list
'site_list': [s.strip() for s in request.form.get('site', '').split(',') if s.strip()], 'site_list': [
s.strip() for s in request.form.get('site', '').split(',') if s.strip()
],
} }
logging.info(f"Starting search for usernames: {usernames} with tags: {selected_tags}") logging.info(
f"Starting search for usernames: {usernames} with tags: {selected_tags}"
)
# Start background job # Start background job
background_jobs[timestamp] = { background_jobs[timestamp] = {
@@ -253,6 +259,7 @@ def search():
return redirect(url_for('status', timestamp=timestamp)) return redirect(url_for('status', timestamp=timestamp))
@app.route('/status/<timestamp>') @app.route('/status/<timestamp>')
def status(timestamp): def status(timestamp):
logging.info(f"Status check for timestamp: {timestamp}") logging.info(f"Status check for timestamp: {timestamp}")
@@ -313,8 +320,11 @@ def results(session_id):
@app.route('/reports/<path:filename>') @app.route('/reports/<path:filename>')
def download_report(filename): def download_report(filename):
try: try:
file_path = os.path.normpath(os.path.join(REPORTS_FOLDER, filename)) os.makedirs(app.config["REPORTS_FOLDER"], exist_ok=True)
if not file_path.startswith(REPORTS_FOLDER): file_path = os.path.normpath(
os.path.join(app.config["REPORTS_FOLDER"], filename)
)
if not file_path.startswith(app.config["REPORTS_FOLDER"]):
raise Exception("Invalid file path") raise Exception("Invalid file path")
return send_file(file_path) return send_file(file_path)
except Exception as e: except Exception as e: