Pass db_file configuration to web interface (#2019)

* pass db_file configuration to web interface
* Autoformatting

---------

Co-authored-by: Soxoj <soxoj@protonmail.com>
This commit is contained in:
pykereaper
2025-06-28 23:15:56 +02:00
committed by GitHub
parent c0e60e25b8
commit 0f7aa2c456
3 changed files with 86 additions and 53 deletions
+12 -9
View File
@@ -493,15 +493,6 @@ async def main():
log_level = logging.WARNING
logger.setLevel(log_level)
if args.web is not None:
from maigret.web.app import app
port = (
args.web if args.web else 5000
) # args.web is either the specified port or 5000 by default
app.run(port=port)
return
# Usernames initial list
usernames = {
u: args.id_type
@@ -609,6 +600,18 @@ async def main():
# Define one report filename template
report_filepath_tpl = path.join(report_dir, 'report_{username}{postfix}')
# Web interface
if args.web is not None:
from maigret.web.app import app
app.config["MAIGRET_DB_FILE"] = db_file
port = (
args.web if args.web else 5000
) # args.web is either the specified port or 5000 by default
app.run(port=port)
return
if usernames == {}:
# magic params to exit after init
query_notify.warning('No usernames to check, exiting.')
+28 -8
View File
@@ -144,7 +144,9 @@ def save_graph_report(filename: str, username_results: list, db: MaigretDatabase
# base site node
site_base_url = website_name
if site_base_url not in base_site_nodes:
base_site_nodes[site_base_url] = graph.add_node('site', site_base_url, color='#28a745') # Green color
base_site_nodes[site_base_url] = graph.add_node(
'site', site_base_url, color='#28a745'
) # Green color
site_base_node_name = base_site_nodes[site_base_url]
@@ -152,7 +154,9 @@ def save_graph_report(filename: str, username_results: list, db: MaigretDatabase
account_url = dictionary.get('url_user', f'{site_base_url}/{norm_username}')
account_node_id = f"{site_base_url}: {account_url}"
if account_node_id not in site_account_nodes:
site_account_nodes[account_node_id] = graph.add_node('account', account_url)
site_account_nodes[account_node_id] = graph.add_node(
'account', account_url
)
account_node_name = site_account_nodes[account_node_id]
@@ -162,7 +166,12 @@ def save_graph_report(filename: str, username_results: list, db: MaigretDatabase
def process_ids(parent_node, ids):
for k, v in ids.items():
if k.endswith('_count') or k.startswith('is_') or k.endswith('_at') or k in 'image':
if (
k.endswith('_count')
or k.startswith('is_')
or k.endswith('_at')
or k in 'image'
):
continue
# Normalize value if string
@@ -187,7 +196,9 @@ def save_graph_report(filename: str, username_results: list, db: MaigretDatabase
data_node_name = graph.add_node(vv, site_base_url)
graph.link(list_node_name, data_node_name)
add_ids = {a: b for b, a in db.extract_ids_from_url(vv).items()}
add_ids = {
a: b for b, a in db.extract_ids_from_url(vv).items()
}
if add_ids:
process_ids(data_node_name, add_ids)
ids_data_name = list_node_name
@@ -198,11 +209,17 @@ def save_graph_report(filename: str, username_results: list, db: MaigretDatabase
if 'username' in k or k in SUPPORTED_IDS:
new_username_key = f"username:{norm_v}"
if new_username_key not in processed_values:
new_username_node_name = graph.add_node('username', norm_v)
processed_values[new_username_key] = new_username_node_name
new_username_node_name = graph.add_node(
'username', norm_v
)
processed_values[new_username_key] = (
new_username_node_name
)
graph.link(ids_data_name, new_username_node_name)
add_ids = {k: v for v, k in db.extract_ids_from_url(v).items()}
add_ids = {
k: v for v, k in db.extract_ids_from_url(v).items()
}
if add_ids:
process_ids(ids_data_name, add_ids)
@@ -216,11 +233,14 @@ def save_graph_report(filename: str, username_results: list, db: MaigretDatabase
G.remove_nodes_from(nodes_to_remove)
# Remove site nodes with only one connection
single_degree_sites = [n for n, deg in G.degree() if n.startswith("site:") and deg <= 1]
single_degree_sites = [
n for n, deg in G.degree() if n.startswith("site:") and deg <= 1
]
G.remove_nodes_from(single_degree_sites)
# Generate interactive visualization
from pyvis.network import Network
nt = Network(notebook=True, height="750px", width="100%")
nt.from_nx(G)
nt.show(filename)
+28 -18
View File
@@ -26,13 +26,10 @@ background_jobs = {}
job_results = {}
# Configuration
MAIGRET_DB_FILE = os.path.join('maigret', 'resources', 'data.json')
COOKIES_FILE = "cookies.txt"
UPLOAD_FOLDER = 'uploads'
REPORTS_FOLDER = os.path.abspath('/tmp/maigret_reports')
os.makedirs(UPLOAD_FOLDER, exist_ok=True)
os.makedirs(REPORTS_FOLDER, exist_ok=True)
app.config["MAIGRET_DB_FILE"] = os.path.join('maigret', 'resources', 'data.json')
app.config["COOKIES_FILE"] = "cookies.txt"
app.config["UPLOAD_FOLDER"] = 'uploads'
app.config["REPORTS_FOLDER"] = os.path.abspath('/tmp/maigret_reports')
def setup_logger(log_level, name):
@@ -44,7 +41,7 @@ def setup_logger(log_level, name):
async def maigret_search(username, options):
logger = setup_logger(logging.WARNING, 'maigret')
try:
db = MaigretDatabase().load_from_path(MAIGRET_DB_FILE)
db = MaigretDatabase().load_from_path(app.config["MAIGRET_DB_FILE"])
top_sites = int(options.get('top_sites') or 500)
if options.get('all_sites'):
@@ -59,7 +56,7 @@ async def maigret_search(username, options):
tags=tags,
names=site_list,
disabled=False,
id_type='username'
id_type='username',
)
logger.info(f"Found {len(sites)} sites matching the tag criteria")
@@ -70,9 +67,11 @@ async def maigret_search(username, options):
timeout=int(options.get('timeout', 30)),
logger=logger,
id_type='username',
cookies=COOKIES_FILE if options.get('use_cookies') else None,
cookies=app.config["COOKIES_FILE"] if options.get('use_cookies') else None,
is_parsing_enabled=(not options.get('disable_extracting', False)),
recursive_search_enabled=(not options.get('disable_recursive_search', False)),
recursive_search_enabled=(
not options.get('disable_recursive_search', False)
),
check_domains=options.get('with_domains', False),
proxy=options.get('proxy', None),
tor_proxy=options.get('tor_proxy', None),
@@ -104,14 +103,17 @@ def process_search_task(usernames, options, timestamp):
search_multiple_usernames(usernames, options)
)
session_folder = os.path.join(REPORTS_FOLDER, f"search_{timestamp}")
os.makedirs(app.config["REPORTS_FOLDER"], exist_ok=True)
session_folder = os.path.join(
app.config["REPORTS_FOLDER"], f"search_{timestamp}"
)
os.makedirs(session_folder, exist_ok=True)
graph_path = os.path.join(session_folder, "combined_graph.html")
maigret.report.save_graph_report(
graph_path,
general_results,
MaigretDatabase().load_from_path(MAIGRET_DB_FILE),
MaigretDatabase().load_from_path(app.config["MAIGRET_DB_FILE"]),
)
individual_reports = []
@@ -189,7 +191,7 @@ def process_search_task(usernames, options, timestamp):
@app.route('/')
def index():
# load site data for autocomplete
db = MaigretDatabase().load_from_path(MAIGRET_DB_FILE)
db = MaigretDatabase().load_from_path(app.config["MAIGRET_DB_FILE"])
site_options = []
for site in db.sites:
@@ -237,10 +239,14 @@ def search():
'i2p_proxy': request.form.get('i2p_proxy', None) or None,
'permute': 'permute' in request.form,
'tags': selected_tags, # Pass selected tags as a list
'site_list': [s.strip() for s in request.form.get('site', '').split(',') if s.strip()],
'site_list': [
s.strip() for s in request.form.get('site', '').split(',') if s.strip()
],
}
logging.info(f"Starting search for usernames: {usernames} with tags: {selected_tags}")
logging.info(
f"Starting search for usernames: {usernames} with tags: {selected_tags}"
)
# Start background job
background_jobs[timestamp] = {
@@ -253,6 +259,7 @@ def search():
return redirect(url_for('status', timestamp=timestamp))
@app.route('/status/<timestamp>')
def status(timestamp):
logging.info(f"Status check for timestamp: {timestamp}")
@@ -313,8 +320,11 @@ def results(session_id):
@app.route('/reports/<path:filename>')
def download_report(filename):
try:
file_path = os.path.normpath(os.path.join(REPORTS_FOLDER, filename))
if not file_path.startswith(REPORTS_FOLDER):
os.makedirs(app.config["REPORTS_FOLDER"], exist_ok=True)
file_path = os.path.normpath(
os.path.join(app.config["REPORTS_FOLDER"], filename)
)
if not file_path.startswith(app.config["REPORTS_FOLDER"]):
raise Exception("Invalid file path")
return send_file(file_path)
except Exception as e: