mirror of
https://github.com/soxoj/maigret.git
synced 2026-05-07 06:24:35 +00:00
Pass db_file configuration to web interface (#2019)
* pass db_file configuration to web interface * Autoformatting --------- Co-authored-by: Soxoj <soxoj@protonmail.com>
This commit is contained in:
+12
-9
@@ -493,15 +493,6 @@ async def main():
|
|||||||
log_level = logging.WARNING
|
log_level = logging.WARNING
|
||||||
logger.setLevel(log_level)
|
logger.setLevel(log_level)
|
||||||
|
|
||||||
if args.web is not None:
|
|
||||||
from maigret.web.app import app
|
|
||||||
|
|
||||||
port = (
|
|
||||||
args.web if args.web else 5000
|
|
||||||
) # args.web is either the specified port or 5000 by default
|
|
||||||
app.run(port=port)
|
|
||||||
return
|
|
||||||
|
|
||||||
# Usernames initial list
|
# Usernames initial list
|
||||||
usernames = {
|
usernames = {
|
||||||
u: args.id_type
|
u: args.id_type
|
||||||
@@ -609,6 +600,18 @@ async def main():
|
|||||||
# Define one report filename template
|
# Define one report filename template
|
||||||
report_filepath_tpl = path.join(report_dir, 'report_{username}{postfix}')
|
report_filepath_tpl = path.join(report_dir, 'report_{username}{postfix}')
|
||||||
|
|
||||||
|
# Web interface
|
||||||
|
if args.web is not None:
|
||||||
|
from maigret.web.app import app
|
||||||
|
|
||||||
|
app.config["MAIGRET_DB_FILE"] = db_file
|
||||||
|
|
||||||
|
port = (
|
||||||
|
args.web if args.web else 5000
|
||||||
|
) # args.web is either the specified port or 5000 by default
|
||||||
|
app.run(port=port)
|
||||||
|
return
|
||||||
|
|
||||||
if usernames == {}:
|
if usernames == {}:
|
||||||
# magic params to exit after init
|
# magic params to exit after init
|
||||||
query_notify.warning('No usernames to check, exiting.')
|
query_notify.warning('No usernames to check, exiting.')
|
||||||
|
|||||||
+31
-11
@@ -106,7 +106,7 @@ class MaigretGraph:
|
|||||||
params = dict(self.username_params)
|
params = dict(self.username_params)
|
||||||
elif value.startswith('http'):
|
elif value.startswith('http'):
|
||||||
params = dict(self.site_params)
|
params = dict(self.site_params)
|
||||||
|
|
||||||
params['title'] = node_name
|
params['title'] = node_name
|
||||||
if color:
|
if color:
|
||||||
params['color'] = color
|
params['color'] = color
|
||||||
@@ -141,10 +141,12 @@ def save_graph_report(filename: str, username_results: list, db: MaigretDatabase
|
|||||||
if not status or status.status != MaigretCheckStatus.CLAIMED:
|
if not status or status.status != MaigretCheckStatus.CLAIMED:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# base site node
|
# base site node
|
||||||
site_base_url = website_name
|
site_base_url = website_name
|
||||||
if site_base_url not in base_site_nodes:
|
if site_base_url not in base_site_nodes:
|
||||||
base_site_nodes[site_base_url] = graph.add_node('site', site_base_url, color='#28a745') # Green color
|
base_site_nodes[site_base_url] = graph.add_node(
|
||||||
|
'site', site_base_url, color='#28a745'
|
||||||
|
) # Green color
|
||||||
|
|
||||||
site_base_node_name = base_site_nodes[site_base_url]
|
site_base_node_name = base_site_nodes[site_base_url]
|
||||||
|
|
||||||
@@ -152,7 +154,9 @@ def save_graph_report(filename: str, username_results: list, db: MaigretDatabase
|
|||||||
account_url = dictionary.get('url_user', f'{site_base_url}/{norm_username}')
|
account_url = dictionary.get('url_user', f'{site_base_url}/{norm_username}')
|
||||||
account_node_id = f"{site_base_url}: {account_url}"
|
account_node_id = f"{site_base_url}: {account_url}"
|
||||||
if account_node_id not in site_account_nodes:
|
if account_node_id not in site_account_nodes:
|
||||||
site_account_nodes[account_node_id] = graph.add_node('account', account_url)
|
site_account_nodes[account_node_id] = graph.add_node(
|
||||||
|
'account', account_url
|
||||||
|
)
|
||||||
|
|
||||||
account_node_name = site_account_nodes[account_node_id]
|
account_node_name = site_account_nodes[account_node_id]
|
||||||
|
|
||||||
@@ -162,13 +166,18 @@ def save_graph_report(filename: str, username_results: list, db: MaigretDatabase
|
|||||||
|
|
||||||
def process_ids(parent_node, ids):
|
def process_ids(parent_node, ids):
|
||||||
for k, v in ids.items():
|
for k, v in ids.items():
|
||||||
if k.endswith('_count') or k.startswith('is_') or k.endswith('_at') or k in 'image':
|
if (
|
||||||
|
k.endswith('_count')
|
||||||
|
or k.startswith('is_')
|
||||||
|
or k.endswith('_at')
|
||||||
|
or k in 'image'
|
||||||
|
):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Normalize value if string
|
# Normalize value if string
|
||||||
norm_v = v.lower() if isinstance(v, str) else v
|
norm_v = v.lower() if isinstance(v, str) else v
|
||||||
value_key = f"{k}:{norm_v}"
|
value_key = f"{k}:{norm_v}"
|
||||||
|
|
||||||
if value_key in processed_values:
|
if value_key in processed_values:
|
||||||
ids_data_name = processed_values[value_key]
|
ids_data_name = processed_values[value_key]
|
||||||
else:
|
else:
|
||||||
@@ -187,7 +196,9 @@ def save_graph_report(filename: str, username_results: list, db: MaigretDatabase
|
|||||||
data_node_name = graph.add_node(vv, site_base_url)
|
data_node_name = graph.add_node(vv, site_base_url)
|
||||||
graph.link(list_node_name, data_node_name)
|
graph.link(list_node_name, data_node_name)
|
||||||
|
|
||||||
add_ids = {a: b for b, a in db.extract_ids_from_url(vv).items()}
|
add_ids = {
|
||||||
|
a: b for b, a in db.extract_ids_from_url(vv).items()
|
||||||
|
}
|
||||||
if add_ids:
|
if add_ids:
|
||||||
process_ids(data_node_name, add_ids)
|
process_ids(data_node_name, add_ids)
|
||||||
ids_data_name = list_node_name
|
ids_data_name = list_node_name
|
||||||
@@ -198,11 +209,17 @@ def save_graph_report(filename: str, username_results: list, db: MaigretDatabase
|
|||||||
if 'username' in k or k in SUPPORTED_IDS:
|
if 'username' in k or k in SUPPORTED_IDS:
|
||||||
new_username_key = f"username:{norm_v}"
|
new_username_key = f"username:{norm_v}"
|
||||||
if new_username_key not in processed_values:
|
if new_username_key not in processed_values:
|
||||||
new_username_node_name = graph.add_node('username', norm_v)
|
new_username_node_name = graph.add_node(
|
||||||
processed_values[new_username_key] = new_username_node_name
|
'username', norm_v
|
||||||
|
)
|
||||||
|
processed_values[new_username_key] = (
|
||||||
|
new_username_node_name
|
||||||
|
)
|
||||||
graph.link(ids_data_name, new_username_node_name)
|
graph.link(ids_data_name, new_username_node_name)
|
||||||
|
|
||||||
add_ids = {k: v for v, k in db.extract_ids_from_url(v).items()}
|
add_ids = {
|
||||||
|
k: v for v, k in db.extract_ids_from_url(v).items()
|
||||||
|
}
|
||||||
if add_ids:
|
if add_ids:
|
||||||
process_ids(ids_data_name, add_ids)
|
process_ids(ids_data_name, add_ids)
|
||||||
|
|
||||||
@@ -216,11 +233,14 @@ def save_graph_report(filename: str, username_results: list, db: MaigretDatabase
|
|||||||
G.remove_nodes_from(nodes_to_remove)
|
G.remove_nodes_from(nodes_to_remove)
|
||||||
|
|
||||||
# Remove site nodes with only one connection
|
# Remove site nodes with only one connection
|
||||||
single_degree_sites = [n for n, deg in G.degree() if n.startswith("site:") and deg <= 1]
|
single_degree_sites = [
|
||||||
|
n for n, deg in G.degree() if n.startswith("site:") and deg <= 1
|
||||||
|
]
|
||||||
G.remove_nodes_from(single_degree_sites)
|
G.remove_nodes_from(single_degree_sites)
|
||||||
|
|
||||||
# Generate interactive visualization
|
# Generate interactive visualization
|
||||||
from pyvis.network import Network
|
from pyvis.network import Network
|
||||||
|
|
||||||
nt = Network(notebook=True, height="750px", width="100%")
|
nt = Network(notebook=True, height="750px", width="100%")
|
||||||
nt.from_nx(G)
|
nt.from_nx(G)
|
||||||
nt.show(filename)
|
nt.show(filename)
|
||||||
|
|||||||
+43
-33
@@ -21,18 +21,15 @@ from maigret.report import generate_report_context
|
|||||||
app = Flask(__name__)
|
app = Flask(__name__)
|
||||||
app.secret_key = 'your-secret-key-here'
|
app.secret_key = 'your-secret-key-here'
|
||||||
|
|
||||||
#add background job tracking
|
# add background job tracking
|
||||||
background_jobs = {}
|
background_jobs = {}
|
||||||
job_results = {}
|
job_results = {}
|
||||||
|
|
||||||
# Configuration
|
# Configuration
|
||||||
MAIGRET_DB_FILE = os.path.join('maigret', 'resources', 'data.json')
|
app.config["MAIGRET_DB_FILE"] = os.path.join('maigret', 'resources', 'data.json')
|
||||||
COOKIES_FILE = "cookies.txt"
|
app.config["COOKIES_FILE"] = "cookies.txt"
|
||||||
UPLOAD_FOLDER = 'uploads'
|
app.config["UPLOAD_FOLDER"] = 'uploads'
|
||||||
REPORTS_FOLDER = os.path.abspath('/tmp/maigret_reports')
|
app.config["REPORTS_FOLDER"] = os.path.abspath('/tmp/maigret_reports')
|
||||||
|
|
||||||
os.makedirs(UPLOAD_FOLDER, exist_ok=True)
|
|
||||||
os.makedirs(REPORTS_FOLDER, exist_ok=True)
|
|
||||||
|
|
||||||
|
|
||||||
def setup_logger(log_level, name):
|
def setup_logger(log_level, name):
|
||||||
@@ -44,24 +41,24 @@ def setup_logger(log_level, name):
|
|||||||
async def maigret_search(username, options):
|
async def maigret_search(username, options):
|
||||||
logger = setup_logger(logging.WARNING, 'maigret')
|
logger = setup_logger(logging.WARNING, 'maigret')
|
||||||
try:
|
try:
|
||||||
db = MaigretDatabase().load_from_path(MAIGRET_DB_FILE)
|
db = MaigretDatabase().load_from_path(app.config["MAIGRET_DB_FILE"])
|
||||||
|
|
||||||
top_sites = int(options.get('top_sites') or 500)
|
top_sites = int(options.get('top_sites') or 500)
|
||||||
if options.get('all_sites'):
|
if options.get('all_sites'):
|
||||||
top_sites = 999999999 # effectively all
|
top_sites = 999999999 # effectively all
|
||||||
|
|
||||||
tags = options.get('tags', [])
|
tags = options.get('tags', [])
|
||||||
site_list= options.get('site_list', [])
|
site_list = options.get('site_list', [])
|
||||||
logger.info(f"Filtering sites by tags: {tags}")
|
logger.info(f"Filtering sites by tags: {tags}")
|
||||||
|
|
||||||
sites = db.ranked_sites_dict(
|
sites = db.ranked_sites_dict(
|
||||||
top=top_sites,
|
top=top_sites,
|
||||||
tags=tags,
|
tags=tags,
|
||||||
names=site_list,
|
names=site_list,
|
||||||
disabled=False,
|
disabled=False,
|
||||||
id_type='username'
|
id_type='username',
|
||||||
)
|
)
|
||||||
|
|
||||||
logger.info(f"Found {len(sites)} sites matching the tag criteria")
|
logger.info(f"Found {len(sites)} sites matching the tag criteria")
|
||||||
|
|
||||||
results = await maigret.search(
|
results = await maigret.search(
|
||||||
@@ -70,9 +67,11 @@ async def maigret_search(username, options):
|
|||||||
timeout=int(options.get('timeout', 30)),
|
timeout=int(options.get('timeout', 30)),
|
||||||
logger=logger,
|
logger=logger,
|
||||||
id_type='username',
|
id_type='username',
|
||||||
cookies=COOKIES_FILE if options.get('use_cookies') else None,
|
cookies=app.config["COOKIES_FILE"] if options.get('use_cookies') else None,
|
||||||
is_parsing_enabled=(not options.get('disable_extracting', False)),
|
is_parsing_enabled=(not options.get('disable_extracting', False)),
|
||||||
recursive_search_enabled=(not options.get('disable_recursive_search', False)),
|
recursive_search_enabled=(
|
||||||
|
not options.get('disable_recursive_search', False)
|
||||||
|
),
|
||||||
check_domains=options.get('with_domains', False),
|
check_domains=options.get('with_domains', False),
|
||||||
proxy=options.get('proxy', None),
|
proxy=options.get('proxy', None),
|
||||||
tor_proxy=options.get('tor_proxy', None),
|
tor_proxy=options.get('tor_proxy', None),
|
||||||
@@ -104,14 +103,17 @@ def process_search_task(usernames, options, timestamp):
|
|||||||
search_multiple_usernames(usernames, options)
|
search_multiple_usernames(usernames, options)
|
||||||
)
|
)
|
||||||
|
|
||||||
session_folder = os.path.join(REPORTS_FOLDER, f"search_{timestamp}")
|
os.makedirs(app.config["REPORTS_FOLDER"], exist_ok=True)
|
||||||
|
session_folder = os.path.join(
|
||||||
|
app.config["REPORTS_FOLDER"], f"search_{timestamp}"
|
||||||
|
)
|
||||||
os.makedirs(session_folder, exist_ok=True)
|
os.makedirs(session_folder, exist_ok=True)
|
||||||
|
|
||||||
graph_path = os.path.join(session_folder, "combined_graph.html")
|
graph_path = os.path.join(session_folder, "combined_graph.html")
|
||||||
maigret.report.save_graph_report(
|
maigret.report.save_graph_report(
|
||||||
graph_path,
|
graph_path,
|
||||||
general_results,
|
general_results,
|
||||||
MaigretDatabase().load_from_path(MAIGRET_DB_FILE),
|
MaigretDatabase().load_from_path(app.config["MAIGRET_DB_FILE"]),
|
||||||
)
|
)
|
||||||
|
|
||||||
individual_reports = []
|
individual_reports = []
|
||||||
@@ -188,20 +190,20 @@ def process_search_task(usernames, options, timestamp):
|
|||||||
|
|
||||||
@app.route('/')
|
@app.route('/')
|
||||||
def index():
|
def index():
|
||||||
#load site data for autocomplete
|
# load site data for autocomplete
|
||||||
db = MaigretDatabase().load_from_path(MAIGRET_DB_FILE)
|
db = MaigretDatabase().load_from_path(app.config["MAIGRET_DB_FILE"])
|
||||||
site_options = []
|
site_options = []
|
||||||
|
|
||||||
for site in db.sites:
|
for site in db.sites:
|
||||||
#add main site name
|
# add main site name
|
||||||
site_options.append(site.name)
|
site_options.append(site.name)
|
||||||
#add URL if different from name
|
# add URL if different from name
|
||||||
if site.url_main and site.url_main not in site_options:
|
if site.url_main and site.url_main not in site_options:
|
||||||
site_options.append(site.url_main)
|
site_options.append(site.url_main)
|
||||||
|
|
||||||
#sort and deduplicate
|
# sort and deduplicate
|
||||||
site_options = sorted(set(site_options))
|
site_options = sorted(set(site_options))
|
||||||
|
|
||||||
return render_template('index.html', site_options=site_options)
|
return render_template('index.html', site_options=site_options)
|
||||||
|
|
||||||
|
|
||||||
@@ -237,10 +239,14 @@ def search():
|
|||||||
'i2p_proxy': request.form.get('i2p_proxy', None) or None,
|
'i2p_proxy': request.form.get('i2p_proxy', None) or None,
|
||||||
'permute': 'permute' in request.form,
|
'permute': 'permute' in request.form,
|
||||||
'tags': selected_tags, # Pass selected tags as a list
|
'tags': selected_tags, # Pass selected tags as a list
|
||||||
'site_list': [s.strip() for s in request.form.get('site', '').split(',') if s.strip()],
|
'site_list': [
|
||||||
|
s.strip() for s in request.form.get('site', '').split(',') if s.strip()
|
||||||
|
],
|
||||||
}
|
}
|
||||||
|
|
||||||
logging.info(f"Starting search for usernames: {usernames} with tags: {selected_tags}")
|
logging.info(
|
||||||
|
f"Starting search for usernames: {usernames} with tags: {selected_tags}"
|
||||||
|
)
|
||||||
|
|
||||||
# Start background job
|
# Start background job
|
||||||
background_jobs[timestamp] = {
|
background_jobs[timestamp] = {
|
||||||
@@ -253,6 +259,7 @@ def search():
|
|||||||
|
|
||||||
return redirect(url_for('status', timestamp=timestamp))
|
return redirect(url_for('status', timestamp=timestamp))
|
||||||
|
|
||||||
|
|
||||||
@app.route('/status/<timestamp>')
|
@app.route('/status/<timestamp>')
|
||||||
def status(timestamp):
|
def status(timestamp):
|
||||||
logging.info(f"Status check for timestamp: {timestamp}")
|
logging.info(f"Status check for timestamp: {timestamp}")
|
||||||
@@ -313,8 +320,11 @@ def results(session_id):
|
|||||||
@app.route('/reports/<path:filename>')
|
@app.route('/reports/<path:filename>')
|
||||||
def download_report(filename):
|
def download_report(filename):
|
||||||
try:
|
try:
|
||||||
file_path = os.path.normpath(os.path.join(REPORTS_FOLDER, filename))
|
os.makedirs(app.config["REPORTS_FOLDER"], exist_ok=True)
|
||||||
if not file_path.startswith(REPORTS_FOLDER):
|
file_path = os.path.normpath(
|
||||||
|
os.path.join(app.config["REPORTS_FOLDER"], filename)
|
||||||
|
)
|
||||||
|
if not file_path.startswith(app.config["REPORTS_FOLDER"]):
|
||||||
raise Exception("Invalid file path")
|
raise Exception("Invalid file path")
|
||||||
return send_file(file_path)
|
return send_file(file_path)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|||||||
Reference in New Issue
Block a user