mirror of
https://github.com/soxoj/maigret.git
synced 2026-05-06 22:19:01 +00:00
Added reports of JSON format (simple, njdson); improved submit logic; added several sites
This commit is contained in:
+24
-11
@@ -13,7 +13,8 @@ from socid_extractor import parse, __version__ as socid_version
|
|||||||
from .checking import *
|
from .checking import *
|
||||||
from .notify import QueryNotifyPrint
|
from .notify import QueryNotifyPrint
|
||||||
from .report import save_csv_report, save_xmind_report, save_html_report, save_pdf_report, \
|
from .report import save_csv_report, save_xmind_report, save_html_report, save_pdf_report, \
|
||||||
generate_report_context, save_txt_report
|
generate_report_context, save_txt_report, SUPPORTED_JSON_REPORT_FORMATS, check_supported_json_format, \
|
||||||
|
save_json_report
|
||||||
from .submit import submit_dialog
|
from .submit import submit_dialog
|
||||||
|
|
||||||
__version__ = '0.1.13'
|
__version__ = '0.1.13'
|
||||||
@@ -56,9 +57,9 @@ async def main():
|
|||||||
action="store", dest="proxy", default=None,
|
action="store", dest="proxy", default=None,
|
||||||
help="Make requests over a proxy. e.g. socks5://127.0.0.1:1080"
|
help="Make requests over a proxy. e.g. socks5://127.0.0.1:1080"
|
||||||
)
|
)
|
||||||
parser.add_argument("--json", "-j", metavar="JSON_FILE",
|
parser.add_argument("--db", metavar="DB_FILE",
|
||||||
dest="json_file", default=None,
|
dest="db_file", default=None,
|
||||||
help="Load data from a JSON file or an online, valid, JSON file.")
|
help="Load Maigret database from a JSON file or an online, valid, JSON file.")
|
||||||
parser.add_argument("--cookies-jar-file", metavar="COOKIE_FILE",
|
parser.add_argument("--cookies-jar-file", metavar="COOKIE_FILE",
|
||||||
dest="cookie_file", default=None,
|
dest="cookie_file", default=None,
|
||||||
help="File with cookies.")
|
help="File with cookies.")
|
||||||
@@ -91,7 +92,7 @@ async def main():
|
|||||||
action="store_true", dest="print_check_errors", default=False,
|
action="store_true", dest="print_check_errors", default=False,
|
||||||
help="Print errors messages: connection, captcha, site country ban, etc."
|
help="Print errors messages: connection, captcha, site country ban, etc."
|
||||||
)
|
)
|
||||||
parser.add_argument("--submit",
|
parser.add_argument("--submit", metavar='EXISTING_USER_URL',
|
||||||
type=str, dest="new_site_to_submit", default=False,
|
type=str, dest="new_site_to_submit", default=False,
|
||||||
help="URL of existing profile in new site to submit."
|
help="URL of existing profile in new site to submit."
|
||||||
)
|
)
|
||||||
@@ -158,6 +159,12 @@ async def main():
|
|||||||
dest="pdf", default=False,
|
dest="pdf", default=False,
|
||||||
help="Generate a PDF report (general report on all usernames)."
|
help="Generate a PDF report (general report on all usernames)."
|
||||||
)
|
)
|
||||||
|
parser.add_argument("-J", "--json",
|
||||||
|
action="store", metavar='REPORT_TYPE',
|
||||||
|
dest="json", default='', type=check_supported_json_format,
|
||||||
|
help=f"Generate a JSON report of specific type: {', '.join(SUPPORTED_JSON_REPORT_FORMATS)}"
|
||||||
|
" (one report per username)."
|
||||||
|
)
|
||||||
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
@@ -206,8 +213,8 @@ async def main():
|
|||||||
if args.tags:
|
if args.tags:
|
||||||
args.tags = list(set(str(args.tags).split(',')))
|
args.tags = list(set(str(args.tags).split(',')))
|
||||||
|
|
||||||
if args.json_file is None:
|
if args.db_file is None:
|
||||||
args.json_file = \
|
args.db_file = \
|
||||||
os.path.join(os.path.dirname(os.path.realpath(__file__)),
|
os.path.join(os.path.dirname(os.path.realpath(__file__)),
|
||||||
"resources/data.json"
|
"resources/data.json"
|
||||||
)
|
)
|
||||||
@@ -223,7 +230,7 @@ async def main():
|
|||||||
color=not args.no_color)
|
color=not args.no_color)
|
||||||
|
|
||||||
# Create object with all information about sites we are aware of.
|
# Create object with all information about sites we are aware of.
|
||||||
db = MaigretDatabase().load_from_file(args.json_file)
|
db = MaigretDatabase().load_from_file(args.db_file)
|
||||||
get_top_sites_for_id = lambda x: db.ranked_sites_dict(top=args.top_sites, tags=args.tags,
|
get_top_sites_for_id = lambda x: db.ranked_sites_dict(top=args.top_sites, tags=args.tags,
|
||||||
names=args.site_list,
|
names=args.site_list,
|
||||||
disabled=False, id_type=x)
|
disabled=False, id_type=x)
|
||||||
@@ -233,7 +240,7 @@ async def main():
|
|||||||
if args.new_site_to_submit:
|
if args.new_site_to_submit:
|
||||||
is_submitted = await submit_dialog(db, args.new_site_to_submit)
|
is_submitted = await submit_dialog(db, args.new_site_to_submit)
|
||||||
if is_submitted:
|
if is_submitted:
|
||||||
db.save_to_file(args.json_file)
|
db.save_to_file(args.db_file)
|
||||||
|
|
||||||
# Database self-checking
|
# Database self-checking
|
||||||
if args.self_check:
|
if args.self_check:
|
||||||
@@ -241,7 +248,7 @@ async def main():
|
|||||||
is_need_update = await self_check(db, site_data, logger, max_connections=args.connections)
|
is_need_update = await self_check(db, site_data, logger, max_connections=args.connections)
|
||||||
if is_need_update:
|
if is_need_update:
|
||||||
if input('Do you want to save changes permanently? [yYnN]\n').lower() == 'y':
|
if input('Do you want to save changes permanently? [yYnN]\n').lower() == 'y':
|
||||||
db.save_to_file(args.json_file)
|
db.save_to_file(args.db_file)
|
||||||
print('Database was successfully updated.')
|
print('Database was successfully updated.')
|
||||||
else:
|
else:
|
||||||
print('Updates will be applied only for current search session.')
|
print('Updates will be applied only for current search session.')
|
||||||
@@ -339,6 +346,12 @@ async def main():
|
|||||||
save_txt_report(filename, username, results)
|
save_txt_report(filename, username, results)
|
||||||
query_notify.warning(f'TXT report for {username} saved in {filename}')
|
query_notify.warning(f'TXT report for {username} saved in {filename}')
|
||||||
|
|
||||||
|
if args.json:
|
||||||
|
filename = report_filepath_tpl.format(username=username, postfix=f'_{args.json}.json')
|
||||||
|
save_json_report(filename, username, results, report_type=args.json)
|
||||||
|
query_notify.warning(f'JSON {args.json} report for {username} saved in {filename}')
|
||||||
|
|
||||||
|
|
||||||
# reporting for all the result
|
# reporting for all the result
|
||||||
if general_results:
|
if general_results:
|
||||||
if args.html or args.pdf:
|
if args.html or args.pdf:
|
||||||
@@ -357,7 +370,7 @@ async def main():
|
|||||||
save_pdf_report(filename, report_context)
|
save_pdf_report(filename, report_context)
|
||||||
query_notify.warning(f'PDF report on all usernames saved in {filename}')
|
query_notify.warning(f'PDF report on all usernames saved in {filename}')
|
||||||
# update database
|
# update database
|
||||||
db.save_to_file(args.json_file)
|
db.save_to_file(args.db_file)
|
||||||
|
|
||||||
|
|
||||||
def run():
|
def run():
|
||||||
|
|||||||
@@ -1,4 +1,5 @@
|
|||||||
import csv
|
import csv
|
||||||
|
import json
|
||||||
import io
|
import io
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
@@ -7,11 +8,17 @@ import xmind
|
|||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from jinja2 import Template
|
from jinja2 import Template
|
||||||
from xhtml2pdf import pisa
|
from xhtml2pdf import pisa
|
||||||
|
from argparse import ArgumentTypeError
|
||||||
from dateutil.parser import parse as parse_datetime_str
|
from dateutil.parser import parse as parse_datetime_str
|
||||||
|
|
||||||
from .result import QueryStatus
|
from .result import QueryStatus
|
||||||
from .utils import is_country_tag, CaseConverter, enrich_link_str
|
from .utils import is_country_tag, CaseConverter, enrich_link_str
|
||||||
|
|
||||||
|
SUPPORTED_JSON_REPORT_FORMATS = [
|
||||||
|
'simple',
|
||||||
|
'ndjson',
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
'''
|
'''
|
||||||
UTILS
|
UTILS
|
||||||
@@ -51,6 +58,10 @@ def save_pdf_report(filename: str, context: dict):
|
|||||||
with open(filename, 'w+b') as f:
|
with open(filename, 'w+b') as f:
|
||||||
pisa.pisaDocument(io.StringIO(filled_template), dest=f, default_css=css)
|
pisa.pisaDocument(io.StringIO(filled_template), dest=f, default_css=css)
|
||||||
|
|
||||||
|
def save_json_report(filename: str, username: str, results: dict, report_type: str):
|
||||||
|
with open(filename, 'w', encoding='utf-8') as f:
|
||||||
|
generate_json_report(username, results, f, report_type=report_type)
|
||||||
|
|
||||||
|
|
||||||
'''
|
'''
|
||||||
REPORTS GENERATING
|
REPORTS GENERATING
|
||||||
@@ -225,6 +236,30 @@ def generate_txt_report(username: str, results: dict, file):
|
|||||||
file.write(dictionary["url_user"] + "\n")
|
file.write(dictionary["url_user"] + "\n")
|
||||||
file.write(f'Total Websites Username Detected On : {exists_counter}')
|
file.write(f'Total Websites Username Detected On : {exists_counter}')
|
||||||
|
|
||||||
|
|
||||||
|
def generate_json_report(username: str, results: dict, file, report_type):
|
||||||
|
exists_counter = 0
|
||||||
|
is_report_per_line = report_type.startswith('ndjson')
|
||||||
|
all_json = {}
|
||||||
|
|
||||||
|
for sitename in results:
|
||||||
|
site_result = results[sitename]
|
||||||
|
# TODO: fix no site data issue
|
||||||
|
if not site_result or site_result.get("status").status != QueryStatus.CLAIMED:
|
||||||
|
continue
|
||||||
|
|
||||||
|
data = dict(site_result)
|
||||||
|
data['status'] = data['status'].json()
|
||||||
|
|
||||||
|
if is_report_per_line:
|
||||||
|
data['sitename'] = sitename
|
||||||
|
file.write(json.dumps(data)+'\n')
|
||||||
|
else:
|
||||||
|
all_json[sitename] = data
|
||||||
|
|
||||||
|
if not is_report_per_line:
|
||||||
|
file.write(json.dumps(all_json))
|
||||||
|
|
||||||
'''
|
'''
|
||||||
XMIND 8 Functions
|
XMIND 8 Functions
|
||||||
'''
|
'''
|
||||||
@@ -306,3 +341,9 @@ def design_sheet(sheet, username, results):
|
|||||||
currentsublabel.setTitle("%s: %s" % (k, v))
|
currentsublabel.setTitle("%s: %s" % (k, v))
|
||||||
|
|
||||||
|
|
||||||
|
def check_supported_json_format(value):
|
||||||
|
if value and not value in SUPPORTED_JSON_REPORT_FORMATS:
|
||||||
|
raise ArgumentTypeError(f'JSON report type must be one of the following types: '
|
||||||
|
+ ', '.join(SUPPORTED_JSON_REPORT_FORMATS))
|
||||||
|
return value
|
||||||
|
|
||||||
|
|||||||
@@ -23096,6 +23096,62 @@
|
|||||||
"urlMain": "https://www.are.na",
|
"urlMain": "https://www.are.na",
|
||||||
"usernameClaimed": "nate-cassel",
|
"usernameClaimed": "nate-cassel",
|
||||||
"usernameUnclaimed": "noonewouldeverusethis7"
|
"usernameUnclaimed": "noonewouldeverusethis7"
|
||||||
|
},
|
||||||
|
"mywishboard.com": {
|
||||||
|
"checkType": "message",
|
||||||
|
"presenseStrs": [
|
||||||
|
"profile-header",
|
||||||
|
" profile-header__col"
|
||||||
|
],
|
||||||
|
"absenceStrs": [
|
||||||
|
"This page could not be found"
|
||||||
|
],
|
||||||
|
"url": "https://mywishboard.com/@{username}",
|
||||||
|
"urlMain": "https://mywishboard.com",
|
||||||
|
"usernameClaimed": "alex",
|
||||||
|
"usernameUnclaimed": "noonewouldeverusethis7"
|
||||||
|
},
|
||||||
|
"crafta.ua": {
|
||||||
|
"checkType": "message",
|
||||||
|
"presenseStrs": [
|
||||||
|
"cft-profile-about"
|
||||||
|
],
|
||||||
|
"absenceStrs": [
|
||||||
|
"Page not found"
|
||||||
|
],
|
||||||
|
"url": "https://{username}.crafta.ua/",
|
||||||
|
"urlMain": "https://crafta.ua",
|
||||||
|
"usernameClaimed": "test",
|
||||||
|
"usernameUnclaimed": "noonewouldeverusethis7"
|
||||||
|
},
|
||||||
|
"m.smutty.com": {
|
||||||
|
"tags": [
|
||||||
|
"erotic"
|
||||||
|
],
|
||||||
|
"checkType": "message",
|
||||||
|
"presenseStrs": [
|
||||||
|
"profile_stats_n"
|
||||||
|
],
|
||||||
|
"absenceStrs": [
|
||||||
|
"Not Found</span>"
|
||||||
|
],
|
||||||
|
"url": "https://m.smutty.com/user/{username}/",
|
||||||
|
"urlMain": "https://m.smutty.com",
|
||||||
|
"usernameClaimed": "alex",
|
||||||
|
"usernameUnclaimed": "noonewouldeverusethis7"
|
||||||
|
},
|
||||||
|
"www.marykay.ru": {
|
||||||
|
"checkType": "message",
|
||||||
|
"presenseStrs": [
|
||||||
|
"email"
|
||||||
|
],
|
||||||
|
"absenceStrs": [
|
||||||
|
"errorPage"
|
||||||
|
],
|
||||||
|
"url": "https://www.marykay.ru/{username}",
|
||||||
|
"urlMain": "https://www.marykay.ru",
|
||||||
|
"usernameClaimed": "anna",
|
||||||
|
"usernameUnclaimed": "noonewouldeverusethis7"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"engines": {
|
"engines": {
|
||||||
|
|||||||
+10
-1
@@ -1,4 +1,4 @@
|
|||||||
"""Sherlock Result Module
|
"""Maigret Result Module
|
||||||
|
|
||||||
This module defines various objects for recording the results of queries.
|
This module defines various objects for recording the results of queries.
|
||||||
"""
|
"""
|
||||||
@@ -74,6 +74,15 @@ class QueryResult():
|
|||||||
self.ids_data = ids_data
|
self.ids_data = ids_data
|
||||||
self.tags = tags
|
self.tags = tags
|
||||||
|
|
||||||
|
def json(self):
|
||||||
|
return {
|
||||||
|
'username': self.username,
|
||||||
|
'site_name': self.site_name,
|
||||||
|
'url': self.site_url_user,
|
||||||
|
'status': str(self.status),
|
||||||
|
'ids': self.ids_data or {},
|
||||||
|
'tags': self.tags,
|
||||||
|
}
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
"""Convert Object To String.
|
"""Convert Object To String.
|
||||||
|
|||||||
+14
-3
@@ -1,4 +1,5 @@
|
|||||||
import difflib
|
import difflib
|
||||||
|
import json
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
from mock import Mock
|
from mock import Mock
|
||||||
@@ -10,6 +11,7 @@ DESIRED_STRINGS = ["username", "not found", "пользователь", "profile
|
|||||||
|
|
||||||
RATIO = 0.6
|
RATIO = 0.6
|
||||||
TOP_FEATURES = 5
|
TOP_FEATURES = 5
|
||||||
|
URL_RE = re.compile(r'https?://(www\.)?')
|
||||||
|
|
||||||
|
|
||||||
def get_match_ratio(x):
|
def get_match_ratio(x):
|
||||||
@@ -84,6 +86,17 @@ async def site_self_check(site, logger, semaphore, db: MaigretDatabase, silent=F
|
|||||||
|
|
||||||
|
|
||||||
async def submit_dialog(db, url_exists):
|
async def submit_dialog(db, url_exists):
|
||||||
|
domain_raw = URL_RE.sub('', url_exists).strip().strip('/')
|
||||||
|
domain_raw = domain_raw.split('/')[0]
|
||||||
|
|
||||||
|
matched_sites = list(filter(lambda x: domain_raw in x.url_main+x.url, db.sites))
|
||||||
|
if matched_sites:
|
||||||
|
print(f'Sites with domain "{domain_raw}" already exists in the Maigret database!')
|
||||||
|
status = lambda s: '(disabled)' if s.disabled else ''
|
||||||
|
url_block = lambda s: f'\n\t{s.url_main}\n\t{s.url}'
|
||||||
|
print('\n'.join([f'{site.name} {status(site)}{url_block(site)}' for site in matched_sites]))
|
||||||
|
return False
|
||||||
|
|
||||||
url_parts = url_exists.split('/')
|
url_parts = url_exists.split('/')
|
||||||
supposed_username = url_parts[-1]
|
supposed_username = url_parts[-1]
|
||||||
new_name = input(f'Is "{supposed_username}" a valid username? If not, write it manually: ')
|
new_name = input(f'Is "{supposed_username}" a valid username? If not, write it manually: ')
|
||||||
@@ -103,9 +116,7 @@ async def submit_dialog(db, url_exists):
|
|||||||
a_minus_b = tokens_a.difference(tokens_b)
|
a_minus_b = tokens_a.difference(tokens_b)
|
||||||
b_minus_a = tokens_b.difference(tokens_a)
|
b_minus_a = tokens_b.difference(tokens_a)
|
||||||
|
|
||||||
top_features_count = int(input(f'Specify count of features to extract [default {TOP_FEATURES}]: '))
|
top_features_count = int(input(f'Specify count of features to extract [default {TOP_FEATURES}]: ') or TOP_FEATURES)
|
||||||
if not top_features_count:
|
|
||||||
top_features_count = TOP_FEATURES
|
|
||||||
|
|
||||||
presence_list = sorted(a_minus_b, key=get_match_ratio, reverse=True)[:top_features_count]
|
presence_list = sorted(a_minus_b, key=get_match_ratio, reverse=True)[:top_features_count]
|
||||||
|
|
||||||
|
|||||||
+28
-1
@@ -1,5 +1,6 @@
|
|||||||
"""Maigret reports test functions"""
|
"""Maigret reports test functions"""
|
||||||
import copy
|
import copy
|
||||||
|
import json
|
||||||
import os
|
import os
|
||||||
from io import StringIO
|
from io import StringIO
|
||||||
|
|
||||||
@@ -7,7 +8,7 @@ import xmind
|
|||||||
from jinja2 import Template
|
from jinja2 import Template
|
||||||
|
|
||||||
from maigret.report import generate_csv_report, generate_txt_report, save_xmind_report, save_html_report, \
|
from maigret.report import generate_csv_report, generate_txt_report, save_xmind_report, save_html_report, \
|
||||||
save_pdf_report, generate_report_template, generate_report_context
|
save_pdf_report, generate_report_template, generate_report_context, generate_json_report
|
||||||
from maigret.result import QueryResult, QueryStatus
|
from maigret.result import QueryResult, QueryStatus
|
||||||
|
|
||||||
EXAMPLE_RESULTS = {
|
EXAMPLE_RESULTS = {
|
||||||
@@ -146,6 +147,32 @@ def test_generate_txt_report():
|
|||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def test_generate_json_simple_report():
|
||||||
|
jsonfile = StringIO()
|
||||||
|
MODIFIED_RESULTS = dict(EXAMPLE_RESULTS)
|
||||||
|
MODIFIED_RESULTS['GitHub2'] = EXAMPLE_RESULTS['GitHub']
|
||||||
|
generate_json_report('test', MODIFIED_RESULTS, jsonfile, 'simple')
|
||||||
|
|
||||||
|
jsonfile.seek(0)
|
||||||
|
data = jsonfile.readlines()
|
||||||
|
|
||||||
|
assert len(data) == 1
|
||||||
|
assert list(json.loads(data[0]).keys()) == ['GitHub', 'GitHub2']
|
||||||
|
|
||||||
|
|
||||||
|
def test_generate_json_ndjson_report():
|
||||||
|
jsonfile = StringIO()
|
||||||
|
MODIFIED_RESULTS = dict(EXAMPLE_RESULTS)
|
||||||
|
MODIFIED_RESULTS['GitHub2'] = EXAMPLE_RESULTS['GitHub']
|
||||||
|
generate_json_report('test', MODIFIED_RESULTS, jsonfile, 'ndjson')
|
||||||
|
|
||||||
|
jsonfile.seek(0)
|
||||||
|
data = jsonfile.readlines()
|
||||||
|
|
||||||
|
assert len(data) == 2
|
||||||
|
assert json.loads(data[0])['sitename'] == 'GitHub'
|
||||||
|
|
||||||
|
|
||||||
def test_save_xmind_report():
|
def test_save_xmind_report():
|
||||||
filename = 'report_test.xmind'
|
filename = 'report_test.xmind'
|
||||||
save_xmind_report(filename, 'test', EXAMPLE_RESULTS)
|
save_xmind_report(filename, 'test', EXAMPLE_RESULTS)
|
||||||
|
|||||||
Reference in New Issue
Block a user