Added reports of JSON format (simple, njdson); improved submit logic; added several sites

This commit is contained in:
Soxoj
2021-02-13 01:06:05 +03:00
parent 7676c053f9
commit 631de7b346
6 changed files with 173 additions and 16 deletions
+24 -11
View File
@@ -13,7 +13,8 @@ from socid_extractor import parse, __version__ as socid_version
from .checking import * from .checking import *
from .notify import QueryNotifyPrint from .notify import QueryNotifyPrint
from .report import save_csv_report, save_xmind_report, save_html_report, save_pdf_report, \ from .report import save_csv_report, save_xmind_report, save_html_report, save_pdf_report, \
generate_report_context, save_txt_report generate_report_context, save_txt_report, SUPPORTED_JSON_REPORT_FORMATS, check_supported_json_format, \
save_json_report
from .submit import submit_dialog from .submit import submit_dialog
__version__ = '0.1.13' __version__ = '0.1.13'
@@ -56,9 +57,9 @@ async def main():
action="store", dest="proxy", default=None, action="store", dest="proxy", default=None,
help="Make requests over a proxy. e.g. socks5://127.0.0.1:1080" help="Make requests over a proxy. e.g. socks5://127.0.0.1:1080"
) )
parser.add_argument("--json", "-j", metavar="JSON_FILE", parser.add_argument("--db", metavar="DB_FILE",
dest="json_file", default=None, dest="db_file", default=None,
help="Load data from a JSON file or an online, valid, JSON file.") help="Load Maigret database from a JSON file or an online, valid, JSON file.")
parser.add_argument("--cookies-jar-file", metavar="COOKIE_FILE", parser.add_argument("--cookies-jar-file", metavar="COOKIE_FILE",
dest="cookie_file", default=None, dest="cookie_file", default=None,
help="File with cookies.") help="File with cookies.")
@@ -91,7 +92,7 @@ async def main():
action="store_true", dest="print_check_errors", default=False, action="store_true", dest="print_check_errors", default=False,
help="Print errors messages: connection, captcha, site country ban, etc." help="Print errors messages: connection, captcha, site country ban, etc."
) )
parser.add_argument("--submit", parser.add_argument("--submit", metavar='EXISTING_USER_URL',
type=str, dest="new_site_to_submit", default=False, type=str, dest="new_site_to_submit", default=False,
help="URL of existing profile in new site to submit." help="URL of existing profile in new site to submit."
) )
@@ -158,6 +159,12 @@ async def main():
dest="pdf", default=False, dest="pdf", default=False,
help="Generate a PDF report (general report on all usernames)." help="Generate a PDF report (general report on all usernames)."
) )
parser.add_argument("-J", "--json",
action="store", metavar='REPORT_TYPE',
dest="json", default='', type=check_supported_json_format,
help=f"Generate a JSON report of specific type: {', '.join(SUPPORTED_JSON_REPORT_FORMATS)}"
" (one report per username)."
)
args = parser.parse_args() args = parser.parse_args()
@@ -206,8 +213,8 @@ async def main():
if args.tags: if args.tags:
args.tags = list(set(str(args.tags).split(','))) args.tags = list(set(str(args.tags).split(',')))
if args.json_file is None: if args.db_file is None:
args.json_file = \ args.db_file = \
os.path.join(os.path.dirname(os.path.realpath(__file__)), os.path.join(os.path.dirname(os.path.realpath(__file__)),
"resources/data.json" "resources/data.json"
) )
@@ -223,7 +230,7 @@ async def main():
color=not args.no_color) color=not args.no_color)
# Create object with all information about sites we are aware of. # Create object with all information about sites we are aware of.
db = MaigretDatabase().load_from_file(args.json_file) db = MaigretDatabase().load_from_file(args.db_file)
get_top_sites_for_id = lambda x: db.ranked_sites_dict(top=args.top_sites, tags=args.tags, get_top_sites_for_id = lambda x: db.ranked_sites_dict(top=args.top_sites, tags=args.tags,
names=args.site_list, names=args.site_list,
disabled=False, id_type=x) disabled=False, id_type=x)
@@ -233,7 +240,7 @@ async def main():
if args.new_site_to_submit: if args.new_site_to_submit:
is_submitted = await submit_dialog(db, args.new_site_to_submit) is_submitted = await submit_dialog(db, args.new_site_to_submit)
if is_submitted: if is_submitted:
db.save_to_file(args.json_file) db.save_to_file(args.db_file)
# Database self-checking # Database self-checking
if args.self_check: if args.self_check:
@@ -241,7 +248,7 @@ async def main():
is_need_update = await self_check(db, site_data, logger, max_connections=args.connections) is_need_update = await self_check(db, site_data, logger, max_connections=args.connections)
if is_need_update: if is_need_update:
if input('Do you want to save changes permanently? [yYnN]\n').lower() == 'y': if input('Do you want to save changes permanently? [yYnN]\n').lower() == 'y':
db.save_to_file(args.json_file) db.save_to_file(args.db_file)
print('Database was successfully updated.') print('Database was successfully updated.')
else: else:
print('Updates will be applied only for current search session.') print('Updates will be applied only for current search session.')
@@ -339,6 +346,12 @@ async def main():
save_txt_report(filename, username, results) save_txt_report(filename, username, results)
query_notify.warning(f'TXT report for {username} saved in {filename}') query_notify.warning(f'TXT report for {username} saved in {filename}')
if args.json:
filename = report_filepath_tpl.format(username=username, postfix=f'_{args.json}.json')
save_json_report(filename, username, results, report_type=args.json)
query_notify.warning(f'JSON {args.json} report for {username} saved in {filename}')
# reporting for all the result # reporting for all the result
if general_results: if general_results:
if args.html or args.pdf: if args.html or args.pdf:
@@ -357,7 +370,7 @@ async def main():
save_pdf_report(filename, report_context) save_pdf_report(filename, report_context)
query_notify.warning(f'PDF report on all usernames saved in {filename}') query_notify.warning(f'PDF report on all usernames saved in {filename}')
# update database # update database
db.save_to_file(args.json_file) db.save_to_file(args.db_file)
def run(): def run():
+41
View File
@@ -1,4 +1,5 @@
import csv import csv
import json
import io import io
import logging import logging
import os import os
@@ -7,11 +8,17 @@ import xmind
from datetime import datetime from datetime import datetime
from jinja2 import Template from jinja2 import Template
from xhtml2pdf import pisa from xhtml2pdf import pisa
from argparse import ArgumentTypeError
from dateutil.parser import parse as parse_datetime_str from dateutil.parser import parse as parse_datetime_str
from .result import QueryStatus from .result import QueryStatus
from .utils import is_country_tag, CaseConverter, enrich_link_str from .utils import is_country_tag, CaseConverter, enrich_link_str
SUPPORTED_JSON_REPORT_FORMATS = [
'simple',
'ndjson',
]
''' '''
UTILS UTILS
@@ -51,6 +58,10 @@ def save_pdf_report(filename: str, context: dict):
with open(filename, 'w+b') as f: with open(filename, 'w+b') as f:
pisa.pisaDocument(io.StringIO(filled_template), dest=f, default_css=css) pisa.pisaDocument(io.StringIO(filled_template), dest=f, default_css=css)
def save_json_report(filename: str, username: str, results: dict, report_type: str):
with open(filename, 'w', encoding='utf-8') as f:
generate_json_report(username, results, f, report_type=report_type)
''' '''
REPORTS GENERATING REPORTS GENERATING
@@ -225,6 +236,30 @@ def generate_txt_report(username: str, results: dict, file):
file.write(dictionary["url_user"] + "\n") file.write(dictionary["url_user"] + "\n")
file.write(f'Total Websites Username Detected On : {exists_counter}') file.write(f'Total Websites Username Detected On : {exists_counter}')
def generate_json_report(username: str, results: dict, file, report_type):
exists_counter = 0
is_report_per_line = report_type.startswith('ndjson')
all_json = {}
for sitename in results:
site_result = results[sitename]
# TODO: fix no site data issue
if not site_result or site_result.get("status").status != QueryStatus.CLAIMED:
continue
data = dict(site_result)
data['status'] = data['status'].json()
if is_report_per_line:
data['sitename'] = sitename
file.write(json.dumps(data)+'\n')
else:
all_json[sitename] = data
if not is_report_per_line:
file.write(json.dumps(all_json))
''' '''
XMIND 8 Functions XMIND 8 Functions
''' '''
@@ -306,3 +341,9 @@ def design_sheet(sheet, username, results):
currentsublabel.setTitle("%s: %s" % (k, v)) currentsublabel.setTitle("%s: %s" % (k, v))
def check_supported_json_format(value):
if value and not value in SUPPORTED_JSON_REPORT_FORMATS:
raise ArgumentTypeError(f'JSON report type must be one of the following types: '
+ ', '.join(SUPPORTED_JSON_REPORT_FORMATS))
return value
+56
View File
@@ -23096,6 +23096,62 @@
"urlMain": "https://www.are.na", "urlMain": "https://www.are.na",
"usernameClaimed": "nate-cassel", "usernameClaimed": "nate-cassel",
"usernameUnclaimed": "noonewouldeverusethis7" "usernameUnclaimed": "noonewouldeverusethis7"
},
"mywishboard.com": {
"checkType": "message",
"presenseStrs": [
"profile-header",
" profile-header__col"
],
"absenceStrs": [
"This page could not be found"
],
"url": "https://mywishboard.com/@{username}",
"urlMain": "https://mywishboard.com",
"usernameClaimed": "alex",
"usernameUnclaimed": "noonewouldeverusethis7"
},
"crafta.ua": {
"checkType": "message",
"presenseStrs": [
"cft-profile-about"
],
"absenceStrs": [
"Page not found"
],
"url": "https://{username}.crafta.ua/",
"urlMain": "https://crafta.ua",
"usernameClaimed": "test",
"usernameUnclaimed": "noonewouldeverusethis7"
},
"m.smutty.com": {
"tags": [
"erotic"
],
"checkType": "message",
"presenseStrs": [
"profile_stats_n"
],
"absenceStrs": [
"Not Found</span>"
],
"url": "https://m.smutty.com/user/{username}/",
"urlMain": "https://m.smutty.com",
"usernameClaimed": "alex",
"usernameUnclaimed": "noonewouldeverusethis7"
},
"www.marykay.ru": {
"checkType": "message",
"presenseStrs": [
"email"
],
"absenceStrs": [
"errorPage"
],
"url": "https://www.marykay.ru/{username}",
"urlMain": "https://www.marykay.ru",
"usernameClaimed": "anna",
"usernameUnclaimed": "noonewouldeverusethis7"
} }
}, },
"engines": { "engines": {
+10 -1
View File
@@ -1,4 +1,4 @@
"""Sherlock Result Module """Maigret Result Module
This module defines various objects for recording the results of queries. This module defines various objects for recording the results of queries.
""" """
@@ -74,6 +74,15 @@ class QueryResult():
self.ids_data = ids_data self.ids_data = ids_data
self.tags = tags self.tags = tags
def json(self):
return {
'username': self.username,
'site_name': self.site_name,
'url': self.site_url_user,
'status': str(self.status),
'ids': self.ids_data or {},
'tags': self.tags,
}
def __str__(self): def __str__(self):
"""Convert Object To String. """Convert Object To String.
+14 -3
View File
@@ -1,4 +1,5 @@
import difflib import difflib
import json
import requests import requests
from mock import Mock from mock import Mock
@@ -10,6 +11,7 @@ DESIRED_STRINGS = ["username", "not found", "пользователь", "profile
RATIO = 0.6 RATIO = 0.6
TOP_FEATURES = 5 TOP_FEATURES = 5
URL_RE = re.compile(r'https?://(www\.)?')
def get_match_ratio(x): def get_match_ratio(x):
@@ -84,6 +86,17 @@ async def site_self_check(site, logger, semaphore, db: MaigretDatabase, silent=F
async def submit_dialog(db, url_exists): async def submit_dialog(db, url_exists):
domain_raw = URL_RE.sub('', url_exists).strip().strip('/')
domain_raw = domain_raw.split('/')[0]
matched_sites = list(filter(lambda x: domain_raw in x.url_main+x.url, db.sites))
if matched_sites:
print(f'Sites with domain "{domain_raw}" already exists in the Maigret database!')
status = lambda s: '(disabled)' if s.disabled else ''
url_block = lambda s: f'\n\t{s.url_main}\n\t{s.url}'
print('\n'.join([f'{site.name} {status(site)}{url_block(site)}' for site in matched_sites]))
return False
url_parts = url_exists.split('/') url_parts = url_exists.split('/')
supposed_username = url_parts[-1] supposed_username = url_parts[-1]
new_name = input(f'Is "{supposed_username}" a valid username? If not, write it manually: ') new_name = input(f'Is "{supposed_username}" a valid username? If not, write it manually: ')
@@ -103,9 +116,7 @@ async def submit_dialog(db, url_exists):
a_minus_b = tokens_a.difference(tokens_b) a_minus_b = tokens_a.difference(tokens_b)
b_minus_a = tokens_b.difference(tokens_a) b_minus_a = tokens_b.difference(tokens_a)
top_features_count = int(input(f'Specify count of features to extract [default {TOP_FEATURES}]: ')) top_features_count = int(input(f'Specify count of features to extract [default {TOP_FEATURES}]: ') or TOP_FEATURES)
if not top_features_count:
top_features_count = TOP_FEATURES
presence_list = sorted(a_minus_b, key=get_match_ratio, reverse=True)[:top_features_count] presence_list = sorted(a_minus_b, key=get_match_ratio, reverse=True)[:top_features_count]
+28 -1
View File
@@ -1,5 +1,6 @@
"""Maigret reports test functions""" """Maigret reports test functions"""
import copy import copy
import json
import os import os
from io import StringIO from io import StringIO
@@ -7,7 +8,7 @@ import xmind
from jinja2 import Template from jinja2 import Template
from maigret.report import generate_csv_report, generate_txt_report, save_xmind_report, save_html_report, \ from maigret.report import generate_csv_report, generate_txt_report, save_xmind_report, save_html_report, \
save_pdf_report, generate_report_template, generate_report_context save_pdf_report, generate_report_template, generate_report_context, generate_json_report
from maigret.result import QueryResult, QueryStatus from maigret.result import QueryResult, QueryStatus
EXAMPLE_RESULTS = { EXAMPLE_RESULTS = {
@@ -146,6 +147,32 @@ def test_generate_txt_report():
] ]
def test_generate_json_simple_report():
jsonfile = StringIO()
MODIFIED_RESULTS = dict(EXAMPLE_RESULTS)
MODIFIED_RESULTS['GitHub2'] = EXAMPLE_RESULTS['GitHub']
generate_json_report('test', MODIFIED_RESULTS, jsonfile, 'simple')
jsonfile.seek(0)
data = jsonfile.readlines()
assert len(data) == 1
assert list(json.loads(data[0]).keys()) == ['GitHub', 'GitHub2']
def test_generate_json_ndjson_report():
jsonfile = StringIO()
MODIFIED_RESULTS = dict(EXAMPLE_RESULTS)
MODIFIED_RESULTS['GitHub2'] = EXAMPLE_RESULTS['GitHub']
generate_json_report('test', MODIFIED_RESULTS, jsonfile, 'ndjson')
jsonfile.seek(0)
data = jsonfile.readlines()
assert len(data) == 2
assert json.loads(data[0])['sitename'] == 'GitHub'
def test_save_xmind_report(): def test_save_xmind_report():
filename = 'report_test.xmind' filename = 'report_test.xmind'
save_xmind_report(filename, 'test', EXAMPLE_RESULTS) save_xmind_report(filename, 'test', EXAMPLE_RESULTS)