Refactoring and linting, added notifications about frequent search errors

This commit is contained in:
Soxoj
2021-04-30 12:03:13 +03:00
parent bfaf276f6e
commit bfa6afac32
20 changed files with 1351 additions and 787 deletions
+380 -204
View File
@@ -12,11 +12,26 @@ from argparse import ArgumentParser, RawDescriptionHelpFormatter
import requests
from socid_extractor import extract, parse, __version__ as socid_version
from .checking import timeout_check, supported_recursive_search_ids, self_check, unsupported_characters, maigret
from .checking import (
timeout_check,
supported_recursive_search_ids,
self_check,
unsupported_characters,
maigret,
)
from . import errors
from .notify import QueryNotifyPrint
from .report import save_csv_report, save_xmind_report, save_html_report, save_pdf_report, \
generate_report_context, save_txt_report, SUPPORTED_JSON_REPORT_FORMATS, check_supported_json_format, \
save_json_report
from .report import (
save_csv_report,
save_xmind_report,
save_html_report,
save_pdf_report,
generate_report_context,
save_txt_report,
SUPPORTED_JSON_REPORT_FORMATS,
check_supported_json_format,
save_json_report,
)
from .sites import MaigretDatabase
from .submit import submit_dialog
from .utils import get_dict_ascii_tree
@@ -24,168 +39,301 @@ from .utils import get_dict_ascii_tree
__version__ = '0.1.19'
async def main():
version_string = '\n'.join([
f'%(prog)s {__version__}',
f'Socid-extractor: {socid_version}',
f'Aiohttp: {aiohttp.__version__}',
f'Requests: {requests.__version__}',
f'Python: {platform.python_version()}',
])
def notify_about_errors(search_results, query_notify):
errs = errors.extract_and_group(search_results.values())
was_errs_displayed = False
for e in errs:
if not errors.is_important(e):
continue
text = f'Too many errors of type "{e["err"]}" ({e["perc"]}%)'
solution = errors.solution_of(e['err'])
if solution:
text = '. '.join([text, solution])
parser = ArgumentParser(formatter_class=RawDescriptionHelpFormatter,
description=f"Maigret v{__version__}"
)
parser.add_argument("--version",
action="version", version=version_string,
help="Display version information and dependencies."
)
parser.add_argument("--info", "-vv",
action="store_true", dest="info", default=False,
help="Display service information."
)
parser.add_argument("--verbose", "-v",
action="store_true", dest="verbose", default=False,
help="Display extra information and metrics."
)
parser.add_argument("-d", "--debug", "-vvv",
action="store_true", dest="debug", default=False,
help="Saving debugging information and sites responses in debug.txt."
)
parser.add_argument("--site",
action="append", metavar='SITE_NAME',
dest="site_list", default=[],
help="Limit analysis to just the listed sites (use several times to specify more than one)"
)
parser.add_argument("--proxy", "-p", metavar='PROXY_URL',
action="store", dest="proxy", default=None,
help="Make requests over a proxy. e.g. socks5://127.0.0.1:1080"
)
parser.add_argument("--db", metavar="DB_FILE",
dest="db_file", default=None,
help="Load Maigret database from a JSON file or an online, valid, JSON file.")
parser.add_argument("--cookies-jar-file", metavar="COOKIE_FILE",
dest="cookie_file", default=None,
help="File with cookies.")
parser.add_argument("--timeout",
action="store", metavar='TIMEOUT',
dest="timeout", type=timeout_check, default=10,
help="Time (in seconds) to wait for response to requests."
"Default timeout of 10.0s. "
"A longer timeout will be more likely to get results from slow sites."
"On the other hand, this may cause a long delay to gather all results."
)
parser.add_argument("-n", "--max-connections",
action="store", type=int,
dest="connections", default=100,
help="Allowed number of concurrent connections."
)
parser.add_argument("-a", "--all-sites",
action="store_true", dest="all_sites", default=False,
help="Use all sites for scan."
)
parser.add_argument("--top-sites",
action="store", default=500, type=int,
help="Count of sites for scan ranked by Alexa Top (default: 500)."
)
parser.add_argument("--print-not-found",
action="store_true", dest="print_not_found", default=False,
help="Print sites where the username was not found."
)
parser.add_argument("--print-errors",
action="store_true", dest="print_check_errors", default=False,
help="Print errors messages: connection, captcha, site country ban, etc."
)
parser.add_argument("--submit", metavar='EXISTING_USER_URL',
type=str, dest="new_site_to_submit", default=False,
help="URL of existing profile in new site to submit."
)
parser.add_argument("--no-color",
action="store_true", dest="no_color", default=False,
help="Don't color terminal output"
)
parser.add_argument("--no-progressbar",
action="store_true", dest="no_progressbar", default=False,
help="Don't show progressbar."
)
parser.add_argument("--browse", "-b",
action="store_true", dest="browse", default=False,
help="Browse to all results on default bowser."
)
parser.add_argument("--no-recursion",
action="store_true", dest="disable_recursive_search", default=False,
help="Disable recursive search by additional data extracted from pages."
)
parser.add_argument("--no-extracting",
action="store_true", dest="disable_extracting", default=False,
help="Disable parsing pages for additional data and other usernames."
)
parser.add_argument("--self-check",
action="store_true", default=False,
help="Do self check for sites and database and disable non-working ones."
)
parser.add_argument("--stats",
action="store_true", default=False,
help="Show database statistics."
)
parser.add_argument("--use-disabled-sites",
action="store_true", default=False,
help="Use disabled sites to search (may cause many false positives)."
)
parser.add_argument("--parse",
dest="parse_url", default='',
help="Parse page by URL and extract username and IDs to use for search."
)
parser.add_argument("--id-type",
dest="id_type", default='username',
help="Specify identifier(s) type (default: username)."
)
parser.add_argument("--ignore-ids",
action="append", metavar='IGNORED_IDS',
dest="ignore_ids_list", default=[],
help="Do not make search by the specified username or other ids."
)
parser.add_argument("username",
nargs='+', metavar='USERNAMES',
action="store",
help="One or more usernames to check with social networks."
)
parser.add_argument("--tags",
dest="tags", default='',
help="Specify tags of sites."
)
query_notify.warning(text, '!')
was_errs_displayed = True
if was_errs_displayed:
query_notify.warning(
'You can see detailed site check errors with a flag `--print-errors`'
)
async def main():
version_string = '\n'.join(
[
f'%(prog)s {__version__}',
f'Socid-extractor: {socid_version}',
f'Aiohttp: {aiohttp.__version__}',
f'Requests: {requests.__version__}',
f'Python: {platform.python_version()}',
]
)
parser = ArgumentParser(
formatter_class=RawDescriptionHelpFormatter,
description=f"Maigret v{__version__}",
)
parser.add_argument(
"--version",
action="version",
version=version_string,
help="Display version information and dependencies.",
)
parser.add_argument(
"--info",
"-vv",
action="store_true",
dest="info",
default=False,
help="Display service information.",
)
parser.add_argument(
"--verbose",
"-v",
action="store_true",
dest="verbose",
default=False,
help="Display extra information and metrics.",
)
parser.add_argument(
"-d",
"--debug",
"-vvv",
action="store_true",
dest="debug",
default=False,
help="Saving debugging information and sites responses in debug.txt.",
)
parser.add_argument(
"--site",
action="append",
metavar='SITE_NAME',
dest="site_list",
default=[],
help="Limit analysis to just the listed sites (use several times to specify more than one)",
)
parser.add_argument(
"--proxy",
"-p",
metavar='PROXY_URL',
action="store",
dest="proxy",
default=None,
help="Make requests over a proxy. e.g. socks5://127.0.0.1:1080",
)
parser.add_argument(
"--db",
metavar="DB_FILE",
dest="db_file",
default=None,
help="Load Maigret database from a JSON file or an online, valid, JSON file.",
)
parser.add_argument(
"--cookies-jar-file",
metavar="COOKIE_FILE",
dest="cookie_file",
default=None,
help="File with cookies.",
)
parser.add_argument(
"--timeout",
action="store",
metavar='TIMEOUT',
dest="timeout",
type=timeout_check,
default=30,
help="Time (in seconds) to wait for response to requests. "
"Default timeout of 30.0s. "
"A longer timeout will be more likely to get results from slow sites. "
"On the other hand, this may cause a long delay to gather all results. ",
)
parser.add_argument(
"-n",
"--max-connections",
action="store",
type=int,
dest="connections",
default=100,
help="Allowed number of concurrent connections.",
)
parser.add_argument(
"-a",
"--all-sites",
action="store_true",
dest="all_sites",
default=False,
help="Use all sites for scan.",
)
parser.add_argument(
"--top-sites",
action="store",
default=500,
type=int,
help="Count of sites for scan ranked by Alexa Top (default: 500).",
)
parser.add_argument(
"--print-not-found",
action="store_true",
dest="print_not_found",
default=False,
help="Print sites where the username was not found.",
)
parser.add_argument(
"--print-errors",
action="store_true",
dest="print_check_errors",
default=False,
help="Print errors messages: connection, captcha, site country ban, etc.",
)
parser.add_argument(
"--submit",
metavar='EXISTING_USER_URL',
type=str,
dest="new_site_to_submit",
default=False,
help="URL of existing profile in new site to submit.",
)
parser.add_argument(
"--no-color",
action="store_true",
dest="no_color",
default=False,
help="Don't color terminal output",
)
parser.add_argument(
"--no-progressbar",
action="store_true",
dest="no_progressbar",
default=False,
help="Don't show progressbar.",
)
parser.add_argument(
"--browse",
"-b",
action="store_true",
dest="browse",
default=False,
help="Browse to all results on default bowser.",
)
parser.add_argument(
"--no-recursion",
action="store_true",
dest="disable_recursive_search",
default=False,
help="Disable recursive search by additional data extracted from pages.",
)
parser.add_argument(
"--no-extracting",
action="store_true",
dest="disable_extracting",
default=False,
help="Disable parsing pages for additional data and other usernames.",
)
parser.add_argument(
"--self-check",
action="store_true",
default=False,
help="Do self check for sites and database and disable non-working ones.",
)
parser.add_argument(
"--stats", action="store_true", default=False, help="Show database statistics."
)
parser.add_argument(
"--use-disabled-sites",
action="store_true",
default=False,
help="Use disabled sites to search (may cause many false positives).",
)
parser.add_argument(
"--parse",
dest="parse_url",
default='',
help="Parse page by URL and extract username and IDs to use for search.",
)
parser.add_argument(
"--id-type",
dest="id_type",
default='username',
help="Specify identifier(s) type (default: username).",
)
parser.add_argument(
"--ignore-ids",
action="append",
metavar='IGNORED_IDS',
dest="ignore_ids_list",
default=[],
help="Do not make search by the specified username or other ids.",
)
parser.add_argument(
"username",
nargs='+',
metavar='USERNAMES',
action="store",
help="One or more usernames to check with social networks.",
)
parser.add_argument(
"--tags", dest="tags", default='', help="Specify tags of sites."
)
# reports options
parser.add_argument("--folderoutput", "-fo", dest="folderoutput", default="reports",
help="If using multiple usernames, the output of the results will be saved to this folder."
)
parser.add_argument("-T", "--txt",
action="store_true", dest="txt", default=False,
help="Create a TXT report (one report per username)."
)
parser.add_argument("-C", "--csv",
action="store_true", dest="csv", default=False,
help="Create a CSV report (one report per username)."
)
parser.add_argument("-H", "--html",
action="store_true", dest="html", default=False,
help="Create an HTML report file (general report on all usernames)."
)
parser.add_argument("-X", "--xmind",
action="store_true",
dest="xmind", default=False,
help="Generate an XMind 8 mindmap report (one report per username)."
)
parser.add_argument("-P", "--pdf",
action="store_true",
dest="pdf", default=False,
help="Generate a PDF report (general report on all usernames)."
)
parser.add_argument("-J", "--json",
action="store", metavar='REPORT_TYPE',
dest="json", default='', type=check_supported_json_format,
help=f"Generate a JSON report of specific type: {', '.join(SUPPORTED_JSON_REPORT_FORMATS)}"
" (one report per username)."
)
parser.add_argument(
"--folderoutput",
"-fo",
dest="folderoutput",
default="reports",
help="If using multiple usernames, the output of the results will be saved to this folder.",
)
parser.add_argument(
"-T",
"--txt",
action="store_true",
dest="txt",
default=False,
help="Create a TXT report (one report per username).",
)
parser.add_argument(
"-C",
"--csv",
action="store_true",
dest="csv",
default=False,
help="Create a CSV report (one report per username).",
)
parser.add_argument(
"-H",
"--html",
action="store_true",
dest="html",
default=False,
help="Create an HTML report file (general report on all usernames).",
)
parser.add_argument(
"-X",
"--xmind",
action="store_true",
dest="xmind",
default=False,
help="Generate an XMind 8 mindmap report (one report per username).",
)
parser.add_argument(
"-P",
"--pdf",
action="store_true",
dest="pdf",
default=False,
help="Generate a PDF report (general report on all usernames).",
)
parser.add_argument(
"-J",
"--json",
action="store",
metavar='REPORT_TYPE',
dest="json",
default='',
type=check_supported_json_format,
help=f"Generate a JSON report of specific type: {', '.join(SUPPORTED_JSON_REPORT_FORMATS)}"
" (one report per username).",
)
args = parser.parse_args()
@@ -194,7 +342,7 @@ async def main():
logging.basicConfig(
format='[%(filename)s:%(lineno)d] %(levelname)-3s %(asctime)s %(message)s',
datefmt='%H:%M:%S',
level=log_level
level=log_level,
)
if args.debug:
@@ -211,8 +359,7 @@ async def main():
usernames = {
u: args.id_type
for u in args.username
if u not in ['-']
and u not in args.ignore_ids_list
if u not in ['-'] and u not in args.ignore_ids_list
}
parsing_enabled = not args.disable_extracting
@@ -228,8 +375,10 @@ async def main():
try:
# temporary workaround for URL mutations MVP
from socid_extractor import mutate_url
reqs += list(mutate_url(args.parse_url))
except:
except Exception as e:
logger.warning(e)
pass
for req in reqs:
@@ -251,38 +400,47 @@ async def main():
args.tags = list(set(str(args.tags).split(',')))
if args.db_file is None:
args.db_file = \
os.path.join(os.path.dirname(os.path.realpath(__file__)),
"resources/data.json"
)
args.db_file = os.path.join(
os.path.dirname(os.path.realpath(__file__)), "resources/data.json"
)
if args.top_sites == 0 or args.all_sites:
args.top_sites = sys.maxsize
# Create notify object for query results.
query_notify = QueryNotifyPrint(result=None,
verbose=args.verbose,
print_found_only=not args.print_not_found,
skip_check_errors=not args.print_check_errors,
color=not args.no_color)
query_notify = QueryNotifyPrint(
result=None,
verbose=args.verbose,
print_found_only=not args.print_not_found,
skip_check_errors=not args.print_check_errors,
color=not args.no_color,
)
# Create object with all information about sites we are aware of.
db = MaigretDatabase().load_from_file(args.db_file)
get_top_sites_for_id = lambda x: db.ranked_sites_dict(top=args.top_sites, tags=args.tags,
names=args.site_list,
disabled=False, id_type=x)
get_top_sites_for_id = lambda x: db.ranked_sites_dict(
top=args.top_sites,
tags=args.tags,
names=args.site_list,
disabled=False,
id_type=x,
)
site_data = get_top_sites_for_id(args.id_type)
if args.new_site_to_submit:
is_submitted = await submit_dialog(db, args.new_site_to_submit, args.cookie_file, logger)
is_submitted = await submit_dialog(
db, args.new_site_to_submit, args.cookie_file, logger
)
if is_submitted:
db.save_to_file(args.db_file)
# Database self-checking
if args.self_check:
print('Maigret sites database self-checking...')
is_need_update = await self_check(db, site_data, logger, max_connections=args.connections)
is_need_update = await self_check(
db, site_data, logger, max_connections=args.connections
)
if is_need_update:
if input('Do you want to save changes permanently? [Yn]\n').lower() == 'y':
db.save_to_file(args.db_file)
@@ -314,9 +472,13 @@ async def main():
query_notify.warning('No sites to check, exiting!')
sys.exit(2)
else:
query_notify.warning(f'Starting a search on top {len(site_data)} sites from the Maigret database...')
query_notify.warning(
f'Starting a search on top {len(site_data)} sites from the Maigret database...'
)
if not args.all_sites:
query_notify.warning(f'You can run search by full list of sites with flag `-a`', '!')
query_notify.warning(
'You can run search by full list of sites with flag `-a`', '!'
)
already_checked = set()
general_results = []
@@ -331,34 +493,44 @@ async def main():
already_checked.add(username.lower())
if username in args.ignore_ids_list:
query_notify.warning(f'Skip a search by username {username} cause it\'s marked as ignored.')
query_notify.warning(
f'Skip a search by username {username} cause it\'s marked as ignored.'
)
continue
# check for characters do not supported by sites generally
found_unsupported_chars = set(unsupported_characters).intersection(set(username))
found_unsupported_chars = set(unsupported_characters).intersection(
set(username)
)
if found_unsupported_chars:
pretty_chars_str = ','.join(map(lambda s: f'"{s}"', found_unsupported_chars))
pretty_chars_str = ','.join(
map(lambda s: f'"{s}"', found_unsupported_chars)
)
query_notify.warning(
f'Found unsupported URL characters: {pretty_chars_str}, skip search by username "{username}"')
f'Found unsupported URL characters: {pretty_chars_str}, skip search by username "{username}"'
)
continue
sites_to_check = get_top_sites_for_id(id_type)
results = await maigret(username=username,
site_dict=dict(sites_to_check),
query_notify=query_notify,
proxy=args.proxy,
timeout=args.timeout,
is_parsing_enabled=parsing_enabled,
id_type=id_type,
debug=args.verbose,
logger=logger,
cookies=args.cookie_file,
forced=args.use_disabled_sites,
max_connections=args.connections,
no_progressbar=args.no_progressbar,
)
results = await maigret(
username=username,
site_dict=dict(sites_to_check),
query_notify=query_notify,
proxy=args.proxy,
timeout=args.timeout,
is_parsing_enabled=parsing_enabled,
id_type=id_type,
debug=args.verbose,
logger=logger,
cookies=args.cookie_file,
forced=args.use_disabled_sites,
max_connections=args.connections,
no_progressbar=args.no_progressbar,
)
notify_about_errors(results, query_notify)
general_results.append((username, id_type, results))
@@ -397,9 +569,13 @@ async def main():
query_notify.warning(f'TXT report for {username} saved in {filename}')
if args.json:
filename = report_filepath_tpl.format(username=username, postfix=f'_{args.json}.json')
filename = report_filepath_tpl.format(
username=username, postfix=f'_{args.json}.json'
)
save_json_report(filename, username, results, report_type=args.json)
query_notify.warning(f'JSON {args.json} report for {username} saved in {filename}')
query_notify.warning(
f'JSON {args.json} report for {username} saved in {filename}'
)
# reporting for all the result
if general_results: