diff --git a/.gitignore b/.gitignore index 919b269..eb3759b 100644 --- a/.gitignore +++ b/.gitignore @@ -28,3 +28,9 @@ tests/.excluded_sites # MacOS Folder Metadata File .DS_Store /reports/ + +# Testing +.coverage +dist/ +htmlcov/ +test_* \ No newline at end of file diff --git a/maigret/checking.py b/maigret/checking.py index f7694b9..e19f456 100644 --- a/maigret/checking.py +++ b/maigret/checking.py @@ -27,7 +27,7 @@ from .types import QueryOptions, QueryResultWrapper from .utils import get_random_user_agent -supported_recursive_search_ids = ( +SUPPORTED_IDS = ( "yandex_public_id", "gaia_id", "vk_id", @@ -263,7 +263,7 @@ def process_site_result( for k, v in extracted_ids_data.items(): if "username" in k: new_usernames[v] = "username" - if k in supported_recursive_search_ids: + if k in SUPPORTED_IDS: new_usernames[v] = k results_info["ids_usernames"] = new_usernames diff --git a/maigret/maigret.py b/maigret/maigret.py index d064e8a..c3c3597 100755 --- a/maigret/maigret.py +++ b/maigret/maigret.py @@ -14,7 +14,7 @@ from socid_extractor import extract, parse, __version__ as socid_version from .checking import ( timeout_check, - supported_recursive_search_ids, + SUPPORTED_IDS, self_check, unsupported_characters, maigret, @@ -29,7 +29,6 @@ from .report import ( generate_report_context, save_txt_report, SUPPORTED_JSON_REPORT_FORMATS, - check_supported_json_format, save_json_report, ) from .sites import MaigretDatabase @@ -74,68 +73,19 @@ def setup_arguments_parser(): formatter_class=RawDescriptionHelpFormatter, description=f"Maigret v{__version__}", ) + parser.add_argument( + "username", + nargs='?', + metavar="USERNAMES", + action="append", + help="One or more usernames to check with social networks.", + ) parser.add_argument( "--version", action="version", version=version_string, help="Display version information and dependencies.", ) - parser.add_argument( - "--info", - "-vv", - action="store_true", - dest="info", - default=False, - help="Display service information.", - ) - parser.add_argument( - "--verbose", - "-v", - action="store_true", - dest="verbose", - default=False, - help="Display extra information and metrics.", - ) - parser.add_argument( - "-d", - "--debug", - "-vvv", - action="store_true", - dest="debug", - default=False, - help="Saving debugging information and sites responses in debug.txt.", - ) - parser.add_argument( - "--site", - action="append", - metavar='SITE_NAME', - dest="site_list", - default=[], - help="Limit analysis to just the listed sites (use several times to specify more than one)", - ) - parser.add_argument( - "--proxy", - "-p", - metavar='PROXY_URL', - action="store", - dest="proxy", - default=None, - help="Make requests over a proxy. e.g. socks5://127.0.0.1:1080", - ) - parser.add_argument( - "--db", - metavar="DB_FILE", - dest="db_file", - default=None, - help="Load Maigret database from a JSON file or an online, valid, JSON file.", - ) - parser.add_argument( - "--cookies-jar-file", - metavar="COOKIE_FILE", - dest="cookie_file", - default=None, - help="File with cookies.", - ) parser.add_argument( "--timeout", action="store", @@ -143,7 +93,7 @@ def setup_arguments_parser(): dest="timeout", type=timeout_check, default=30, - help="Time (in seconds) to wait for response to requests. " + help="Time in seconds to wait for response to requests. " "Default timeout of 30.0s. " "A longer timeout will be more likely to get results from slow sites. " "On the other hand, this may cause a long delay to gather all results. ", @@ -165,65 +115,6 @@ def setup_arguments_parser(): default=100, help="Allowed number of concurrent connections.", ) - parser.add_argument( - "-a", - "--all-sites", - action="store_true", - dest="all_sites", - default=False, - help="Use all sites for scan.", - ) - parser.add_argument( - "--top-sites", - action="store", - default=500, - type=int, - help="Count of sites for scan ranked by Alexa Top (default: 500).", - ) - parser.add_argument( - "--print-not-found", - action="store_true", - dest="print_not_found", - default=False, - help="Print sites where the username was not found.", - ) - parser.add_argument( - "--print-errors", - action="store_true", - dest="print_check_errors", - default=False, - help="Print errors messages: connection, captcha, site country ban, etc.", - ) - parser.add_argument( - "--submit", - metavar='EXISTING_USER_URL', - type=str, - dest="new_site_to_submit", - default=False, - help="URL of existing profile in new site to submit.", - ) - parser.add_argument( - "--no-color", - action="store_true", - dest="no_color", - default=False, - help="Don't color terminal output", - ) - parser.add_argument( - "--no-progressbar", - action="store_true", - dest="no_progressbar", - default=False, - help="Don't show progressbar.", - ) - parser.add_argument( - "--browse", - "-b", - action="store_true", - dest="browse", - default=False, - help="Browse to all results on default bowser.", - ) parser.add_argument( "--no-recursion", action="store_true", @@ -238,33 +129,27 @@ def setup_arguments_parser(): default=False, help="Disable parsing pages for additional data and other usernames.", ) - parser.add_argument( - "--self-check", - action="store_true", - default=False, - help="Do self check for sites and database and disable non-working ones.", - ) - parser.add_argument( - "--stats", action="store_true", default=False, help="Show database statistics." - ) - parser.add_argument( - "--use-disabled-sites", - action="store_true", - default=False, - help="Use disabled sites to search (may cause many false positives).", - ) - parser.add_argument( - "--parse", - dest="parse_url", - default='', - help="Parse page by URL and extract username and IDs to use for search.", - ) parser.add_argument( "--id-type", dest="id_type", default='username', + choices=SUPPORTED_IDS, help="Specify identifier(s) type (default: username).", ) + parser.add_argument( + "--db", + metavar="DB_FILE", + dest="db_file", + default=None, + help="Load Maigret database from a JSON file or an online, valid, JSON file.", + ) + parser.add_argument( + "--cookies-jar-file", + metavar="COOKIE_FILE", + dest="cookie_file", + default=None, + help="File with cookies.", + ) parser.add_argument( "--ignore-ids", action="append", @@ -273,25 +158,150 @@ def setup_arguments_parser(): default=[], help="Do not make search by the specified username or other ids.", ) - parser.add_argument( - "username", - nargs='+', - metavar='USERNAMES', - action="store", - help="One or more usernames to check with social networks.", - ) - parser.add_argument( - "--tags", dest="tags", default='', help="Specify tags of sites." - ) # reports options parser.add_argument( "--folderoutput", "-fo", dest="folderoutput", default="reports", + metavar="PATH", help="If using multiple usernames, the output of the results will be saved to this folder.", ) parser.add_argument( + "--proxy", + "-p", + metavar='PROXY_URL', + action="store", + dest="proxy", + default=None, + help="Make requests over a proxy. e.g. socks5://127.0.0.1:1080", + ) + + filter_group = parser.add_argument_group('Site filtering', 'Options to set site search scope') + filter_group.add_argument( + "-a", + "--all-sites", + action="store_true", + dest="all_sites", + default=False, + help="Use all sites for scan.", + ) + filter_group.add_argument( + "--top-sites", + action="store", + default=500, + metavar="N", + type=int, + help="Count of sites for scan ranked by Alexa Top (default: 500).", + ) + filter_group.add_argument( + "--tags", dest="tags", default='', help="Specify tags of sites (see `--stats`)." + ) + filter_group.add_argument( + "--site", + action="append", + metavar='SITE_NAME', + dest="site_list", + default=[], + help="Limit analysis to just the specified sites (multiple option).", + ) + filter_group.add_argument( + "--use-disabled-sites", + action="store_true", + default=False, + help="Use disabled sites to search (may cause many false positives).", + ) + + modes_group = parser.add_argument_group( + 'Operating modes', + 'Various functions except the default search by a username. ' + 'Modes are executed sequentially in the order of declaration.' + ) + modes_group.add_argument( + "--parse", + dest="parse_url", + default='', + metavar='URL', + help="Parse page by URL and extract username and IDs to use for search.", + ) + modes_group.add_argument( + "--submit", + metavar='URL', + type=str, + dest="new_site_to_submit", + default=False, + help="URL of existing profile in new site to submit.", + ) + modes_group.add_argument( + "--self-check", + action="store_true", + default=False, + help="Do self check for sites and database and disable non-working ones.", + ) + modes_group.add_argument( + "--stats", + action="store_true", + default=False, + help="Show database statistics (most frequent sites engines and tags)." + ) + + output_group = parser.add_argument_group('Output options', 'Options to change verbosity and view of the console output') + output_group.add_argument( + "--print-not-found", + action="store_true", + dest="print_not_found", + default=False, + help="Print sites where the username was not found.", + ) + output_group.add_argument( + "--print-errors", + action="store_true", + dest="print_check_errors", + default=False, + help="Print errors messages: connection, captcha, site country ban, etc.", + ) + output_group.add_argument( + "--verbose", + "-v", + action="store_true", + dest="verbose", + default=False, + help="Display extra information and metrics.", + ) + output_group.add_argument( + "--info", + "-vv", + action="store_true", + dest="info", + default=False, + help="Display extra/service information and metrics.", + ) + output_group.add_argument( + "--debug", + "-vvv", + "-d", + action="store_true", + dest="debug", + default=False, + help="Display extra/service/debug information and metrics, save responses in debug.log.", + ) + output_group.add_argument( + "--no-color", + action="store_true", + dest="no_color", + default=False, + help="Don't color terminal output", + ) + output_group.add_argument( + "--no-progressbar", + action="store_true", + dest="no_progressbar", + default=False, + help="Don't show progressbar.", + ) + + report_group = parser.add_argument_group('Report formats', 'Supported formats of report files') + report_group.add_argument( "-T", "--txt", action="store_true", @@ -299,7 +309,7 @@ def setup_arguments_parser(): default=False, help="Create a TXT report (one report per username).", ) - parser.add_argument( + report_group.add_argument( "-C", "--csv", action="store_true", @@ -307,7 +317,7 @@ def setup_arguments_parser(): default=False, help="Create a CSV report (one report per username).", ) - parser.add_argument( + report_group.add_argument( "-H", "--html", action="store_true", @@ -315,7 +325,7 @@ def setup_arguments_parser(): default=False, help="Create an HTML report file (general report on all usernames).", ) - parser.add_argument( + report_group.add_argument( "-X", "--xmind", action="store_true", @@ -323,7 +333,7 @@ def setup_arguments_parser(): default=False, help="Generate an XMind 8 mindmap report (one report per username).", ) - parser.add_argument( + report_group.add_argument( "-P", "--pdf", action="store_true", @@ -331,14 +341,14 @@ def setup_arguments_parser(): default=False, help="Generate a PDF report (general report on all usernames).", ) - parser.add_argument( + report_group.add_argument( "-J", "--json", action="store", - metavar='REPORT_TYPE', + metavar='TYPE', dest="json", default='', - type=check_supported_json_format, + choices=SUPPORTED_JSON_REPORT_FORMATS, help=f"Generate a JSON report of specific type: {', '.join(SUPPORTED_JSON_REPORT_FORMATS)}" " (one report per username).", ) @@ -371,7 +381,7 @@ async def main(): usernames = { u: args.id_type for u in args.username - if u not in ['-'] and u not in args.ignore_ids_list + if u and u not in ['-'] and u not in args.ignore_ids_list } parsing_enabled = not args.disable_extracting @@ -405,7 +415,7 @@ async def main(): for k, v in info.items(): if 'username' in k: usernames[v] = 'username' - if k in supported_recursive_search_ids: + if k in SUPPORTED_IDS: usernames[v] = k if args.tags: diff --git a/maigret/report.py b/maigret/report.py index c24ddd1..b411ff6 100644 --- a/maigret/report.py +++ b/maigret/report.py @@ -359,12 +359,3 @@ def design_xmind_sheet(sheet, username, results): for k, v in filtered_supposed_data.items(): currentsublabel = undefinedsection.addSubTopic() currentsublabel.setTitle("%s: %s" % (k, v)) - - -def check_supported_json_format(value): - if value and value not in SUPPORTED_JSON_REPORT_FORMATS: - raise ArgumentTypeError( - "JSON report type must be one of the following types: " - + ", ".join(SUPPORTED_JSON_REPORT_FORMATS) - ) - return value diff --git a/maigret/resources/data.json b/maigret/resources/data.json index 4fbbf9f..f2185a6 100644 --- a/maigret/resources/data.json +++ b/maigret/resources/data.json @@ -982,13 +982,9 @@ "\u0412\u044b \u043d\u0435 \u043c\u043e\u0436\u0435\u0442\u0435 \u043f\u0440\u043e\u0438\u0437\u0432\u0435\u0441\u0442\u0438 \u043f\u043e\u0438\u0441\u043a \u0441\u0440\u0430\u0437\u0443 \u043f\u043e\u0441\u043b\u0435 \u043f\u0440\u0435\u0434\u044b\u0434\u0443\u0449\u0435\u0433\u043e": "Too many searhes per IP", "\u0414\u043e\u0441\u0442\u0443\u043f \u043a \u043a\u043e\u043d\u0444\u0435\u0440\u0435\u043d\u0446\u0438\u0438 \u0437\u0430\u043a\u0440\u044b\u0442 \u0434\u043b\u044f \u0432\u0430\u0448\u0435\u0433\u043e IP-\u0430\u0434\u0440\u0435\u0441\u0430.": "IP ban" }, - "checkType": "message", - "absenceStrs": [ - "\u041f\u043e\u0434\u0445\u043e\u0434\u044f\u0449\u0438\u0445 \u0442\u0435\u043c \u0438\u043b\u0438 \u0441\u043e\u043e\u0431\u0449\u0435\u043d\u0438\u0439 \u043d\u0435 \u043d\u0430\u0439\u0434\u0435\u043d\u043e." - ], + "engine": "phpBB/Search", "alexaRank": 284203, "urlMain": "https://antiwomen.ru", - "url": "https://antiwomen.ru/search.php?keywords=&terms=all&author={username}", "usernameClaimed": "adam", "usernameUnclaimed": "noonewouldeverusethis7" }, @@ -12784,7 +12780,7 @@ "us" ], "headers": { - "authorization": "Bearer BQBxsP-d2_tKY0erevviPs9sqxt3qgBU-R1Hpjh-1VV3rCoMm4qVjckkDvPctosbWStF0myG4aJ-7xO2LRg" + "authorization": "Bearer BQAlQVJgjkpZgzYiYPT1DgdyrvwTwWkYAgu3lET0zKuXZK7E28z60A00m2y6ITwkVXskqtWkxbKdfHodCao" }, "errors": { "Spotify is currently not available in your country.": "Access denied in your country, use proxy/vpn" @@ -14171,7 +14167,7 @@ "sec-ch-ua": "Google Chrome\";v=\"87\", \" Not;A Brand\";v=\"99\", \"Chromium\";v=\"87\"", "authorization": "Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA", "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36", - "x-guest-token": "1388922761482022917" + "x-guest-token": "1389716834983759872" }, "errors": { "Bad guest token": "x-guest-token update required" @@ -14568,7 +14564,7 @@ "video" ], "headers": { - "Authorization": "jwt eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJleHAiOjE2MTk5NzQ4MDAsInVzZXJfaWQiOm51bGwsImFwcF9pZCI6NTg0NzksInNjb3BlcyI6InB1YmxpYyIsInRlYW1fdXNlcl9pZCI6bnVsbH0.LJFXICpOC7e-a67hz6kOUY1Mz9wP_60L8mCz2kZawHs" + "Authorization": "jwt eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJleHAiOjE2MjAxNzAyMjAsInVzZXJfaWQiOm51bGwsImFwcF9pZCI6NTg0NzksInNjb3BlcyI6InB1YmxpYyIsInRlYW1fdXNlcl9pZCI6bnVsbH0.TbxzgFVMQsgYz4vTiFE-_P1qydzqP9ADUsPxl8U4bZE" }, "activation": { "url": "https://vimeo.com/_rv/viewer", @@ -15509,19 +15505,6 @@ "usernameClaimed": "yandex", "usernameUnclaimed": "noonewouldeverusethis7" }, - "YandexLocal": { - "tags": [ - "ru" - ], - "type": "yandex_public_id", - "checkType": "status_code", - "alexaRank": 49, - "urlMain": "https://local.yandex.ru/", - "url": "https://local.yandex.ru/users/{username}", - "source": "Yandex", - "usernameClaimed": "gp7v6ufryzw3m1nvdj4ycexa8g", - "usernameUnclaimed": "noonewouldeverusethis77777" - }, "YandexMarket": { "tags": [ "ru" diff --git a/maigret/sites.py b/maigret/sites.py index 9fd66bb..ac05372 100644 --- a/maigret/sites.py +++ b/maigret/sites.py @@ -436,13 +436,13 @@ class MaigretDatabase: tags[tag] = tags.get(tag, 0) + 1 output += f"Enabled/total sites: {total_count - disabled_count}/{total_count}\n" - output += "Top sites' profile URLs:\n" + output += "Top profile URLs:\n" for url, count in sorted(urls.items(), key=lambda x: x[1], reverse=True)[:20]: if count == 1: break output += f"{count}\t{url}\n" - output += "Top sites' tags:\n" + output += "Top tags:\n" for tag, count in sorted(tags.items(), key=lambda x: x[1], reverse=True)[:20]: mark = "" if tag not in SUPPORTED_TAGS: diff --git a/test.sh b/test.sh index c30a4a9..e20c419 100755 --- a/test.sh +++ b/test.sh @@ -1,2 +1,4 @@ #!/bin/sh -pytest tests +coverage run --source=./maigret -m pytest tests +coverage report -m +coverage html diff --git a/tests/conftest.py b/tests/conftest.py index 9df6f6b..1e7cbdd 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -6,6 +6,8 @@ import pytest from _pytest.mark import Mark from maigret.sites import MaigretDatabase +from maigret.maigret import setup_arguments_parser + CUR_PATH = os.path.dirname(os.path.realpath(__file__)) JSON_FILE = os.path.join(CUR_PATH, '../maigret/resources/data.json') @@ -51,3 +53,8 @@ def reports_autoclean(): remove_test_reports() yield remove_test_reports() + + +@pytest.fixture(scope='session') +def argparser(): + return setup_arguments_parser() diff --git a/tests/test_cli.py b/tests/test_cli.py new file mode 100644 index 0000000..9e2cce3 --- /dev/null +++ b/tests/test_cli.py @@ -0,0 +1,80 @@ +"""Maigret command-line arguments parsing tests""" +from argparse import Namespace +from typing import Dict, Any + +DEFAULT_ARGS: Dict[str, Any] = { + 'all_sites': False, + 'connections': 100, + 'cookie_file': None, + 'csv': False, + 'db_file': None, + 'debug': False, + 'disable_extracting': False, + 'disable_recursive_search': False, + 'folderoutput': 'reports', + 'html': False, + 'id_type': 'username', + 'ignore_ids_list': [], + 'info': False, + 'json': '', + 'new_site_to_submit': False, + 'no_color': False, + 'no_progressbar': False, + 'parse_url': '', + 'pdf': False, + 'print_check_errors': False, + 'print_not_found': False, + 'proxy': None, + 'retries': 1, + 'self_check': False, + 'site_list': [], + 'stats': False, + 'tags': '', + 'timeout': 30, + 'top_sites': 500, + 'txt': False, + 'use_disabled_sites': False, + 'username': [], + 'verbose': False, + 'xmind': False, +} + + +def test_args_search_mode(argparser): + args = argparser.parse_args('username'.split()) + + assert args.username == ['username'] + + want_args = dict(DEFAULT_ARGS) + want_args.update({'username': ['username']}) + + assert args == Namespace(**want_args) + + +def test_args_self_check_mode(argparser): + args = argparser.parse_args('--self-check --site GitHub'.split()) + + want_args = dict(DEFAULT_ARGS) + want_args.update( + { + 'self_check': True, + 'site_list': ['GitHub'], + 'username': [None], + } + ) + + assert args == Namespace(**want_args) + + +def test_args_multiple_sites(argparser): + args = argparser.parse_args('--site GitHub VK --site PornHub --site Taringa,Steam'.split()) + + want_args = dict(DEFAULT_ARGS) + want_args.update( + { + 'site_list': ['GitHub', 'PornHub', 'Taringa,Steam'], + 'username': ['VK'], + } + ) + + assert args == Namespace(**want_args) diff --git a/tests/test_utils.py b/tests/test_utils.py index bbd458c..4954bf0 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -98,6 +98,7 @@ def test_get_dict_ascii_tree(): 'legacy_id': '26403415', 'username': 'alexaimephotographycars', 'name': 'Alex Aimé', + 'links': "['www.instagram.com/street.reality.photography/']", 'created_at': '2018-05-04T10:17:01.000+0000', 'image': 'https://drscdn.500px.org/user_avatar/26403415/q%3D85_w%3D300_h%3D300/v2?webp=true&v=2&sig=0235678a4f7b65e007e864033ebfaf5ef6d87fad34f80a8639d985320c20fe3b', 'image_bg': 'https://drscdn.500px.org/user_cover/26403415/q%3D65_m%3D2048/v2?webp=true&v=1&sig=bea411fb158391a4fdad498874ff17088f91257e59dfb376ff67e3a44c3a4201', @@ -107,20 +108,22 @@ def test_get_dict_ascii_tree(): 'twitter_username': 'Alexaimephotogr', } - ascii_tree = get_dict_ascii_tree(data.items()) + ascii_tree = get_dict_ascii_tree(data.items(), prepend=" ") assert ( ascii_tree == """ -┣╸uid: dXJpOm5vZGU6VXNlcjoyNjQwMzQxNQ== -┣╸legacy_id: 26403415 -┣╸username: alexaimephotographycars -┣╸name: Alex Aimé -┣╸created_at: 2018-05-04T10:17:01.000+0000 -┣╸image: https://drscdn.500px.org/user_avatar/26403415/q%3D85_w%3D300_h%3D300/v2?webp=true&v=2&sig=0235678a4f7b65e007e864033ebfaf5ef6d87fad34f80a8639d985320c20fe3b -┣╸image_bg: https://drscdn.500px.org/user_cover/26403415/q%3D65_m%3D2048/v2?webp=true&v=1&sig=bea411fb158391a4fdad498874ff17088f91257e59dfb376ff67e3a44c3a4201 -┣╸website: www.instagram.com/street.reality.photography/ -┣╸facebook_link: www.instagram.com/street.reality.photography/ -┣╸instagram_username: Street.Reality.Photography -┗╸twitter_username: Alexaimephotogr""" + ┣╸uid: dXJpOm5vZGU6VXNlcjoyNjQwMzQxNQ== + ┣╸legacy_id: 26403415 + ┣╸username: alexaimephotographycars + ┣╸name: Alex Aimé + ┣╸links: + ┃ ┗╸ www.instagram.com/street.reality.photography/ + ┣╸created_at: 2018-05-04T10:17:01.000+0000 + ┣╸image: https://drscdn.500px.org/user_avatar/26403415/q%3D85_w%3D300_h%3D300/v2?webp=true&v=2&sig=0235678a4f7b65e007e864033ebfaf5ef6d87fad34f80a8639d985320c20fe3b + ┣╸image_bg: https://drscdn.500px.org/user_cover/26403415/q%3D65_m%3D2048/v2?webp=true&v=1&sig=bea411fb158391a4fdad498874ff17088f91257e59dfb376ff67e3a44c3a4201 + ┣╸website: www.instagram.com/street.reality.photography/ + ┣╸facebook_link: www.instagram.com/street.reality.photography/ + ┣╸instagram_username: Street.Reality.Photography + ┗╸twitter_username: Alexaimephotogr""" )