CLI arguments improvements, tests added

2026-05-06 14:08:59 +00:00 · 2021-05-05 15:27:56 +03:00
parent 9858e71349
commit 3b91a9cd31
10 changed files with 287 additions and 205 deletions
@@ -28,3 +28,9 @@ tests/.excluded_sites
 # MacOS Folder Metadata File
 .DS_Store
 /reports/
 # Testing
 .coverage
 dist/
 htmlcov/
 test_*
@@ -27,7 +27,7 @@ from .types import QueryOptions, QueryResultWrapper
 from .utils import get_random_user_agent
-supported_recursive_search_ids = (
+SUPPORTED_IDS = (
    "yandex_public_id",
    "gaia_id",
    "vk_id",
@@ -263,7 +263,7 @@ def process_site_result(
            for k, v in extracted_ids_data.items():
                if "username" in k:
                    new_usernames[v] = "username"
-                if k in supported_recursive_search_ids:
+                if k in SUPPORTED_IDS:
                    new_usernames[v] = k
            results_info["ids_usernames"] = new_usernames
@@ -14,7 +14,7 @@ from socid_extractor import extract, parse, __version__ as socid_version
 from .checking import (
    timeout_check,
-    supported_recursive_search_ids,
+    SUPPORTED_IDS,
    self_check,
    unsupported_characters,
    maigret,
@@ -29,7 +29,6 @@ from .report import (
    generate_report_context,
    save_txt_report,
    SUPPORTED_JSON_REPORT_FORMATS,
    check_supported_json_format,
    save_json_report,
 )
 from .sites import MaigretDatabase
@@ -74,68 +73,19 @@ def setup_arguments_parser():
        formatter_class=RawDescriptionHelpFormatter,
        description=f"Maigret v{__version__}",
    )
    parser.add_argument(
        "username",
        nargs='?',
        metavar="USERNAMES",
        action="append",
        help="One or more usernames to check with social networks.",
    )
    parser.add_argument(
        "--version",
        action="version",
        version=version_string,
        help="Display version information and dependencies.",
    )
    parser.add_argument(
        "--info",
        "-vv",
        action="store_true",
        dest="info",
        default=False,
        help="Display service information.",
    )
    parser.add_argument(
        "--verbose",
        "-v",
        action="store_true",
        dest="verbose",
        default=False,
        help="Display extra information and metrics.",
    )
    parser.add_argument(
        "-d",
        "--debug",
        "-vvv",
        action="store_true",
        dest="debug",
        default=False,
        help="Saving debugging information and sites responses in debug.txt.",
    )
    parser.add_argument(
        "--site",
        action="append",
        metavar='SITE_NAME',
        dest="site_list",
        default=[],
        help="Limit analysis to just the listed sites (use several times to specify more than one)",
    )
    parser.add_argument(
        "--proxy",
        "-p",
        metavar='PROXY_URL',
        action="store",
        dest="proxy",
        default=None,
        help="Make requests over a proxy. e.g. socks5://127.0.0.1:1080",
    )
    parser.add_argument(
        "--db",
        metavar="DB_FILE",
        dest="db_file",
        default=None,
        help="Load Maigret database from a JSON file or an online, valid, JSON file.",
    )
    parser.add_argument(
        "--cookies-jar-file",
        metavar="COOKIE_FILE",
        dest="cookie_file",
        default=None,
        help="File with cookies.",
    )
    parser.add_argument(
        "--timeout",
        action="store",
@@ -143,7 +93,7 @@ def setup_arguments_parser():
        dest="timeout",
        type=timeout_check,
        default=30,
-        help="Time (in seconds) to wait for response to requests. "
+        help="Time in seconds to wait for response to requests. "
        "Default timeout of 30.0s. "
        "A longer timeout will be more likely to get results from slow sites. "
        "On the other hand, this may cause a long delay to gather all results. ",
@@ -165,65 +115,6 @@ def setup_arguments_parser():
        default=100,
        help="Allowed number of concurrent connections.",
    )
    parser.add_argument(
        "-a",
        "--all-sites",
        action="store_true",
        dest="all_sites",
        default=False,
        help="Use all sites for scan.",
    )
    parser.add_argument(
        "--top-sites",
        action="store",
        default=500,
        type=int,
        help="Count of sites for scan ranked by Alexa Top (default: 500).",
    )
    parser.add_argument(
        "--print-not-found",
        action="store_true",
        dest="print_not_found",
        default=False,
        help="Print sites where the username was not found.",
    )
    parser.add_argument(
        "--print-errors",
        action="store_true",
        dest="print_check_errors",
        default=False,
        help="Print errors messages: connection, captcha, site country ban, etc.",
    )
    parser.add_argument(
        "--submit",
        metavar='EXISTING_USER_URL',
        type=str,
        dest="new_site_to_submit",
        default=False,
        help="URL of existing profile in new site to submit.",
    )
    parser.add_argument(
        "--no-color",
        action="store_true",
        dest="no_color",
        default=False,
        help="Don't color terminal output",
    )
    parser.add_argument(
        "--no-progressbar",
        action="store_true",
        dest="no_progressbar",
        default=False,
        help="Don't show progressbar.",
    )
    parser.add_argument(
        "--browse",
        "-b",
        action="store_true",
        dest="browse",
        default=False,
        help="Browse to all results on default bowser.",
    )
    parser.add_argument(
        "--no-recursion",
        action="store_true",
@@ -238,33 +129,27 @@ def setup_arguments_parser():
        default=False,
        help="Disable parsing pages for additional data and other usernames.",
    )
    parser.add_argument(
        "--self-check",
        action="store_true",
        default=False,
        help="Do self check for sites and database and disable non-working ones.",
    )
    parser.add_argument(
        "--stats", action="store_true", default=False, help="Show database statistics."
    )
    parser.add_argument(
        "--use-disabled-sites",
        action="store_true",
        default=False,
        help="Use disabled sites to search (may cause many false positives).",
    )
    parser.add_argument(
        "--parse",
        dest="parse_url",
        default='',
        help="Parse page by URL and extract username and IDs to use for search.",
    )
    parser.add_argument(
        "--id-type",
        dest="id_type",
        default='username',
        choices=SUPPORTED_IDS,
        help="Specify identifier(s) type (default: username).",
    )
    parser.add_argument(
        "--db",
        metavar="DB_FILE",
        dest="db_file",
        default=None,
        help="Load Maigret database from a JSON file or an online, valid, JSON file.",
    )
    parser.add_argument(
        "--cookies-jar-file",
        metavar="COOKIE_FILE",
        dest="cookie_file",
        default=None,
        help="File with cookies.",
    )
    parser.add_argument(
        "--ignore-ids",
        action="append",
@@ -273,25 +158,150 @@ def setup_arguments_parser():
        default=[],
        help="Do not make search by the specified username or other ids.",
    )
    parser.add_argument(
        "username",
        nargs='+',
        metavar='USERNAMES',
        action="store",
        help="One or more usernames to check with social networks.",
    )
    parser.add_argument(
        "--tags", dest="tags", default='', help="Specify tags of sites."
    )
    # reports options
    parser.add_argument(
        "--folderoutput",
        "-fo",
        dest="folderoutput",
        default="reports",
        metavar="PATH",
        help="If using multiple usernames, the output of the results will be saved to this folder.",
    )
    parser.add_argument(
        "--proxy",
        "-p",
        metavar='PROXY_URL',
        action="store",
        dest="proxy",
        default=None,
        help="Make requests over a proxy. e.g. socks5://127.0.0.1:1080",
    )
    filter_group = parser.add_argument_group('Site filtering', 'Options to set site search scope')
    filter_group.add_argument(
        "-a",
        "--all-sites",
        action="store_true",
        dest="all_sites",
        default=False,
        help="Use all sites for scan.",
    )
    filter_group.add_argument(
        "--top-sites",
        action="store",
        default=500,
        metavar="N",
        type=int,
        help="Count of sites for scan ranked by Alexa Top (default: 500).",
    )
    filter_group.add_argument(
        "--tags", dest="tags", default='', help="Specify tags of sites (see `--stats`)."
    )
    filter_group.add_argument(
        "--site",
        action="append",
        metavar='SITE_NAME',
        dest="site_list",
        default=[],
        help="Limit analysis to just the specified sites (multiple option).",
    )
    filter_group.add_argument(
        "--use-disabled-sites",
        action="store_true",
        default=False,
        help="Use disabled sites to search (may cause many false positives).",
    )
    modes_group = parser.add_argument_group(
        'Operating modes',
        'Various functions except the default search by a username. '
        'Modes are executed sequentially in the order of declaration.'
    )
    modes_group.add_argument(
        "--parse",
        dest="parse_url",
        default='',
        metavar='URL',
        help="Parse page by URL and extract username and IDs to use for search.",
    )
    modes_group.add_argument(
        "--submit",
        metavar='URL',
        type=str,
        dest="new_site_to_submit",
        default=False,
        help="URL of existing profile in new site to submit.",
    )
    modes_group.add_argument(
        "--self-check",
        action="store_true",
        default=False,
        help="Do self check for sites and database and disable non-working ones.",
    )
    modes_group.add_argument(
        "--stats",
        action="store_true",
        default=False,
        help="Show database statistics (most frequent sites engines and tags)."
    )
    output_group = parser.add_argument_group('Output options', 'Options to change verbosity and view of the console output')
    output_group.add_argument(
        "--print-not-found",
        action="store_true",
        dest="print_not_found",
        default=False,
        help="Print sites where the username was not found.",
    )
    output_group.add_argument(
        "--print-errors",
        action="store_true",
        dest="print_check_errors",
        default=False,
        help="Print errors messages: connection, captcha, site country ban, etc.",
    )
    output_group.add_argument(
        "--verbose",
        "-v",
        action="store_true",
        dest="verbose",
        default=False,
        help="Display extra information and metrics.",
    )
    output_group.add_argument(
        "--info",
        "-vv",
        action="store_true",
        dest="info",
        default=False,
        help="Display extra/service information and metrics.",
    )
    output_group.add_argument(
        "--debug",
        "-vvv",
        "-d",
        action="store_true",
        dest="debug",
        default=False,
        help="Display extra/service/debug information and metrics, save responses in debug.log.",
    )
    output_group.add_argument(
        "--no-color",
        action="store_true",
        dest="no_color",
        default=False,
        help="Don't color terminal output",
    )
    output_group.add_argument(
        "--no-progressbar",
        action="store_true",
        dest="no_progressbar",
        default=False,
        help="Don't show progressbar.",
    )
    report_group = parser.add_argument_group('Report formats', 'Supported formats of report files')
    report_group.add_argument(
        "-T",
        "--txt",
        action="store_true",
@@ -299,7 +309,7 @@ def setup_arguments_parser():
        default=False,
        help="Create a TXT report (one report per username).",
    )
-    parser.add_argument(
+    report_group.add_argument(
        "-C",
        "--csv",
        action="store_true",
@@ -307,7 +317,7 @@ def setup_arguments_parser():
        default=False,
        help="Create a CSV report (one report per username).",
    )
-    parser.add_argument(
+    report_group.add_argument(
        "-H",
        "--html",
        action="store_true",
@@ -315,7 +325,7 @@ def setup_arguments_parser():
        default=False,
        help="Create an HTML report file (general report on all usernames).",
    )
-    parser.add_argument(
+    report_group.add_argument(
        "-X",
        "--xmind",
        action="store_true",
@@ -323,7 +333,7 @@ def setup_arguments_parser():
        default=False,
        help="Generate an XMind 8 mindmap report (one report per username).",
    )
-    parser.add_argument(
+    report_group.add_argument(
        "-P",
        "--pdf",
        action="store_true",
@@ -331,14 +341,14 @@ def setup_arguments_parser():
        default=False,
        help="Generate a PDF report (general report on all usernames).",
    )
-    parser.add_argument(
+    report_group.add_argument(
        "-J",
        "--json",
        action="store",
-        metavar='REPORT_TYPE',
+        metavar='TYPE',
        dest="json",
        default='',
-        type=check_supported_json_format,
+        choices=SUPPORTED_JSON_REPORT_FORMATS,
        help=f"Generate a JSON report of specific type: {', '.join(SUPPORTED_JSON_REPORT_FORMATS)}"
        " (one report per username).",
    )
@@ -371,7 +381,7 @@ async def main():
    usernames = {
        u: args.id_type
        for u in args.username
-        if u not in ['-'] and u not in args.ignore_ids_list
+        if u and u not in ['-'] and u not in args.ignore_ids_list
    }
    parsing_enabled = not args.disable_extracting
@@ -405,7 +415,7 @@ async def main():
            for k, v in info.items():
                if 'username' in k:
                    usernames[v] = 'username'
-                if k in supported_recursive_search_ids:
+                if k in SUPPORTED_IDS:
                    usernames[v] = k
    if args.tags:
@@ -359,12 +359,3 @@ def design_xmind_sheet(sheet, username, results):
        for k, v in filtered_supposed_data.items():
            currentsublabel = undefinedsection.addSubTopic()
            currentsublabel.setTitle("%s: %s" % (k, v))
 def check_supported_json_format(value):
    if value and value not in SUPPORTED_JSON_REPORT_FORMATS:
        raise ArgumentTypeError(
            "JSON report type must be one of the following types: "
            + ", ".join(SUPPORTED_JSON_REPORT_FORMATS)
        )
    return value
@@ -982,13 +982,9 @@
                "\u0412\u044b \u043d\u0435 \u043c\u043e\u0436\u0435\u0442\u0435 \u043f\u0440\u043e\u0438\u0437\u0432\u0435\u0441\u0442\u0438 \u043f\u043e\u0438\u0441\u043a \u0441\u0440\u0430\u0437\u0443 \u043f\u043e\u0441\u043b\u0435 \u043f\u0440\u0435\u0434\u044b\u0434\u0443\u0449\u0435\u0433\u043e": "Too many searhes per IP",
                "\u0414\u043e\u0441\u0442\u0443\u043f \u043a \u043a\u043e\u043d\u0444\u0435\u0440\u0435\u043d\u0446\u0438\u0438 \u0437\u0430\u043a\u0440\u044b\u0442 \u0434\u043b\u044f \u0432\u0430\u0448\u0435\u0433\u043e IP-\u0430\u0434\u0440\u0435\u0441\u0430.": "IP ban"
            },
-            "checkType": "message",
+            "engine": "phpBB/Search",
            "absenceStrs": [
                "\u041f\u043e\u0434\u0445\u043e\u0434\u044f\u0449\u0438\u0445 \u0442\u0435\u043c \u0438\u043b\u0438 \u0441\u043e\u043e\u0431\u0449\u0435\u043d\u0438\u0439 \u043d\u0435 \u043d\u0430\u0439\u0434\u0435\u043d\u043e."
            ],
            "alexaRank": 284203,
            "urlMain": "https://antiwomen.ru",
            "url": "https://antiwomen.ru/search.php?keywords=&terms=all&author={username}",
            "usernameClaimed": "adam",
            "usernameUnclaimed": "noonewouldeverusethis7"
        },
@@ -12784,7 +12780,7 @@
                "us"
            ],
            "headers": {
-                "authorization": "Bearer BQBxsP-d2_tKY0erevviPs9sqxt3qgBU-R1Hpjh-1VV3rCoMm4qVjckkDvPctosbWStF0myG4aJ-7xO2LRg"
+                "authorization": "Bearer BQAlQVJgjkpZgzYiYPT1DgdyrvwTwWkYAgu3lET0zKuXZK7E28z60A00m2y6ITwkVXskqtWkxbKdfHodCao"
            },
            "errors": {
                "Spotify is currently not available in your country.": "Access denied in your country, use proxy/vpn"
@@ -14171,7 +14167,7 @@
                "sec-ch-ua": "Google Chrome\";v=\"87\", \" Not;A Brand\";v=\"99\", \"Chromium\";v=\"87\"",
                "authorization": "Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA",
                "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36",
-                "x-guest-token": "1388922761482022917"
+                "x-guest-token": "1389716834983759872"
            },
            "errors": {
                "Bad guest token": "x-guest-token update required"
@@ -14568,7 +14564,7 @@
                "video"
            ],
            "headers": {
-                "Authorization": "jwt eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJleHAiOjE2MTk5NzQ4MDAsInVzZXJfaWQiOm51bGwsImFwcF9pZCI6NTg0NzksInNjb3BlcyI6InB1YmxpYyIsInRlYW1fdXNlcl9pZCI6bnVsbH0.LJFXICpOC7e-a67hz6kOUY1Mz9wP_60L8mCz2kZawHs"
+                "Authorization": "jwt eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJleHAiOjE2MjAxNzAyMjAsInVzZXJfaWQiOm51bGwsImFwcF9pZCI6NTg0NzksInNjb3BlcyI6InB1YmxpYyIsInRlYW1fdXNlcl9pZCI6bnVsbH0.TbxzgFVMQsgYz4vTiFE-_P1qydzqP9ADUsPxl8U4bZE"
            },
            "activation": {
                "url": "https://vimeo.com/_rv/viewer",
@@ -15509,19 +15505,6 @@
            "usernameClaimed": "yandex",
            "usernameUnclaimed": "noonewouldeverusethis7"
        },
        "YandexLocal": {
            "tags": [
                "ru"
            ],
            "type": "yandex_public_id",
            "checkType": "status_code",
            "alexaRank": 49,
            "urlMain": "https://local.yandex.ru/",
            "url": "https://local.yandex.ru/users/{username}",
            "source": "Yandex",
            "usernameClaimed": "gp7v6ufryzw3m1nvdj4ycexa8g",
            "usernameUnclaimed": "noonewouldeverusethis77777"
        },
        "YandexMarket": {
            "tags": [
                "ru"
@@ -436,13 +436,13 @@ class MaigretDatabase:
                tags[tag] = tags.get(tag, 0) + 1
        output += f"Enabled/total sites: {total_count - disabled_count}/{total_count}\n"
-        output += "Top sites' profile URLs:\n"
+        output += "Top profile URLs:\n"
        for url, count in sorted(urls.items(), key=lambda x: x[1], reverse=True)[:20]:
            if count == 1:
                break
            output += f"{count}\t{url}\n"
-        output += "Top sites' tags:\n"
+        output += "Top tags:\n"
        for tag, count in sorted(tags.items(), key=lambda x: x[1], reverse=True)[:20]:
            mark = ""
            if tag not in SUPPORTED_TAGS:
@@ -1,2 +1,4 @@
 #!/bin/sh
-pytest tests
+coverage run --source=./maigret -m pytest tests
 coverage report -m
 coverage html
@@ -6,6 +6,8 @@ import pytest
 from _pytest.mark import Mark
 from maigret.sites import MaigretDatabase
 from maigret.maigret import setup_arguments_parser
 CUR_PATH = os.path.dirname(os.path.realpath(__file__))
 JSON_FILE = os.path.join(CUR_PATH, '../maigret/resources/data.json')
@@ -51,3 +53,8 @@ def reports_autoclean():
    remove_test_reports()
    yield
    remove_test_reports()
@pytest.fixture(scope='session')
 def argparser():
    return setup_arguments_parser()
@@ -0,0 +1,80 @@
 """Maigret command-line arguments parsing tests"""
 from argparse import Namespace
 from typing import Dict, Any
 DEFAULT_ARGS: Dict[str, Any] = {
    'all_sites': False,
    'connections': 100,
    'cookie_file': None,
    'csv': False,
    'db_file': None,
    'debug': False,
    'disable_extracting': False,
    'disable_recursive_search': False,
    'folderoutput': 'reports',
    'html': False,
    'id_type': 'username',
    'ignore_ids_list': [],
    'info': False,
    'json': '',
    'new_site_to_submit': False,
    'no_color': False,
    'no_progressbar': False,
    'parse_url': '',
    'pdf': False,
    'print_check_errors': False,
    'print_not_found': False,
    'proxy': None,
    'retries': 1,
    'self_check': False,
    'site_list': [],
    'stats': False,
    'tags': '',
    'timeout': 30,
    'top_sites': 500,
    'txt': False,
    'use_disabled_sites': False,
    'username': [],
    'verbose': False,
    'xmind': False,
 }
 def test_args_search_mode(argparser):
    args = argparser.parse_args('username'.split())
    assert args.username == ['username']
    want_args = dict(DEFAULT_ARGS)
    want_args.update({'username': ['username']})
    assert args == Namespace(**want_args)
 def test_args_self_check_mode(argparser):
    args = argparser.parse_args('--self-check --site GitHub'.split())
    want_args = dict(DEFAULT_ARGS)
    want_args.update(
        {
            'self_check': True,
            'site_list': ['GitHub'],
            'username': [None],
        }
    )
    assert args == Namespace(**want_args)
 def test_args_multiple_sites(argparser):
    args = argparser.parse_args('--site GitHub VK --site PornHub --site Taringa,Steam'.split())
    want_args = dict(DEFAULT_ARGS)
    want_args.update(
        {
            'site_list': ['GitHub', 'PornHub', 'Taringa,Steam'],
            'username': ['VK'],
        }
    )
    assert args == Namespace(**want_args)
@@ -98,6 +98,7 @@ def test_get_dict_ascii_tree():
        'legacy_id': '26403415',
        'username': 'alexaimephotographycars',
        'name': 'Alex Aimé',
        'links': "['www.instagram.com/street.reality.photography/']",
        'created_at': '2018-05-04T10:17:01.000+0000',
        'image': 'https://drscdn.500px.org/user_avatar/26403415/q%3D85_w%3D300_h%3D300/v2?webp=true&v=2&sig=0235678a4f7b65e007e864033ebfaf5ef6d87fad34f80a8639d985320c20fe3b',
        'image_bg': 'https://drscdn.500px.org/user_cover/26403415/q%3D65_m%3D2048/v2?webp=true&v=1&sig=bea411fb158391a4fdad498874ff17088f91257e59dfb376ff67e3a44c3a4201',
@@ -107,20 +108,22 @@ def test_get_dict_ascii_tree():
        'twitter_username': 'Alexaimephotogr',
    }
-    ascii_tree = get_dict_ascii_tree(data.items())
+    ascii_tree = get_dict_ascii_tree(data.items(), prepend=" ")
    assert (
        ascii_tree
        == """
-┣╸uid: dXJpOm5vZGU6VXNlcjoyNjQwMzQxNQ==
+ ┣╸uid: dXJpOm5vZGU6VXNlcjoyNjQwMzQxNQ==
-┣╸legacy_id: 26403415
+ ┣╸legacy_id: 26403415
-┣╸username: alexaimephotographycars
+ ┣╸username: alexaimephotographycars
-┣╸name: Alex Aimé
+ ┣╸name: Alex Aimé
-┣╸created_at: 2018-05-04T10:17:01.000+0000
+ ┣╸links: 
-┣╸image: https://drscdn.500px.org/user_avatar/26403415/q%3D85_w%3D300_h%3D300/v2?webp=true&v=2&sig=0235678a4f7b65e007e864033ebfaf5ef6d87fad34f80a8639d985320c20fe3b
+ ┃ ┗╸ www.instagram.com/street.reality.photography/
-┣╸image_bg: https://drscdn.500px.org/user_cover/26403415/q%3D65_m%3D2048/v2?webp=true&v=1&sig=bea411fb158391a4fdad498874ff17088f91257e59dfb376ff67e3a44c3a4201
+ ┣╸created_at: 2018-05-04T10:17:01.000+0000
-┣╸website: www.instagram.com/street.reality.photography/
+ ┣╸image: https://drscdn.500px.org/user_avatar/26403415/q%3D85_w%3D300_h%3D300/v2?webp=true&v=2&sig=0235678a4f7b65e007e864033ebfaf5ef6d87fad34f80a8639d985320c20fe3b
-┣╸facebook_link:  www.instagram.com/street.reality.photography/
+ ┣╸image_bg: https://drscdn.500px.org/user_cover/26403415/q%3D65_m%3D2048/v2?webp=true&v=1&sig=bea411fb158391a4fdad498874ff17088f91257e59dfb376ff67e3a44c3a4201
-┣╸instagram_username: Street.Reality.Photography
+ ┣╸website: www.instagram.com/street.reality.photography/
-┗╸twitter_username: Alexaimephotogr"""
+ ┣╸facebook_link:  www.instagram.com/street.reality.photography/
 ┣╸instagram_username: Street.Reality.Photography
 ┗╸twitter_username: Alexaimephotogr"""
    )