From 7a362406d5abaf3e83b5b74b83223e14ebe60de4 Mon Sep 17 00:00:00 2001 From: Soxoj Date: Sat, 6 Nov 2021 08:22:23 +0300 Subject: [PATCH] Documentation and settings improved --- README.md | 2 +- docs/source/command-line-options.rst | 5 +++++ docs/source/index.rst | 1 + docs/source/settings.rst | 26 +++++++++++++++++++++++ maigret/maigret.py | 31 ++++++++++++++-------------- maigret/resources/data.json | 4 ++-- maigret/resources/settings.json | 7 ++++++- maigret/settings.py | 5 +++++ tests/test_cli.py | 2 +- 9 files changed, 62 insertions(+), 21 deletions(-) create mode 100644 docs/source/settings.rst diff --git a/README.md b/README.md index 19e3606..0625514 100644 --- a/README.md +++ b/README.md @@ -23,7 +23,7 @@ **Maigret** collect a dossier on a person **by username only**, checking for accounts on a huge number of sites and gathering all the available information from web pages. No API keys required. Maigret is an easy-to-use and powerful fork of [Sherlock](https://github.com/sherlock-project/sherlock). -Currently supported more than 2000 sites ([full list](https://raw.githubusercontent.com/soxoj/maigret/main/sites.md)), search is launched against 500 popular sites in descending order of popularity by default. Also supported checking of Tor sites, I2P sites, and domains (via DNS resolving). +Currently supported more than 2500 sites ([full list](https://github.com/soxoj/maigret/blob/main/sites.md)), search is launched against 500 popular sites in descending order of popularity by default. Also supported checking of Tor sites, I2P sites, and domains (via DNS resolving). ## Main features diff --git a/docs/source/command-line-options.rst b/docs/source/command-line-options.rst index 44dcd8f..dea15d3 100644 --- a/docs/source/command-line-options.rst +++ b/docs/source/command-line-options.rst @@ -23,6 +23,11 @@ extracted username and ids. :doc:`Examples `. Main options ------------ +Options are also configurable through settings files. See +:doc:`settings section ` +`settings.json ` +for the list of currently supported options. + ``--tags`` - Filter sites for searching by tags: sites categories and two-letter country codes. E.g. photo, dating, sport; jp, us, global. Multiple tags can be associated with one site. **Warning: tags markup is diff --git a/docs/source/index.rst b/docs/source/index.rst index ec6bda6..273e4cf 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -27,3 +27,4 @@ You may be interested in: supported-identifier-types tags usage-examples + settings diff --git a/docs/source/settings.rst b/docs/source/settings.rst new file mode 100644 index 0000000..4ef826b --- /dev/null +++ b/docs/source/settings.rst @@ -0,0 +1,26 @@ +.. _settings: + +Settings +============== + +Options are also configurable through settings files. See +`settings.json ` +for the list of currently supported options. + +After start Maigret tries to load configuration from the following sources in exactly the same order: + +.. code-block:: console + + # relative path, based on installed package path + resources/settings.json + + # absolute path, configuration file in home directory + ~/.maigret/settings.json + + # relative path, based on current working directory + settings.json + +Missing any of these files is not an error. +If the next settings file contains already known option, +this option will be rewrited. So it is possible to make +custom configuration for different users and directories. diff --git a/maigret/maigret.py b/maigret/maigret.py index 515fffa..0d04f4c 100755 --- a/maigret/maigret.py +++ b/maigret/maigret.py @@ -129,7 +129,9 @@ def setup_arguments_parser(settings: Settings): parser = ArgumentParser( formatter_class=RawDescriptionHelpFormatter, - description=f"Maigret v{__version__}", + description=f"Maigret v{__version__}\n" + "Documentation: https://maigret.readthedocs.io/\n" + "All settings are also configurable through files, see docs.", ) parser.add_argument( "username", @@ -149,9 +151,9 @@ def setup_arguments_parser(settings: Settings): metavar='TIMEOUT', dest="timeout", type=timeout_check, - default=30, - help="Time in seconds to wait for response to requests. " - "Default timeout of 30.0s. " + default=settings.timeout, + help="Time in seconds to wait for response to requests " + f"(default {settings.timeout}s). " "A longer timeout will be more likely to get results from slow sites. " "On the other hand, this may cause a long delay to gather all results. ", ) @@ -169,21 +171,21 @@ def setup_arguments_parser(settings: Settings): action="store", type=int, dest="connections", - default=100, + default=settings.max_connections, help="Allowed number of concurrent connections.", ) parser.add_argument( "--no-recursion", action="store_true", dest="disable_recursive_search", - default=False, + default=(not settings.recursive_search), help="Disable recursive search by additional data extracted from pages.", ) parser.add_argument( "--no-extracting", action="store_true", dest="disable_extracting", - default=False, + default=(not settings.info_extracting), help="Disable parsing pages for additional data and other usernames.", ) parser.add_argument( @@ -197,7 +199,7 @@ def setup_arguments_parser(settings: Settings): "--db", metavar="DB_FILE", dest="db_file", - default=None, + default=settings.sites_db_path, help="Load Maigret database from a JSON file or HTTP web resource.", ) parser.add_argument( @@ -507,10 +509,7 @@ async def main(): if args.tags: args.tags = list(set(str(args.tags).split(','))) - if args.db_file is None: - args.db_file = path.join( - path.dirname(path.realpath(__file__)), "resources/data.json" - ) + db_file = path.join(path.dirname(path.realpath(__file__)), args.db_file) if args.top_sites == 0 or args.all_sites: args.top_sites = sys.maxsize @@ -525,7 +524,7 @@ async def main(): ) # Create object with all information about sites we are aware of. - db = MaigretDatabase().load_from_path(args.db_file) + db = MaigretDatabase().load_from_path(db_file) get_top_sites_for_id = lambda x: db.ranked_sites_dict( top=args.top_sites, tags=args.tags, @@ -540,7 +539,7 @@ async def main(): submitter = Submitter(db=db, logger=logger, settings=settings) is_submitted = await submitter.dialog(args.new_site_to_submit, args.cookie_file) if is_submitted: - db.save_to_file(args.db_file) + db.save_to_file(db_file) # Database self-checking if args.self_check: @@ -558,7 +557,7 @@ async def main(): 'y', '', ): - db.save_to_file(args.db_file) + db.save_to_file(db_file) print('Database was successfully updated.') else: print('Updates will be applied only for current search session.') @@ -708,7 +707,7 @@ async def main(): print(text_report) # update database - db.save_to_file(args.db_file) + db.save_to_file(db_file) def run(): diff --git a/maigret/resources/data.json b/maigret/resources/data.json index cfa2706..5a4b51a 100644 --- a/maigret/resources/data.json +++ b/maigret/resources/data.json @@ -13078,7 +13078,7 @@ "us" ], "headers": { - "authorization": "Bearer BQCJhizJUIWlnA6-hlMrHeTsb0jCi067H73LzQ1bWk8Iz1tQ6DIE6WyvbJ2x1bapJhZ_MLNiQbTp-dCxz7g" + "authorization": "Bearer BQB8QPkkvz_PhWGy4sSY4ijssYjumEHJgJJBFu3VX2Sm4XIoT9jp0eFZrYL3TayY4QZGHmMiz3BCPLcAth4" }, "errors": { "Spotify is currently not available in your country.": "Access denied in your country, use proxy/vpn" @@ -14913,7 +14913,7 @@ "video" ], "headers": { - "Authorization": "jwt eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJleHAiOjE2MzYxMDg1NjAsInVzZXJfaWQiOm51bGwsImFwcF9pZCI6NTg0NzksInNjb3BlcyI6InB1YmxpYyIsInRlYW1fdXNlcl9pZCI6bnVsbH0.kqIzH7O99ov6IrTfeQoA7NNaSZe_7czUgrwKxHnBs3o" + "Authorization": "jwt eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJleHAiOjE2MzU2OTI0NjAsInVzZXJfaWQiOm51bGwsImFwcF9pZCI6NTg0NzksInNjb3BlcyI6InB1YmxpYyIsInRlYW1fdXNlcl9pZCI6bnVsbH0.KZHo96wUe5__rTqZQqAWiJKPKOy2-sjyxRjhOuuhyEc" }, "activation": { "url": "https://vimeo.com/_rv/viewer", diff --git a/maigret/resources/settings.json b/maigret/resources/settings.json index e395855..300af33 100644 --- a/maigret/resources/settings.json +++ b/maigret/resources/settings.json @@ -15,5 +15,10 @@ "supposed_usernames": [ "alex", "god", "admin", "red", "blue", "john" ], - "retries_count": 1 + "retries_count": 1, + "sites_db_path": "resources/data.json", + "timeout": 30, + "max_connections": 100, + "recursive_search": true, + "info_extracting": true } \ No newline at end of file diff --git a/maigret/settings.py b/maigret/settings.py index 0f6fd77..b0fca08 100644 --- a/maigret/settings.py +++ b/maigret/settings.py @@ -12,6 +12,11 @@ SETTINGS_FILES_PATHS = [ class Settings: # main maigret setting retries_count: int + sites_db_path: str + timeout: int + max_connections: int + recursive_search: bool + info_extracting: bool # submit mode settings presence_strings: list diff --git a/tests/test_cli.py b/tests/test_cli.py index 5a3f53e..8d677f1 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -7,7 +7,7 @@ DEFAULT_ARGS: Dict[str, Any] = { 'connections': 100, 'cookie_file': None, 'csv': False, - 'db_file': None, + 'db_file': 'resources/data.json', 'debug': False, 'disable_extracting': False, 'disable_recursive_search': False,