Documentation and settings improved

2026-05-07 06:24:35 +00:00 · 2021-11-06 08:22:23 +03:00
parent f81a500d72
commit 7a362406d5
9 changed files with 62 additions and 21 deletions
@@ -23,7 +23,7 @@
 **Maigret** collect a dossier on a person **by username only**, checking for accounts on a huge number of sites and gathering all the available information from web pages. No API keys required. Maigret is an easy-to-use and powerful fork of [Sherlock](https://github.com/sherlock-project/sherlock).
-Currently supported more than 2000 sites ([full list](https://raw.githubusercontent.com/soxoj/maigret/main/sites.md)), search is launched against 500 popular sites in descending order of popularity by default. Also supported checking of Tor sites, I2P sites, and domains (via DNS resolving).
+Currently supported more than 2500 sites ([full list](https://github.com/soxoj/maigret/blob/main/sites.md)), search is launched against 500 popular sites in descending order of popularity by default. Also supported checking of Tor sites, I2P sites, and domains (via DNS resolving).
 ## Main features
@@ -23,6 +23,11 @@ extracted username and ids. :doc:`Examples <extracting-information-from-pages>`.
 Main options
 ------------
 Options are also configurable through settings files. See
 :doc:`settings section <settings>`
 `settings.json <https://github.com/soxoj/maigret/blob/main/maigret/resources/settings.json>`
 for the list of currently supported options.
 ``--tags`` - Filter sites for searching by tags: sites categories and
 two-letter country codes. E.g. photo, dating, sport; jp, us, global.
 Multiple tags can be associated with one site. **Warning: tags markup is
@@ -27,3 +27,4 @@ You may be interested in:
   supported-identifier-types
   tags
   usage-examples
   settings
@@ -0,0 +1,26 @@
 .. _settings:
 Settings
 ==============
 Options are also configurable through settings files. See
 `settings.json <https://github.com/soxoj/maigret/blob/main/maigret/resources/settings.json>`
 for the list of currently supported options.
 After start Maigret tries to load configuration from the following sources in exactly the same order:
 .. code-block:: console
  # relative path, based on installed package path
  resources/settings.json
  # absolute path, configuration file in home directory
  ~/.maigret/settings.json
  # relative path, based on current working directory
  settings.json
 Missing any of these files is not an error.
 If the next settings file contains already known option,
 this option will be rewrited. So it is possible to make
 custom configuration for different users and directories.
@@ -129,7 +129,9 @@ def setup_arguments_parser(settings: Settings):
    parser = ArgumentParser(
        formatter_class=RawDescriptionHelpFormatter,
-        description=f"Maigret v{__version__}",
+        description=f"Maigret v{__version__}\n"
        "Documentation: https://maigret.readthedocs.io/\n"
        "All settings are also configurable through files, see docs.",
    )
    parser.add_argument(
        "username",
@@ -149,9 +151,9 @@ def setup_arguments_parser(settings: Settings):
        metavar='TIMEOUT',
        dest="timeout",
        type=timeout_check,
-        default=30,
+        default=settings.timeout,
-        help="Time in seconds to wait for response to requests. "
+        help="Time in seconds to wait for response to requests "
-        "Default timeout of 30.0s. "
+        f"(default {settings.timeout}s). "
        "A longer timeout will be more likely to get results from slow sites. "
        "On the other hand, this may cause a long delay to gather all results. ",
    )
@@ -169,21 +171,21 @@ def setup_arguments_parser(settings: Settings):
        action="store",
        type=int,
        dest="connections",
-        default=100,
+        default=settings.max_connections,
        help="Allowed number of concurrent connections.",
    )
    parser.add_argument(
        "--no-recursion",
        action="store_true",
        dest="disable_recursive_search",
-        default=False,
+        default=(not settings.recursive_search),
        help="Disable recursive search by additional data extracted from pages.",
    )
    parser.add_argument(
        "--no-extracting",
        action="store_true",
        dest="disable_extracting",
-        default=False,
+        default=(not settings.info_extracting),
        help="Disable parsing pages for additional data and other usernames.",
    )
    parser.add_argument(
@@ -197,7 +199,7 @@ def setup_arguments_parser(settings: Settings):
        "--db",
        metavar="DB_FILE",
        dest="db_file",
-        default=None,
+        default=settings.sites_db_path,
        help="Load Maigret database from a JSON file or HTTP web resource.",
    )
    parser.add_argument(
@@ -507,10 +509,7 @@ async def main():
    if args.tags:
        args.tags = list(set(str(args.tags).split(',')))
-    if args.db_file is None:
+    db_file = path.join(path.dirname(path.realpath(__file__)), args.db_file)
        args.db_file = path.join(
            path.dirname(path.realpath(__file__)), "resources/data.json"
        )
    if args.top_sites == 0 or args.all_sites:
        args.top_sites = sys.maxsize
@@ -525,7 +524,7 @@ async def main():
    )
    # Create object with all information about sites we are aware of.
-    db = MaigretDatabase().load_from_path(args.db_file)
+    db = MaigretDatabase().load_from_path(db_file)
    get_top_sites_for_id = lambda x: db.ranked_sites_dict(
        top=args.top_sites,
        tags=args.tags,
@@ -540,7 +539,7 @@ async def main():
        submitter = Submitter(db=db, logger=logger, settings=settings)
        is_submitted = await submitter.dialog(args.new_site_to_submit, args.cookie_file)
        if is_submitted:
-            db.save_to_file(args.db_file)
+            db.save_to_file(db_file)
    # Database self-checking
    if args.self_check:
@@ -558,7 +557,7 @@ async def main():
                'y',
                '',
            ):
-                db.save_to_file(args.db_file)
+                db.save_to_file(db_file)
                print('Database was successfully updated.')
            else:
                print('Updates will be applied only for current search session.')
@@ -708,7 +707,7 @@ async def main():
            print(text_report)
    # update database
-    db.save_to_file(args.db_file)
+    db.save_to_file(db_file)
 def run():
@@ -13078,7 +13078,7 @@
                "us"
            ],
            "headers": {
-                "authorization": "Bearer BQCJhizJUIWlnA6-hlMrHeTsb0jCi067H73LzQ1bWk8Iz1tQ6DIE6WyvbJ2x1bapJhZ_MLNiQbTp-dCxz7g"
+                "authorization": "Bearer BQB8QPkkvz_PhWGy4sSY4ijssYjumEHJgJJBFu3VX2Sm4XIoT9jp0eFZrYL3TayY4QZGHmMiz3BCPLcAth4"
            },
            "errors": {
                "Spotify is currently not available in your country.": "Access denied in your country, use proxy/vpn"
@@ -14913,7 +14913,7 @@
                "video"
            ],
            "headers": {
-                "Authorization": "jwt eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJleHAiOjE2MzYxMDg1NjAsInVzZXJfaWQiOm51bGwsImFwcF9pZCI6NTg0NzksInNjb3BlcyI6InB1YmxpYyIsInRlYW1fdXNlcl9pZCI6bnVsbH0.kqIzH7O99ov6IrTfeQoA7NNaSZe_7czUgrwKxHnBs3o"
+                "Authorization": "jwt eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJleHAiOjE2MzU2OTI0NjAsInVzZXJfaWQiOm51bGwsImFwcF9pZCI6NTg0NzksInNjb3BlcyI6InB1YmxpYyIsInRlYW1fdXNlcl9pZCI6bnVsbH0.KZHo96wUe5__rTqZQqAWiJKPKOy2-sjyxRjhOuuhyEc"
            },
            "activation": {
                "url": "https://vimeo.com/_rv/viewer",
@@ -15,5 +15,10 @@
    "supposed_usernames": [
        "alex", "god", "admin", "red", "blue", "john"
    ],
-    "retries_count": 1
+    "retries_count": 1,
    "sites_db_path": "resources/data.json",
    "timeout": 30,
    "max_connections": 100,
    "recursive_search": true,
    "info_extracting": true
 }
@@ -12,6 +12,11 @@ SETTINGS_FILES_PATHS = [
 class Settings:
    # main maigret setting
    retries_count: int
    sites_db_path: str
    timeout: int
    max_connections: int
    recursive_search: bool
    info_extracting: bool
    # submit mode settings
    presence_strings: list
@@ -7,7 +7,7 @@ DEFAULT_ARGS: Dict[str, Any] = {
    'connections': 100,
    'cookie_file': None,
    'csv': False,
-    'db_file': None,
+    'db_file': 'resources/data.json',
    'debug': False,
    'disable_extracting': False,
    'disable_recursive_search': False,