mirror of
https://github.com/soxoj/maigret.git
synced 2026-05-07 06:24:35 +00:00
Documentation and settings improved
This commit is contained in:
@@ -23,7 +23,7 @@
|
|||||||
|
|
||||||
**Maigret** collect a dossier on a person **by username only**, checking for accounts on a huge number of sites and gathering all the available information from web pages. No API keys required. Maigret is an easy-to-use and powerful fork of [Sherlock](https://github.com/sherlock-project/sherlock).
|
**Maigret** collect a dossier on a person **by username only**, checking for accounts on a huge number of sites and gathering all the available information from web pages. No API keys required. Maigret is an easy-to-use and powerful fork of [Sherlock](https://github.com/sherlock-project/sherlock).
|
||||||
|
|
||||||
Currently supported more than 2000 sites ([full list](https://raw.githubusercontent.com/soxoj/maigret/main/sites.md)), search is launched against 500 popular sites in descending order of popularity by default. Also supported checking of Tor sites, I2P sites, and domains (via DNS resolving).
|
Currently supported more than 2500 sites ([full list](https://github.com/soxoj/maigret/blob/main/sites.md)), search is launched against 500 popular sites in descending order of popularity by default. Also supported checking of Tor sites, I2P sites, and domains (via DNS resolving).
|
||||||
|
|
||||||
## Main features
|
## Main features
|
||||||
|
|
||||||
|
|||||||
@@ -23,6 +23,11 @@ extracted username and ids. :doc:`Examples <extracting-information-from-pages>`.
|
|||||||
Main options
|
Main options
|
||||||
------------
|
------------
|
||||||
|
|
||||||
|
Options are also configurable through settings files. See
|
||||||
|
:doc:`settings section <settings>`
|
||||||
|
`settings.json <https://github.com/soxoj/maigret/blob/main/maigret/resources/settings.json>`
|
||||||
|
for the list of currently supported options.
|
||||||
|
|
||||||
``--tags`` - Filter sites for searching by tags: sites categories and
|
``--tags`` - Filter sites for searching by tags: sites categories and
|
||||||
two-letter country codes. E.g. photo, dating, sport; jp, us, global.
|
two-letter country codes. E.g. photo, dating, sport; jp, us, global.
|
||||||
Multiple tags can be associated with one site. **Warning: tags markup is
|
Multiple tags can be associated with one site. **Warning: tags markup is
|
||||||
|
|||||||
@@ -27,3 +27,4 @@ You may be interested in:
|
|||||||
supported-identifier-types
|
supported-identifier-types
|
||||||
tags
|
tags
|
||||||
usage-examples
|
usage-examples
|
||||||
|
settings
|
||||||
|
|||||||
@@ -0,0 +1,26 @@
|
|||||||
|
.. _settings:
|
||||||
|
|
||||||
|
Settings
|
||||||
|
==============
|
||||||
|
|
||||||
|
Options are also configurable through settings files. See
|
||||||
|
`settings.json <https://github.com/soxoj/maigret/blob/main/maigret/resources/settings.json>`
|
||||||
|
for the list of currently supported options.
|
||||||
|
|
||||||
|
After start Maigret tries to load configuration from the following sources in exactly the same order:
|
||||||
|
|
||||||
|
.. code-block:: console
|
||||||
|
|
||||||
|
# relative path, based on installed package path
|
||||||
|
resources/settings.json
|
||||||
|
|
||||||
|
# absolute path, configuration file in home directory
|
||||||
|
~/.maigret/settings.json
|
||||||
|
|
||||||
|
# relative path, based on current working directory
|
||||||
|
settings.json
|
||||||
|
|
||||||
|
Missing any of these files is not an error.
|
||||||
|
If the next settings file contains already known option,
|
||||||
|
this option will be rewrited. So it is possible to make
|
||||||
|
custom configuration for different users and directories.
|
||||||
+15
-16
@@ -129,7 +129,9 @@ def setup_arguments_parser(settings: Settings):
|
|||||||
|
|
||||||
parser = ArgumentParser(
|
parser = ArgumentParser(
|
||||||
formatter_class=RawDescriptionHelpFormatter,
|
formatter_class=RawDescriptionHelpFormatter,
|
||||||
description=f"Maigret v{__version__}",
|
description=f"Maigret v{__version__}\n"
|
||||||
|
"Documentation: https://maigret.readthedocs.io/\n"
|
||||||
|
"All settings are also configurable through files, see docs.",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"username",
|
"username",
|
||||||
@@ -149,9 +151,9 @@ def setup_arguments_parser(settings: Settings):
|
|||||||
metavar='TIMEOUT',
|
metavar='TIMEOUT',
|
||||||
dest="timeout",
|
dest="timeout",
|
||||||
type=timeout_check,
|
type=timeout_check,
|
||||||
default=30,
|
default=settings.timeout,
|
||||||
help="Time in seconds to wait for response to requests. "
|
help="Time in seconds to wait for response to requests "
|
||||||
"Default timeout of 30.0s. "
|
f"(default {settings.timeout}s). "
|
||||||
"A longer timeout will be more likely to get results from slow sites. "
|
"A longer timeout will be more likely to get results from slow sites. "
|
||||||
"On the other hand, this may cause a long delay to gather all results. ",
|
"On the other hand, this may cause a long delay to gather all results. ",
|
||||||
)
|
)
|
||||||
@@ -169,21 +171,21 @@ def setup_arguments_parser(settings: Settings):
|
|||||||
action="store",
|
action="store",
|
||||||
type=int,
|
type=int,
|
||||||
dest="connections",
|
dest="connections",
|
||||||
default=100,
|
default=settings.max_connections,
|
||||||
help="Allowed number of concurrent connections.",
|
help="Allowed number of concurrent connections.",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--no-recursion",
|
"--no-recursion",
|
||||||
action="store_true",
|
action="store_true",
|
||||||
dest="disable_recursive_search",
|
dest="disable_recursive_search",
|
||||||
default=False,
|
default=(not settings.recursive_search),
|
||||||
help="Disable recursive search by additional data extracted from pages.",
|
help="Disable recursive search by additional data extracted from pages.",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--no-extracting",
|
"--no-extracting",
|
||||||
action="store_true",
|
action="store_true",
|
||||||
dest="disable_extracting",
|
dest="disable_extracting",
|
||||||
default=False,
|
default=(not settings.info_extracting),
|
||||||
help="Disable parsing pages for additional data and other usernames.",
|
help="Disable parsing pages for additional data and other usernames.",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
@@ -197,7 +199,7 @@ def setup_arguments_parser(settings: Settings):
|
|||||||
"--db",
|
"--db",
|
||||||
metavar="DB_FILE",
|
metavar="DB_FILE",
|
||||||
dest="db_file",
|
dest="db_file",
|
||||||
default=None,
|
default=settings.sites_db_path,
|
||||||
help="Load Maigret database from a JSON file or HTTP web resource.",
|
help="Load Maigret database from a JSON file or HTTP web resource.",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
@@ -507,10 +509,7 @@ async def main():
|
|||||||
if args.tags:
|
if args.tags:
|
||||||
args.tags = list(set(str(args.tags).split(',')))
|
args.tags = list(set(str(args.tags).split(',')))
|
||||||
|
|
||||||
if args.db_file is None:
|
db_file = path.join(path.dirname(path.realpath(__file__)), args.db_file)
|
||||||
args.db_file = path.join(
|
|
||||||
path.dirname(path.realpath(__file__)), "resources/data.json"
|
|
||||||
)
|
|
||||||
|
|
||||||
if args.top_sites == 0 or args.all_sites:
|
if args.top_sites == 0 or args.all_sites:
|
||||||
args.top_sites = sys.maxsize
|
args.top_sites = sys.maxsize
|
||||||
@@ -525,7 +524,7 @@ async def main():
|
|||||||
)
|
)
|
||||||
|
|
||||||
# Create object with all information about sites we are aware of.
|
# Create object with all information about sites we are aware of.
|
||||||
db = MaigretDatabase().load_from_path(args.db_file)
|
db = MaigretDatabase().load_from_path(db_file)
|
||||||
get_top_sites_for_id = lambda x: db.ranked_sites_dict(
|
get_top_sites_for_id = lambda x: db.ranked_sites_dict(
|
||||||
top=args.top_sites,
|
top=args.top_sites,
|
||||||
tags=args.tags,
|
tags=args.tags,
|
||||||
@@ -540,7 +539,7 @@ async def main():
|
|||||||
submitter = Submitter(db=db, logger=logger, settings=settings)
|
submitter = Submitter(db=db, logger=logger, settings=settings)
|
||||||
is_submitted = await submitter.dialog(args.new_site_to_submit, args.cookie_file)
|
is_submitted = await submitter.dialog(args.new_site_to_submit, args.cookie_file)
|
||||||
if is_submitted:
|
if is_submitted:
|
||||||
db.save_to_file(args.db_file)
|
db.save_to_file(db_file)
|
||||||
|
|
||||||
# Database self-checking
|
# Database self-checking
|
||||||
if args.self_check:
|
if args.self_check:
|
||||||
@@ -558,7 +557,7 @@ async def main():
|
|||||||
'y',
|
'y',
|
||||||
'',
|
'',
|
||||||
):
|
):
|
||||||
db.save_to_file(args.db_file)
|
db.save_to_file(db_file)
|
||||||
print('Database was successfully updated.')
|
print('Database was successfully updated.')
|
||||||
else:
|
else:
|
||||||
print('Updates will be applied only for current search session.')
|
print('Updates will be applied only for current search session.')
|
||||||
@@ -708,7 +707,7 @@ async def main():
|
|||||||
print(text_report)
|
print(text_report)
|
||||||
|
|
||||||
# update database
|
# update database
|
||||||
db.save_to_file(args.db_file)
|
db.save_to_file(db_file)
|
||||||
|
|
||||||
|
|
||||||
def run():
|
def run():
|
||||||
|
|||||||
@@ -13078,7 +13078,7 @@
|
|||||||
"us"
|
"us"
|
||||||
],
|
],
|
||||||
"headers": {
|
"headers": {
|
||||||
"authorization": "Bearer BQCJhizJUIWlnA6-hlMrHeTsb0jCi067H73LzQ1bWk8Iz1tQ6DIE6WyvbJ2x1bapJhZ_MLNiQbTp-dCxz7g"
|
"authorization": "Bearer BQB8QPkkvz_PhWGy4sSY4ijssYjumEHJgJJBFu3VX2Sm4XIoT9jp0eFZrYL3TayY4QZGHmMiz3BCPLcAth4"
|
||||||
},
|
},
|
||||||
"errors": {
|
"errors": {
|
||||||
"Spotify is currently not available in your country.": "Access denied in your country, use proxy/vpn"
|
"Spotify is currently not available in your country.": "Access denied in your country, use proxy/vpn"
|
||||||
@@ -14913,7 +14913,7 @@
|
|||||||
"video"
|
"video"
|
||||||
],
|
],
|
||||||
"headers": {
|
"headers": {
|
||||||
"Authorization": "jwt eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJleHAiOjE2MzYxMDg1NjAsInVzZXJfaWQiOm51bGwsImFwcF9pZCI6NTg0NzksInNjb3BlcyI6InB1YmxpYyIsInRlYW1fdXNlcl9pZCI6bnVsbH0.kqIzH7O99ov6IrTfeQoA7NNaSZe_7czUgrwKxHnBs3o"
|
"Authorization": "jwt eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJleHAiOjE2MzU2OTI0NjAsInVzZXJfaWQiOm51bGwsImFwcF9pZCI6NTg0NzksInNjb3BlcyI6InB1YmxpYyIsInRlYW1fdXNlcl9pZCI6bnVsbH0.KZHo96wUe5__rTqZQqAWiJKPKOy2-sjyxRjhOuuhyEc"
|
||||||
},
|
},
|
||||||
"activation": {
|
"activation": {
|
||||||
"url": "https://vimeo.com/_rv/viewer",
|
"url": "https://vimeo.com/_rv/viewer",
|
||||||
|
|||||||
@@ -15,5 +15,10 @@
|
|||||||
"supposed_usernames": [
|
"supposed_usernames": [
|
||||||
"alex", "god", "admin", "red", "blue", "john"
|
"alex", "god", "admin", "red", "blue", "john"
|
||||||
],
|
],
|
||||||
"retries_count": 1
|
"retries_count": 1,
|
||||||
|
"sites_db_path": "resources/data.json",
|
||||||
|
"timeout": 30,
|
||||||
|
"max_connections": 100,
|
||||||
|
"recursive_search": true,
|
||||||
|
"info_extracting": true
|
||||||
}
|
}
|
||||||
@@ -12,6 +12,11 @@ SETTINGS_FILES_PATHS = [
|
|||||||
class Settings:
|
class Settings:
|
||||||
# main maigret setting
|
# main maigret setting
|
||||||
retries_count: int
|
retries_count: int
|
||||||
|
sites_db_path: str
|
||||||
|
timeout: int
|
||||||
|
max_connections: int
|
||||||
|
recursive_search: bool
|
||||||
|
info_extracting: bool
|
||||||
|
|
||||||
# submit mode settings
|
# submit mode settings
|
||||||
presence_strings: list
|
presence_strings: list
|
||||||
|
|||||||
+1
-1
@@ -7,7 +7,7 @@ DEFAULT_ARGS: Dict[str, Any] = {
|
|||||||
'connections': 100,
|
'connections': 100,
|
||||||
'cookie_file': None,
|
'cookie_file': None,
|
||||||
'csv': False,
|
'csv': False,
|
||||||
'db_file': None,
|
'db_file': 'resources/data.json',
|
||||||
'debug': False,
|
'debug': False,
|
||||||
'disable_extracting': False,
|
'disable_extracting': False,
|
||||||
'disable_recursive_search': False,
|
'disable_recursive_search': False,
|
||||||
|
|||||||
Reference in New Issue
Block a user