mirror of
https://github.com/soxoj/maigret.git
synced 2026-05-13 18:05:39 +00:00
Compare commits
16 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 40d16e8da6 | |||
| 251fc11df3 | |||
| de9a6135dc | |||
| b4fb100387 | |||
| 37d6247d3b | |||
| 7bdd09a757 | |||
| e164d0c463 | |||
| ebdb3d4503 | |||
| 4af9aec8d8 | |||
| a72dd1070c | |||
| e3f9bae813 | |||
| 719891e12c | |||
| 57fa85b451 | |||
| afd1cac3e1 | |||
| a258dbd716 | |||
| 4a49d4108e |
@@ -2,6 +2,12 @@
|
|||||||
|
|
||||||
## [Unreleased]
|
## [Unreleased]
|
||||||
|
|
||||||
|
## [0.1.13] - 2021-02-06
|
||||||
|
* improved sites list filtering
|
||||||
|
* pretty console messages
|
||||||
|
* Yandex services updates
|
||||||
|
* false positives fixes
|
||||||
|
|
||||||
## [0.1.12] - 2021-01-28
|
## [0.1.12] - 2021-01-28
|
||||||
* added support of custom cookies
|
* added support of custom cookies
|
||||||
* fixed lots of false positives
|
* fixed lots of false positives
|
||||||
|
|||||||
@@ -1,30 +1,7 @@
|
|||||||
MIT License
|
MIT License
|
||||||
|
|
||||||
Copyright (c) 2019 Soxoj
|
|
||||||
|
|
||||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
||||||
of this software and associated documentation files (the "Software"), to deal
|
|
||||||
in the Software without restriction, including without limitation the rights
|
|
||||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
||||||
copies of the Software, and to permit persons to whom the Software is
|
|
||||||
furnished to do so, subject to the following conditions:
|
|
||||||
|
|
||||||
The above copyright notice and this permission notice shall be included in all
|
|
||||||
copies or substantial portions of the Software.
|
|
||||||
|
|
||||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
||||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
||||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
||||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
||||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
||||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
||||||
SOFTWARE.
|
|
||||||
|
|
||||||
-------------------------------------------------------------------------------
|
|
||||||
|
|
||||||
MIT License
|
|
||||||
|
|
||||||
Copyright (c) 2019 Sherlock Project
|
Copyright (c) 2019 Sherlock Project
|
||||||
|
Copyright (c) 2020-2021 Soxoj
|
||||||
|
|
||||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
of this software and associated documentation files (the "Software"), to deal
|
of this software and associated documentation files (the "Software"), to deal
|
||||||
|
|||||||
+37
-31
@@ -26,7 +26,7 @@ from .report import save_csv_report, save_xmind_report, save_html_report, save_p
|
|||||||
from .result import QueryResult, QueryStatus
|
from .result import QueryResult, QueryStatus
|
||||||
from .sites import MaigretDatabase, MaigretSite
|
from .sites import MaigretDatabase, MaigretSite
|
||||||
|
|
||||||
__version__ = '0.1.12'
|
__version__ = '0.1.13'
|
||||||
|
|
||||||
supported_recursive_search_ids = (
|
supported_recursive_search_ids = (
|
||||||
'yandex_public_id',
|
'yandex_public_id',
|
||||||
@@ -805,13 +805,20 @@ async def main():
|
|||||||
if args.top_sites == 0 or args.all_sites:
|
if args.top_sites == 0 or args.all_sites:
|
||||||
args.top_sites = sys.maxsize
|
args.top_sites = sys.maxsize
|
||||||
|
|
||||||
|
# Create notify object for query results.
|
||||||
|
query_notify = QueryNotifyPrint(result=None,
|
||||||
|
verbose=args.verbose,
|
||||||
|
print_found_only=not args.print_not_found,
|
||||||
|
skip_check_errors=not args.print_check_errors,
|
||||||
|
color=not args.no_color)
|
||||||
|
|
||||||
# Create object with all information about sites we are aware of.
|
# Create object with all information about sites we are aware of.
|
||||||
try:
|
db = MaigretDatabase().load_from_file(args.json_file)
|
||||||
db = MaigretDatabase().load_from_file(args.json_file)
|
get_top_sites_for_id = lambda x: db.ranked_sites_dict(top=args.top_sites, tags=args.tags,
|
||||||
site_data = db.ranked_sites_dict(top=args.top_sites, tags=args.tags, names=args.site_list)
|
names=args.site_list,
|
||||||
except Exception as error:
|
disabled=False, id_type=x)
|
||||||
print(f"ERROR: {error}")
|
|
||||||
sys.exit(1)
|
site_data = get_top_sites_for_id(args.id_type)
|
||||||
|
|
||||||
# Database self-checking
|
# Database self-checking
|
||||||
if args.self_check:
|
if args.self_check:
|
||||||
@@ -832,28 +839,25 @@ async def main():
|
|||||||
# Define one report filename template
|
# Define one report filename template
|
||||||
report_filepath_tpl = os.path.join(args.folderoutput, 'report_{username}{postfix}')
|
report_filepath_tpl = os.path.join(args.folderoutput, 'report_{username}{postfix}')
|
||||||
|
|
||||||
# Database consistency
|
# Database stats
|
||||||
enabled_count = len(list(filter(lambda x: not x.disabled, site_data.values())))
|
# TODO: verbose info about filtered sites
|
||||||
print(f'Sites in database, enabled/total: {enabled_count}/{len(site_data)}')
|
# enabled_count = len(list(filter(lambda x: not x.disabled, site_data.values())))
|
||||||
|
# print(f'Sites in database, enabled/total: {enabled_count}/{len(site_data)}')
|
||||||
|
|
||||||
if not enabled_count:
|
if usernames == {}:
|
||||||
print('No sites to check, exiting!')
|
|
||||||
sys.exit(2)
|
|
||||||
|
|
||||||
if usernames == ['-']:
|
|
||||||
# magic params to exit after init
|
# magic params to exit after init
|
||||||
print('No usernames to check, exiting.')
|
query_notify.warning('No usernames to check, exiting.')
|
||||||
sys.exit(0)
|
sys.exit(0)
|
||||||
|
|
||||||
# Create notify object for query results.
|
if not site_data:
|
||||||
query_notify = QueryNotifyPrint(result=None,
|
query_notify.warning('No sites to check, exiting!')
|
||||||
verbose=args.verbose,
|
sys.exit(2)
|
||||||
print_found_only=not args.print_not_found,
|
else:
|
||||||
skip_check_errors=not args.print_check_errors,
|
query_notify.warning(f'Starting a search on top {len(site_data)} sites from the Maigret database...')
|
||||||
color=not args.no_color)
|
if not args.all_sites:
|
||||||
|
query_notify.warning(f'You can run search by full list of sites with flag `-a`', '!')
|
||||||
|
|
||||||
already_checked = set()
|
already_checked = set()
|
||||||
|
|
||||||
general_results = []
|
general_results = []
|
||||||
|
|
||||||
while usernames:
|
while usernames:
|
||||||
@@ -870,11 +874,13 @@ async def main():
|
|||||||
|
|
||||||
if found_unsupported_chars:
|
if found_unsupported_chars:
|
||||||
pretty_chars_str = ','.join(map(lambda s: f'"{s}"', found_unsupported_chars))
|
pretty_chars_str = ','.join(map(lambda s: f'"{s}"', found_unsupported_chars))
|
||||||
print(f'Found unsupported URL characters: {pretty_chars_str}, skip search by username "{username}"')
|
query_notify.warning(f'Found unsupported URL characters: {pretty_chars_str}, skip search by username "{username}"')
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
sites_to_check = get_top_sites_for_id(id_type)
|
||||||
|
|
||||||
results = await maigret(username,
|
results = await maigret(username,
|
||||||
dict(site_data),
|
dict(sites_to_check),
|
||||||
query_notify,
|
query_notify,
|
||||||
proxy=args.proxy,
|
proxy=args.proxy,
|
||||||
timeout=args.timeout,
|
timeout=args.timeout,
|
||||||
@@ -905,22 +911,22 @@ async def main():
|
|||||||
if args.xmind:
|
if args.xmind:
|
||||||
filename = report_filepath_tpl.format(username=username, postfix='.xmind')
|
filename = report_filepath_tpl.format(username=username, postfix='.xmind')
|
||||||
save_xmind_report(filename, username, results)
|
save_xmind_report(filename, username, results)
|
||||||
print(f'XMind report for {username} saved in {filename}')
|
query_notify.warning(f'XMind report for {username} saved in {filename}')
|
||||||
|
|
||||||
if args.csv:
|
if args.csv:
|
||||||
filename = report_filepath_tpl.format(username=username, postfix='.csv')
|
filename = report_filepath_tpl.format(username=username, postfix='.csv')
|
||||||
save_csv_report(filename, username, results)
|
save_csv_report(filename, username, results)
|
||||||
print(f'CSV report for {username} saved in {filename}')
|
query_notify.warning(f'CSV report for {username} saved in {filename}')
|
||||||
|
|
||||||
if args.txt:
|
if args.txt:
|
||||||
filename = report_filepath_tpl.format(username=username, postfix='.txt')
|
filename = report_filepath_tpl.format(username=username, postfix='.txt')
|
||||||
save_txt_report(filename, username, results)
|
save_txt_report(filename, username, results)
|
||||||
print(f'TXT report for {username} saved in {filename}')
|
query_notify.warning(f'TXT report for {username} saved in {filename}')
|
||||||
|
|
||||||
# reporting for all the result
|
# reporting for all the result
|
||||||
if general_results:
|
if general_results:
|
||||||
if args.html or args.pdf:
|
if args.html or args.pdf:
|
||||||
print('Generating report info...')
|
query_notify.warning('Generating report info...')
|
||||||
report_context = generate_report_context(general_results)
|
report_context = generate_report_context(general_results)
|
||||||
# determine main username
|
# determine main username
|
||||||
username = report_context['username']
|
username = report_context['username']
|
||||||
@@ -928,12 +934,12 @@ async def main():
|
|||||||
if args.html:
|
if args.html:
|
||||||
filename = report_filepath_tpl.format(username=username, postfix='.html')
|
filename = report_filepath_tpl.format(username=username, postfix='.html')
|
||||||
save_html_report(filename, report_context)
|
save_html_report(filename, report_context)
|
||||||
print(f'HTML report on all usernames saved in {filename}')
|
query_notify.warning(f'HTML report on all usernames saved in {filename}')
|
||||||
|
|
||||||
if args.pdf:
|
if args.pdf:
|
||||||
filename = report_filepath_tpl.format(username=username, postfix='.pdf')
|
filename = report_filepath_tpl.format(username=username, postfix='.pdf')
|
||||||
save_pdf_report(filename, report_context)
|
save_pdf_report(filename, report_context)
|
||||||
print(f'PDF report on all usernames saved in {filename}')
|
query_notify.warning(f'PDF report on all usernames saved in {filename}')
|
||||||
# update database
|
# update database
|
||||||
db.save_to_file(args.json_file)
|
db.save_to_file(args.json_file)
|
||||||
|
|
||||||
|
|||||||
+6
-1
@@ -168,7 +168,12 @@ class QueryNotifyPrint(QueryNotify):
|
|||||||
else:
|
else:
|
||||||
print(f"[*] {title} {message} on:")
|
print(f"[*] {title} {message} on:")
|
||||||
|
|
||||||
return
|
def warning(self, message, symbol='-'):
|
||||||
|
msg = f'[{symbol}] {message}'
|
||||||
|
if self.color:
|
||||||
|
print(Style.BRIGHT + Fore.YELLOW + msg)
|
||||||
|
else:
|
||||||
|
print(msg)
|
||||||
|
|
||||||
def get_additional_data_text(self, items, prepend=''):
|
def get_additional_data_text(self, items, prepend=''):
|
||||||
text = ''
|
text = ''
|
||||||
|
|||||||
+57
-15
@@ -311,7 +311,7 @@
|
|||||||
"INTERNAL_SERVER_ERROR": "Site error",
|
"INTERNAL_SERVER_ERROR": "Site error",
|
||||||
"Something just went wrong": "Site error"
|
"Something just went wrong": "Site error"
|
||||||
},
|
},
|
||||||
"urlProbe": "https://api.500px.com/graphql?operationName=ProfileRendererQuery&variables=%7B%22username%22%3A%22{username}%22%7D&extensions=%7B%22persistedQuery%22%3A%7B%22version%22%3A1%2C%22sha256Hash%22%3A%225a17a9af1830b58b94a912995b7947b24f27f1301c6ea8ab71a9eb1a6a86585b%22%7D%7D",
|
"urlProbe": "https://api.500px.com/graphql?operationName=ProfileRendererQuery&variables=%7B%22username%22%3A%22{username}%22%7D&extensions=%7B%22persistedQuery%22%3A%7B%22version%22%3A1%2C%22sha256Hash%22%3A%22105058632482dd2786fd5775745908dc928f537b28e28356b076522757d65c19%22%7D%7D",
|
||||||
"checkType": "message",
|
"checkType": "message",
|
||||||
"absenceStrs": "No message available",
|
"absenceStrs": "No message available",
|
||||||
"alexaRank": 3175,
|
"alexaRank": 3175,
|
||||||
@@ -4310,6 +4310,7 @@
|
|||||||
"usernameUnclaimed": "noonewouldeverusethis7"
|
"usernameUnclaimed": "noonewouldeverusethis7"
|
||||||
},
|
},
|
||||||
"FilmWeb": {
|
"FilmWeb": {
|
||||||
|
"disabled": true,
|
||||||
"tags": [
|
"tags": [
|
||||||
"films",
|
"films",
|
||||||
"pl"
|
"pl"
|
||||||
@@ -6705,7 +6706,14 @@
|
|||||||
"tags": [
|
"tags": [
|
||||||
"in"
|
"in"
|
||||||
],
|
],
|
||||||
"checkType": "status_code",
|
"urlProbe": "https://issuu.com/query?format=json&_=3210224608766&profileUsername={username}&action=issuu.user.get_anonymous",
|
||||||
|
"checkType": "message",
|
||||||
|
"presenseStrs": [
|
||||||
|
"displayName"
|
||||||
|
],
|
||||||
|
"absenceStrs": [
|
||||||
|
"No such user"
|
||||||
|
],
|
||||||
"alexaRank": 452,
|
"alexaRank": 452,
|
||||||
"url": "https://issuu.com/{username}",
|
"url": "https://issuu.com/{username}",
|
||||||
"urlMain": "https://issuu.com/",
|
"urlMain": "https://issuu.com/",
|
||||||
@@ -7096,7 +7104,12 @@
|
|||||||
"ru"
|
"ru"
|
||||||
],
|
],
|
||||||
"checkType": "message",
|
"checkType": "message",
|
||||||
"absenceStrs": "<title>\u0412\u043d\u0438\u043c\u0430\u043d\u0438\u0435</title>",
|
"presenseStrs": [
|
||||||
|
"profile-content"
|
||||||
|
],
|
||||||
|
"absenceStrs": [
|
||||||
|
"\u0423\u043f\u0441! \u0412\u043e\u0442 \u044d\u0442\u043e \u043f\u043e\u0432\u043e\u0440\u043e\u0442!"
|
||||||
|
],
|
||||||
"alexaRank": 41066,
|
"alexaRank": 41066,
|
||||||
"url": "https://kosmetista.ru/profile/{username}/",
|
"url": "https://kosmetista.ru/profile/{username}/",
|
||||||
"urlMain": "https://kosmetista.ru",
|
"urlMain": "https://kosmetista.ru",
|
||||||
@@ -7374,7 +7387,12 @@
|
|||||||
"ru"
|
"ru"
|
||||||
],
|
],
|
||||||
"checkType": "message",
|
"checkType": "message",
|
||||||
"absenceStrs": "\u041f\u043e\u043b\u044c\u0437\u043e\u0432\u0430\u0442\u0435\u043b\u044c \u0441 \u0442\u0430\u043a\u0438\u043c \u0438\u043c\u0435\u043d\u0435\u043c \u043d\u0435 \u043d\u0430\u0439\u0434\u0435\u043d.",
|
"presenseStrs": [
|
||||||
|
"<table class=\"profil-tabl\">"
|
||||||
|
],
|
||||||
|
"absenceStrs": [
|
||||||
|
"\u041f\u043e\u043b\u044c\u0437\u043e\u0432\u0430\u0442\u0435\u043b\u044c \u0441 \u0442\u0430\u043a\u0438\u043c \u0438\u043c\u0435\u043d\u0435\u043c \u043d\u0435 \u043d\u0430\u0439\u0434\u0435\u043d."
|
||||||
|
],
|
||||||
"alexaRank": 227594,
|
"alexaRank": 227594,
|
||||||
"url": "https://life-dom2.su/user/{username}",
|
"url": "https://life-dom2.su/user/{username}",
|
||||||
"urlMain": "https://life-dom2.su",
|
"urlMain": "https://life-dom2.su",
|
||||||
@@ -10354,7 +10372,12 @@
|
|||||||
"us"
|
"us"
|
||||||
],
|
],
|
||||||
"checkType": "message",
|
"checkType": "message",
|
||||||
"absenceStrs": "The page you are looking for doesn\u2019t exist",
|
"presenseStrs": [
|
||||||
|
"{\"username\""
|
||||||
|
],
|
||||||
|
"absenceStrs": [
|
||||||
|
"We seem to have lost this page"
|
||||||
|
],
|
||||||
"alexaRank": 12727,
|
"alexaRank": 12727,
|
||||||
"url": "https://www.producthunt.com/@{username}",
|
"url": "https://www.producthunt.com/@{username}",
|
||||||
"urlMain": "https://www.producthunt.com/",
|
"urlMain": "https://www.producthunt.com/",
|
||||||
@@ -13544,7 +13567,7 @@
|
|||||||
"sec-ch-ua": "Google Chrome\";v=\"87\", \" Not;A Brand\";v=\"99\", \"Chromium\";v=\"87\"",
|
"sec-ch-ua": "Google Chrome\";v=\"87\", \" Not;A Brand\";v=\"99\", \"Chromium\";v=\"87\"",
|
||||||
"authorization": "Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA",
|
"authorization": "Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA",
|
||||||
"user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36",
|
"user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36",
|
||||||
"x-guest-token": "1354842807018663939"
|
"x-guest-token": "1358064134064140290"
|
||||||
},
|
},
|
||||||
"errors": {
|
"errors": {
|
||||||
"Bad guest token": "x-guest-token update required"
|
"Bad guest token": "x-guest-token update required"
|
||||||
@@ -13911,7 +13934,7 @@
|
|||||||
"video"
|
"video"
|
||||||
],
|
],
|
||||||
"headers": {
|
"headers": {
|
||||||
"Authorization": "jwt eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJleHAiOjE2MTE4NTU2MDAsInVzZXJfaWQiOm51bGwsImFwcF9pZCI6NTg0NzksInNjb3BlcyI6InB1YmxpYyIsInRlYW1fdXNlcl9pZCI6bnVsbH0.Gohkz2XqInremcVPgql1SCk4xRsmX2BbKLTeB_1aw4s"
|
"Authorization": "jwt eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJleHAiOjE2MTI2MjQ4NjAsInVzZXJfaWQiOm51bGwsImFwcF9pZCI6NTg0NzksInNjb3BlcyI6InB1YmxpYyIsInRlYW1fdXNlcl9pZCI6bnVsbH0.kgp8r380d1aDWcd-ROncr0Tqf8EdA-l35EeEY9is6TI"
|
||||||
},
|
},
|
||||||
"activation": {
|
"activation": {
|
||||||
"url": "https://vimeo.com/_rv/viewer",
|
"url": "https://vimeo.com/_rv/viewer",
|
||||||
@@ -14778,7 +14801,6 @@
|
|||||||
},
|
},
|
||||||
"YandexBugbounty": {
|
"YandexBugbounty": {
|
||||||
"tags": [
|
"tags": [
|
||||||
"global",
|
|
||||||
"ru"
|
"ru"
|
||||||
],
|
],
|
||||||
"checkType": "status_code",
|
"checkType": "status_code",
|
||||||
@@ -14790,7 +14812,6 @@
|
|||||||
},
|
},
|
||||||
"YandexCollections": {
|
"YandexCollections": {
|
||||||
"tags": [
|
"tags": [
|
||||||
"global",
|
|
||||||
"ru"
|
"ru"
|
||||||
],
|
],
|
||||||
"headers": {
|
"headers": {
|
||||||
@@ -14805,14 +14826,14 @@
|
|||||||
],
|
],
|
||||||
"absenceStrs": "cl-not-found-content__title",
|
"absenceStrs": "cl-not-found-content__title",
|
||||||
"alexaRank": 47,
|
"alexaRank": 47,
|
||||||
"url": "https://yandex.ru/collections/user/{username}/",
|
"url": "https://yandex.ru/collections/api/users/{username}/",
|
||||||
"urlMain": "https://yandex.ru/collections/",
|
"urlMain": "https://yandex.ru/collections/",
|
||||||
"usernameClaimed": "yandex",
|
"usernameClaimed": "yandex",
|
||||||
"usernameUnclaimed": "noonewouldeverusethis7"
|
"usernameUnclaimed": "noonewouldeverusethis7"
|
||||||
},
|
},
|
||||||
"YandexLocal": {
|
"YandexLocal": {
|
||||||
|
"disabled": true,
|
||||||
"tags": [
|
"tags": [
|
||||||
"global",
|
|
||||||
"ru"
|
"ru"
|
||||||
],
|
],
|
||||||
"type": "yandex_public_id",
|
"type": "yandex_public_id",
|
||||||
@@ -14824,8 +14845,8 @@
|
|||||||
"usernameUnclaimed": "noonewouldeverusethis77777"
|
"usernameUnclaimed": "noonewouldeverusethis77777"
|
||||||
},
|
},
|
||||||
"YandexMarket": {
|
"YandexMarket": {
|
||||||
|
"disabled": true,
|
||||||
"tags": [
|
"tags": [
|
||||||
"global",
|
|
||||||
"ru"
|
"ru"
|
||||||
],
|
],
|
||||||
"type": "yandex_public_id",
|
"type": "yandex_public_id",
|
||||||
@@ -14839,7 +14860,6 @@
|
|||||||
},
|
},
|
||||||
"YandexMusic": {
|
"YandexMusic": {
|
||||||
"tags": [
|
"tags": [
|
||||||
"global",
|
|
||||||
"ru"
|
"ru"
|
||||||
],
|
],
|
||||||
"headers": {
|
"headers": {
|
||||||
@@ -14853,7 +14873,7 @@
|
|||||||
"usernameClaimed": "YandexMusic",
|
"usernameClaimed": "YandexMusic",
|
||||||
"usernameUnclaimed": "noonewouldeverusethis77777"
|
"usernameUnclaimed": "noonewouldeverusethis77777"
|
||||||
},
|
},
|
||||||
"YandexSoberu": {
|
"Soberu": {
|
||||||
"tags": [
|
"tags": [
|
||||||
"ru"
|
"ru"
|
||||||
],
|
],
|
||||||
@@ -14866,7 +14886,6 @@
|
|||||||
},
|
},
|
||||||
"YandexZnatoki": {
|
"YandexZnatoki": {
|
||||||
"tags": [
|
"tags": [
|
||||||
"global",
|
|
||||||
"ru"
|
"ru"
|
||||||
],
|
],
|
||||||
"type": "yandex_public_id",
|
"type": "yandex_public_id",
|
||||||
@@ -14877,6 +14896,29 @@
|
|||||||
"usernameClaimed": "blue",
|
"usernameClaimed": "blue",
|
||||||
"usernameUnclaimed": "noonewouldeverusethis77777"
|
"usernameUnclaimed": "noonewouldeverusethis77777"
|
||||||
},
|
},
|
||||||
|
"YandexZenChannel": {
|
||||||
|
"tags": [
|
||||||
|
"ru"
|
||||||
|
],
|
||||||
|
"checkType": "status_code",
|
||||||
|
"alexaRank": 49,
|
||||||
|
"url": "https://zen.yandex.ru/{username}",
|
||||||
|
"urlMain": "https://zen.yandex.ru",
|
||||||
|
"usernameClaimed": "tema",
|
||||||
|
"usernameUnclaimed": "noonewouldeverusethis77777"
|
||||||
|
},
|
||||||
|
"YandexZenUser": {
|
||||||
|
"tags": [
|
||||||
|
"ru"
|
||||||
|
],
|
||||||
|
"type": "yandex_public_id",
|
||||||
|
"checkType": "status_code",
|
||||||
|
"alexaRank": 49,
|
||||||
|
"url": "https://zen.yandex.ru/user/{username}",
|
||||||
|
"urlMain": "https://zen.yandex.ru",
|
||||||
|
"usernameClaimed": "20vpvmmwpnwyb0dpbnjvy3k14c",
|
||||||
|
"usernameUnclaimed": "noonewouldeverusethis77777"
|
||||||
|
},
|
||||||
"Yapisal": {
|
"Yapisal": {
|
||||||
"tags": [
|
"tags": [
|
||||||
"tr"
|
"tr"
|
||||||
|
|||||||
+15
-10
@@ -140,22 +140,27 @@ class MaigretDatabase:
|
|||||||
def sites_dict(self):
|
def sites_dict(self):
|
||||||
return {site.name: site for site in self._sites}
|
return {site.name: site for site in self._sites}
|
||||||
|
|
||||||
def ranked_sites_dict(self, reverse=False, top=sys.maxsize, tags=[], names=[]):
|
def ranked_sites_dict(self, reverse=False, top=sys.maxsize, tags=[], names=[],
|
||||||
|
disabled=True, id_type='username'):
|
||||||
|
"""
|
||||||
|
Ranking and filtering of the sites list
|
||||||
|
"""
|
||||||
normalized_names = list(map(str.lower, names))
|
normalized_names = list(map(str.lower, names))
|
||||||
normalized_tags = list(map(str.lower, tags))
|
normalized_tags = list(map(str.lower, tags))
|
||||||
|
|
||||||
def is_tags_ok(site):
|
|
||||||
intersected_tags = set(site.tags).intersection(set(normalized_tags))
|
|
||||||
is_disabled = 'disabled' in tags and site.disabled
|
|
||||||
return intersected_tags or is_disabled
|
|
||||||
|
|
||||||
is_name_ok = lambda x: x.name.lower() in normalized_names
|
is_name_ok = lambda x: x.name.lower() in normalized_names
|
||||||
is_engine_ok = lambda x: isinstance(x.engine, str) and x.engine.lower() in normalized_tags
|
is_engine_ok = lambda x: isinstance(x.engine, str) and x.engine.lower() in normalized_tags
|
||||||
|
is_tags_ok = lambda x: set(x.tags).intersection(set(normalized_tags))
|
||||||
|
is_disabled_needed = lambda x: not x.disabled or ('disabled' in tags or disabled)
|
||||||
|
is_id_type_ok = lambda x: x.type == id_type
|
||||||
|
|
||||||
if not tags and not names:
|
filter_tags_engines_fun = lambda x: not tags or is_engine_ok(x) or is_tags_ok(x)
|
||||||
filtered_list = self.sites
|
filter_names_fun = lambda x: not names or is_name_ok(x)
|
||||||
else:
|
|
||||||
filtered_list = [s for s in self.sites if is_tags_ok(s) or is_name_ok(s) or is_engine_ok(s)]
|
filter_fun = lambda x: filter_tags_engines_fun(x) and filter_names_fun(x) \
|
||||||
|
and is_disabled_needed(x) and is_id_type_ok(x)
|
||||||
|
|
||||||
|
filtered_list = [s for s in self.sites if filter_fun(s)]
|
||||||
|
|
||||||
sorted_list = sorted(filtered_list, key=lambda x: x.alexa_rank, reverse=reverse)[:top]
|
sorted_list = sorted(filtered_list, key=lambda x: x.alexa_rank, reverse=reverse)[:top]
|
||||||
return {site.name: site for site in sorted_list}
|
return {site.name: site for site in sorted_list}
|
||||||
|
|||||||
@@ -12,7 +12,7 @@ with open('requirements.txt') as rf:
|
|||||||
requires = rf.read().splitlines()
|
requires = rf.read().splitlines()
|
||||||
|
|
||||||
setup(name='maigret',
|
setup(name='maigret',
|
||||||
version='0.1.12',
|
version='0.1.13',
|
||||||
description='Collect a dossier on a person by username from a huge number of sites',
|
description='Collect a dossier on a person by username from a huge number of sites',
|
||||||
long_description=long_description,
|
long_description=long_description,
|
||||||
long_description_content_type="text/markdown",
|
long_description_content_type="text/markdown",
|
||||||
|
|||||||
+86
-50
@@ -1,48 +1,47 @@
|
|||||||
## Demo with page parsing and recursive username search
|
## Demo with page parsing and recursive username search
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
python3 maigret --ids --print-found --skip-errors alexaimephotographycars
|
$ maigret.py alexaimephotographycars
|
||||||
|
Sites in database, enabled/total: 492/500
|
||||||
[*] Checking username alexaimephotographycars on:
|
[*] Checking username alexaimephotographycars on:
|
||||||
[+] 500px: https://500px.com/p/alexaimephotographycars
|
[+] 500px: https://500px.com/p/alexaimephotographycars
|
||||||
┣╸uid: dXJpOm5vZGU6VXNlcjoyNjQwMzQxNQ==
|
┣╸uid: dXJpOm5vZGU6VXNlcjoyNjQwMzQxNQ==
|
||||||
┣╸legacy_id: 26403415
|
┣╸legacy_id: 26403415
|
||||||
┣╸username: alexaimephotographycars
|
┣╸username: alexaimephotographycars
|
||||||
┣╸name: Alex Aimé
|
┣╸name: Alex Aimé
|
||||||
|
┣╸created_at: 2018-05-04T10:17:01.000+0000
|
||||||
|
┣╸image: https://drscdn.500px.org/user_avatar/26403415/q%3D85_w%3D300_h%3D300/v2?webp=true&v=2&sig=0235678a4f7b65e007e864033ebfaf5ef6d87fad34f80a8639d985320c20fe3b
|
||||||
|
┣╸image_bg: https://drscdn.500px.org/user_cover/26403415/q%3D65_m%3D2048/v2?webp=true&v=1&sig=bea411fb158391a4fdad498874ff17088f91257e59dfb376ff67e3a44c3a4201
|
||||||
┣╸website: www.flickr.com/photos/alexaimephotography/
|
┣╸website: www.flickr.com/photos/alexaimephotography/
|
||||||
┣╸facebook_link: www.instagram.com/street.reality.photography/
|
┣╸facebook_link: www.instagram.com/street.reality.photography/
|
||||||
┣╸instagram_username: alexaimephotography
|
┣╸instagram_username: alexaimephotography
|
||||||
┗╸twitter_username: Alexaimephotogr
|
┗╸twitter_username: Alexaimephotogr
|
||||||
[*] Checking username alexaimephotography on:
|
[*] Checking username alexaimephotography on:
|
||||||
[+] DeviantART: https://alexaimephotography.deviantart.com
|
[+] Vimeo: https://vimeo.com/alexaimephotography
|
||||||
┣╸country: France
|
┣╸uid: 75857717
|
||||||
┣╸registered_for_seconds: 55040868
|
┣╸gender: m
|
||||||
┣╸gender: male
|
┣╸image: https://i.vimeocdn.com/portrait/22443952_360x360
|
||||||
┣╸username: Alexaimephotography
|
┣╸bio: Hello
|
||||||
┣╸twitter_username: alexaimephotogr
|
Passionate about photography for several years. I set the video recently.
|
||||||
┣╸website: www.instagram.com/alexaimephotography/
|
I use my Nikon d7200 and Nikkor 50mm 1.8d . Premiere Pro software.
|
||||||
┗╸links:
|
Follow me on :
|
||||||
┗╸ https://www.instagram.com/alexaimephotography/
|
https://www.instagram.com/alexaimephotography/
|
||||||
[+] EyeEm: https://www.eyeem.com/u/alexaimephotography
|
https://500px.com/alexaimephotography
|
||||||
┣╸eyeem_id: 21974802
|
|
||||||
┣╸eyeem_username: alexaimephotography
|
|
||||||
┣╸fullname: Alex
|
Bonjour
|
||||||
┣╸followers: 10
|
Passionné par la photographie depuis quelques années . Je me suis mis à la video depuis peu.
|
||||||
┣╸friends: 2
|
J'utilise mon Nikon d7200 et l'objectif Nikkor 50mm 1.8d .Comme logiciel Premiere pro cc.
|
||||||
┣╸liked_photos: 37
|
Suivez moi sur :
|
||||||
┣╸photos: 10
|
https://www.instagram.com/alexaimephotography/
|
||||||
┗╸facebook_uid: 1534915183474093
|
https://500px.com/alexaimephotography
|
||||||
[+] Facebook: https://www.facebook.com/alexaimephotography
|
┣╸location: France
|
||||||
[+] Gramho: https://gramho.com/explore-hashtag/alexaimephotography
|
┣╸username: AlexAimePhotography
|
||||||
[+] Instagram: https://www.instagram.com/alexaimephotography
|
┣╸is_verified: True
|
||||||
┣╸username: alexaimephotography
|
┣╸created_at: 2017-12-06T11:49:28+00:00
|
||||||
┣╸full_name: Alexaimephotography
|
┣╸videos: 14
|
||||||
┣╸id: 6828488620
|
┣╸is_looking_for_job: False
|
||||||
┣╸biography: 🇮🇹 🇲🇫 🇩🇪
|
┗╸is_working_remotely: False
|
||||||
Amateur photographer
|
|
||||||
Follow me @street.reality.photography
|
|
||||||
Sony A7ii
|
|
||||||
┗╸external_url: https://www.flickr.com/photos/alexaimephotography2020/
|
|
||||||
[+] Picuki: https://www.picuki.com/profile/alexaimephotography
|
|
||||||
[+] Pinterest: https://www.pinterest.com/alexaimephotography/
|
[+] Pinterest: https://www.pinterest.com/alexaimephotography/
|
||||||
┣╸pinterest_username: alexaimephotography
|
┣╸pinterest_username: alexaimephotography
|
||||||
┣╸fullname: alexaimephotography
|
┣╸fullname: alexaimephotography
|
||||||
@@ -56,35 +55,72 @@ Sony A7ii
|
|||||||
┣╸is_indexed: True
|
┣╸is_indexed: True
|
||||||
┣╸is_verified_merchant: False
|
┣╸is_verified_merchant: False
|
||||||
┗╸locale: fr
|
┗╸locale: fr
|
||||||
|
[+] VK: https://vk.com/alexaimephotography
|
||||||
|
[+] Facebook: https://www.facebook.com/alexaimephotography
|
||||||
|
[+] Tumblr: https://alexaimephotography.tumblr.com/
|
||||||
|
┣╸fullname: Alex Aimé Photography
|
||||||
|
┣╸title: My name is Alex Aimé, and i am a freelance photographer. Originally from Burgundy in France .I am a man of 29 years. Follow me on : www.facebook.com/AlexAimePhotography/
|
||||||
|
┗╸links:
|
||||||
|
┣╸ https://www.facebook.com/AlexAimePhotography/
|
||||||
|
┣╸ https://500px.com/alexaimephotography
|
||||||
|
┣╸ https://www.instagram.com/alexaimephotography/
|
||||||
|
┗╸ https://www.flickr.com/photos/photoambiance/
|
||||||
|
[+] Picuki: https://www.picuki.com/profile/alexaimephotography
|
||||||
|
[+] Instagram: https://www.instagram.com/alexaimephotography
|
||||||
|
┣╸instagram_username: alexaimephotography
|
||||||
|
┣╸fullname: Alexaimephotography
|
||||||
|
┣╸id: 6828488620
|
||||||
|
┣╸image: https://instagram.fhel6-1.fna.fbcdn.net/v/t51.2885-19/s320x320/95420076_1169632876707608_8741505804647006208_n.jpg?_nc_ht=instagram.fhel6-1.fna.fbcdn.net&_nc_ohc=PuXb4vhtU1EAX-ln7aE&tp=1&oh=434faf2ef40e30e8416e63d10e1a5dbf&oe=6041F6EF
|
||||||
|
┣╸bio: Photographer
|
||||||
|
Child of fine street arts
|
||||||
|
┗╸external_url: https://www.flickr.com/photos/alexaimephotography2020/
|
||||||
|
[+] We Heart It: https://weheartit.com/alexaimephotography
|
||||||
[+] Reddit: https://www.reddit.com/user/alexaimephotography
|
[+] Reddit: https://www.reddit.com/user/alexaimephotography
|
||||||
┣╸reddit_id: t5_1nytpy
|
┣╸reddit_id: t5_1nytpy
|
||||||
┣╸reddit_username: alexaimephotography
|
┣╸reddit_username: alexaimephotography
|
||||||
┣╸display_name: alexaimephotography
|
┣╸fullname: alexaimephotography
|
||||||
|
┣╸image: https://styles.redditmedia.com/t5_1nytpy/styles/profileIcon_7vmhdwzd3g931.jpg?width=256&height=256&crop=256:256,smart&frame=1&s=4f355f16b4920844a3f4eacd4237a7bf76b2e97e
|
||||||
┣╸is_employee: False
|
┣╸is_employee: False
|
||||||
┣╸is_nsfw: False
|
┣╸is_nsfw: False
|
||||||
┣╸is_mod: True
|
┣╸is_mod: True
|
||||||
┣╸is_following: True
|
┣╸is_following: True
|
||||||
┣╸has_user_profile: True
|
┣╸has_user_profile: True
|
||||||
┣╸hide_from_robots: False
|
┣╸hide_from_robots: False
|
||||||
┣╸created_utc: 1562750403
|
┣╸created_at: 2019-07-10 12:20:03
|
||||||
┣╸total_karma: 43075
|
┣╸total_karma: 54958
|
||||||
┗╸post_karma: 42574
|
┗╸post_karma: 53698
|
||||||
[+] Tumblr: https://alexaimephotography.tumblr.com/
|
[+] DeviantART: https://alexaimephotography.deviantart.com
|
||||||
[+] VK: https://vk.com/alexaimephotography
|
┣╸country: France
|
||||||
[+] Vimeo: https://vimeo.com/alexaimephotography
|
┣╸created_at: 2018-12-09 16:02:10
|
||||||
┣╸uid: 75857717
|
┣╸gender: male
|
||||||
┣╸name: AlexAimePhotography
|
┣╸username: Alexaimephotography
|
||||||
┣╸username: alexaimephotography
|
┣╸twitter_username: alexaimephotogr
|
||||||
┣╸location: France
|
┣╸website: www.instagram.com/alexaimephotography/
|
||||||
┣╸created_at: 2017-12-06 06:49:28
|
|
||||||
┣╸is_staff: False
|
|
||||||
┗╸links:
|
┗╸links:
|
||||||
┣╸ https://500px.com/alexaimephotography
|
┗╸ https://www.instagram.com/alexaimephotography/
|
||||||
┣╸ https://www.flickr.com/photos/photoambiance/
|
[+] EyeEm: https://www.eyeem.com/u/alexaimephotography
|
||||||
┣╸ https://www.instagram.com/alexaimephotography/
|
┣╸eyeem_id: 21974802
|
||||||
┣╸ https://www.youtube.com/channel/UC4NiYV3Yqih2WHcwKg4uPuQ
|
┣╸eyeem_username: alexaimephotography
|
||||||
┗╸ https://flii.by/alexaimephotography/
|
┣╸fullname: Alex
|
||||||
[+] We Heart It: https://weheartit.com/alexaimephotography
|
┣╸follower_count: 10
|
||||||
|
┣╸friends: 2
|
||||||
|
┣╸liked_photos: 37
|
||||||
|
┣╸photos: 10
|
||||||
|
┗╸facebook_uid: 1534915183474093
|
||||||
[*] Checking username Alexaimephotogr on:
|
[*] Checking username Alexaimephotogr on:
|
||||||
[+] Twitter: https://twitter.com/Alexaimephotogr
|
[+] Twitter: https://twitter.com/Alexaimephotogr
|
||||||
|
┣╸uid: VXNlcjo5NDYzODMzNTA3ODAxMDQ3MDQ=
|
||||||
|
┣╸fullname: AlexAimephotography
|
||||||
|
┣╸bio: Photographe amateur
|
||||||
|
New gear :
|
||||||
|
Sony A7 ii
|
||||||
|
Sony FE 85mm 1.8
|
||||||
|
┣╸created_at: 2017-12-28 14:12:28+00:00
|
||||||
|
┣╸image: https://pbs.twimg.com/profile_images/1089860309895049218/5DucgDw1.jpg
|
||||||
|
┣╸image_bg: https://pbs.twimg.com/profile_banners/946383350780104704/1548759346
|
||||||
|
┣╸is_protected: False
|
||||||
|
┣╸follower_count: 303
|
||||||
|
┣╸following_count: 76
|
||||||
|
┣╸location: France
|
||||||
|
┗╸favourites_count: 6705
|
||||||
```
|
```
|
||||||
+33
-6
@@ -131,13 +131,40 @@ def test_ranked_sites_dict():
|
|||||||
# filtering by engine
|
# filtering by engine
|
||||||
assert list(db.ranked_sites_dict(tags=['ucoz']).keys()) == ['3']
|
assert list(db.ranked_sites_dict(tags=['ucoz']).keys()) == ['3']
|
||||||
|
|
||||||
|
# disjunction
|
||||||
|
assert list(db.ranked_sites_dict(names=['2'], tags=['forum']).keys()) == ['2']
|
||||||
|
assert list(db.ranked_sites_dict(names=['2'], tags=['ucoz']).keys()) == []
|
||||||
|
assert list(db.ranked_sites_dict(names=['4'], tags=['ru']).keys()) == []
|
||||||
|
|
||||||
|
# reverse
|
||||||
|
assert list(db.ranked_sites_dict(reverse=True).keys()) == ['3', '2', '1']
|
||||||
|
|
||||||
|
|
||||||
|
def test_ranked_sites_dict_names():
|
||||||
|
db = MaigretDatabase()
|
||||||
|
db.update_site(MaigretSite('3', {'alexaRank': 30}))
|
||||||
|
db.update_site(MaigretSite('1', {'alexaRank': 2}))
|
||||||
|
db.update_site(MaigretSite('2', {'alexaRank': 10}))
|
||||||
|
|
||||||
# filtering by names
|
# filtering by names
|
||||||
assert list(db.ranked_sites_dict(names=['1', '2']).keys()) == ['1', '2']
|
assert list(db.ranked_sites_dict(names=['1', '2']).keys()) == ['1', '2']
|
||||||
assert list(db.ranked_sites_dict(names=['2', '3']).keys()) == ['2', '3']
|
assert list(db.ranked_sites_dict(names=['2', '3']).keys()) == ['2', '3']
|
||||||
|
|
||||||
# disjunction
|
|
||||||
assert list(db.ranked_sites_dict(names=['2'], tags=['forum']).keys()) == ['1', '2']
|
def test_ranked_sites_dict_disabled():
|
||||||
assert list(db.ranked_sites_dict(names=['2'], tags=['forum'], reverse=True).keys()) == ['2', '1']
|
db = MaigretDatabase()
|
||||||
assert list(db.ranked_sites_dict(names=['2'], tags=['ucoz']).keys()) == ['2', '3']
|
db.update_site(MaigretSite('1', {'disabled': True}))
|
||||||
assert list(db.ranked_sites_dict(names=['4'], tags=['ru']).keys()) == ['2']
|
db.update_site(MaigretSite('2', {}))
|
||||||
assert list(db.ranked_sites_dict(names=['4'], tags=['nosuchtag']).keys()) == []
|
|
||||||
|
assert len(db.ranked_sites_dict()) == 2
|
||||||
|
assert len(db.ranked_sites_dict(disabled=False)) == 1
|
||||||
|
|
||||||
|
def test_ranked_sites_dict_id_type():
|
||||||
|
db = MaigretDatabase()
|
||||||
|
db.update_site(MaigretSite('1', {}))
|
||||||
|
db.update_site(MaigretSite('2', {'type': 'username'}))
|
||||||
|
db.update_site(MaigretSite('3', {'type': 'gaia_id'}))
|
||||||
|
|
||||||
|
assert len(db.ranked_sites_dict()) == 2
|
||||||
|
assert len(db.ranked_sites_dict(id_type='username')) == 2
|
||||||
|
assert len(db.ranked_sites_dict(id_type='gaia_id')) == 1
|
||||||
|
|||||||
Reference in New Issue
Block a user