Added some domains for new DNS checker, fixed reports generation crashes

This commit is contained in:
Soxoj
2021-06-02 23:16:44 +03:00
parent 5179cb56eb
commit 13e1b6f4d1
8 changed files with 192 additions and 20 deletions
+12 -3
View File
@@ -738,7 +738,12 @@ def timeout_check(value):
async def site_self_check( async def site_self_check(
site: MaigretSite, logger, semaphore, db: MaigretDatabase, silent=False, tor_proxy=None site: MaigretSite,
logger,
semaphore,
db: MaigretDatabase,
silent=False,
tor_proxy=None,
): ):
changes = { changes = {
"disabled": False, "disabled": False,
@@ -812,8 +817,12 @@ async def site_self_check(
async def self_check( async def self_check(
db: MaigretDatabase, site_data: dict, logger, silent=False, max_connections=10, db: MaigretDatabase,
tor_proxy=None site_data: dict,
logger,
silent=False,
max_connections=10,
tor_proxy=None,
) -> bool: ) -> bool:
sem = asyncio.Semaphore(max_connections) sem = asyncio.Semaphore(max_connections)
tasks = [] tasks = []
+5 -2
View File
@@ -529,8 +529,11 @@ async def main():
if args.self_check: if args.self_check:
print('Maigret sites database self-checking...') print('Maigret sites database self-checking...')
is_need_update = await self_check( is_need_update = await self_check(
db, site_data, logger, max_connections=args.connections, db,
tor_proxy=args.tor_proxy site_data,
logger,
max_connections=args.connections,
tor_proxy=args.tor_proxy,
) )
if is_need_update: if is_need_update:
if input('Do you want to save changes permanently? [Yn]\n').lower() in ( if input('Do you want to save changes permanently? [Yn]\n').lower() in (
+20 -7
View File
@@ -40,7 +40,9 @@ def sort_report_by_data_points(results):
return dict( return dict(
sorted( sorted(
results.items(), results.items(),
key=lambda x: len((x[1].get('status') and x[1]['status'].ids_data or {}).keys()), key=lambda x: len(
(x[1].get('status') and x[1]['status'].ids_data or {}).keys()
),
reverse=True, reverse=True,
) )
) )
@@ -253,14 +255,18 @@ def generate_csv_report(username: str, results: dict, csvfile):
["username", "name", "url_main", "url_user", "exists", "http_status"] ["username", "name", "url_main", "url_user", "exists", "http_status"]
) )
for site in results: for site in results:
# TODO: fix the reason
status = 'Unknown'
if "status" in results[site]:
status = str(results[site]["status"].status)
writer.writerow( writer.writerow(
[ [
username, username,
site, site,
results[site]["url_main"], results[site].get("url_main", ""),
results[site]["url_user"], results[site].get("url_user", ""),
str(results[site]["status"].status), status,
results[site]["http_status"], results[site].get("http_status", 0),
] ]
) )
@@ -272,7 +278,10 @@ def generate_txt_report(username: str, results: dict, file):
# TODO: fix no site data issue # TODO: fix no site data issue
if not dictionary: if not dictionary:
continue continue
if dictionary.get("status").status == QueryStatus.CLAIMED: if (
dictionary.get("status")
and dictionary["status"].status == QueryStatus.CLAIMED
):
exists_counter += 1 exists_counter += 1
file.write(dictionary["url_user"] + "\n") file.write(dictionary["url_user"] + "\n")
file.write(f"Total Websites Username Detected On : {exists_counter}") file.write(f"Total Websites Username Detected On : {exists_counter}")
@@ -285,7 +294,10 @@ def generate_json_report(username: str, results: dict, file, report_type):
for sitename in results: for sitename in results:
site_result = results[sitename] site_result = results[sitename]
# TODO: fix no site data issue # TODO: fix no site data issue
if not site_result or site_result.get("status").status != QueryStatus.CLAIMED: if not site_result or not site_result.get("status"):
continue
if site_result["status"].status != QueryStatus.CLAIMED:
continue continue
data = dict(site_result) data = dict(site_result)
@@ -345,6 +357,7 @@ def design_xmind_sheet(sheet, username, results):
if not dictionary: if not dictionary:
continue continue
result_status = dictionary.get("status") result_status = dictionary.get("status")
# TODO: fix the reason
if not result_status or result_status.status != QueryStatus.CLAIMED: if not result_status or result_status.status != QueryStatus.CLAIMED:
continue continue
+51 -3
View File
@@ -13024,7 +13024,7 @@
"us" "us"
], ],
"headers": { "headers": {
"authorization": "Bearer BQBKzy1QSQQO4wR2vRVROUOaj8T9gr0Vkjup9wUkLh0MZDtMEVZ0WEtyoZ_tTc4utIhyvvn9V7URwVWGeuU" "authorization": "Bearer BQDEpoSTjg2Ko86QUHZjJmZvp5AuI1ru6rJySe8_cD0bRqMZk6PfmdsmJBu3QeiNHgUPGQPDz2VeSvRr16w"
}, },
"errors": { "errors": {
"Spotify is currently not available in your country.": "Access denied in your country, use proxy/vpn" "Spotify is currently not available in your country.": "Access denied in your country, use proxy/vpn"
@@ -14450,7 +14450,7 @@
"sec-ch-ua": "Google Chrome\";v=\"87\", \" Not;A Brand\";v=\"99\", \"Chromium\";v=\"87\"", "sec-ch-ua": "Google Chrome\";v=\"87\", \" Not;A Brand\";v=\"99\", \"Chromium\";v=\"87\"",
"authorization": "Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA", "authorization": "Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA",
"user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36", "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36",
"x-guest-token": "1397644352072163331" "x-guest-token": "1400174453577900043"
}, },
"errors": { "errors": {
"Bad guest token": "x-guest-token update required" "Bad guest token": "x-guest-token update required"
@@ -14857,7 +14857,7 @@
"video" "video"
], ],
"headers": { "headers": {
"Authorization": "jwt eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJleHAiOjE2MjIwNjAyODAsInVzZXJfaWQiOm51bGwsImFwcF9pZCI6NTg0NzksInNjb3BlcyI6InB1YmxpYyIsInRlYW1fdXNlcl9pZCI6bnVsbH0.RBYc81QRYfs9m7yzcGkUXhyA3rGPhQJaoAG8dnt61I4" "Authorization": "jwt eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJleHAiOjE2MjI2NjM1MjAsInVzZXJfaWQiOm51bGwsImFwcF9pZCI6NTg0NzksInNjb3BlcyI6InB1YmxpYyIsInRlYW1fdXNlcl9pZCI6bnVsbH0.bKcisdrE5nJZMvrbagUC8lZQOs9spg3IKMlK15IclM4"
}, },
"activation": { "activation": {
"url": "https://vimeo.com/_rv/viewer", "url": "https://vimeo.com/_rv/viewer",
@@ -27783,6 +27783,54 @@
"usernameUnclaimed": "noonewouldeverusethis7", "usernameUnclaimed": "noonewouldeverusethis7",
"checkType": "status_code" "checkType": "status_code"
}, },
".pro": {
"protocol": "dns",
"url": "{username}.pro",
"urlMain": "{username}.pro",
"usernameClaimed": "alex",
"usernameUnclaimed": "noonewouldeverusethis7",
"checkType": "status_code"
},
".me": {
"protocol": "dns",
"url": "{username}.me",
"urlMain": "{username}.me",
"usernameClaimed": "alex",
"usernameUnclaimed": "noonewouldeverusethis7",
"checkType": "status_code"
},
".biz": {
"protocol": "dns",
"url": "{username}.biz",
"urlMain": "{username}.biz",
"usernameClaimed": "alex",
"usernameUnclaimed": "noonewouldeverusethis7",
"checkType": "status_code"
},
".email": {
"protocol": "dns",
"url": "{username}.email",
"urlMain": "{username}.email",
"usernameClaimed": "alex",
"usernameUnclaimed": "noonewouldeverusethis7",
"checkType": "status_code"
},
".guru": {
"protocol": "dns",
"url": "{username}.guru",
"urlMain": "{username}.guru",
"usernameClaimed": "alex",
"usernameUnclaimed": "noonewouldeverusethis7",
"checkType": "status_code"
},
".ddns.net": {
"protocol": "dns",
"url": "{username}.ddns.net",
"urlMain": "{username}.ddns.net",
"usernameClaimed": "repack",
"usernameUnclaimed": "noonewouldeverusethis7",
"checkType": "status_code"
},
"Ameblo": { "Ameblo": {
"absenceStrs": [ "absenceStrs": [
"THROW_NOT_FOUND_EXCEPTION" "THROW_NOT_FOUND_EXCEPTION"
+1 -1
View File
@@ -68,7 +68,7 @@
<div class="row-mb"> <div class="row-mb">
<div class="col-md"> <div class="col-md">
<div class="card flex-md-row mb-4 box-shadow h-md-250"> <div class="card flex-md-row mb-4 box-shadow h-md-250">
<img class="card-img-right flex-auto d-md-block" alt="Photo" style="width: 200px; height: 200px; object-fit: scale-down;" src="{{ v.status.ids_data.image or 'https://i.imgur.com/040fmbw.png' }}" data-holder-rendered="true"> <img class="card-img-right flex-auto d-md-block" alt="Photo" style="width: 200px; height: 200px; object-fit: scale-down;" src="{{ v.status and v.status.ids_data and v.status.ids_data.image or 'https://i.imgur.com/040fmbw.png' }}" data-holder-rendered="true">
<div class="card-body d-flex flex-column align-items-start" style="padding-top: 0;"> <div class="card-body d-flex flex-column align-items-start" style="padding-top: 0;">
<h3 class="mb-0" style="padding-top: 1rem;"> <h3 class="mb-0" style="padding-top: 1rem;">
<a class="text-dark" href="{{ v.url_main }}" target="_blank">{{ k }}</a> <a class="text-dark" href="{{ v.url_main }}" target="_blank">{{ k }}</a>
+2 -1
View File
@@ -304,12 +304,13 @@ class MaigretDatabase:
lambda x: isinstance(x.engine, str) and x.engine.lower() in normalized_tags lambda x: isinstance(x.engine, str) and x.engine.lower() in normalized_tags
) )
is_tags_ok = lambda x: set(x.tags).intersection(set(normalized_tags)) is_tags_ok = lambda x: set(x.tags).intersection(set(normalized_tags))
is_protocol_in_tags = lambda x: x.protocol and x.protocol in normalized_tags
is_disabled_needed = lambda x: not x.disabled or ( is_disabled_needed = lambda x: not x.disabled or (
"disabled" in tags or disabled "disabled" in tags or disabled
) )
is_id_type_ok = lambda x: x.type == id_type is_id_type_ok = lambda x: x.type == id_type
filter_tags_engines_fun = lambda x: not tags or is_engine_ok(x) or is_tags_ok(x) filter_tags_engines_fun = lambda x: not tags or is_engine_ok(x) or is_tags_ok(x) or is_protocol_in_tags(x)
filter_names_fun = lambda x: not names or is_name_ok(x) or is_source_ok(x) filter_names_fun = lambda x: not names or is_name_ok(x) or is_source_ok(x)
filter_fun = ( filter_fun = (
+3 -1
View File
@@ -209,7 +209,9 @@ async def check_features_manually(
): ):
custom_headers = {} custom_headers = {}
while True: while True:
header_key = input('Specify custom header if you need or just press Enter to skip. Header name: ') header_key = input(
'Specify custom header if you need or just press Enter to skip. Header name: '
)
if not header_key: if not header_key:
break break
header_value = input('Header value: ') header_value = input('Header value: ')
+98 -2
View File
@@ -45,6 +45,19 @@ EXAMPLE_RESULTS = {
} }
} }
BROKEN_RESULTS = {
'GitHub': {
'username': 'test',
'parsing_enabled': True,
'url_main': 'https://www.github.com/',
'url_user': 'https://www.github.com/test',
'http_status': 200,
'is_similar': False,
'rank': 78,
'site': MaigretSite('test', {}),
}
}
GOOD_500PX_RESULT = copy.deepcopy(GOOD_RESULT) GOOD_500PX_RESULT = copy.deepcopy(GOOD_RESULT)
GOOD_500PX_RESULT.tags = ['photo', 'us', 'global'] GOOD_500PX_RESULT.tags = ['photo', 'us', 'global']
GOOD_500PX_RESULT.ids_data = { GOOD_500PX_RESULT.ids_data = {
@@ -239,10 +252,13 @@ TEST = [
] ]
SUPPOSED_BRIEF = """Search by username alexaimephotographycars returned 1 accounts. Found target's other IDs: alexaimephotography, Alexaimephotogr. Search by username alexaimephotography returned 2 accounts. Search by username Alexaimephotogr returned 1 accounts. Extended info extracted from 3 accounts.""" SUPPOSED_BRIEF = """Search by username alexaimephotographycars returned 1 accounts. Found target's other IDs: alexaimephotography, Alexaimephotogr. Search by username alexaimephotography returned 2 accounts. Search by username Alexaimephotogr returned 1 accounts. Extended info extracted from 3 accounts."""
SUPPOSED_BROKEN_BRIEF = """Search by username alexaimephotographycars returned 0 accounts. Search by username alexaimephotography returned 2 accounts. Search by username Alexaimephotogr returned 1 accounts. Extended info extracted from 2 accounts."""
SUPPOSED_INTERESTS = "Interests: photo <span class=\"text-muted\">(2)</span>, news <span class=\"text-muted\">(1)</span>, social <span class=\"text-muted\">(1)</span>"
SUPPOSED_GEO = "Geo: us <span class=\"text-muted\">(3)</span>" SUPPOSED_GEO = "Geo: us <span class=\"text-muted\">(3)</span>"
SUPPOSED_BROKEN_GEO = "Geo: us <span class=\"text-muted\">(2)</span>"
SUPPOSED_INTERESTS = "Interests: photo <span class=\"text-muted\">(2)</span>, news <span class=\"text-muted\">(1)</span>, social <span class=\"text-muted\">(1)</span>"
SUPPOSED_BROKEN_INTERESTS = "Interests: news <span class=\"text-muted\">(1)</span>, photo <span class=\"text-muted\">(1)</span>, social <span class=\"text-muted\">(1)</span>"
def test_generate_report_template(): def test_generate_report_template():
@@ -270,6 +286,19 @@ def test_generate_csv_report():
] ]
def test_generate_csv_report_broken():
csvfile = StringIO()
generate_csv_report('test', BROKEN_RESULTS, csvfile)
csvfile.seek(0)
data = csvfile.readlines()
assert data == [
'username,name,url_main,url_user,exists,http_status\r\n',
'test,GitHub,https://www.github.com/,https://www.github.com/test,Unknown,200\r\n',
]
def test_generate_txt_report(): def test_generate_txt_report():
txtfile = StringIO() txtfile = StringIO()
generate_txt_report('test', EXAMPLE_RESULTS, txtfile) generate_txt_report('test', EXAMPLE_RESULTS, txtfile)
@@ -283,6 +312,18 @@ def test_generate_txt_report():
] ]
def test_generate_txt_report_broken():
txtfile = StringIO()
generate_txt_report('test', BROKEN_RESULTS, txtfile)
txtfile.seek(0)
data = txtfile.readlines()
assert data == [
'Total Websites Username Detected On : 0',
]
def test_generate_json_simple_report(): def test_generate_json_simple_report():
jsonfile = StringIO() jsonfile = StringIO()
MODIFIED_RESULTS = dict(EXAMPLE_RESULTS) MODIFIED_RESULTS = dict(EXAMPLE_RESULTS)
@@ -296,6 +337,19 @@ def test_generate_json_simple_report():
assert list(json.loads(data[0]).keys()) == ['GitHub', 'GitHub2'] assert list(json.loads(data[0]).keys()) == ['GitHub', 'GitHub2']
def test_generate_json_simple_report_broken():
jsonfile = StringIO()
MODIFIED_RESULTS = dict(BROKEN_RESULTS)
MODIFIED_RESULTS['GitHub2'] = BROKEN_RESULTS['GitHub']
generate_json_report('test', BROKEN_RESULTS, jsonfile, 'simple')
jsonfile.seek(0)
data = jsonfile.readlines()
assert len(data) == 1
assert list(json.loads(data[0]).keys()) == []
def test_generate_json_ndjson_report(): def test_generate_json_ndjson_report():
jsonfile = StringIO() jsonfile = StringIO()
MODIFIED_RESULTS = dict(EXAMPLE_RESULTS) MODIFIED_RESULTS = dict(EXAMPLE_RESULTS)
@@ -329,6 +383,20 @@ def test_save_xmind_report():
) )
def test_save_xmind_report_broken():
filename = 'report_test.xmind'
save_xmind_report(filename, 'test', BROKEN_RESULTS)
workbook = xmind.load(filename)
sheet = workbook.getPrimarySheet()
data = sheet.getData()
assert data['title'] == 'test Analysis'
assert data['topic']['title'] == 'test'
assert len(data['topic']['topics']) == 1
assert data['topic']['topics'][0]['title'] == 'Undefined'
def test_html_report(): def test_html_report():
report_name = 'report_test.html' report_name = 'report_test.html'
context = generate_report_context(TEST) context = generate_report_context(TEST)
@@ -341,6 +409,21 @@ def test_html_report():
assert SUPPOSED_INTERESTS in report_text assert SUPPOSED_INTERESTS in report_text
def test_html_report_broken():
report_name = 'report_test_broken.html'
BROKEN_DATA = copy.deepcopy(TEST)
BROKEN_DATA[0][2]['500px']['status'] = None
context = generate_report_context(BROKEN_DATA)
save_html_report(report_name, context)
report_text = open(report_name).read()
assert SUPPOSED_BROKEN_BRIEF in report_text
assert SUPPOSED_BROKEN_GEO in report_text
assert SUPPOSED_BROKEN_INTERESTS in report_text
def test_pdf_report(): def test_pdf_report():
report_name = 'report_test.pdf' report_name = 'report_test.pdf'
context = generate_report_context(TEST) context = generate_report_context(TEST)
@@ -357,3 +440,16 @@ def test_text_report():
assert brief_part in report_text assert brief_part in report_text
assert 'us' in report_text assert 'us' in report_text
assert 'photo' in report_text assert 'photo' in report_text
def test_text_report_broken():
BROKEN_DATA = copy.deepcopy(TEST)
BROKEN_DATA[0][2]['500px']['status'] = None
context = generate_report_context(BROKEN_DATA)
report_text = get_plaintext_report(context)
for brief_part in SUPPOSED_BROKEN_BRIEF.split():
assert brief_part in report_text
assert 'us' in report_text
assert 'photo' in report_text