Added some domains for new DNS checker, fixed reports generation crashes

This commit is contained in:
Soxoj
2021-06-02 23:16:44 +03:00
parent 5179cb56eb
commit 13e1b6f4d1
8 changed files with 192 additions and 20 deletions
+12 -3
View File
@@ -738,7 +738,12 @@ def timeout_check(value):
async def site_self_check(
site: MaigretSite, logger, semaphore, db: MaigretDatabase, silent=False, tor_proxy=None
site: MaigretSite,
logger,
semaphore,
db: MaigretDatabase,
silent=False,
tor_proxy=None,
):
changes = {
"disabled": False,
@@ -812,8 +817,12 @@ async def site_self_check(
async def self_check(
db: MaigretDatabase, site_data: dict, logger, silent=False, max_connections=10,
tor_proxy=None
db: MaigretDatabase,
site_data: dict,
logger,
silent=False,
max_connections=10,
tor_proxy=None,
) -> bool:
sem = asyncio.Semaphore(max_connections)
tasks = []
+5 -2
View File
@@ -529,8 +529,11 @@ async def main():
if args.self_check:
print('Maigret sites database self-checking...')
is_need_update = await self_check(
db, site_data, logger, max_connections=args.connections,
tor_proxy=args.tor_proxy
db,
site_data,
logger,
max_connections=args.connections,
tor_proxy=args.tor_proxy,
)
if is_need_update:
if input('Do you want to save changes permanently? [Yn]\n').lower() in (
+20 -7
View File
@@ -40,7 +40,9 @@ def sort_report_by_data_points(results):
return dict(
sorted(
results.items(),
key=lambda x: len((x[1].get('status') and x[1]['status'].ids_data or {}).keys()),
key=lambda x: len(
(x[1].get('status') and x[1]['status'].ids_data or {}).keys()
),
reverse=True,
)
)
@@ -253,14 +255,18 @@ def generate_csv_report(username: str, results: dict, csvfile):
["username", "name", "url_main", "url_user", "exists", "http_status"]
)
for site in results:
# TODO: fix the reason
status = 'Unknown'
if "status" in results[site]:
status = str(results[site]["status"].status)
writer.writerow(
[
username,
site,
results[site]["url_main"],
results[site]["url_user"],
str(results[site]["status"].status),
results[site]["http_status"],
results[site].get("url_main", ""),
results[site].get("url_user", ""),
status,
results[site].get("http_status", 0),
]
)
@@ -272,7 +278,10 @@ def generate_txt_report(username: str, results: dict, file):
# TODO: fix no site data issue
if not dictionary:
continue
if dictionary.get("status").status == QueryStatus.CLAIMED:
if (
dictionary.get("status")
and dictionary["status"].status == QueryStatus.CLAIMED
):
exists_counter += 1
file.write(dictionary["url_user"] + "\n")
file.write(f"Total Websites Username Detected On : {exists_counter}")
@@ -285,7 +294,10 @@ def generate_json_report(username: str, results: dict, file, report_type):
for sitename in results:
site_result = results[sitename]
# TODO: fix no site data issue
if not site_result or site_result.get("status").status != QueryStatus.CLAIMED:
if not site_result or not site_result.get("status"):
continue
if site_result["status"].status != QueryStatus.CLAIMED:
continue
data = dict(site_result)
@@ -345,6 +357,7 @@ def design_xmind_sheet(sheet, username, results):
if not dictionary:
continue
result_status = dictionary.get("status")
# TODO: fix the reason
if not result_status or result_status.status != QueryStatus.CLAIMED:
continue
+51 -3
View File
@@ -13024,7 +13024,7 @@
"us"
],
"headers": {
"authorization": "Bearer BQBKzy1QSQQO4wR2vRVROUOaj8T9gr0Vkjup9wUkLh0MZDtMEVZ0WEtyoZ_tTc4utIhyvvn9V7URwVWGeuU"
"authorization": "Bearer BQDEpoSTjg2Ko86QUHZjJmZvp5AuI1ru6rJySe8_cD0bRqMZk6PfmdsmJBu3QeiNHgUPGQPDz2VeSvRr16w"
},
"errors": {
"Spotify is currently not available in your country.": "Access denied in your country, use proxy/vpn"
@@ -14450,7 +14450,7 @@
"sec-ch-ua": "Google Chrome\";v=\"87\", \" Not;A Brand\";v=\"99\", \"Chromium\";v=\"87\"",
"authorization": "Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA",
"user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36",
"x-guest-token": "1397644352072163331"
"x-guest-token": "1400174453577900043"
},
"errors": {
"Bad guest token": "x-guest-token update required"
@@ -14857,7 +14857,7 @@
"video"
],
"headers": {
"Authorization": "jwt eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJleHAiOjE2MjIwNjAyODAsInVzZXJfaWQiOm51bGwsImFwcF9pZCI6NTg0NzksInNjb3BlcyI6InB1YmxpYyIsInRlYW1fdXNlcl9pZCI6bnVsbH0.RBYc81QRYfs9m7yzcGkUXhyA3rGPhQJaoAG8dnt61I4"
"Authorization": "jwt eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJleHAiOjE2MjI2NjM1MjAsInVzZXJfaWQiOm51bGwsImFwcF9pZCI6NTg0NzksInNjb3BlcyI6InB1YmxpYyIsInRlYW1fdXNlcl9pZCI6bnVsbH0.bKcisdrE5nJZMvrbagUC8lZQOs9spg3IKMlK15IclM4"
},
"activation": {
"url": "https://vimeo.com/_rv/viewer",
@@ -27783,6 +27783,54 @@
"usernameUnclaimed": "noonewouldeverusethis7",
"checkType": "status_code"
},
".pro": {
"protocol": "dns",
"url": "{username}.pro",
"urlMain": "{username}.pro",
"usernameClaimed": "alex",
"usernameUnclaimed": "noonewouldeverusethis7",
"checkType": "status_code"
},
".me": {
"protocol": "dns",
"url": "{username}.me",
"urlMain": "{username}.me",
"usernameClaimed": "alex",
"usernameUnclaimed": "noonewouldeverusethis7",
"checkType": "status_code"
},
".biz": {
"protocol": "dns",
"url": "{username}.biz",
"urlMain": "{username}.biz",
"usernameClaimed": "alex",
"usernameUnclaimed": "noonewouldeverusethis7",
"checkType": "status_code"
},
".email": {
"protocol": "dns",
"url": "{username}.email",
"urlMain": "{username}.email",
"usernameClaimed": "alex",
"usernameUnclaimed": "noonewouldeverusethis7",
"checkType": "status_code"
},
".guru": {
"protocol": "dns",
"url": "{username}.guru",
"urlMain": "{username}.guru",
"usernameClaimed": "alex",
"usernameUnclaimed": "noonewouldeverusethis7",
"checkType": "status_code"
},
".ddns.net": {
"protocol": "dns",
"url": "{username}.ddns.net",
"urlMain": "{username}.ddns.net",
"usernameClaimed": "repack",
"usernameUnclaimed": "noonewouldeverusethis7",
"checkType": "status_code"
},
"Ameblo": {
"absenceStrs": [
"THROW_NOT_FOUND_EXCEPTION"
+1 -1
View File
@@ -68,7 +68,7 @@
<div class="row-mb">
<div class="col-md">
<div class="card flex-md-row mb-4 box-shadow h-md-250">
<img class="card-img-right flex-auto d-md-block" alt="Photo" style="width: 200px; height: 200px; object-fit: scale-down;" src="{{ v.status.ids_data.image or 'https://i.imgur.com/040fmbw.png' }}" data-holder-rendered="true">
<img class="card-img-right flex-auto d-md-block" alt="Photo" style="width: 200px; height: 200px; object-fit: scale-down;" src="{{ v.status and v.status.ids_data and v.status.ids_data.image or 'https://i.imgur.com/040fmbw.png' }}" data-holder-rendered="true">
<div class="card-body d-flex flex-column align-items-start" style="padding-top: 0;">
<h3 class="mb-0" style="padding-top: 1rem;">
<a class="text-dark" href="{{ v.url_main }}" target="_blank">{{ k }}</a>
+2 -1
View File
@@ -304,12 +304,13 @@ class MaigretDatabase:
lambda x: isinstance(x.engine, str) and x.engine.lower() in normalized_tags
)
is_tags_ok = lambda x: set(x.tags).intersection(set(normalized_tags))
is_protocol_in_tags = lambda x: x.protocol and x.protocol in normalized_tags
is_disabled_needed = lambda x: not x.disabled or (
"disabled" in tags or disabled
)
is_id_type_ok = lambda x: x.type == id_type
filter_tags_engines_fun = lambda x: not tags or is_engine_ok(x) or is_tags_ok(x)
filter_tags_engines_fun = lambda x: not tags or is_engine_ok(x) or is_tags_ok(x) or is_protocol_in_tags(x)
filter_names_fun = lambda x: not names or is_name_ok(x) or is_source_ok(x)
filter_fun = (
+3 -1
View File
@@ -209,7 +209,9 @@ async def check_features_manually(
):
custom_headers = {}
while True:
header_key = input('Specify custom header if you need or just press Enter to skip. Header name: ')
header_key = input(
'Specify custom header if you need or just press Enter to skip. Header name: '
)
if not header_key:
break
header_value = input('Header value: ')
+98 -2
View File
@@ -45,6 +45,19 @@ EXAMPLE_RESULTS = {
}
}
BROKEN_RESULTS = {
'GitHub': {
'username': 'test',
'parsing_enabled': True,
'url_main': 'https://www.github.com/',
'url_user': 'https://www.github.com/test',
'http_status': 200,
'is_similar': False,
'rank': 78,
'site': MaigretSite('test', {}),
}
}
GOOD_500PX_RESULT = copy.deepcopy(GOOD_RESULT)
GOOD_500PX_RESULT.tags = ['photo', 'us', 'global']
GOOD_500PX_RESULT.ids_data = {
@@ -239,10 +252,13 @@ TEST = [
]
SUPPOSED_BRIEF = """Search by username alexaimephotographycars returned 1 accounts. Found target's other IDs: alexaimephotography, Alexaimephotogr. Search by username alexaimephotography returned 2 accounts. Search by username Alexaimephotogr returned 1 accounts. Extended info extracted from 3 accounts."""
SUPPOSED_INTERESTS = "Interests: photo <span class=\"text-muted\">(2)</span>, news <span class=\"text-muted\">(1)</span>, social <span class=\"text-muted\">(1)</span>"
SUPPOSED_BROKEN_BRIEF = """Search by username alexaimephotographycars returned 0 accounts. Search by username alexaimephotography returned 2 accounts. Search by username Alexaimephotogr returned 1 accounts. Extended info extracted from 2 accounts."""
SUPPOSED_GEO = "Geo: us <span class=\"text-muted\">(3)</span>"
SUPPOSED_BROKEN_GEO = "Geo: us <span class=\"text-muted\">(2)</span>"
SUPPOSED_INTERESTS = "Interests: photo <span class=\"text-muted\">(2)</span>, news <span class=\"text-muted\">(1)</span>, social <span class=\"text-muted\">(1)</span>"
SUPPOSED_BROKEN_INTERESTS = "Interests: news <span class=\"text-muted\">(1)</span>, photo <span class=\"text-muted\">(1)</span>, social <span class=\"text-muted\">(1)</span>"
def test_generate_report_template():
@@ -270,6 +286,19 @@ def test_generate_csv_report():
]
def test_generate_csv_report_broken():
csvfile = StringIO()
generate_csv_report('test', BROKEN_RESULTS, csvfile)
csvfile.seek(0)
data = csvfile.readlines()
assert data == [
'username,name,url_main,url_user,exists,http_status\r\n',
'test,GitHub,https://www.github.com/,https://www.github.com/test,Unknown,200\r\n',
]
def test_generate_txt_report():
txtfile = StringIO()
generate_txt_report('test', EXAMPLE_RESULTS, txtfile)
@@ -283,6 +312,18 @@ def test_generate_txt_report():
]
def test_generate_txt_report_broken():
txtfile = StringIO()
generate_txt_report('test', BROKEN_RESULTS, txtfile)
txtfile.seek(0)
data = txtfile.readlines()
assert data == [
'Total Websites Username Detected On : 0',
]
def test_generate_json_simple_report():
jsonfile = StringIO()
MODIFIED_RESULTS = dict(EXAMPLE_RESULTS)
@@ -296,6 +337,19 @@ def test_generate_json_simple_report():
assert list(json.loads(data[0]).keys()) == ['GitHub', 'GitHub2']
def test_generate_json_simple_report_broken():
jsonfile = StringIO()
MODIFIED_RESULTS = dict(BROKEN_RESULTS)
MODIFIED_RESULTS['GitHub2'] = BROKEN_RESULTS['GitHub']
generate_json_report('test', BROKEN_RESULTS, jsonfile, 'simple')
jsonfile.seek(0)
data = jsonfile.readlines()
assert len(data) == 1
assert list(json.loads(data[0]).keys()) == []
def test_generate_json_ndjson_report():
jsonfile = StringIO()
MODIFIED_RESULTS = dict(EXAMPLE_RESULTS)
@@ -329,6 +383,20 @@ def test_save_xmind_report():
)
def test_save_xmind_report_broken():
filename = 'report_test.xmind'
save_xmind_report(filename, 'test', BROKEN_RESULTS)
workbook = xmind.load(filename)
sheet = workbook.getPrimarySheet()
data = sheet.getData()
assert data['title'] == 'test Analysis'
assert data['topic']['title'] == 'test'
assert len(data['topic']['topics']) == 1
assert data['topic']['topics'][0]['title'] == 'Undefined'
def test_html_report():
report_name = 'report_test.html'
context = generate_report_context(TEST)
@@ -341,6 +409,21 @@ def test_html_report():
assert SUPPOSED_INTERESTS in report_text
def test_html_report_broken():
report_name = 'report_test_broken.html'
BROKEN_DATA = copy.deepcopy(TEST)
BROKEN_DATA[0][2]['500px']['status'] = None
context = generate_report_context(BROKEN_DATA)
save_html_report(report_name, context)
report_text = open(report_name).read()
assert SUPPOSED_BROKEN_BRIEF in report_text
assert SUPPOSED_BROKEN_GEO in report_text
assert SUPPOSED_BROKEN_INTERESTS in report_text
def test_pdf_report():
report_name = 'report_test.pdf'
context = generate_report_context(TEST)
@@ -357,3 +440,16 @@ def test_text_report():
assert brief_part in report_text
assert 'us' in report_text
assert 'photo' in report_text
def test_text_report_broken():
BROKEN_DATA = copy.deepcopy(TEST)
BROKEN_DATA[0][2]['500px']['status'] = None
context = generate_report_context(BROKEN_DATA)
report_text = get_plaintext_report(context)
for brief_part in SUPPOSED_BROKEN_BRIEF.split():
assert brief_part in report_text
assert 'us' in report_text
assert 'photo' in report_text