diff --git a/maigret/checking.py b/maigret/checking.py index 2ba70a8..bfabf6c 100644 --- a/maigret/checking.py +++ b/maigret/checking.py @@ -738,7 +738,12 @@ def timeout_check(value): async def site_self_check( - site: MaigretSite, logger, semaphore, db: MaigretDatabase, silent=False, tor_proxy=None + site: MaigretSite, + logger, + semaphore, + db: MaigretDatabase, + silent=False, + tor_proxy=None, ): changes = { "disabled": False, @@ -812,8 +817,12 @@ async def site_self_check( async def self_check( - db: MaigretDatabase, site_data: dict, logger, silent=False, max_connections=10, - tor_proxy=None + db: MaigretDatabase, + site_data: dict, + logger, + silent=False, + max_connections=10, + tor_proxy=None, ) -> bool: sem = asyncio.Semaphore(max_connections) tasks = [] diff --git a/maigret/maigret.py b/maigret/maigret.py index f345152..9334b90 100755 --- a/maigret/maigret.py +++ b/maigret/maigret.py @@ -529,8 +529,11 @@ async def main(): if args.self_check: print('Maigret sites database self-checking...') is_need_update = await self_check( - db, site_data, logger, max_connections=args.connections, - tor_proxy=args.tor_proxy + db, + site_data, + logger, + max_connections=args.connections, + tor_proxy=args.tor_proxy, ) if is_need_update: if input('Do you want to save changes permanently? [Yn]\n').lower() in ( diff --git a/maigret/report.py b/maigret/report.py index 89ff335..2aa1800 100644 --- a/maigret/report.py +++ b/maigret/report.py @@ -40,7 +40,9 @@ def sort_report_by_data_points(results): return dict( sorted( results.items(), - key=lambda x: len((x[1].get('status') and x[1]['status'].ids_data or {}).keys()), + key=lambda x: len( + (x[1].get('status') and x[1]['status'].ids_data or {}).keys() + ), reverse=True, ) ) @@ -253,14 +255,18 @@ def generate_csv_report(username: str, results: dict, csvfile): ["username", "name", "url_main", "url_user", "exists", "http_status"] ) for site in results: + # TODO: fix the reason + status = 'Unknown' + if "status" in results[site]: + status = str(results[site]["status"].status) writer.writerow( [ username, site, - results[site]["url_main"], - results[site]["url_user"], - str(results[site]["status"].status), - results[site]["http_status"], + results[site].get("url_main", ""), + results[site].get("url_user", ""), + status, + results[site].get("http_status", 0), ] ) @@ -272,7 +278,10 @@ def generate_txt_report(username: str, results: dict, file): # TODO: fix no site data issue if not dictionary: continue - if dictionary.get("status").status == QueryStatus.CLAIMED: + if ( + dictionary.get("status") + and dictionary["status"].status == QueryStatus.CLAIMED + ): exists_counter += 1 file.write(dictionary["url_user"] + "\n") file.write(f"Total Websites Username Detected On : {exists_counter}") @@ -285,7 +294,10 @@ def generate_json_report(username: str, results: dict, file, report_type): for sitename in results: site_result = results[sitename] # TODO: fix no site data issue - if not site_result or site_result.get("status").status != QueryStatus.CLAIMED: + if not site_result or not site_result.get("status"): + continue + + if site_result["status"].status != QueryStatus.CLAIMED: continue data = dict(site_result) @@ -345,6 +357,7 @@ def design_xmind_sheet(sheet, username, results): if not dictionary: continue result_status = dictionary.get("status") + # TODO: fix the reason if not result_status or result_status.status != QueryStatus.CLAIMED: continue diff --git a/maigret/resources/data.json b/maigret/resources/data.json index 918434d..550da92 100644 --- a/maigret/resources/data.json +++ b/maigret/resources/data.json @@ -13024,7 +13024,7 @@ "us" ], "headers": { - "authorization": "Bearer BQBKzy1QSQQO4wR2vRVROUOaj8T9gr0Vkjup9wUkLh0MZDtMEVZ0WEtyoZ_tTc4utIhyvvn9V7URwVWGeuU" + "authorization": "Bearer BQDEpoSTjg2Ko86QUHZjJmZvp5AuI1ru6rJySe8_cD0bRqMZk6PfmdsmJBu3QeiNHgUPGQPDz2VeSvRr16w" }, "errors": { "Spotify is currently not available in your country.": "Access denied in your country, use proxy/vpn" @@ -14450,7 +14450,7 @@ "sec-ch-ua": "Google Chrome\";v=\"87\", \" Not;A Brand\";v=\"99\", \"Chromium\";v=\"87\"", "authorization": "Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA", "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36", - "x-guest-token": "1397644352072163331" + "x-guest-token": "1400174453577900043" }, "errors": { "Bad guest token": "x-guest-token update required" @@ -14857,7 +14857,7 @@ "video" ], "headers": { - "Authorization": "jwt eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJleHAiOjE2MjIwNjAyODAsInVzZXJfaWQiOm51bGwsImFwcF9pZCI6NTg0NzksInNjb3BlcyI6InB1YmxpYyIsInRlYW1fdXNlcl9pZCI6bnVsbH0.RBYc81QRYfs9m7yzcGkUXhyA3rGPhQJaoAG8dnt61I4" + "Authorization": "jwt eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJleHAiOjE2MjI2NjM1MjAsInVzZXJfaWQiOm51bGwsImFwcF9pZCI6NTg0NzksInNjb3BlcyI6InB1YmxpYyIsInRlYW1fdXNlcl9pZCI6bnVsbH0.bKcisdrE5nJZMvrbagUC8lZQOs9spg3IKMlK15IclM4" }, "activation": { "url": "https://vimeo.com/_rv/viewer", @@ -27783,6 +27783,54 @@ "usernameUnclaimed": "noonewouldeverusethis7", "checkType": "status_code" }, + ".pro": { + "protocol": "dns", + "url": "{username}.pro", + "urlMain": "{username}.pro", + "usernameClaimed": "alex", + "usernameUnclaimed": "noonewouldeverusethis7", + "checkType": "status_code" + }, + ".me": { + "protocol": "dns", + "url": "{username}.me", + "urlMain": "{username}.me", + "usernameClaimed": "alex", + "usernameUnclaimed": "noonewouldeverusethis7", + "checkType": "status_code" + }, + ".biz": { + "protocol": "dns", + "url": "{username}.biz", + "urlMain": "{username}.biz", + "usernameClaimed": "alex", + "usernameUnclaimed": "noonewouldeverusethis7", + "checkType": "status_code" + }, + ".email": { + "protocol": "dns", + "url": "{username}.email", + "urlMain": "{username}.email", + "usernameClaimed": "alex", + "usernameUnclaimed": "noonewouldeverusethis7", + "checkType": "status_code" + }, + ".guru": { + "protocol": "dns", + "url": "{username}.guru", + "urlMain": "{username}.guru", + "usernameClaimed": "alex", + "usernameUnclaimed": "noonewouldeverusethis7", + "checkType": "status_code" + }, + ".ddns.net": { + "protocol": "dns", + "url": "{username}.ddns.net", + "urlMain": "{username}.ddns.net", + "usernameClaimed": "repack", + "usernameUnclaimed": "noonewouldeverusethis7", + "checkType": "status_code" + }, "Ameblo": { "absenceStrs": [ "THROW_NOT_FOUND_EXCEPTION" diff --git a/maigret/resources/simple_report.tpl b/maigret/resources/simple_report.tpl index 2cee141..c2e3322 100644 --- a/maigret/resources/simple_report.tpl +++ b/maigret/resources/simple_report.tpl @@ -68,7 +68,7 @@
- Photo + Photo

{{ k }} diff --git a/maigret/sites.py b/maigret/sites.py index c059de4..008842f 100644 --- a/maigret/sites.py +++ b/maigret/sites.py @@ -304,12 +304,13 @@ class MaigretDatabase: lambda x: isinstance(x.engine, str) and x.engine.lower() in normalized_tags ) is_tags_ok = lambda x: set(x.tags).intersection(set(normalized_tags)) + is_protocol_in_tags = lambda x: x.protocol and x.protocol in normalized_tags is_disabled_needed = lambda x: not x.disabled or ( "disabled" in tags or disabled ) is_id_type_ok = lambda x: x.type == id_type - filter_tags_engines_fun = lambda x: not tags or is_engine_ok(x) or is_tags_ok(x) + filter_tags_engines_fun = lambda x: not tags or is_engine_ok(x) or is_tags_ok(x) or is_protocol_in_tags(x) filter_names_fun = lambda x: not names or is_name_ok(x) or is_source_ok(x) filter_fun = ( diff --git a/maigret/submit.py b/maigret/submit.py index d7ed031..9c4c99e 100644 --- a/maigret/submit.py +++ b/maigret/submit.py @@ -209,7 +209,9 @@ async def check_features_manually( ): custom_headers = {} while True: - header_key = input('Specify custom header if you need or just press Enter to skip. Header name: ') + header_key = input( + 'Specify custom header if you need or just press Enter to skip. Header name: ' + ) if not header_key: break header_value = input('Header value: ') diff --git a/tests/test_report.py b/tests/test_report.py index b4f12b8..7c86013 100644 --- a/tests/test_report.py +++ b/tests/test_report.py @@ -45,6 +45,19 @@ EXAMPLE_RESULTS = { } } +BROKEN_RESULTS = { + 'GitHub': { + 'username': 'test', + 'parsing_enabled': True, + 'url_main': 'https://www.github.com/', + 'url_user': 'https://www.github.com/test', + 'http_status': 200, + 'is_similar': False, + 'rank': 78, + 'site': MaigretSite('test', {}), + } +} + GOOD_500PX_RESULT = copy.deepcopy(GOOD_RESULT) GOOD_500PX_RESULT.tags = ['photo', 'us', 'global'] GOOD_500PX_RESULT.ids_data = { @@ -239,10 +252,13 @@ TEST = [ ] SUPPOSED_BRIEF = """Search by username alexaimephotographycars returned 1 accounts. Found target's other IDs: alexaimephotography, Alexaimephotogr. Search by username alexaimephotography returned 2 accounts. Search by username Alexaimephotogr returned 1 accounts. Extended info extracted from 3 accounts.""" - -SUPPOSED_INTERESTS = "Interests: photo (2), news (1), social (1)" +SUPPOSED_BROKEN_BRIEF = """Search by username alexaimephotographycars returned 0 accounts. Search by username alexaimephotography returned 2 accounts. Search by username Alexaimephotogr returned 1 accounts. Extended info extracted from 2 accounts.""" SUPPOSED_GEO = "Geo: us (3)" +SUPPOSED_BROKEN_GEO = "Geo: us (2)" + +SUPPOSED_INTERESTS = "Interests: photo (2), news (1), social (1)" +SUPPOSED_BROKEN_INTERESTS = "Interests: news (1), photo (1), social (1)" def test_generate_report_template(): @@ -270,6 +286,19 @@ def test_generate_csv_report(): ] +def test_generate_csv_report_broken(): + csvfile = StringIO() + generate_csv_report('test', BROKEN_RESULTS, csvfile) + + csvfile.seek(0) + data = csvfile.readlines() + + assert data == [ + 'username,name,url_main,url_user,exists,http_status\r\n', + 'test,GitHub,https://www.github.com/,https://www.github.com/test,Unknown,200\r\n', + ] + + def test_generate_txt_report(): txtfile = StringIO() generate_txt_report('test', EXAMPLE_RESULTS, txtfile) @@ -283,6 +312,18 @@ def test_generate_txt_report(): ] +def test_generate_txt_report_broken(): + txtfile = StringIO() + generate_txt_report('test', BROKEN_RESULTS, txtfile) + + txtfile.seek(0) + data = txtfile.readlines() + + assert data == [ + 'Total Websites Username Detected On : 0', + ] + + def test_generate_json_simple_report(): jsonfile = StringIO() MODIFIED_RESULTS = dict(EXAMPLE_RESULTS) @@ -296,6 +337,19 @@ def test_generate_json_simple_report(): assert list(json.loads(data[0]).keys()) == ['GitHub', 'GitHub2'] +def test_generate_json_simple_report_broken(): + jsonfile = StringIO() + MODIFIED_RESULTS = dict(BROKEN_RESULTS) + MODIFIED_RESULTS['GitHub2'] = BROKEN_RESULTS['GitHub'] + generate_json_report('test', BROKEN_RESULTS, jsonfile, 'simple') + + jsonfile.seek(0) + data = jsonfile.readlines() + + assert len(data) == 1 + assert list(json.loads(data[0]).keys()) == [] + + def test_generate_json_ndjson_report(): jsonfile = StringIO() MODIFIED_RESULTS = dict(EXAMPLE_RESULTS) @@ -329,6 +383,20 @@ def test_save_xmind_report(): ) +def test_save_xmind_report_broken(): + filename = 'report_test.xmind' + save_xmind_report(filename, 'test', BROKEN_RESULTS) + + workbook = xmind.load(filename) + sheet = workbook.getPrimarySheet() + data = sheet.getData() + + assert data['title'] == 'test Analysis' + assert data['topic']['title'] == 'test' + assert len(data['topic']['topics']) == 1 + assert data['topic']['topics'][0]['title'] == 'Undefined' + + def test_html_report(): report_name = 'report_test.html' context = generate_report_context(TEST) @@ -341,6 +409,21 @@ def test_html_report(): assert SUPPOSED_INTERESTS in report_text +def test_html_report_broken(): + report_name = 'report_test_broken.html' + BROKEN_DATA = copy.deepcopy(TEST) + BROKEN_DATA[0][2]['500px']['status'] = None + + context = generate_report_context(BROKEN_DATA) + save_html_report(report_name, context) + + report_text = open(report_name).read() + + assert SUPPOSED_BROKEN_BRIEF in report_text + assert SUPPOSED_BROKEN_GEO in report_text + assert SUPPOSED_BROKEN_INTERESTS in report_text + + def test_pdf_report(): report_name = 'report_test.pdf' context = generate_report_context(TEST) @@ -357,3 +440,16 @@ def test_text_report(): assert brief_part in report_text assert 'us' in report_text assert 'photo' in report_text + + +def test_text_report_broken(): + BROKEN_DATA = copy.deepcopy(TEST) + BROKEN_DATA[0][2]['500px']['status'] = None + + context = generate_report_context(BROKEN_DATA) + report_text = get_plaintext_report(context) + + for brief_part in SUPPOSED_BROKEN_BRIEF.split(): + assert brief_part in report_text + assert 'us' in report_text + assert 'photo' in report_text