Added some domains for new DNS checker, fixed reports generation crashes

2026-05-07 06:24:35 +00:00 · 2021-06-02 23:16:44 +03:00
parent 5179cb56eb
commit 13e1b6f4d1
8 changed files with 192 additions and 20 deletions
@@ -738,7 +738,12 @@ def timeout_check(value):


 async def site_self_check(
-    site: MaigretSite, logger, semaphore, db: MaigretDatabase, silent=False, tor_proxy=None
+    site: MaigretSite,
+    logger,
+    semaphore,
+    db: MaigretDatabase,
+    silent=False,
+    tor_proxy=None,
 ):
    changes = {
        "disabled": False,
@@ -812,8 +817,12 @@ async def site_self_check(


 async def self_check(
-    db: MaigretDatabase, site_data: dict, logger, silent=False, max_connections=10,
-    tor_proxy=None
+    db: MaigretDatabase,
+    site_data: dict,
+    logger,
+    silent=False,
+    max_connections=10,
+    tor_proxy=None,
 ) -> bool:
    sem = asyncio.Semaphore(max_connections)
    tasks = []
@@ -529,8 +529,11 @@ async def main():
    if args.self_check:
        print('Maigret sites database self-checking...')
        is_need_update = await self_check(
-            db, site_data, logger, max_connections=args.connections,
-            tor_proxy=args.tor_proxy
+            db,
+            site_data,
+            logger,
+            max_connections=args.connections,
+            tor_proxy=args.tor_proxy,
        )
        if is_need_update:
            if input('Do you want to save changes permanently? [Yn]\n').lower() in (
@@ -40,7 +40,9 @@ def sort_report_by_data_points(results):
    return dict(
        sorted(
            results.items(),
-            key=lambda x: len((x[1].get('status') and x[1]['status'].ids_data or {}).keys()),
+            key=lambda x: len(
+                (x[1].get('status') and x[1]['status'].ids_data or {}).keys()
+            ),
            reverse=True,
        )
    )
@@ -253,14 +255,18 @@ def generate_csv_report(username: str, results: dict, csvfile):
        ["username", "name", "url_main", "url_user", "exists", "http_status"]
    )
    for site in results:
+        # TODO: fix the reason
+        status = 'Unknown'
+        if "status" in results[site]:
+            status = str(results[site]["status"].status)
        writer.writerow(
            [
                username,
                site,
-                results[site]["url_main"],
-                results[site]["url_user"],
-                str(results[site]["status"].status),
-                results[site]["http_status"],
+                results[site].get("url_main", ""),
+                results[site].get("url_user", ""),
+                status,
+                results[site].get("http_status", 0),
            ]
        )

@@ -272,7 +278,10 @@ def generate_txt_report(username: str, results: dict, file):
        # TODO: fix no site data issue
        if not dictionary:
            continue
-        if dictionary.get("status").status == QueryStatus.CLAIMED:
+        if (
+            dictionary.get("status")
+            and dictionary["status"].status == QueryStatus.CLAIMED
+        ):
            exists_counter += 1
            file.write(dictionary["url_user"] + "\n")
    file.write(f"Total Websites Username Detected On : {exists_counter}")
@@ -285,7 +294,10 @@ def generate_json_report(username: str, results: dict, file, report_type):
    for sitename in results:
        site_result = results[sitename]
        # TODO: fix no site data issue
-        if not site_result or site_result.get("status").status != QueryStatus.CLAIMED:
+        if not site_result or not site_result.get("status"):
+            continue
+
+        if site_result["status"].status != QueryStatus.CLAIMED:
            continue

        data = dict(site_result)
@@ -345,6 +357,7 @@ def design_xmind_sheet(sheet, username, results):
        if not dictionary:
            continue
        result_status = dictionary.get("status")
+        # TODO: fix the reason
        if not result_status or result_status.status != QueryStatus.CLAIMED:
            continue

@@ -13024,7 +13024,7 @@
                "us"
            ],
            "headers": {
-                "authorization": "Bearer BQBKzy1QSQQO4wR2vRVROUOaj8T9gr0Vkjup9wUkLh0MZDtMEVZ0WEtyoZ_tTc4utIhyvvn9V7URwVWGeuU"
+                "authorization": "Bearer BQDEpoSTjg2Ko86QUHZjJmZvp5AuI1ru6rJySe8_cD0bRqMZk6PfmdsmJBu3QeiNHgUPGQPDz2VeSvRr16w"
            },
            "errors": {
                "Spotify is currently not available in your country.": "Access denied in your country, use proxy/vpn"
@@ -14450,7 +14450,7 @@
                "sec-ch-ua": "Google Chrome\";v=\"87\", \" Not;A Brand\";v=\"99\", \"Chromium\";v=\"87\"",
                "authorization": "Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA",
                "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36",
-                "x-guest-token": "1397644352072163331"
+                "x-guest-token": "1400174453577900043"
            },
            "errors": {
                "Bad guest token": "x-guest-token update required"
@@ -14857,7 +14857,7 @@
                "video"
            ],
            "headers": {
-                "Authorization": "jwt eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJleHAiOjE2MjIwNjAyODAsInVzZXJfaWQiOm51bGwsImFwcF9pZCI6NTg0NzksInNjb3BlcyI6InB1YmxpYyIsInRlYW1fdXNlcl9pZCI6bnVsbH0.RBYc81QRYfs9m7yzcGkUXhyA3rGPhQJaoAG8dnt61I4"
+                "Authorization": "jwt eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJleHAiOjE2MjI2NjM1MjAsInVzZXJfaWQiOm51bGwsImFwcF9pZCI6NTg0NzksInNjb3BlcyI6InB1YmxpYyIsInRlYW1fdXNlcl9pZCI6bnVsbH0.bKcisdrE5nJZMvrbagUC8lZQOs9spg3IKMlK15IclM4"
            },
            "activation": {
                "url": "https://vimeo.com/_rv/viewer",
@@ -27783,6 +27783,54 @@
            "usernameUnclaimed": "noonewouldeverusethis7",
            "checkType": "status_code"
        },
+        ".pro": {
+            "protocol": "dns",
+            "url": "{username}.pro",
+            "urlMain": "{username}.pro",
+            "usernameClaimed": "alex",
+            "usernameUnclaimed": "noonewouldeverusethis7",
+            "checkType": "status_code"
+        },
+        ".me": {
+            "protocol": "dns",
+            "url": "{username}.me",
+            "urlMain": "{username}.me",
+            "usernameClaimed": "alex",
+            "usernameUnclaimed": "noonewouldeverusethis7",
+            "checkType": "status_code"
+        },
+        ".biz": {
+            "protocol": "dns",
+            "url": "{username}.biz",
+            "urlMain": "{username}.biz",
+            "usernameClaimed": "alex",
+            "usernameUnclaimed": "noonewouldeverusethis7",
+            "checkType": "status_code"
+        },
+        ".email": {
+            "protocol": "dns",
+            "url": "{username}.email",
+            "urlMain": "{username}.email",
+            "usernameClaimed": "alex",
+            "usernameUnclaimed": "noonewouldeverusethis7",
+            "checkType": "status_code"
+        },
+        ".guru": {
+            "protocol": "dns",
+            "url": "{username}.guru",
+            "urlMain": "{username}.guru",
+            "usernameClaimed": "alex",
+            "usernameUnclaimed": "noonewouldeverusethis7",
+            "checkType": "status_code"
+        },
+        ".ddns.net": {
+            "protocol": "dns",
+            "url": "{username}.ddns.net",
+            "urlMain": "{username}.ddns.net",
+            "usernameClaimed": "repack",
+            "usernameUnclaimed": "noonewouldeverusethis7",
+            "checkType": "status_code"
+        },
        "Ameblo": {
            "absenceStrs": [
                "THROW_NOT_FOUND_EXCEPTION"
@@ -68,7 +68,7 @@
        <div class="row-mb">
            <div class="col-md">
                <div class="card flex-md-row mb-4 box-shadow h-md-250">
-                    <img class="card-img-right flex-auto d-md-block" alt="Photo" style="width: 200px; height: 200px; object-fit: scale-down;" src="{{ v.status.ids_data.image or 'https://i.imgur.com/040fmbw.png' }}" data-holder-rendered="true">
+                    <img class="card-img-right flex-auto d-md-block" alt="Photo" style="width: 200px; height: 200px; object-fit: scale-down;" src="{{ v.status and v.status.ids_data and v.status.ids_data.image or 'https://i.imgur.com/040fmbw.png' }}" data-holder-rendered="true">
                    <div class="card-body d-flex flex-column align-items-start" style="padding-top: 0;">
                    <h3 class="mb-0" style="padding-top: 1rem;">
                        <a class="text-dark" href="{{ v.url_main }}" target="_blank">{{ k }}</a>
@@ -304,12 +304,13 @@ class MaigretDatabase:
            lambda x: isinstance(x.engine, str) and x.engine.lower() in normalized_tags
        )
        is_tags_ok = lambda x: set(x.tags).intersection(set(normalized_tags))
+        is_protocol_in_tags = lambda x: x.protocol and x.protocol in normalized_tags
        is_disabled_needed = lambda x: not x.disabled or (
            "disabled" in tags or disabled
        )
        is_id_type_ok = lambda x: x.type == id_type

-        filter_tags_engines_fun = lambda x: not tags or is_engine_ok(x) or is_tags_ok(x)
+        filter_tags_engines_fun = lambda x: not tags or is_engine_ok(x) or is_tags_ok(x) or is_protocol_in_tags(x)
        filter_names_fun = lambda x: not names or is_name_ok(x) or is_source_ok(x)

        filter_fun = (
@@ -209,7 +209,9 @@ async def check_features_manually(
 ):
    custom_headers = {}
    while True:
-        header_key = input('Specify custom header if you need or just press Enter to skip. Header name: ')
+        header_key = input(
+            'Specify custom header if you need or just press Enter to skip. Header name: '
+        )
        if not header_key:
            break
        header_value = input('Header value: ')
@@ -45,6 +45,19 @@ EXAMPLE_RESULTS = {
    }
 }

+BROKEN_RESULTS = {
+    'GitHub': {
+        'username': 'test',
+        'parsing_enabled': True,
+        'url_main': 'https://www.github.com/',
+        'url_user': 'https://www.github.com/test',
+        'http_status': 200,
+        'is_similar': False,
+        'rank': 78,
+        'site': MaigretSite('test', {}),
+    }
+}
+
 GOOD_500PX_RESULT = copy.deepcopy(GOOD_RESULT)
 GOOD_500PX_RESULT.tags = ['photo', 'us', 'global']
 GOOD_500PX_RESULT.ids_data = {
@@ -239,10 +252,13 @@ TEST = [
 ]

 SUPPOSED_BRIEF = """Search by username alexaimephotographycars returned 1 accounts. Found target's other IDs: alexaimephotography, Alexaimephotogr. Search by username alexaimephotography returned 2 accounts. Search by username Alexaimephotogr returned 1 accounts. Extended info extracted from 3 accounts."""
-
-SUPPOSED_INTERESTS = "Interests: photo <span class=\"text-muted\">(2)</span>, news <span class=\"text-muted\">(1)</span>, social <span class=\"text-muted\">(1)</span>"
+SUPPOSED_BROKEN_BRIEF = """Search by username alexaimephotographycars returned 0 accounts. Search by username alexaimephotography returned 2 accounts. Search by username Alexaimephotogr returned 1 accounts. Extended info extracted from 2 accounts."""

 SUPPOSED_GEO = "Geo: us <span class=\"text-muted\">(3)</span>"
+SUPPOSED_BROKEN_GEO = "Geo: us <span class=\"text-muted\">(2)</span>"
+
+SUPPOSED_INTERESTS = "Interests: photo <span class=\"text-muted\">(2)</span>, news <span class=\"text-muted\">(1)</span>, social <span class=\"text-muted\">(1)</span>"
+SUPPOSED_BROKEN_INTERESTS = "Interests: news <span class=\"text-muted\">(1)</span>, photo <span class=\"text-muted\">(1)</span>, social <span class=\"text-muted\">(1)</span>"


 def test_generate_report_template():
@@ -270,6 +286,19 @@ def test_generate_csv_report():
    ]


+def test_generate_csv_report_broken():
+    csvfile = StringIO()
+    generate_csv_report('test', BROKEN_RESULTS, csvfile)
+
+    csvfile.seek(0)
+    data = csvfile.readlines()
+
+    assert data == [
+        'username,name,url_main,url_user,exists,http_status\r\n',
+        'test,GitHub,https://www.github.com/,https://www.github.com/test,Unknown,200\r\n',
+    ]
+
+
 def test_generate_txt_report():
    txtfile = StringIO()
    generate_txt_report('test', EXAMPLE_RESULTS, txtfile)
@@ -283,6 +312,18 @@ def test_generate_txt_report():
    ]


+def test_generate_txt_report_broken():
+    txtfile = StringIO()
+    generate_txt_report('test', BROKEN_RESULTS, txtfile)
+
+    txtfile.seek(0)
+    data = txtfile.readlines()
+
+    assert data == [
+        'Total Websites Username Detected On : 0',
+    ]
+
+
 def test_generate_json_simple_report():
    jsonfile = StringIO()
    MODIFIED_RESULTS = dict(EXAMPLE_RESULTS)
@@ -296,6 +337,19 @@ def test_generate_json_simple_report():
    assert list(json.loads(data[0]).keys()) == ['GitHub', 'GitHub2']


+def test_generate_json_simple_report_broken():
+    jsonfile = StringIO()
+    MODIFIED_RESULTS = dict(BROKEN_RESULTS)
+    MODIFIED_RESULTS['GitHub2'] = BROKEN_RESULTS['GitHub']
+    generate_json_report('test', BROKEN_RESULTS, jsonfile, 'simple')
+
+    jsonfile.seek(0)
+    data = jsonfile.readlines()
+
+    assert len(data) == 1
+    assert list(json.loads(data[0]).keys()) == []
+
+
 def test_generate_json_ndjson_report():
    jsonfile = StringIO()
    MODIFIED_RESULTS = dict(EXAMPLE_RESULTS)
@@ -329,6 +383,20 @@ def test_save_xmind_report():
    )


+def test_save_xmind_report_broken():
+    filename = 'report_test.xmind'
+    save_xmind_report(filename, 'test', BROKEN_RESULTS)
+
+    workbook = xmind.load(filename)
+    sheet = workbook.getPrimarySheet()
+    data = sheet.getData()
+
+    assert data['title'] == 'test Analysis'
+    assert data['topic']['title'] == 'test'
+    assert len(data['topic']['topics']) == 1
+    assert data['topic']['topics'][0]['title'] == 'Undefined'
+
+
 def test_html_report():
    report_name = 'report_test.html'
    context = generate_report_context(TEST)
@@ -341,6 +409,21 @@ def test_html_report():
    assert SUPPOSED_INTERESTS in report_text


+def test_html_report_broken():
+    report_name = 'report_test_broken.html'
+    BROKEN_DATA = copy.deepcopy(TEST)
+    BROKEN_DATA[0][2]['500px']['status'] = None
+
+    context = generate_report_context(BROKEN_DATA)
+    save_html_report(report_name, context)
+
+    report_text = open(report_name).read()
+
+    assert SUPPOSED_BROKEN_BRIEF in report_text
+    assert SUPPOSED_BROKEN_GEO in report_text
+    assert SUPPOSED_BROKEN_INTERESTS in report_text
+
+
 def test_pdf_report():
    report_name = 'report_test.pdf'
    context = generate_report_context(TEST)
@@ -357,3 +440,16 @@ def test_text_report():
        assert brief_part in report_text
    assert 'us' in report_text
    assert 'photo' in report_text
+
+
+def test_text_report_broken():
+    BROKEN_DATA = copy.deepcopy(TEST)
+    BROKEN_DATA[0][2]['500px']['status'] = None
+
+    context = generate_report_context(BROKEN_DATA)
+    report_text = get_plaintext_report(context)
+
+    for brief_part in SUPPOSED_BROKEN_BRIEF.split():
+        assert brief_part in report_text
+    assert 'us' in report_text
+    assert 'photo' in report_text