Updated sites, improved submit dialog, bump to 0.2.2

2026-05-06 22:19:01 +00:00 · 2021-05-07 12:27:24 +03:00
parent d59867b0d9
commit b6a207d0e3
10 changed files with 3044 additions and 2907 deletions
@@ -2,6 +2,11 @@

 ## [Unreleased]

+## [0.2.2] - 2021-05-07
+* improved ids extractors
+* updated sites and engines
+* updates CLI options
+
 ## [0.2.1] - 2021-05-02
 * fixed json reports generation bug, added tests

@@ -37,7 +37,7 @@ from .submit import submit_dialog
 from .types import QueryResultWrapper
 from .utils import get_dict_ascii_tree

-__version__ = '0.2.1'
+__version__ = '0.2.2'


 def notify_about_errors(search_results: QueryResultWrapper, query_notify):
@@ -511,12 +511,15 @@ async def main():
            db, site_data, logger, max_connections=args.connections
        )
        if is_need_update:
-            if input('Do you want to save changes permanently? [Yn]\n').lower() == 'y':
+            if input('Do you want to save changes permanently? [Yn]\n').lower() in (
+                'y',
+                '',
+            ):
                db.save_to_file(args.db_file)
                print('Database was successfully updated.')
            else:
                print('Updates will be applied only for current search session.')
-        print(db.get_scan_stats(site_data))
+        print('Scan sessions flags stats: ' + str(db.get_scan_stats(site_data)))

    # Database statistics
    if args.stats:
@@ -291,7 +291,13 @@ async def submit_dialog(db, url_exists, cookie_file, logger):

    url_mainpage = extract_mainpage_url(url_exists)

+    print('Detecting site engine, please wait...')
+    sites = []
+    try:
        sites = await detect_known_engine(db, url_exists, url_mainpage, logger)
+    except KeyboardInterrupt:
+        print('Engine detect process is interrupted.')
+
    if not sites:
        print("Unable to detect site engine, lets generate checking features")
        sites = [
@@ -304,7 +310,7 @@ async def submit_dialog(db, url_exists, cookie_file, logger):

    sem = asyncio.Semaphore(1)

-    print("Checking...")
+    print("Checking, please wait...")
    found = False
    chosen_site = None
    for s in sites:
@@ -12,7 +12,7 @@ with open('requirements.txt') as rf:
    requires = rf.read().splitlines()

 setup(name='maigret',
-      version='0.2.1',
+      version='0.2.2',
      description='Collect a dossier on a person by username from a huge number of sites',
      long_description=long_description,
      long_description_content_type="text/markdown",
@@ -78,7 +78,9 @@ def test_args_self_check_mode(argparser):


 def test_args_multiple_sites(argparser):
-    args = argparser.parse_args('--site GitHub VK --site PornHub --site Taringa,Steam'.split())
+    args = argparser.parse_args(
+        '--site GitHub VK --site PornHub --site Taringa,Steam'.split()
+    )

    want_args = dict(DEFAULT_ARGS)
    want_args.update(
@@ -6,7 +6,11 @@ import pytest
 from mock import Mock

 from maigret.maigret import self_check, maigret
-from maigret.maigret import extract_ids_from_page, extract_ids_from_results, extract_ids_from_url
+from maigret.maigret import (
+    extract_ids_from_page,
+    extract_ids_from_results,
+    extract_ids_from_url,
+)
 from maigret.sites import MaigretSite
 from maigret.result import QueryResult, QueryStatus

@@ -139,17 +143,27 @@ def test_maigret_results(test_db):


 def test_extract_ids_from_url(default_db):
-    assert extract_ids_from_url('https://www.reddit.com/user/test', default_db) == {'test': 'username'}
+    assert extract_ids_from_url('https://www.reddit.com/user/test', default_db) == {
+        'test': 'username'
+    }
    assert extract_ids_from_url('https://vk.com/id123', default_db) == {'123': 'vk_id'}
-    assert extract_ids_from_url('https://vk.com/ida123', default_db) == {'ida123': 'username'}
-    assert extract_ids_from_url('https://my.mail.ru/yandex.ru/dipres8904/', default_db) == {'dipres8904': 'username'}
-    assert extract_ids_from_url('https://reviews.yandex.ru/user/adbced123', default_db) == {'adbced123': 'yandex_public_id'}
+    assert extract_ids_from_url('https://vk.com/ida123', default_db) == {
+        'ida123': 'username'
+    }
+    assert extract_ids_from_url(
+        'https://my.mail.ru/yandex.ru/dipres8904/', default_db
+    ) == {'dipres8904': 'username'}
+    assert extract_ids_from_url(
+        'https://reviews.yandex.ru/user/adbced123', default_db
+    ) == {'adbced123': 'yandex_public_id'}


@pytest.mark.slow
 def test_extract_ids_from_page(test_db):
    logger = Mock()
-    extract_ids_from_page('https://www.reddit.com/user/test', logger) == {'test': 'username'}
+    extract_ids_from_page('https://www.reddit.com/user/test', logger) == {
+        'test': 'username'
+    }


 def test_extract_ids_from_results(test_db):
@@ -157,4 +171,7 @@ def test_extract_ids_from_results(test_db):
    TEST_EXAMPLE['Reddit']['ids_usernames'] = {'test1': 'yandex_public_id'}
    TEST_EXAMPLE['Reddit']['ids_links'] = ['https://www.reddit.com/user/test2']

-    extract_ids_from_results(TEST_EXAMPLE, test_db) == {'test1': 'yandex_public_id', 'test2': 'username'}
+    extract_ids_from_results(TEST_EXAMPLE, test_db) == {
+        'test1': 'yandex_public_id',
+        'test2': 'username',
+    }
@@ -6,34 +6,49 @@ from maigret.result import QueryStatus, QueryResult
 def test_notify_illegal():
    n = QueryNotifyPrint(color=False)

-    assert n.update(QueryResult(
+    assert (
+        n.update(
+            QueryResult(
                username="test",
                status=QueryStatus.ILLEGAL,
                site_name="TEST_SITE",
-        site_url_user="http://example.com/test"
-    )) == "[-] TEST_SITE: Illegal Username Format For This Site!"
+                site_url_user="http://example.com/test",
+            )
+        )
+        == "[-] TEST_SITE: Illegal Username Format For This Site!"
+    )


 def test_notify_claimed():
    n = QueryNotifyPrint(color=False)

-    assert n.update(QueryResult(
+    assert (
+        n.update(
+            QueryResult(
                username="test",
                status=QueryStatus.CLAIMED,
                site_name="TEST_SITE",
-        site_url_user="http://example.com/test"
-    )) == "[+] TEST_SITE: http://example.com/test"
+                site_url_user="http://example.com/test",
+            )
+        )
+        == "[+] TEST_SITE: http://example.com/test"
+    )


 def test_notify_available():
    n = QueryNotifyPrint(color=False)

-    assert n.update(QueryResult(
+    assert (
+        n.update(
+            QueryResult(
                username="test",
                status=QueryStatus.AVAILABLE,
                site_name="TEST_SITE",
-        site_url_user="http://example.com/test"
-    )) == "[-] TEST_SITE: Not found!"
+                site_url_user="http://example.com/test",
+            )
+        )
+        == "[-] TEST_SITE: Not found!"
+    )


 def test_notify_unknown():
@@ -42,7 +57,7 @@ def test_notify_unknown():
        username="test",
        status=QueryStatus.UNKNOWN,
        site_name="TEST_SITE",
-        site_url_user="http://example.com/test"
+        site_url_user="http://example.com/test",
    )
    result.error = CheckError('Type', 'Reason')

@@ -87,7 +87,7 @@ if __name__ == '__main__':

    with open("sites.md", "w") as site_file:
        site_file.write(f"""
-## List of supported sites: total {len(sites_subset)}\n
+## List of supported sites (search methods): total {len(sites_subset)}\n
 Rank data fetched from Alexa by domains.

 """)