Updated sites, improved submit dialog, bump to 0.2.2

This commit is contained in:
Soxoj
2021-05-07 12:27:24 +03:00
parent d59867b0d9
commit b6a207d0e3
10 changed files with 3044 additions and 2907 deletions
+5
View File
@@ -2,6 +2,11 @@
## [Unreleased] ## [Unreleased]
## [0.2.2] - 2021-05-07
* improved ids extractors
* updated sites and engines
* updates CLI options
## [0.2.1] - 2021-05-02 ## [0.2.1] - 2021-05-02
* fixed json reports generation bug, added tests * fixed json reports generation bug, added tests
+6 -3
View File
@@ -37,7 +37,7 @@ from .submit import submit_dialog
from .types import QueryResultWrapper from .types import QueryResultWrapper
from .utils import get_dict_ascii_tree from .utils import get_dict_ascii_tree
__version__ = '0.2.1' __version__ = '0.2.2'
def notify_about_errors(search_results: QueryResultWrapper, query_notify): def notify_about_errors(search_results: QueryResultWrapper, query_notify):
@@ -511,12 +511,15 @@ async def main():
db, site_data, logger, max_connections=args.connections db, site_data, logger, max_connections=args.connections
) )
if is_need_update: if is_need_update:
if input('Do you want to save changes permanently? [Yn]\n').lower() == 'y': if input('Do you want to save changes permanently? [Yn]\n').lower() in (
'y',
'',
):
db.save_to_file(args.db_file) db.save_to_file(args.db_file)
print('Database was successfully updated.') print('Database was successfully updated.')
else: else:
print('Updates will be applied only for current search session.') print('Updates will be applied only for current search session.')
print(db.get_scan_stats(site_data)) print('Scan sessions flags stats: ' + str(db.get_scan_stats(site_data)))
# Database statistics # Database statistics
if args.stats: if args.stats:
+1870 -1799
View File
File diff suppressed because it is too large Load Diff
+8 -2
View File
@@ -291,7 +291,13 @@ async def submit_dialog(db, url_exists, cookie_file, logger):
url_mainpage = extract_mainpage_url(url_exists) url_mainpage = extract_mainpage_url(url_exists)
sites = await detect_known_engine(db, url_exists, url_mainpage, logger) print('Detecting site engine, please wait...')
sites = []
try:
sites = await detect_known_engine(db, url_exists, url_mainpage, logger)
except KeyboardInterrupt:
print('Engine detect process is interrupted.')
if not sites: if not sites:
print("Unable to detect site engine, lets generate checking features") print("Unable to detect site engine, lets generate checking features")
sites = [ sites = [
@@ -304,7 +310,7 @@ async def submit_dialog(db, url_exists, cookie_file, logger):
sem = asyncio.Semaphore(1) sem = asyncio.Semaphore(1)
print("Checking...") print("Checking, please wait...")
found = False found = False
chosen_site = None chosen_site = None
for s in sites: for s in sites:
+1 -1
View File
@@ -12,7 +12,7 @@ with open('requirements.txt') as rf:
requires = rf.read().splitlines() requires = rf.read().splitlines()
setup(name='maigret', setup(name='maigret',
version='0.2.1', version='0.2.2',
description='Collect a dossier on a person by username from a huge number of sites', description='Collect a dossier on a person by username from a huge number of sites',
long_description=long_description, long_description=long_description,
long_description_content_type="text/markdown", long_description_content_type="text/markdown",
+1092 -1074
View File
File diff suppressed because it is too large Load Diff
+3 -1
View File
@@ -78,7 +78,9 @@ def test_args_self_check_mode(argparser):
def test_args_multiple_sites(argparser): def test_args_multiple_sites(argparser):
args = argparser.parse_args('--site GitHub VK --site PornHub --site Taringa,Steam'.split()) args = argparser.parse_args(
'--site GitHub VK --site PornHub --site Taringa,Steam'.split()
)
want_args = dict(DEFAULT_ARGS) want_args = dict(DEFAULT_ARGS)
want_args.update( want_args.update(
+24 -7
View File
@@ -6,7 +6,11 @@ import pytest
from mock import Mock from mock import Mock
from maigret.maigret import self_check, maigret from maigret.maigret import self_check, maigret
from maigret.maigret import extract_ids_from_page, extract_ids_from_results, extract_ids_from_url from maigret.maigret import (
extract_ids_from_page,
extract_ids_from_results,
extract_ids_from_url,
)
from maigret.sites import MaigretSite from maigret.sites import MaigretSite
from maigret.result import QueryResult, QueryStatus from maigret.result import QueryResult, QueryStatus
@@ -139,17 +143,27 @@ def test_maigret_results(test_db):
def test_extract_ids_from_url(default_db): def test_extract_ids_from_url(default_db):
assert extract_ids_from_url('https://www.reddit.com/user/test', default_db) == {'test': 'username'} assert extract_ids_from_url('https://www.reddit.com/user/test', default_db) == {
'test': 'username'
}
assert extract_ids_from_url('https://vk.com/id123', default_db) == {'123': 'vk_id'} assert extract_ids_from_url('https://vk.com/id123', default_db) == {'123': 'vk_id'}
assert extract_ids_from_url('https://vk.com/ida123', default_db) == {'ida123': 'username'} assert extract_ids_from_url('https://vk.com/ida123', default_db) == {
assert extract_ids_from_url('https://my.mail.ru/yandex.ru/dipres8904/', default_db) == {'dipres8904': 'username'} 'ida123': 'username'
assert extract_ids_from_url('https://reviews.yandex.ru/user/adbced123', default_db) == {'adbced123': 'yandex_public_id'} }
assert extract_ids_from_url(
'https://my.mail.ru/yandex.ru/dipres8904/', default_db
) == {'dipres8904': 'username'}
assert extract_ids_from_url(
'https://reviews.yandex.ru/user/adbced123', default_db
) == {'adbced123': 'yandex_public_id'}
@pytest.mark.slow @pytest.mark.slow
def test_extract_ids_from_page(test_db): def test_extract_ids_from_page(test_db):
logger = Mock() logger = Mock()
extract_ids_from_page('https://www.reddit.com/user/test', logger) == {'test': 'username'} extract_ids_from_page('https://www.reddit.com/user/test', logger) == {
'test': 'username'
}
def test_extract_ids_from_results(test_db): def test_extract_ids_from_results(test_db):
@@ -157,4 +171,7 @@ def test_extract_ids_from_results(test_db):
TEST_EXAMPLE['Reddit']['ids_usernames'] = {'test1': 'yandex_public_id'} TEST_EXAMPLE['Reddit']['ids_usernames'] = {'test1': 'yandex_public_id'}
TEST_EXAMPLE['Reddit']['ids_links'] = ['https://www.reddit.com/user/test2'] TEST_EXAMPLE['Reddit']['ids_links'] = ['https://www.reddit.com/user/test2']
extract_ids_from_results(TEST_EXAMPLE, test_db) == {'test1': 'yandex_public_id', 'test2': 'username'} extract_ids_from_results(TEST_EXAMPLE, test_db) == {
'test1': 'yandex_public_id',
'test2': 'username',
}
+34 -19
View File
@@ -6,34 +6,49 @@ from maigret.result import QueryStatus, QueryResult
def test_notify_illegal(): def test_notify_illegal():
n = QueryNotifyPrint(color=False) n = QueryNotifyPrint(color=False)
assert n.update(QueryResult( assert (
username="test", n.update(
status=QueryStatus.ILLEGAL, QueryResult(
site_name="TEST_SITE", username="test",
site_url_user="http://example.com/test" status=QueryStatus.ILLEGAL,
)) == "[-] TEST_SITE: Illegal Username Format For This Site!" site_name="TEST_SITE",
site_url_user="http://example.com/test",
)
)
== "[-] TEST_SITE: Illegal Username Format For This Site!"
)
def test_notify_claimed(): def test_notify_claimed():
n = QueryNotifyPrint(color=False) n = QueryNotifyPrint(color=False)
assert n.update(QueryResult( assert (
username="test", n.update(
status=QueryStatus.CLAIMED, QueryResult(
site_name="TEST_SITE", username="test",
site_url_user="http://example.com/test" status=QueryStatus.CLAIMED,
)) == "[+] TEST_SITE: http://example.com/test" site_name="TEST_SITE",
site_url_user="http://example.com/test",
)
)
== "[+] TEST_SITE: http://example.com/test"
)
def test_notify_available(): def test_notify_available():
n = QueryNotifyPrint(color=False) n = QueryNotifyPrint(color=False)
assert n.update(QueryResult( assert (
username="test", n.update(
status=QueryStatus.AVAILABLE, QueryResult(
site_name="TEST_SITE", username="test",
site_url_user="http://example.com/test" status=QueryStatus.AVAILABLE,
)) == "[-] TEST_SITE: Not found!" site_name="TEST_SITE",
site_url_user="http://example.com/test",
)
)
== "[-] TEST_SITE: Not found!"
)
def test_notify_unknown(): def test_notify_unknown():
@@ -42,7 +57,7 @@ def test_notify_unknown():
username="test", username="test",
status=QueryStatus.UNKNOWN, status=QueryStatus.UNKNOWN,
site_name="TEST_SITE", site_name="TEST_SITE",
site_url_user="http://example.com/test" site_url_user="http://example.com/test",
) )
result.error = CheckError('Type', 'Reason') result.error = CheckError('Type', 'Reason')
+1 -1
View File
@@ -87,7 +87,7 @@ if __name__ == '__main__':
with open("sites.md", "w") as site_file: with open("sites.md", "w") as site_file:
site_file.write(f""" site_file.write(f"""
## List of supported sites: total {len(sites_subset)}\n ## List of supported sites (search methods): total {len(sites_subset)}\n
Rank data fetched from Alexa by domains. Rank data fetched from Alexa by domains.
""") """)