Merge pull request #155 from soxoj/0.2.4

Bump to 0.2.4
2026-05-13 18:05:39 +00:00 · 2021-05-18 01:20:00 +03:00 · 2021-05-18 01:17:35 +03:00 · 2021-05-18 00:57:31 +03:00 · 2021-05-18 00:55:18 +03:00 · 2021-05-18 00:49:13 +03:00
22 changed files with 3363 additions and 3256 deletions
@@ -26,7 +26,7 @@ jobs:
    - name: Install dependencies
      run: |
        python -m pip install --upgrade pip
-        python -m pip install flake8 pytest pytest-rerunfailures
+        python -m pip install -r test-requirements.txt
        if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
    - name: Test with pytest
      run: |
@@ -2,6 +2,10 @@
 ## [Unreleased]
 ## [0.2.4] - 2021-05-18
 * cli output report
 * various improvements
 ## [0.2.3] - 2021-05-12
 * added Yelp and yelp_userid support
 * tags markup stabilization
@@ -1,3 +1,3 @@
 """Maigret version file"""
-__version__ = '0.2.3'
+__version__ = '0.2.4'
@@ -26,7 +26,7 @@ from .executors import (
 from .result import QueryResult, QueryStatus
 from .sites import MaigretDatabase, MaigretSite
 from .types import QueryOptions, QueryResultWrapper
-from .utils import get_random_user_agent
+from .utils import get_random_user_agent, ascii_data_display
 SUPPORTED_IDS = (
@@ -233,9 +233,9 @@ def process_site_result(
            result = build_result(QueryStatus.CLAIMED)
        else:
            result = build_result(QueryStatus.AVAILABLE)
-    elif check_type == "status_code":
+    elif check_type in "status_code":
        # Checks if the status code of the response is 2XX
-        if is_presense_detected and (not status_code >= 300 or status_code < 200):
+        if 200 <= status_code < 300:
            result = build_result(QueryStatus.CLAIMED)
        else:
            result = build_result(QueryStatus.AVAILABLE)
@@ -272,7 +272,7 @@ def process_site_result(
                    new_usernames[v] = k
            results_info["ids_usernames"] = new_usernames
-            links = eval(extracted_ids_data.get("links", "[]"))
+            links = ascii_data_display(extracted_ids_data.get("links", "[]"))
            if "website" in extracted_ids_data:
                links.append(extracted_ids_data["website"])
            results_info["ids_links"] = links
@@ -456,7 +456,7 @@ async def maigret(
    logger,
    query_notify=None,
    proxy=None,
-    timeout=None,
+    timeout=3,
    is_parsing_enabled=False,
    id_type="username",
    debug=False,
@@ -478,7 +478,7 @@ async def maigret(
                              query results.
    logger                 -- Standard Python logger object.
    timeout                -- Time in seconds to wait before timing out request.
-                              Default is no timeout.
+                              Default is 3 seconds.
    is_parsing_enabled     -- Extract additional info from account pages.
    id_type                -- Type of username to search.
                              Default is 'username', see all supported here:
@@ -32,6 +32,7 @@ from .report import (
    save_txt_report,
    SUPPORTED_JSON_REPORT_FORMATS,
    save_json_report,
    get_plaintext_report,
 )
 from .sites import MaigretDatabase
 from .submit import submit_dialog
@@ -646,6 +647,12 @@ async def main():
            filename = report_filepath_tpl.format(username=username, postfix='.pdf')
            save_pdf_report(filename, report_context)
            query_notify.warning(f'PDF report on all usernames saved in {filename}')
        text_report = get_plaintext_report(report_context)
        if text_report:
            query_notify.info('Short text report:')
            print(text_report)
    # update database
    db.save_to_file(args.db_file)
@@ -205,13 +205,20 @@ class QueryNotifyPrint(QueryNotify):
        else:
            print(f"[*] {title} {message} on:")
-    def warning(self, message, symbol="-"):
+    def _colored_print(self, fore_color, msg):
        msg = f"[{symbol}] {message}"
        if self.color:
-            print(Style.BRIGHT + Fore.YELLOW + msg)
+            print(Style.BRIGHT + fore_color + msg)
        else:
            print(msg)
    def warning(self, message, symbol="-"):
        msg = f"[{symbol}] {message}"
        self._colored_print(Fore.YELLOW, msg)
    def info(self, message, symbol="*"):
        msg = f"[{symbol}] {message}"
        self._colored_print(Fore.BLUE, msg)
    def update(self, result, is_similar=False):
        """Notify Update.
@@ -70,6 +70,17 @@ def save_json_report(filename: str, username: str, results: dict, report_type: s
        generate_json_report(username, results, f, report_type=report_type)
 def get_plaintext_report(context: dict) -> str:
    output = (context['brief'] + " ").replace('. ', '.\n')
    interests = list(map(lambda x: x[0], context.get('interests_tuple_list', [])))
    countries = list(map(lambda x: x[0], context.get('countries_tuple_list', [])))
    if countries:
        output += f'Countries: {", ".join(countries)}\n'
    if interests:
        output += f'Interests (tags): {", ".join(interests)}\n'
    return output.strip()
 """
 REPORTS GENERATING
 """
@@ -215,6 +226,7 @@ def generate_report_context(username_results: list):
    return {
        "username": first_username,
        # TODO: return brief list
        "brief": brief,
        "results": username_results,
        "first_seen": first_seen,
@@ -61,9 +61,10 @@ SUPPORTED_TAGS = [
    "military",
    "auto",
    "gambling",
    "business",
    "cybercriminal",
    "review",
    "bookmarks",
    "design",
 ]
@@ -255,7 +255,7 @@ async def check_features_manually(
    features = input("If features was not detected correctly, write it manually: ")
    if features:
-        presence_list = features.split(",")
+        presence_list = list(map(str.strip, features.split(",")))
    absence_list = sorted(b_minus_a, key=get_match_ratio, reverse=True)[
        :top_features_count
@@ -264,7 +264,7 @@ async def check_features_manually(
    features = input("If features was not detected correctly, write it manually: ")
    if features:
-        absence_list = features.split(",")
+        absence_list = list(map(str.strip, features.split(",")))
    site_data = {
        "absenceStrs": absence_list,
@@ -355,7 +355,7 @@ async def submit_dialog(db, url_exists, cookie_file, logger):
            return False
    chosen_site.name = input("Change site name if you want: ") or chosen_site.name
-    chosen_site.tags = input("Site tags: ").split(',')
+    chosen_site.tags = list(map(str.strip, input("Site tags: ").split(',')))
    rank = get_alexa_rank(chosen_site.url_main)
    if rank:
        print(f'New alexa rank: {rank}')
@@ -1,5 +1,7 @@
 import ast
 import re
 import random
 from typing import Any
 DEFAULT_USER_AGENTS = [
@@ -65,6 +67,10 @@ class URLMatcher:
        return re.compile(regexp_str)
 def ascii_data_display(data: str) -> Any:
    return ast.literal_eval(data)
 def get_dict_ascii_tree(items, prepend="", new_line=True):
    text = ""
    for num, item in enumerate(items):
@@ -75,7 +81,8 @@ def get_dict_ascii_tree(items, prepend="", new_line=True):
            if field_value.startswith("['"):
                is_last_item = num == len(items) - 1
                prepend_symbols = " " * 3 if is_last_item else " ┃ "
-                field_value = get_dict_ascii_tree(eval(field_value), prepend_symbols)
+                data = ascii_data_display(field_value)
                field_value = get_dict_ascii_tree(data, prepend_symbols)
            text += f"\n{prepend}{box_symbol}{field_name}: {field_value}"
        else:
            text += f"\n{prepend}{box_symbol} {item}"
@@ -26,7 +26,7 @@ python-socks==1.1.2
 requests>=2.24.0
 requests-futures==1.0.0
 six==1.15.0
-socid-extractor>=0.0.19
+socid-extractor>=0.0.20
 soupsieve==2.1
 stem==1.8.0
 torrequest==0.1.0
@@ -12,7 +12,7 @@ with open('requirements.txt') as rf:
    requires = rf.read().splitlines()
 setup(name='maigret',
-      version='0.2.3',
+      version='0.2.4',
      description='Collect a dossier on a person by username from a huge number of sites',
      long_description=long_description,
      long_description_content_type="text/markdown",
@@ -0,0 +1,6 @@
 flake8==3.8.4
 pytest==6.2.4
 pytest-asyncio==0.14.0
 pytest-cov==2.10.1
 pytest-httpserver==1.0.0
 pytest-rerunfailures==9.1.1
@@ -12,6 +12,7 @@ from maigret.maigret import setup_arguments_parser
 CUR_PATH = os.path.dirname(os.path.realpath(__file__))
 JSON_FILE = os.path.join(CUR_PATH, '../maigret/resources/data.json')
 TEST_JSON_FILE = os.path.join(CUR_PATH, 'db.json')
 LOCAL_TEST_JSON_FILE = os.path.join(CUR_PATH, 'local.json')
 empty_mark = Mark('', (), {})
@@ -36,16 +37,17 @@ def remove_test_reports():
@pytest.fixture(scope='session')
 def default_db():
-    db = MaigretDatabase().load_from_file(JSON_FILE)
+    return MaigretDatabase().load_from_file(JSON_FILE)
    return db
@pytest.fixture(scope='function')
 def test_db():
-    db = MaigretDatabase().load_from_file(TEST_JSON_FILE)
+    return MaigretDatabase().load_from_file(TEST_JSON_FILE)
-    return db
+
@pytest.fixture(scope='function')
 def local_test_db():
    return MaigretDatabase().load_from_file(LOCAL_TEST_JSON_FILE)
@pytest.fixture(autouse=True)
@@ -58,3 +60,8 @@ def reports_autoclean():
@pytest.fixture(scope='session')
 def argparser():
    return setup_arguments_parser()
@pytest.fixture(scope="session")
 def httpserver_listen_address():
    return ("localhost", 8989)
@@ -0,0 +1,21 @@
 {
    "engines": {},
    "sites": {
        "StatusCode": {
            "checkType": "status_code",
            "url": "http://localhost:8989/url?id={username}",
            "urlMain": "http://localhost:8989/",
            "usernameClaimed": "claimed",
            "usernameUnclaimed": "unclaimed"
        },
        "Message": {
            "checkType": "message",
            "url": "http://localhost:8989/url?id={username}",
            "urlMain": "http://localhost:8989/",
            "presenseStrs": ["user", "profile"],
            "absenseStrs": ["not found", "404"],
            "usernameClaimed": "claimed",
            "usernameUnclaimed": "unclaimed"
        }
    }
 }
@@ -22,6 +22,7 @@ httpbin.org	FALSE	/	FALSE	0	a	b
 """
@pytest.mark.skip(reason="periodically fails")
@pytest.mark.slow
 def test_twitter_activation(default_db):
    twitter_site = default_db.sites_dict['Twitter']
@@ -0,0 +1,65 @@
 from mock import Mock
 import pytest
 from maigret import search
 def site_result_except(server, username, **kwargs):
    query = f'id={username}'
    server.expect_request('/url', query_string=query).respond_with_data(**kwargs)
@pytest.mark.asyncio
 async def test_checking_by_status_code(httpserver, local_test_db):
    sites_dict = local_test_db.sites_dict
    site_result_except(httpserver, 'claimed', status=200)
    site_result_except(httpserver, 'unclaimed', status=404)
    result = await search('claimed', site_dict=sites_dict, logger=Mock())
    assert result['StatusCode']['status'].is_found() is True
    result = await search('unclaimed', site_dict=sites_dict, logger=Mock())
    assert result['StatusCode']['status'].is_found() is False
@pytest.mark.asyncio
 async def test_checking_by_message_positive_full(httpserver, local_test_db):
    sites_dict = local_test_db.sites_dict
    site_result_except(httpserver, 'claimed', response_data="user profile")
    site_result_except(httpserver, 'unclaimed', response_data="404 not found")
    result = await search('claimed', site_dict=sites_dict, logger=Mock())
    assert result['Message']['status'].is_found() is True
    result = await search('unclaimed', site_dict=sites_dict, logger=Mock())
    assert result['Message']['status'].is_found() is False
@pytest.mark.asyncio
 async def test_checking_by_message_positive_part(httpserver, local_test_db):
    sites_dict = local_test_db.sites_dict
    site_result_except(httpserver, 'claimed', response_data="profile")
    site_result_except(httpserver, 'unclaimed', response_data="404")
    result = await search('claimed', site_dict=sites_dict, logger=Mock())
    assert result['Message']['status'].is_found() is True
    result = await search('unclaimed', site_dict=sites_dict, logger=Mock())
    assert result['Message']['status'].is_found() is False
@pytest.mark.asyncio
 async def test_checking_by_message_negative(httpserver, local_test_db):
    sites_dict = local_test_db.sites_dict
    site_result_except(httpserver, 'claimed', response_data="")
    site_result_except(httpserver, 'unclaimed', response_data="user 404")
    result = await search('claimed', site_dict=sites_dict, logger=Mock())
    assert result['Message']['status'].is_found() is False
    result = await search('unclaimed', site_dict=sites_dict, logger=Mock())
    assert result['Message']['status'].is_found() is True
@@ -0,0 +1,15 @@
 """Maigret data test functions"""
 from maigret.utils import is_country_tag
 from maigret.sites import SUPPORTED_TAGS
 def test_tags_validity(default_db):
    unknown_tags = set()
    for site in default_db.sites:
        for tag in filter(lambda x: not is_country_tag(x), site.tags):
            if tag not in SUPPORTED_TAGS:
                unknown_tags.add(tag)
    assert unknown_tags == set()
@@ -16,6 +16,7 @@ from maigret.report import (
    generate_report_template,
    generate_report_context,
    generate_json_report,
    get_plaintext_report,
 )
 from maigret.result import QueryResult, QueryStatus
 from maigret.sites import MaigretSite
@@ -346,3 +347,13 @@ def test_pdf_report():
    save_pdf_report(report_name, context)
    assert os.path.exists(report_name)
 def test_text_report():
    context = generate_report_context(TEST)
    report_text = get_plaintext_report(context)
    for brief_part in SUPPOSED_BRIEF.split():
        assert brief_part in report_text
    assert 'us' in report_text
    assert 'photo' in report_text
@@ -57,6 +57,11 @@ def test_enrich_link_str():
    )
 def test_url_extract_main_part_negative():
    url_main_part = 'None'
    assert URLMatcher.extract_main_part(url_main_part) == ''
 def test_url_extract_main_part():
    url_main_part = 'flickr.com/photos/alexaimephotography'
Author	SHA1	Message	Date
soxoj	bea900dda0	Merge pull request #155 from soxoj/0.2.4 Bump to 0.2.4	2021-05-18 01:20:00 +03:00
Soxoj	bb1bde833d	Bump to 0.2.4	2021-05-18 01:17:35 +03:00
soxoj	5b405c6abb	Merge pull request #154 from soxoj/tests-improving Improved tests	2021-05-18 00:57:31 +03:00
Soxoj	99fa58ceed	Disabled Twitter activation test	2021-05-18 00:55:18 +03:00
Soxoj	c71e404f63	Added test dependencies	2021-05-18 00:49:13 +03:00
Soxoj	2c04ccce57	Improved tests	2021-05-18 00:43:56 +03:00
soxoj	435db7cdc9	Merge pull request #153 from soxoj/sites-update-16-05-21 Several sites added, updated site list	2021-05-17 00:35:56 +03:00
Soxoj	413a0502a4	Several sites added, updated site list	2021-05-16 17:02:41 +03:00
soxoj	2aedcc3166	Merge pull request #152 from soxoj/cli-plaintext-report Added text report to CLI output	2021-05-15 16:57:22 +03:00
Soxoj	28835204f5	Added text report to CLI output	2021-05-15 16:55:05 +03:00
soxoj	b11a247dfd	Merge pull request #151 from soxoj/tags-socid-extractor Tags updated, added tests for tags	2021-05-15 14:55:01 +03:00
Soxoj	c9219d91ec	Tags updated, added tests for tags Added several sites Updated socid_extractor version to avoid bug #150	2021-05-15 14:51:30 +03:00
`@@ -1,3 +1,3 @@`
	`"""Maigret version file"""`	`"""Maigret version file"""`

	`__version__ = '0.2.3'`	`__version__ = '0.2.4'`