Merge pull request #155 from soxoj/0.2.4

Bump to 0.2.4
2026-05-09 08:04:32 +00:00 · 2021-05-18 01:20:00 +03:00 · 2021-05-18 01:17:35 +03:00 · 2021-05-18 00:57:31 +03:00 · 2021-05-18 00:55:18 +03:00 · 2021-05-18 00:49:13 +03:00
22 changed files with 3363 additions and 3256 deletions
@@ -26,7 +26,7 @@ jobs:
    - name: Install dependencies
      run: |
        python -m pip install --upgrade pip
-        python -m pip install flake8 pytest pytest-rerunfailures
+        python -m pip install -r test-requirements.txt
        if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
    - name: Test with pytest
      run: |
@@ -2,6 +2,10 @@

 ## [Unreleased]

+## [0.2.4] - 2021-05-18
+* cli output report
+* various improvements
+
 ## [0.2.3] - 2021-05-12
 * added Yelp and yelp_userid support
 * tags markup stabilization
@@ -1,3 +1,3 @@
 """Maigret version file"""

-__version__ = '0.2.3'
+__version__ = '0.2.4'
@@ -26,7 +26,7 @@ from .executors import (
 from .result import QueryResult, QueryStatus
 from .sites import MaigretDatabase, MaigretSite
 from .types import QueryOptions, QueryResultWrapper
-from .utils import get_random_user_agent
+from .utils import get_random_user_agent, ascii_data_display


 SUPPORTED_IDS = (
@@ -233,9 +233,9 @@ def process_site_result(
            result = build_result(QueryStatus.CLAIMED)
        else:
            result = build_result(QueryStatus.AVAILABLE)
-    elif check_type == "status_code":
+    elif check_type in "status_code":
        # Checks if the status code of the response is 2XX
-        if is_presense_detected and (not status_code >= 300 or status_code < 200):
+        if 200 <= status_code < 300:
            result = build_result(QueryStatus.CLAIMED)
        else:
            result = build_result(QueryStatus.AVAILABLE)
@@ -272,7 +272,7 @@ def process_site_result(
                    new_usernames[v] = k

            results_info["ids_usernames"] = new_usernames
-            links = eval(extracted_ids_data.get("links", "[]"))
+            links = ascii_data_display(extracted_ids_data.get("links", "[]"))
            if "website" in extracted_ids_data:
                links.append(extracted_ids_data["website"])
            results_info["ids_links"] = links
@@ -456,7 +456,7 @@ async def maigret(
    logger,
    query_notify=None,
    proxy=None,
-    timeout=None,
+    timeout=3,
    is_parsing_enabled=False,
    id_type="username",
    debug=False,
@@ -478,7 +478,7 @@ async def maigret(
                              query results.
    logger                 -- Standard Python logger object.
    timeout                -- Time in seconds to wait before timing out request.
-                              Default is no timeout.
+                              Default is 3 seconds.
    is_parsing_enabled     -- Extract additional info from account pages.
    id_type                -- Type of username to search.
                              Default is 'username', see all supported here:
@@ -32,6 +32,7 @@ from .report import (
    save_txt_report,
    SUPPORTED_JSON_REPORT_FORMATS,
    save_json_report,
+    get_plaintext_report,
 )
 from .sites import MaigretDatabase
 from .submit import submit_dialog
@@ -646,6 +647,12 @@ async def main():
            filename = report_filepath_tpl.format(username=username, postfix='.pdf')
            save_pdf_report(filename, report_context)
            query_notify.warning(f'PDF report on all usernames saved in {filename}')
+
+        text_report = get_plaintext_report(report_context)
+        if text_report:
+            query_notify.info('Short text report:')
+            print(text_report)
+
    # update database
    db.save_to_file(args.db_file)

@@ -205,13 +205,20 @@ class QueryNotifyPrint(QueryNotify):
        else:
            print(f"[*] {title} {message} on:")

-    def warning(self, message, symbol="-"):
-        msg = f"[{symbol}] {message}"
+    def _colored_print(self, fore_color, msg):
        if self.color:
-            print(Style.BRIGHT + Fore.YELLOW + msg)
+            print(Style.BRIGHT + fore_color + msg)
        else:
            print(msg)

+    def warning(self, message, symbol="-"):
+        msg = f"[{symbol}] {message}"
+        self._colored_print(Fore.YELLOW, msg)
+
+    def info(self, message, symbol="*"):
+        msg = f"[{symbol}] {message}"
+        self._colored_print(Fore.BLUE, msg)
+
    def update(self, result, is_similar=False):
        """Notify Update.

@@ -70,6 +70,17 @@ def save_json_report(filename: str, username: str, results: dict, report_type: s
        generate_json_report(username, results, f, report_type=report_type)


+def get_plaintext_report(context: dict) -> str:
+    output = (context['brief'] + " ").replace('. ', '.\n')
+    interests = list(map(lambda x: x[0], context.get('interests_tuple_list', [])))
+    countries = list(map(lambda x: x[0], context.get('countries_tuple_list', [])))
+    if countries:
+        output += f'Countries: {", ".join(countries)}\n'
+    if interests:
+        output += f'Interests (tags): {", ".join(interests)}\n'
+    return output.strip()
+
+
 """
 REPORTS GENERATING
 """
@@ -215,6 +226,7 @@ def generate_report_context(username_results: list):

    return {
        "username": first_username,
+        # TODO: return brief list
        "brief": brief,
        "results": username_results,
        "first_seen": first_seen,
@@ -61,9 +61,10 @@ SUPPORTED_TAGS = [
    "military",
    "auto",
    "gambling",
-    "business",
    "cybercriminal",
    "review",
+    "bookmarks",
+    "design",
 ]


@@ -255,7 +255,7 @@ async def check_features_manually(
    features = input("If features was not detected correctly, write it manually: ")

    if features:
-        presence_list = features.split(",")
+        presence_list = list(map(str.strip, features.split(",")))

    absence_list = sorted(b_minus_a, key=get_match_ratio, reverse=True)[
        :top_features_count
@@ -264,7 +264,7 @@ async def check_features_manually(
    features = input("If features was not detected correctly, write it manually: ")

    if features:
-        absence_list = features.split(",")
+        absence_list = list(map(str.strip, features.split(",")))

    site_data = {
        "absenceStrs": absence_list,
@@ -355,7 +355,7 @@ async def submit_dialog(db, url_exists, cookie_file, logger):
            return False

    chosen_site.name = input("Change site name if you want: ") or chosen_site.name
-    chosen_site.tags = input("Site tags: ").split(',')
+    chosen_site.tags = list(map(str.strip, input("Site tags: ").split(',')))
    rank = get_alexa_rank(chosen_site.url_main)
    if rank:
        print(f'New alexa rank: {rank}')
@@ -1,5 +1,7 @@
+import ast
 import re
 import random
+from typing import Any


 DEFAULT_USER_AGENTS = [
@@ -65,6 +67,10 @@ class URLMatcher:
        return re.compile(regexp_str)


+def ascii_data_display(data: str) -> Any:
+    return ast.literal_eval(data)
+
+
 def get_dict_ascii_tree(items, prepend="", new_line=True):
    text = ""
    for num, item in enumerate(items):
@@ -75,7 +81,8 @@ def get_dict_ascii_tree(items, prepend="", new_line=True):
            if field_value.startswith("['"):
                is_last_item = num == len(items) - 1
                prepend_symbols = " " * 3 if is_last_item else " ┃ "
-                field_value = get_dict_ascii_tree(eval(field_value), prepend_symbols)
+                data = ascii_data_display(field_value)
+                field_value = get_dict_ascii_tree(data, prepend_symbols)
            text += f"\n{prepend}{box_symbol}{field_name}: {field_value}"
        else:
            text += f"\n{prepend}{box_symbol} {item}"
@@ -26,7 +26,7 @@ python-socks==1.1.2
 requests>=2.24.0
 requests-futures==1.0.0
 six==1.15.0
-socid-extractor>=0.0.19
+socid-extractor>=0.0.20
 soupsieve==2.1
 stem==1.8.0
 torrequest==0.1.0
@@ -12,7 +12,7 @@ with open('requirements.txt') as rf:
    requires = rf.read().splitlines()

 setup(name='maigret',
-      version='0.2.3',
+      version='0.2.4',
      description='Collect a dossier on a person by username from a huge number of sites',
      long_description=long_description,
      long_description_content_type="text/markdown",
@@ -0,0 +1,6 @@
+flake8==3.8.4
+pytest==6.2.4
+pytest-asyncio==0.14.0
+pytest-cov==2.10.1
+pytest-httpserver==1.0.0
+pytest-rerunfailures==9.1.1
@@ -12,6 +12,7 @@ from maigret.maigret import setup_arguments_parser
 CUR_PATH = os.path.dirname(os.path.realpath(__file__))
 JSON_FILE = os.path.join(CUR_PATH, '../maigret/resources/data.json')
 TEST_JSON_FILE = os.path.join(CUR_PATH, 'db.json')
+LOCAL_TEST_JSON_FILE = os.path.join(CUR_PATH, 'local.json')
 empty_mark = Mark('', (), {})


@@ -36,16 +37,17 @@ def remove_test_reports():

@pytest.fixture(scope='session')
 def default_db():
-    db = MaigretDatabase().load_from_file(JSON_FILE)
-
-    return db
+    return MaigretDatabase().load_from_file(JSON_FILE)


@pytest.fixture(scope='function')
 def test_db():
-    db = MaigretDatabase().load_from_file(TEST_JSON_FILE)
+    return MaigretDatabase().load_from_file(TEST_JSON_FILE)

-    return db
+
+@pytest.fixture(scope='function')
+def local_test_db():
+    return MaigretDatabase().load_from_file(LOCAL_TEST_JSON_FILE)


@pytest.fixture(autouse=True)
@@ -58,3 +60,8 @@ def reports_autoclean():
@pytest.fixture(scope='session')
 def argparser():
    return setup_arguments_parser()
+
+
+@pytest.fixture(scope="session")
+def httpserver_listen_address():
+    return ("localhost", 8989)
@@ -0,0 +1,21 @@
+{
+    "engines": {},
+    "sites": {
+        "StatusCode": {
+            "checkType": "status_code",
+            "url": "http://localhost:8989/url?id={username}",
+            "urlMain": "http://localhost:8989/",
+            "usernameClaimed": "claimed",
+            "usernameUnclaimed": "unclaimed"
+        },
+        "Message": {
+            "checkType": "message",
+            "url": "http://localhost:8989/url?id={username}",
+            "urlMain": "http://localhost:8989/",
+            "presenseStrs": ["user", "profile"],
+            "absenseStrs": ["not found", "404"],
+            "usernameClaimed": "claimed",
+            "usernameUnclaimed": "unclaimed"
+        }
+    }
+}
@@ -22,6 +22,7 @@ httpbin.org	FALSE	/	FALSE	0	a	b
 """


+@pytest.mark.skip(reason="periodically fails")
@pytest.mark.slow
 def test_twitter_activation(default_db):
    twitter_site = default_db.sites_dict['Twitter']
@@ -0,0 +1,65 @@
+from mock import Mock
+import pytest
+
+from maigret import search
+
+
+def site_result_except(server, username, **kwargs):
+    query = f'id={username}'
+    server.expect_request('/url', query_string=query).respond_with_data(**kwargs)
+
+
+@pytest.mark.asyncio
+async def test_checking_by_status_code(httpserver, local_test_db):
+    sites_dict = local_test_db.sites_dict
+
+    site_result_except(httpserver, 'claimed', status=200)
+    site_result_except(httpserver, 'unclaimed', status=404)
+
+    result = await search('claimed', site_dict=sites_dict, logger=Mock())
+    assert result['StatusCode']['status'].is_found() is True
+
+    result = await search('unclaimed', site_dict=sites_dict, logger=Mock())
+    assert result['StatusCode']['status'].is_found() is False
+
+
+@pytest.mark.asyncio
+async def test_checking_by_message_positive_full(httpserver, local_test_db):
+    sites_dict = local_test_db.sites_dict
+
+    site_result_except(httpserver, 'claimed', response_data="user profile")
+    site_result_except(httpserver, 'unclaimed', response_data="404 not found")
+
+    result = await search('claimed', site_dict=sites_dict, logger=Mock())
+    assert result['Message']['status'].is_found() is True
+
+    result = await search('unclaimed', site_dict=sites_dict, logger=Mock())
+    assert result['Message']['status'].is_found() is False
+
+
+@pytest.mark.asyncio
+async def test_checking_by_message_positive_part(httpserver, local_test_db):
+    sites_dict = local_test_db.sites_dict
+
+    site_result_except(httpserver, 'claimed', response_data="profile")
+    site_result_except(httpserver, 'unclaimed', response_data="404")
+
+    result = await search('claimed', site_dict=sites_dict, logger=Mock())
+    assert result['Message']['status'].is_found() is True
+
+    result = await search('unclaimed', site_dict=sites_dict, logger=Mock())
+    assert result['Message']['status'].is_found() is False
+
+
+@pytest.mark.asyncio
+async def test_checking_by_message_negative(httpserver, local_test_db):
+    sites_dict = local_test_db.sites_dict
+
+    site_result_except(httpserver, 'claimed', response_data="")
+    site_result_except(httpserver, 'unclaimed', response_data="user 404")
+
+    result = await search('claimed', site_dict=sites_dict, logger=Mock())
+    assert result['Message']['status'].is_found() is False
+
+    result = await search('unclaimed', site_dict=sites_dict, logger=Mock())
+    assert result['Message']['status'].is_found() is True
@@ -0,0 +1,15 @@
+"""Maigret data test functions"""
+
+from maigret.utils import is_country_tag
+from maigret.sites import SUPPORTED_TAGS
+
+
+def test_tags_validity(default_db):
+    unknown_tags = set()
+
+    for site in default_db.sites:
+        for tag in filter(lambda x: not is_country_tag(x), site.tags):
+            if tag not in SUPPORTED_TAGS:
+                unknown_tags.add(tag)
+
+    assert unknown_tags == set()
@@ -16,6 +16,7 @@ from maigret.report import (
    generate_report_template,
    generate_report_context,
    generate_json_report,
+    get_plaintext_report,
 )
 from maigret.result import QueryResult, QueryStatus
 from maigret.sites import MaigretSite
@@ -346,3 +347,13 @@ def test_pdf_report():
    save_pdf_report(report_name, context)

    assert os.path.exists(report_name)
+
+
+def test_text_report():
+    context = generate_report_context(TEST)
+    report_text = get_plaintext_report(context)
+
+    for brief_part in SUPPOSED_BRIEF.split():
+        assert brief_part in report_text
+    assert 'us' in report_text
+    assert 'photo' in report_text
@@ -57,6 +57,11 @@ def test_enrich_link_str():
    )


+def test_url_extract_main_part_negative():
+    url_main_part = 'None'
+    assert URLMatcher.extract_main_part(url_main_part) == ''
+
+
 def test_url_extract_main_part():
    url_main_part = 'flickr.com/photos/alexaimephotography'
Author	SHA1	Message	Date
soxoj	bea900dda0	Merge pull request #155 from soxoj/0.2.4 Bump to 0.2.4	2021-05-18 01:20:00 +03:00
Soxoj	bb1bde833d	Bump to 0.2.4	2021-05-18 01:17:35 +03:00
soxoj	5b405c6abb	Merge pull request #154 from soxoj/tests-improving Improved tests	2021-05-18 00:57:31 +03:00
Soxoj	99fa58ceed	Disabled Twitter activation test	2021-05-18 00:55:18 +03:00
Soxoj	c71e404f63	Added test dependencies	2021-05-18 00:49:13 +03:00
Soxoj	2c04ccce57	Improved tests	2021-05-18 00:43:56 +03:00
soxoj	435db7cdc9	Merge pull request #153 from soxoj/sites-update-16-05-21 Several sites added, updated site list	2021-05-17 00:35:56 +03:00
Soxoj	413a0502a4	Several sites added, updated site list	2021-05-16 17:02:41 +03:00
soxoj	2aedcc3166	Merge pull request #152 from soxoj/cli-plaintext-report Added text report to CLI output	2021-05-15 16:57:22 +03:00
Soxoj	28835204f5	Added text report to CLI output	2021-05-15 16:55:05 +03:00
soxoj	b11a247dfd	Merge pull request #151 from soxoj/tags-socid-extractor Tags updated, added tests for tags	2021-05-15 14:55:01 +03:00
Soxoj	c9219d91ec	Tags updated, added tests for tags Added several sites Updated socid_extractor version to avoid bug #150	2021-05-15 14:51:30 +03:00