Compare commits

..

12 Commits

Author SHA1 Message Date
soxoj bea900dda0 Merge pull request #155 from soxoj/0.2.4
Bump to 0.2.4
2021-05-18 01:20:00 +03:00
Soxoj bb1bde833d Bump to 0.2.4 2021-05-18 01:17:35 +03:00
soxoj 5b405c6abb Merge pull request #154 from soxoj/tests-improving
Improved tests
2021-05-18 00:57:31 +03:00
Soxoj 99fa58ceed Disabled Twitter activation test 2021-05-18 00:55:18 +03:00
Soxoj c71e404f63 Added test dependencies 2021-05-18 00:49:13 +03:00
Soxoj 2c04ccce57 Improved tests 2021-05-18 00:43:56 +03:00
soxoj 435db7cdc9 Merge pull request #153 from soxoj/sites-update-16-05-21
Several sites added, updated site list
2021-05-17 00:35:56 +03:00
Soxoj 413a0502a4 Several sites added, updated site list 2021-05-16 17:02:41 +03:00
soxoj 2aedcc3166 Merge pull request #152 from soxoj/cli-plaintext-report
Added text report to CLI output
2021-05-15 16:57:22 +03:00
Soxoj 28835204f5 Added text report to CLI output 2021-05-15 16:55:05 +03:00
soxoj b11a247dfd Merge pull request #151 from soxoj/tags-socid-extractor
Tags updated, added tests for tags
2021-05-15 14:55:01 +03:00
Soxoj c9219d91ec Tags updated, added tests for tags
Added several sites
Updated socid_extractor version to avoid bug #150
2021-05-15 14:51:30 +03:00
22 changed files with 3363 additions and 3256 deletions
+1 -1
View File
@@ -26,7 +26,7 @@ jobs:
- name: Install dependencies - name: Install dependencies
run: | run: |
python -m pip install --upgrade pip python -m pip install --upgrade pip
python -m pip install flake8 pytest pytest-rerunfailures python -m pip install -r test-requirements.txt
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
- name: Test with pytest - name: Test with pytest
run: | run: |
+4
View File
@@ -2,6 +2,10 @@
## [Unreleased] ## [Unreleased]
## [0.2.4] - 2021-05-18
* cli output report
* various improvements
## [0.2.3] - 2021-05-12 ## [0.2.3] - 2021-05-12
* added Yelp and yelp_userid support * added Yelp and yelp_userid support
* tags markup stabilization * tags markup stabilization
+1 -1
View File
@@ -1,3 +1,3 @@
"""Maigret version file""" """Maigret version file"""
__version__ = '0.2.3' __version__ = '0.2.4'
+6 -6
View File
@@ -26,7 +26,7 @@ from .executors import (
from .result import QueryResult, QueryStatus from .result import QueryResult, QueryStatus
from .sites import MaigretDatabase, MaigretSite from .sites import MaigretDatabase, MaigretSite
from .types import QueryOptions, QueryResultWrapper from .types import QueryOptions, QueryResultWrapper
from .utils import get_random_user_agent from .utils import get_random_user_agent, ascii_data_display
SUPPORTED_IDS = ( SUPPORTED_IDS = (
@@ -233,9 +233,9 @@ def process_site_result(
result = build_result(QueryStatus.CLAIMED) result = build_result(QueryStatus.CLAIMED)
else: else:
result = build_result(QueryStatus.AVAILABLE) result = build_result(QueryStatus.AVAILABLE)
elif check_type == "status_code": elif check_type in "status_code":
# Checks if the status code of the response is 2XX # Checks if the status code of the response is 2XX
if is_presense_detected and (not status_code >= 300 or status_code < 200): if 200 <= status_code < 300:
result = build_result(QueryStatus.CLAIMED) result = build_result(QueryStatus.CLAIMED)
else: else:
result = build_result(QueryStatus.AVAILABLE) result = build_result(QueryStatus.AVAILABLE)
@@ -272,7 +272,7 @@ def process_site_result(
new_usernames[v] = k new_usernames[v] = k
results_info["ids_usernames"] = new_usernames results_info["ids_usernames"] = new_usernames
links = eval(extracted_ids_data.get("links", "[]")) links = ascii_data_display(extracted_ids_data.get("links", "[]"))
if "website" in extracted_ids_data: if "website" in extracted_ids_data:
links.append(extracted_ids_data["website"]) links.append(extracted_ids_data["website"])
results_info["ids_links"] = links results_info["ids_links"] = links
@@ -456,7 +456,7 @@ async def maigret(
logger, logger,
query_notify=None, query_notify=None,
proxy=None, proxy=None,
timeout=None, timeout=3,
is_parsing_enabled=False, is_parsing_enabled=False,
id_type="username", id_type="username",
debug=False, debug=False,
@@ -478,7 +478,7 @@ async def maigret(
query results. query results.
logger -- Standard Python logger object. logger -- Standard Python logger object.
timeout -- Time in seconds to wait before timing out request. timeout -- Time in seconds to wait before timing out request.
Default is no timeout. Default is 3 seconds.
is_parsing_enabled -- Extract additional info from account pages. is_parsing_enabled -- Extract additional info from account pages.
id_type -- Type of username to search. id_type -- Type of username to search.
Default is 'username', see all supported here: Default is 'username', see all supported here:
+7
View File
@@ -32,6 +32,7 @@ from .report import (
save_txt_report, save_txt_report,
SUPPORTED_JSON_REPORT_FORMATS, SUPPORTED_JSON_REPORT_FORMATS,
save_json_report, save_json_report,
get_plaintext_report,
) )
from .sites import MaigretDatabase from .sites import MaigretDatabase
from .submit import submit_dialog from .submit import submit_dialog
@@ -646,6 +647,12 @@ async def main():
filename = report_filepath_tpl.format(username=username, postfix='.pdf') filename = report_filepath_tpl.format(username=username, postfix='.pdf')
save_pdf_report(filename, report_context) save_pdf_report(filename, report_context)
query_notify.warning(f'PDF report on all usernames saved in {filename}') query_notify.warning(f'PDF report on all usernames saved in {filename}')
text_report = get_plaintext_report(report_context)
if text_report:
query_notify.info('Short text report:')
print(text_report)
# update database # update database
db.save_to_file(args.db_file) db.save_to_file(args.db_file)
+10 -3
View File
@@ -205,13 +205,20 @@ class QueryNotifyPrint(QueryNotify):
else: else:
print(f"[*] {title} {message} on:") print(f"[*] {title} {message} on:")
def warning(self, message, symbol="-"): def _colored_print(self, fore_color, msg):
msg = f"[{symbol}] {message}"
if self.color: if self.color:
print(Style.BRIGHT + Fore.YELLOW + msg) print(Style.BRIGHT + fore_color + msg)
else: else:
print(msg) print(msg)
def warning(self, message, symbol="-"):
msg = f"[{symbol}] {message}"
self._colored_print(Fore.YELLOW, msg)
def info(self, message, symbol="*"):
msg = f"[{symbol}] {message}"
self._colored_print(Fore.BLUE, msg)
def update(self, result, is_similar=False): def update(self, result, is_similar=False):
"""Notify Update. """Notify Update.
+12
View File
@@ -70,6 +70,17 @@ def save_json_report(filename: str, username: str, results: dict, report_type: s
generate_json_report(username, results, f, report_type=report_type) generate_json_report(username, results, f, report_type=report_type)
def get_plaintext_report(context: dict) -> str:
output = (context['brief'] + " ").replace('. ', '.\n')
interests = list(map(lambda x: x[0], context.get('interests_tuple_list', [])))
countries = list(map(lambda x: x[0], context.get('countries_tuple_list', [])))
if countries:
output += f'Countries: {", ".join(countries)}\n'
if interests:
output += f'Interests (tags): {", ".join(interests)}\n'
return output.strip()
""" """
REPORTS GENERATING REPORTS GENERATING
""" """
@@ -215,6 +226,7 @@ def generate_report_context(username_results: list):
return { return {
"username": first_username, "username": first_username,
# TODO: return brief list
"brief": brief, "brief": brief,
"results": username_results, "results": username_results,
"first_seen": first_seen, "first_seen": first_seen,
+1810 -1890
View File
File diff suppressed because it is too large Load Diff
+2 -1
View File
@@ -61,9 +61,10 @@ SUPPORTED_TAGS = [
"military", "military",
"auto", "auto",
"gambling", "gambling",
"business",
"cybercriminal", "cybercriminal",
"review", "review",
"bookmarks",
"design",
] ]
+3 -3
View File
@@ -255,7 +255,7 @@ async def check_features_manually(
features = input("If features was not detected correctly, write it manually: ") features = input("If features was not detected correctly, write it manually: ")
if features: if features:
presence_list = features.split(",") presence_list = list(map(str.strip, features.split(",")))
absence_list = sorted(b_minus_a, key=get_match_ratio, reverse=True)[ absence_list = sorted(b_minus_a, key=get_match_ratio, reverse=True)[
:top_features_count :top_features_count
@@ -264,7 +264,7 @@ async def check_features_manually(
features = input("If features was not detected correctly, write it manually: ") features = input("If features was not detected correctly, write it manually: ")
if features: if features:
absence_list = features.split(",") absence_list = list(map(str.strip, features.split(",")))
site_data = { site_data = {
"absenceStrs": absence_list, "absenceStrs": absence_list,
@@ -355,7 +355,7 @@ async def submit_dialog(db, url_exists, cookie_file, logger):
return False return False
chosen_site.name = input("Change site name if you want: ") or chosen_site.name chosen_site.name = input("Change site name if you want: ") or chosen_site.name
chosen_site.tags = input("Site tags: ").split(',') chosen_site.tags = list(map(str.strip, input("Site tags: ").split(',')))
rank = get_alexa_rank(chosen_site.url_main) rank = get_alexa_rank(chosen_site.url_main)
if rank: if rank:
print(f'New alexa rank: {rank}') print(f'New alexa rank: {rank}')
+8 -1
View File
@@ -1,5 +1,7 @@
import ast
import re import re
import random import random
from typing import Any
DEFAULT_USER_AGENTS = [ DEFAULT_USER_AGENTS = [
@@ -65,6 +67,10 @@ class URLMatcher:
return re.compile(regexp_str) return re.compile(regexp_str)
def ascii_data_display(data: str) -> Any:
return ast.literal_eval(data)
def get_dict_ascii_tree(items, prepend="", new_line=True): def get_dict_ascii_tree(items, prepend="", new_line=True):
text = "" text = ""
for num, item in enumerate(items): for num, item in enumerate(items):
@@ -75,7 +81,8 @@ def get_dict_ascii_tree(items, prepend="", new_line=True):
if field_value.startswith("['"): if field_value.startswith("['"):
is_last_item = num == len(items) - 1 is_last_item = num == len(items) - 1
prepend_symbols = " " * 3 if is_last_item else "" prepend_symbols = " " * 3 if is_last_item else ""
field_value = get_dict_ascii_tree(eval(field_value), prepend_symbols) data = ascii_data_display(field_value)
field_value = get_dict_ascii_tree(data, prepend_symbols)
text += f"\n{prepend}{box_symbol}{field_name}: {field_value}" text += f"\n{prepend}{box_symbol}{field_name}: {field_value}"
else: else:
text += f"\n{prepend}{box_symbol} {item}" text += f"\n{prepend}{box_symbol} {item}"
+1 -1
View File
@@ -26,7 +26,7 @@ python-socks==1.1.2
requests>=2.24.0 requests>=2.24.0
requests-futures==1.0.0 requests-futures==1.0.0
six==1.15.0 six==1.15.0
socid-extractor>=0.0.19 socid-extractor>=0.0.20
soupsieve==2.1 soupsieve==2.1
stem==1.8.0 stem==1.8.0
torrequest==0.1.0 torrequest==0.1.0
+1 -1
View File
@@ -12,7 +12,7 @@ with open('requirements.txt') as rf:
requires = rf.read().splitlines() requires = rf.read().splitlines()
setup(name='maigret', setup(name='maigret',
version='0.2.3', version='0.2.4',
description='Collect a dossier on a person by username from a huge number of sites', description='Collect a dossier on a person by username from a huge number of sites',
long_description=long_description, long_description=long_description,
long_description_content_type="text/markdown", long_description_content_type="text/markdown",
+1361 -1343
View File
File diff suppressed because it is too large Load Diff
+6
View File
@@ -0,0 +1,6 @@
flake8==3.8.4
pytest==6.2.4
pytest-asyncio==0.14.0
pytest-cov==2.10.1
pytest-httpserver==1.0.0
pytest-rerunfailures==9.1.1
+12 -5
View File
@@ -12,6 +12,7 @@ from maigret.maigret import setup_arguments_parser
CUR_PATH = os.path.dirname(os.path.realpath(__file__)) CUR_PATH = os.path.dirname(os.path.realpath(__file__))
JSON_FILE = os.path.join(CUR_PATH, '../maigret/resources/data.json') JSON_FILE = os.path.join(CUR_PATH, '../maigret/resources/data.json')
TEST_JSON_FILE = os.path.join(CUR_PATH, 'db.json') TEST_JSON_FILE = os.path.join(CUR_PATH, 'db.json')
LOCAL_TEST_JSON_FILE = os.path.join(CUR_PATH, 'local.json')
empty_mark = Mark('', (), {}) empty_mark = Mark('', (), {})
@@ -36,16 +37,17 @@ def remove_test_reports():
@pytest.fixture(scope='session') @pytest.fixture(scope='session')
def default_db(): def default_db():
db = MaigretDatabase().load_from_file(JSON_FILE) return MaigretDatabase().load_from_file(JSON_FILE)
return db
@pytest.fixture(scope='function') @pytest.fixture(scope='function')
def test_db(): def test_db():
db = MaigretDatabase().load_from_file(TEST_JSON_FILE) return MaigretDatabase().load_from_file(TEST_JSON_FILE)
return db
@pytest.fixture(scope='function')
def local_test_db():
return MaigretDatabase().load_from_file(LOCAL_TEST_JSON_FILE)
@pytest.fixture(autouse=True) @pytest.fixture(autouse=True)
@@ -58,3 +60,8 @@ def reports_autoclean():
@pytest.fixture(scope='session') @pytest.fixture(scope='session')
def argparser(): def argparser():
return setup_arguments_parser() return setup_arguments_parser()
@pytest.fixture(scope="session")
def httpserver_listen_address():
return ("localhost", 8989)
+21
View File
@@ -0,0 +1,21 @@
{
"engines": {},
"sites": {
"StatusCode": {
"checkType": "status_code",
"url": "http://localhost:8989/url?id={username}",
"urlMain": "http://localhost:8989/",
"usernameClaimed": "claimed",
"usernameUnclaimed": "unclaimed"
},
"Message": {
"checkType": "message",
"url": "http://localhost:8989/url?id={username}",
"urlMain": "http://localhost:8989/",
"presenseStrs": ["user", "profile"],
"absenseStrs": ["not found", "404"],
"usernameClaimed": "claimed",
"usernameUnclaimed": "unclaimed"
}
}
}
+1
View File
@@ -22,6 +22,7 @@ httpbin.org FALSE / FALSE 0 a b
""" """
@pytest.mark.skip(reason="periodically fails")
@pytest.mark.slow @pytest.mark.slow
def test_twitter_activation(default_db): def test_twitter_activation(default_db):
twitter_site = default_db.sites_dict['Twitter'] twitter_site = default_db.sites_dict['Twitter']
+65
View File
@@ -0,0 +1,65 @@
from mock import Mock
import pytest
from maigret import search
def site_result_except(server, username, **kwargs):
query = f'id={username}'
server.expect_request('/url', query_string=query).respond_with_data(**kwargs)
@pytest.mark.asyncio
async def test_checking_by_status_code(httpserver, local_test_db):
sites_dict = local_test_db.sites_dict
site_result_except(httpserver, 'claimed', status=200)
site_result_except(httpserver, 'unclaimed', status=404)
result = await search('claimed', site_dict=sites_dict, logger=Mock())
assert result['StatusCode']['status'].is_found() is True
result = await search('unclaimed', site_dict=sites_dict, logger=Mock())
assert result['StatusCode']['status'].is_found() is False
@pytest.mark.asyncio
async def test_checking_by_message_positive_full(httpserver, local_test_db):
sites_dict = local_test_db.sites_dict
site_result_except(httpserver, 'claimed', response_data="user profile")
site_result_except(httpserver, 'unclaimed', response_data="404 not found")
result = await search('claimed', site_dict=sites_dict, logger=Mock())
assert result['Message']['status'].is_found() is True
result = await search('unclaimed', site_dict=sites_dict, logger=Mock())
assert result['Message']['status'].is_found() is False
@pytest.mark.asyncio
async def test_checking_by_message_positive_part(httpserver, local_test_db):
sites_dict = local_test_db.sites_dict
site_result_except(httpserver, 'claimed', response_data="profile")
site_result_except(httpserver, 'unclaimed', response_data="404")
result = await search('claimed', site_dict=sites_dict, logger=Mock())
assert result['Message']['status'].is_found() is True
result = await search('unclaimed', site_dict=sites_dict, logger=Mock())
assert result['Message']['status'].is_found() is False
@pytest.mark.asyncio
async def test_checking_by_message_negative(httpserver, local_test_db):
sites_dict = local_test_db.sites_dict
site_result_except(httpserver, 'claimed', response_data="")
site_result_except(httpserver, 'unclaimed', response_data="user 404")
result = await search('claimed', site_dict=sites_dict, logger=Mock())
assert result['Message']['status'].is_found() is False
result = await search('unclaimed', site_dict=sites_dict, logger=Mock())
assert result['Message']['status'].is_found() is True
+15
View File
@@ -0,0 +1,15 @@
"""Maigret data test functions"""
from maigret.utils import is_country_tag
from maigret.sites import SUPPORTED_TAGS
def test_tags_validity(default_db):
unknown_tags = set()
for site in default_db.sites:
for tag in filter(lambda x: not is_country_tag(x), site.tags):
if tag not in SUPPORTED_TAGS:
unknown_tags.add(tag)
assert unknown_tags == set()
+11
View File
@@ -16,6 +16,7 @@ from maigret.report import (
generate_report_template, generate_report_template,
generate_report_context, generate_report_context,
generate_json_report, generate_json_report,
get_plaintext_report,
) )
from maigret.result import QueryResult, QueryStatus from maigret.result import QueryResult, QueryStatus
from maigret.sites import MaigretSite from maigret.sites import MaigretSite
@@ -346,3 +347,13 @@ def test_pdf_report():
save_pdf_report(report_name, context) save_pdf_report(report_name, context)
assert os.path.exists(report_name) assert os.path.exists(report_name)
def test_text_report():
context = generate_report_context(TEST)
report_text = get_plaintext_report(context)
for brief_part in SUPPOSED_BRIEF.split():
assert brief_part in report_text
assert 'us' in report_text
assert 'photo' in report_text
+5
View File
@@ -57,6 +57,11 @@ def test_enrich_link_str():
) )
def test_url_extract_main_part_negative():
url_main_part = 'None'
assert URLMatcher.extract_main_part(url_main_part) == ''
def test_url_extract_main_part(): def test_url_extract_main_part():
url_main_part = 'flickr.com/photos/alexaimephotography' url_main_part = 'flickr.com/photos/alexaimephotography'