Compare commits

..

12 Commits

Author SHA1 Message Date
soxoj bea900dda0 Merge pull request #155 from soxoj/0.2.4
Bump to 0.2.4
2021-05-18 01:20:00 +03:00
Soxoj bb1bde833d Bump to 0.2.4 2021-05-18 01:17:35 +03:00
soxoj 5b405c6abb Merge pull request #154 from soxoj/tests-improving
Improved tests
2021-05-18 00:57:31 +03:00
Soxoj 99fa58ceed Disabled Twitter activation test 2021-05-18 00:55:18 +03:00
Soxoj c71e404f63 Added test dependencies 2021-05-18 00:49:13 +03:00
Soxoj 2c04ccce57 Improved tests 2021-05-18 00:43:56 +03:00
soxoj 435db7cdc9 Merge pull request #153 from soxoj/sites-update-16-05-21
Several sites added, updated site list
2021-05-17 00:35:56 +03:00
Soxoj 413a0502a4 Several sites added, updated site list 2021-05-16 17:02:41 +03:00
soxoj 2aedcc3166 Merge pull request #152 from soxoj/cli-plaintext-report
Added text report to CLI output
2021-05-15 16:57:22 +03:00
Soxoj 28835204f5 Added text report to CLI output 2021-05-15 16:55:05 +03:00
soxoj b11a247dfd Merge pull request #151 from soxoj/tags-socid-extractor
Tags updated, added tests for tags
2021-05-15 14:55:01 +03:00
Soxoj c9219d91ec Tags updated, added tests for tags
Added several sites
Updated socid_extractor version to avoid bug #150
2021-05-15 14:51:30 +03:00
22 changed files with 3363 additions and 3256 deletions
+1 -1
View File
@@ -26,7 +26,7 @@ jobs:
- name: Install dependencies
run: |
python -m pip install --upgrade pip
python -m pip install flake8 pytest pytest-rerunfailures
python -m pip install -r test-requirements.txt
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
- name: Test with pytest
run: |
+4
View File
@@ -2,6 +2,10 @@
## [Unreleased]
## [0.2.4] - 2021-05-18
* cli output report
* various improvements
## [0.2.3] - 2021-05-12
* added Yelp and yelp_userid support
* tags markup stabilization
+1 -1
View File
@@ -1,3 +1,3 @@
"""Maigret version file"""
__version__ = '0.2.3'
__version__ = '0.2.4'
+6 -6
View File
@@ -26,7 +26,7 @@ from .executors import (
from .result import QueryResult, QueryStatus
from .sites import MaigretDatabase, MaigretSite
from .types import QueryOptions, QueryResultWrapper
from .utils import get_random_user_agent
from .utils import get_random_user_agent, ascii_data_display
SUPPORTED_IDS = (
@@ -233,9 +233,9 @@ def process_site_result(
result = build_result(QueryStatus.CLAIMED)
else:
result = build_result(QueryStatus.AVAILABLE)
elif check_type == "status_code":
elif check_type in "status_code":
# Checks if the status code of the response is 2XX
if is_presense_detected and (not status_code >= 300 or status_code < 200):
if 200 <= status_code < 300:
result = build_result(QueryStatus.CLAIMED)
else:
result = build_result(QueryStatus.AVAILABLE)
@@ -272,7 +272,7 @@ def process_site_result(
new_usernames[v] = k
results_info["ids_usernames"] = new_usernames
links = eval(extracted_ids_data.get("links", "[]"))
links = ascii_data_display(extracted_ids_data.get("links", "[]"))
if "website" in extracted_ids_data:
links.append(extracted_ids_data["website"])
results_info["ids_links"] = links
@@ -456,7 +456,7 @@ async def maigret(
logger,
query_notify=None,
proxy=None,
timeout=None,
timeout=3,
is_parsing_enabled=False,
id_type="username",
debug=False,
@@ -478,7 +478,7 @@ async def maigret(
query results.
logger -- Standard Python logger object.
timeout -- Time in seconds to wait before timing out request.
Default is no timeout.
Default is 3 seconds.
is_parsing_enabled -- Extract additional info from account pages.
id_type -- Type of username to search.
Default is 'username', see all supported here:
+7
View File
@@ -32,6 +32,7 @@ from .report import (
save_txt_report,
SUPPORTED_JSON_REPORT_FORMATS,
save_json_report,
get_plaintext_report,
)
from .sites import MaigretDatabase
from .submit import submit_dialog
@@ -646,6 +647,12 @@ async def main():
filename = report_filepath_tpl.format(username=username, postfix='.pdf')
save_pdf_report(filename, report_context)
query_notify.warning(f'PDF report on all usernames saved in {filename}')
text_report = get_plaintext_report(report_context)
if text_report:
query_notify.info('Short text report:')
print(text_report)
# update database
db.save_to_file(args.db_file)
+10 -3
View File
@@ -205,13 +205,20 @@ class QueryNotifyPrint(QueryNotify):
else:
print(f"[*] {title} {message} on:")
def warning(self, message, symbol="-"):
msg = f"[{symbol}] {message}"
def _colored_print(self, fore_color, msg):
if self.color:
print(Style.BRIGHT + Fore.YELLOW + msg)
print(Style.BRIGHT + fore_color + msg)
else:
print(msg)
def warning(self, message, symbol="-"):
msg = f"[{symbol}] {message}"
self._colored_print(Fore.YELLOW, msg)
def info(self, message, symbol="*"):
msg = f"[{symbol}] {message}"
self._colored_print(Fore.BLUE, msg)
def update(self, result, is_similar=False):
"""Notify Update.
+12
View File
@@ -70,6 +70,17 @@ def save_json_report(filename: str, username: str, results: dict, report_type: s
generate_json_report(username, results, f, report_type=report_type)
def get_plaintext_report(context: dict) -> str:
output = (context['brief'] + " ").replace('. ', '.\n')
interests = list(map(lambda x: x[0], context.get('interests_tuple_list', [])))
countries = list(map(lambda x: x[0], context.get('countries_tuple_list', [])))
if countries:
output += f'Countries: {", ".join(countries)}\n'
if interests:
output += f'Interests (tags): {", ".join(interests)}\n'
return output.strip()
"""
REPORTS GENERATING
"""
@@ -215,6 +226,7 @@ def generate_report_context(username_results: list):
return {
"username": first_username,
# TODO: return brief list
"brief": brief,
"results": username_results,
"first_seen": first_seen,
+1810 -1890
View File
File diff suppressed because it is too large Load Diff
+2 -1
View File
@@ -61,9 +61,10 @@ SUPPORTED_TAGS = [
"military",
"auto",
"gambling",
"business",
"cybercriminal",
"review",
"bookmarks",
"design",
]
+3 -3
View File
@@ -255,7 +255,7 @@ async def check_features_manually(
features = input("If features was not detected correctly, write it manually: ")
if features:
presence_list = features.split(",")
presence_list = list(map(str.strip, features.split(",")))
absence_list = sorted(b_minus_a, key=get_match_ratio, reverse=True)[
:top_features_count
@@ -264,7 +264,7 @@ async def check_features_manually(
features = input("If features was not detected correctly, write it manually: ")
if features:
absence_list = features.split(",")
absence_list = list(map(str.strip, features.split(",")))
site_data = {
"absenceStrs": absence_list,
@@ -355,7 +355,7 @@ async def submit_dialog(db, url_exists, cookie_file, logger):
return False
chosen_site.name = input("Change site name if you want: ") or chosen_site.name
chosen_site.tags = input("Site tags: ").split(',')
chosen_site.tags = list(map(str.strip, input("Site tags: ").split(',')))
rank = get_alexa_rank(chosen_site.url_main)
if rank:
print(f'New alexa rank: {rank}')
+8 -1
View File
@@ -1,5 +1,7 @@
import ast
import re
import random
from typing import Any
DEFAULT_USER_AGENTS = [
@@ -65,6 +67,10 @@ class URLMatcher:
return re.compile(regexp_str)
def ascii_data_display(data: str) -> Any:
return ast.literal_eval(data)
def get_dict_ascii_tree(items, prepend="", new_line=True):
text = ""
for num, item in enumerate(items):
@@ -75,7 +81,8 @@ def get_dict_ascii_tree(items, prepend="", new_line=True):
if field_value.startswith("['"):
is_last_item = num == len(items) - 1
prepend_symbols = " " * 3 if is_last_item else ""
field_value = get_dict_ascii_tree(eval(field_value), prepend_symbols)
data = ascii_data_display(field_value)
field_value = get_dict_ascii_tree(data, prepend_symbols)
text += f"\n{prepend}{box_symbol}{field_name}: {field_value}"
else:
text += f"\n{prepend}{box_symbol} {item}"
+1 -1
View File
@@ -26,7 +26,7 @@ python-socks==1.1.2
requests>=2.24.0
requests-futures==1.0.0
six==1.15.0
socid-extractor>=0.0.19
socid-extractor>=0.0.20
soupsieve==2.1
stem==1.8.0
torrequest==0.1.0
+1 -1
View File
@@ -12,7 +12,7 @@ with open('requirements.txt') as rf:
requires = rf.read().splitlines()
setup(name='maigret',
version='0.2.3',
version='0.2.4',
description='Collect a dossier on a person by username from a huge number of sites',
long_description=long_description,
long_description_content_type="text/markdown",
+1361 -1343
View File
File diff suppressed because it is too large Load Diff
+6
View File
@@ -0,0 +1,6 @@
flake8==3.8.4
pytest==6.2.4
pytest-asyncio==0.14.0
pytest-cov==2.10.1
pytest-httpserver==1.0.0
pytest-rerunfailures==9.1.1
+12 -5
View File
@@ -12,6 +12,7 @@ from maigret.maigret import setup_arguments_parser
CUR_PATH = os.path.dirname(os.path.realpath(__file__))
JSON_FILE = os.path.join(CUR_PATH, '../maigret/resources/data.json')
TEST_JSON_FILE = os.path.join(CUR_PATH, 'db.json')
LOCAL_TEST_JSON_FILE = os.path.join(CUR_PATH, 'local.json')
empty_mark = Mark('', (), {})
@@ -36,16 +37,17 @@ def remove_test_reports():
@pytest.fixture(scope='session')
def default_db():
db = MaigretDatabase().load_from_file(JSON_FILE)
return db
return MaigretDatabase().load_from_file(JSON_FILE)
@pytest.fixture(scope='function')
def test_db():
db = MaigretDatabase().load_from_file(TEST_JSON_FILE)
return MaigretDatabase().load_from_file(TEST_JSON_FILE)
return db
@pytest.fixture(scope='function')
def local_test_db():
return MaigretDatabase().load_from_file(LOCAL_TEST_JSON_FILE)
@pytest.fixture(autouse=True)
@@ -58,3 +60,8 @@ def reports_autoclean():
@pytest.fixture(scope='session')
def argparser():
return setup_arguments_parser()
@pytest.fixture(scope="session")
def httpserver_listen_address():
return ("localhost", 8989)
+21
View File
@@ -0,0 +1,21 @@
{
"engines": {},
"sites": {
"StatusCode": {
"checkType": "status_code",
"url": "http://localhost:8989/url?id={username}",
"urlMain": "http://localhost:8989/",
"usernameClaimed": "claimed",
"usernameUnclaimed": "unclaimed"
},
"Message": {
"checkType": "message",
"url": "http://localhost:8989/url?id={username}",
"urlMain": "http://localhost:8989/",
"presenseStrs": ["user", "profile"],
"absenseStrs": ["not found", "404"],
"usernameClaimed": "claimed",
"usernameUnclaimed": "unclaimed"
}
}
}
+1
View File
@@ -22,6 +22,7 @@ httpbin.org FALSE / FALSE 0 a b
"""
@pytest.mark.skip(reason="periodically fails")
@pytest.mark.slow
def test_twitter_activation(default_db):
twitter_site = default_db.sites_dict['Twitter']
+65
View File
@@ -0,0 +1,65 @@
from mock import Mock
import pytest
from maigret import search
def site_result_except(server, username, **kwargs):
query = f'id={username}'
server.expect_request('/url', query_string=query).respond_with_data(**kwargs)
@pytest.mark.asyncio
async def test_checking_by_status_code(httpserver, local_test_db):
sites_dict = local_test_db.sites_dict
site_result_except(httpserver, 'claimed', status=200)
site_result_except(httpserver, 'unclaimed', status=404)
result = await search('claimed', site_dict=sites_dict, logger=Mock())
assert result['StatusCode']['status'].is_found() is True
result = await search('unclaimed', site_dict=sites_dict, logger=Mock())
assert result['StatusCode']['status'].is_found() is False
@pytest.mark.asyncio
async def test_checking_by_message_positive_full(httpserver, local_test_db):
sites_dict = local_test_db.sites_dict
site_result_except(httpserver, 'claimed', response_data="user profile")
site_result_except(httpserver, 'unclaimed', response_data="404 not found")
result = await search('claimed', site_dict=sites_dict, logger=Mock())
assert result['Message']['status'].is_found() is True
result = await search('unclaimed', site_dict=sites_dict, logger=Mock())
assert result['Message']['status'].is_found() is False
@pytest.mark.asyncio
async def test_checking_by_message_positive_part(httpserver, local_test_db):
sites_dict = local_test_db.sites_dict
site_result_except(httpserver, 'claimed', response_data="profile")
site_result_except(httpserver, 'unclaimed', response_data="404")
result = await search('claimed', site_dict=sites_dict, logger=Mock())
assert result['Message']['status'].is_found() is True
result = await search('unclaimed', site_dict=sites_dict, logger=Mock())
assert result['Message']['status'].is_found() is False
@pytest.mark.asyncio
async def test_checking_by_message_negative(httpserver, local_test_db):
sites_dict = local_test_db.sites_dict
site_result_except(httpserver, 'claimed', response_data="")
site_result_except(httpserver, 'unclaimed', response_data="user 404")
result = await search('claimed', site_dict=sites_dict, logger=Mock())
assert result['Message']['status'].is_found() is False
result = await search('unclaimed', site_dict=sites_dict, logger=Mock())
assert result['Message']['status'].is_found() is True
+15
View File
@@ -0,0 +1,15 @@
"""Maigret data test functions"""
from maigret.utils import is_country_tag
from maigret.sites import SUPPORTED_TAGS
def test_tags_validity(default_db):
unknown_tags = set()
for site in default_db.sites:
for tag in filter(lambda x: not is_country_tag(x), site.tags):
if tag not in SUPPORTED_TAGS:
unknown_tags.add(tag)
assert unknown_tags == set()
+11
View File
@@ -16,6 +16,7 @@ from maigret.report import (
generate_report_template,
generate_report_context,
generate_json_report,
get_plaintext_report,
)
from maigret.result import QueryResult, QueryStatus
from maigret.sites import MaigretSite
@@ -346,3 +347,13 @@ def test_pdf_report():
save_pdf_report(report_name, context)
assert os.path.exists(report_name)
def test_text_report():
context = generate_report_context(TEST)
report_text = get_plaintext_report(context)
for brief_part in SUPPOSED_BRIEF.split():
assert brief_part in report_text
assert 'us' in report_text
assert 'photo' in report_text
+5
View File
@@ -57,6 +57,11 @@ def test_enrich_link_str():
)
def test_url_extract_main_part_negative():
url_main_part = 'None'
assert URLMatcher.extract_main_part(url_main_part) == ''
def test_url_extract_main_part():
url_main_part = 'flickr.com/photos/alexaimephotography'