mirror of
https://github.com/soxoj/maigret.git
synced 2026-05-09 08:04:32 +00:00
Compare commits
12 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| bea900dda0 | |||
| bb1bde833d | |||
| 5b405c6abb | |||
| 99fa58ceed | |||
| c71e404f63 | |||
| 2c04ccce57 | |||
| 435db7cdc9 | |||
| 413a0502a4 | |||
| 2aedcc3166 | |||
| 28835204f5 | |||
| b11a247dfd | |||
| c9219d91ec |
@@ -26,7 +26,7 @@ jobs:
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
python -m pip install flake8 pytest pytest-rerunfailures
|
||||
python -m pip install -r test-requirements.txt
|
||||
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
|
||||
- name: Test with pytest
|
||||
run: |
|
||||
|
||||
@@ -2,6 +2,10 @@
|
||||
|
||||
## [Unreleased]
|
||||
|
||||
## [0.2.4] - 2021-05-18
|
||||
* cli output report
|
||||
* various improvements
|
||||
|
||||
## [0.2.3] - 2021-05-12
|
||||
* added Yelp and yelp_userid support
|
||||
* tags markup stabilization
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
"""Maigret version file"""
|
||||
|
||||
__version__ = '0.2.3'
|
||||
__version__ = '0.2.4'
|
||||
|
||||
+6
-6
@@ -26,7 +26,7 @@ from .executors import (
|
||||
from .result import QueryResult, QueryStatus
|
||||
from .sites import MaigretDatabase, MaigretSite
|
||||
from .types import QueryOptions, QueryResultWrapper
|
||||
from .utils import get_random_user_agent
|
||||
from .utils import get_random_user_agent, ascii_data_display
|
||||
|
||||
|
||||
SUPPORTED_IDS = (
|
||||
@@ -233,9 +233,9 @@ def process_site_result(
|
||||
result = build_result(QueryStatus.CLAIMED)
|
||||
else:
|
||||
result = build_result(QueryStatus.AVAILABLE)
|
||||
elif check_type == "status_code":
|
||||
elif check_type in "status_code":
|
||||
# Checks if the status code of the response is 2XX
|
||||
if is_presense_detected and (not status_code >= 300 or status_code < 200):
|
||||
if 200 <= status_code < 300:
|
||||
result = build_result(QueryStatus.CLAIMED)
|
||||
else:
|
||||
result = build_result(QueryStatus.AVAILABLE)
|
||||
@@ -272,7 +272,7 @@ def process_site_result(
|
||||
new_usernames[v] = k
|
||||
|
||||
results_info["ids_usernames"] = new_usernames
|
||||
links = eval(extracted_ids_data.get("links", "[]"))
|
||||
links = ascii_data_display(extracted_ids_data.get("links", "[]"))
|
||||
if "website" in extracted_ids_data:
|
||||
links.append(extracted_ids_data["website"])
|
||||
results_info["ids_links"] = links
|
||||
@@ -456,7 +456,7 @@ async def maigret(
|
||||
logger,
|
||||
query_notify=None,
|
||||
proxy=None,
|
||||
timeout=None,
|
||||
timeout=3,
|
||||
is_parsing_enabled=False,
|
||||
id_type="username",
|
||||
debug=False,
|
||||
@@ -478,7 +478,7 @@ async def maigret(
|
||||
query results.
|
||||
logger -- Standard Python logger object.
|
||||
timeout -- Time in seconds to wait before timing out request.
|
||||
Default is no timeout.
|
||||
Default is 3 seconds.
|
||||
is_parsing_enabled -- Extract additional info from account pages.
|
||||
id_type -- Type of username to search.
|
||||
Default is 'username', see all supported here:
|
||||
|
||||
@@ -32,6 +32,7 @@ from .report import (
|
||||
save_txt_report,
|
||||
SUPPORTED_JSON_REPORT_FORMATS,
|
||||
save_json_report,
|
||||
get_plaintext_report,
|
||||
)
|
||||
from .sites import MaigretDatabase
|
||||
from .submit import submit_dialog
|
||||
@@ -646,6 +647,12 @@ async def main():
|
||||
filename = report_filepath_tpl.format(username=username, postfix='.pdf')
|
||||
save_pdf_report(filename, report_context)
|
||||
query_notify.warning(f'PDF report on all usernames saved in {filename}')
|
||||
|
||||
text_report = get_plaintext_report(report_context)
|
||||
if text_report:
|
||||
query_notify.info('Short text report:')
|
||||
print(text_report)
|
||||
|
||||
# update database
|
||||
db.save_to_file(args.db_file)
|
||||
|
||||
|
||||
+10
-3
@@ -205,13 +205,20 @@ class QueryNotifyPrint(QueryNotify):
|
||||
else:
|
||||
print(f"[*] {title} {message} on:")
|
||||
|
||||
def warning(self, message, symbol="-"):
|
||||
msg = f"[{symbol}] {message}"
|
||||
def _colored_print(self, fore_color, msg):
|
||||
if self.color:
|
||||
print(Style.BRIGHT + Fore.YELLOW + msg)
|
||||
print(Style.BRIGHT + fore_color + msg)
|
||||
else:
|
||||
print(msg)
|
||||
|
||||
def warning(self, message, symbol="-"):
|
||||
msg = f"[{symbol}] {message}"
|
||||
self._colored_print(Fore.YELLOW, msg)
|
||||
|
||||
def info(self, message, symbol="*"):
|
||||
msg = f"[{symbol}] {message}"
|
||||
self._colored_print(Fore.BLUE, msg)
|
||||
|
||||
def update(self, result, is_similar=False):
|
||||
"""Notify Update.
|
||||
|
||||
|
||||
@@ -70,6 +70,17 @@ def save_json_report(filename: str, username: str, results: dict, report_type: s
|
||||
generate_json_report(username, results, f, report_type=report_type)
|
||||
|
||||
|
||||
def get_plaintext_report(context: dict) -> str:
|
||||
output = (context['brief'] + " ").replace('. ', '.\n')
|
||||
interests = list(map(lambda x: x[0], context.get('interests_tuple_list', [])))
|
||||
countries = list(map(lambda x: x[0], context.get('countries_tuple_list', [])))
|
||||
if countries:
|
||||
output += f'Countries: {", ".join(countries)}\n'
|
||||
if interests:
|
||||
output += f'Interests (tags): {", ".join(interests)}\n'
|
||||
return output.strip()
|
||||
|
||||
|
||||
"""
|
||||
REPORTS GENERATING
|
||||
"""
|
||||
@@ -215,6 +226,7 @@ def generate_report_context(username_results: list):
|
||||
|
||||
return {
|
||||
"username": first_username,
|
||||
# TODO: return brief list
|
||||
"brief": brief,
|
||||
"results": username_results,
|
||||
"first_seen": first_seen,
|
||||
|
||||
+1810
-1890
File diff suppressed because it is too large
Load Diff
+2
-1
@@ -61,9 +61,10 @@ SUPPORTED_TAGS = [
|
||||
"military",
|
||||
"auto",
|
||||
"gambling",
|
||||
"business",
|
||||
"cybercriminal",
|
||||
"review",
|
||||
"bookmarks",
|
||||
"design",
|
||||
]
|
||||
|
||||
|
||||
|
||||
+3
-3
@@ -255,7 +255,7 @@ async def check_features_manually(
|
||||
features = input("If features was not detected correctly, write it manually: ")
|
||||
|
||||
if features:
|
||||
presence_list = features.split(",")
|
||||
presence_list = list(map(str.strip, features.split(",")))
|
||||
|
||||
absence_list = sorted(b_minus_a, key=get_match_ratio, reverse=True)[
|
||||
:top_features_count
|
||||
@@ -264,7 +264,7 @@ async def check_features_manually(
|
||||
features = input("If features was not detected correctly, write it manually: ")
|
||||
|
||||
if features:
|
||||
absence_list = features.split(",")
|
||||
absence_list = list(map(str.strip, features.split(",")))
|
||||
|
||||
site_data = {
|
||||
"absenceStrs": absence_list,
|
||||
@@ -355,7 +355,7 @@ async def submit_dialog(db, url_exists, cookie_file, logger):
|
||||
return False
|
||||
|
||||
chosen_site.name = input("Change site name if you want: ") or chosen_site.name
|
||||
chosen_site.tags = input("Site tags: ").split(',')
|
||||
chosen_site.tags = list(map(str.strip, input("Site tags: ").split(',')))
|
||||
rank = get_alexa_rank(chosen_site.url_main)
|
||||
if rank:
|
||||
print(f'New alexa rank: {rank}')
|
||||
|
||||
+8
-1
@@ -1,5 +1,7 @@
|
||||
import ast
|
||||
import re
|
||||
import random
|
||||
from typing import Any
|
||||
|
||||
|
||||
DEFAULT_USER_AGENTS = [
|
||||
@@ -65,6 +67,10 @@ class URLMatcher:
|
||||
return re.compile(regexp_str)
|
||||
|
||||
|
||||
def ascii_data_display(data: str) -> Any:
|
||||
return ast.literal_eval(data)
|
||||
|
||||
|
||||
def get_dict_ascii_tree(items, prepend="", new_line=True):
|
||||
text = ""
|
||||
for num, item in enumerate(items):
|
||||
@@ -75,7 +81,8 @@ def get_dict_ascii_tree(items, prepend="", new_line=True):
|
||||
if field_value.startswith("['"):
|
||||
is_last_item = num == len(items) - 1
|
||||
prepend_symbols = " " * 3 if is_last_item else " ┃ "
|
||||
field_value = get_dict_ascii_tree(eval(field_value), prepend_symbols)
|
||||
data = ascii_data_display(field_value)
|
||||
field_value = get_dict_ascii_tree(data, prepend_symbols)
|
||||
text += f"\n{prepend}{box_symbol}{field_name}: {field_value}"
|
||||
else:
|
||||
text += f"\n{prepend}{box_symbol} {item}"
|
||||
|
||||
+1
-1
@@ -26,7 +26,7 @@ python-socks==1.1.2
|
||||
requests>=2.24.0
|
||||
requests-futures==1.0.0
|
||||
six==1.15.0
|
||||
socid-extractor>=0.0.19
|
||||
socid-extractor>=0.0.20
|
||||
soupsieve==2.1
|
||||
stem==1.8.0
|
||||
torrequest==0.1.0
|
||||
|
||||
@@ -12,7 +12,7 @@ with open('requirements.txt') as rf:
|
||||
requires = rf.read().splitlines()
|
||||
|
||||
setup(name='maigret',
|
||||
version='0.2.3',
|
||||
version='0.2.4',
|
||||
description='Collect a dossier on a person by username from a huge number of sites',
|
||||
long_description=long_description,
|
||||
long_description_content_type="text/markdown",
|
||||
|
||||
@@ -0,0 +1,6 @@
|
||||
flake8==3.8.4
|
||||
pytest==6.2.4
|
||||
pytest-asyncio==0.14.0
|
||||
pytest-cov==2.10.1
|
||||
pytest-httpserver==1.0.0
|
||||
pytest-rerunfailures==9.1.1
|
||||
+12
-5
@@ -12,6 +12,7 @@ from maigret.maigret import setup_arguments_parser
|
||||
CUR_PATH = os.path.dirname(os.path.realpath(__file__))
|
||||
JSON_FILE = os.path.join(CUR_PATH, '../maigret/resources/data.json')
|
||||
TEST_JSON_FILE = os.path.join(CUR_PATH, 'db.json')
|
||||
LOCAL_TEST_JSON_FILE = os.path.join(CUR_PATH, 'local.json')
|
||||
empty_mark = Mark('', (), {})
|
||||
|
||||
|
||||
@@ -36,16 +37,17 @@ def remove_test_reports():
|
||||
|
||||
@pytest.fixture(scope='session')
|
||||
def default_db():
|
||||
db = MaigretDatabase().load_from_file(JSON_FILE)
|
||||
|
||||
return db
|
||||
return MaigretDatabase().load_from_file(JSON_FILE)
|
||||
|
||||
|
||||
@pytest.fixture(scope='function')
|
||||
def test_db():
|
||||
db = MaigretDatabase().load_from_file(TEST_JSON_FILE)
|
||||
return MaigretDatabase().load_from_file(TEST_JSON_FILE)
|
||||
|
||||
return db
|
||||
|
||||
@pytest.fixture(scope='function')
|
||||
def local_test_db():
|
||||
return MaigretDatabase().load_from_file(LOCAL_TEST_JSON_FILE)
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
@@ -58,3 +60,8 @@ def reports_autoclean():
|
||||
@pytest.fixture(scope='session')
|
||||
def argparser():
|
||||
return setup_arguments_parser()
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def httpserver_listen_address():
|
||||
return ("localhost", 8989)
|
||||
|
||||
@@ -0,0 +1,21 @@
|
||||
{
|
||||
"engines": {},
|
||||
"sites": {
|
||||
"StatusCode": {
|
||||
"checkType": "status_code",
|
||||
"url": "http://localhost:8989/url?id={username}",
|
||||
"urlMain": "http://localhost:8989/",
|
||||
"usernameClaimed": "claimed",
|
||||
"usernameUnclaimed": "unclaimed"
|
||||
},
|
||||
"Message": {
|
||||
"checkType": "message",
|
||||
"url": "http://localhost:8989/url?id={username}",
|
||||
"urlMain": "http://localhost:8989/",
|
||||
"presenseStrs": ["user", "profile"],
|
||||
"absenseStrs": ["not found", "404"],
|
||||
"usernameClaimed": "claimed",
|
||||
"usernameUnclaimed": "unclaimed"
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -22,6 +22,7 @@ httpbin.org FALSE / FALSE 0 a b
|
||||
"""
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="periodically fails")
|
||||
@pytest.mark.slow
|
||||
def test_twitter_activation(default_db):
|
||||
twitter_site = default_db.sites_dict['Twitter']
|
||||
|
||||
@@ -0,0 +1,65 @@
|
||||
from mock import Mock
|
||||
import pytest
|
||||
|
||||
from maigret import search
|
||||
|
||||
|
||||
def site_result_except(server, username, **kwargs):
|
||||
query = f'id={username}'
|
||||
server.expect_request('/url', query_string=query).respond_with_data(**kwargs)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_checking_by_status_code(httpserver, local_test_db):
|
||||
sites_dict = local_test_db.sites_dict
|
||||
|
||||
site_result_except(httpserver, 'claimed', status=200)
|
||||
site_result_except(httpserver, 'unclaimed', status=404)
|
||||
|
||||
result = await search('claimed', site_dict=sites_dict, logger=Mock())
|
||||
assert result['StatusCode']['status'].is_found() is True
|
||||
|
||||
result = await search('unclaimed', site_dict=sites_dict, logger=Mock())
|
||||
assert result['StatusCode']['status'].is_found() is False
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_checking_by_message_positive_full(httpserver, local_test_db):
|
||||
sites_dict = local_test_db.sites_dict
|
||||
|
||||
site_result_except(httpserver, 'claimed', response_data="user profile")
|
||||
site_result_except(httpserver, 'unclaimed', response_data="404 not found")
|
||||
|
||||
result = await search('claimed', site_dict=sites_dict, logger=Mock())
|
||||
assert result['Message']['status'].is_found() is True
|
||||
|
||||
result = await search('unclaimed', site_dict=sites_dict, logger=Mock())
|
||||
assert result['Message']['status'].is_found() is False
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_checking_by_message_positive_part(httpserver, local_test_db):
|
||||
sites_dict = local_test_db.sites_dict
|
||||
|
||||
site_result_except(httpserver, 'claimed', response_data="profile")
|
||||
site_result_except(httpserver, 'unclaimed', response_data="404")
|
||||
|
||||
result = await search('claimed', site_dict=sites_dict, logger=Mock())
|
||||
assert result['Message']['status'].is_found() is True
|
||||
|
||||
result = await search('unclaimed', site_dict=sites_dict, logger=Mock())
|
||||
assert result['Message']['status'].is_found() is False
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_checking_by_message_negative(httpserver, local_test_db):
|
||||
sites_dict = local_test_db.sites_dict
|
||||
|
||||
site_result_except(httpserver, 'claimed', response_data="")
|
||||
site_result_except(httpserver, 'unclaimed', response_data="user 404")
|
||||
|
||||
result = await search('claimed', site_dict=sites_dict, logger=Mock())
|
||||
assert result['Message']['status'].is_found() is False
|
||||
|
||||
result = await search('unclaimed', site_dict=sites_dict, logger=Mock())
|
||||
assert result['Message']['status'].is_found() is True
|
||||
@@ -0,0 +1,15 @@
|
||||
"""Maigret data test functions"""
|
||||
|
||||
from maigret.utils import is_country_tag
|
||||
from maigret.sites import SUPPORTED_TAGS
|
||||
|
||||
|
||||
def test_tags_validity(default_db):
|
||||
unknown_tags = set()
|
||||
|
||||
for site in default_db.sites:
|
||||
for tag in filter(lambda x: not is_country_tag(x), site.tags):
|
||||
if tag not in SUPPORTED_TAGS:
|
||||
unknown_tags.add(tag)
|
||||
|
||||
assert unknown_tags == set()
|
||||
@@ -16,6 +16,7 @@ from maigret.report import (
|
||||
generate_report_template,
|
||||
generate_report_context,
|
||||
generate_json_report,
|
||||
get_plaintext_report,
|
||||
)
|
||||
from maigret.result import QueryResult, QueryStatus
|
||||
from maigret.sites import MaigretSite
|
||||
@@ -346,3 +347,13 @@ def test_pdf_report():
|
||||
save_pdf_report(report_name, context)
|
||||
|
||||
assert os.path.exists(report_name)
|
||||
|
||||
|
||||
def test_text_report():
|
||||
context = generate_report_context(TEST)
|
||||
report_text = get_plaintext_report(context)
|
||||
|
||||
for brief_part in SUPPOSED_BRIEF.split():
|
||||
assert brief_part in report_text
|
||||
assert 'us' in report_text
|
||||
assert 'photo' in report_text
|
||||
|
||||
@@ -57,6 +57,11 @@ def test_enrich_link_str():
|
||||
)
|
||||
|
||||
|
||||
def test_url_extract_main_part_negative():
|
||||
url_main_part = 'None'
|
||||
assert URLMatcher.extract_main_part(url_main_part) == ''
|
||||
|
||||
|
||||
def test_url_extract_main_part():
|
||||
url_main_part = 'flickr.com/photos/alexaimephotography'
|
||||
|
||||
|
||||
Reference in New Issue
Block a user