diff --git a/Dockerfile b/Dockerfile index 52c0d55..7a69db5 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,28 +1,24 @@ -FROM python:3.7-alpine as build -WORKDIR /wheels -RUN apk add --no-cache \ - g++ \ - gcc \ - git \ - libxml2 \ - libxml2-dev \ - libxslt-dev \ - linux-headers -COPY requirements.txt /opt/maigret/ -RUN pip3 wheel -r /opt/maigret/requirements.txt - - FROM python:3.7-alpine -WORKDIR /opt/maigret -ARG VCS_REF -ARG VCS_URL="https://gitlab.com/soxoj/maigret" -LABEL org.label-schema.vcs-ref=$VCS_REF \ - org.label-schema.vcs-url=$VCS_URL -COPY --from=build /wheels /wheels -COPY . /opt/maigret/ -RUN apk add git -RUN pip3 install -r requirements.txt -f /wheels \ - && rm -rf /wheels \ - && rm -rf /root/.cache/pip/* +LABEL maintainer="Soxoj " -ENTRYPOINT ["python", "maigret"] +WORKDIR /app + +ADD requirements.txt . + +RUN pip install --upgrade pip \ +&& apk add --update --virtual .build-dependencies \ + build-base \ + gcc \ + musl-dev \ + libxml2 \ + libxml2-dev \ + libxslt-dev \ +&& YARL_NO_EXTENSIONS=1 python3 -m pip install maigret \ +&& apk del .build-dependencies \ +&& rm -rf /var/cache/apk/* \ + /tmp/* \ + /var/tmp/* + +ADD . . + +ENTRYPOINT ["maigret"] diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000..65cbbbe --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,4 @@ +include LICENSE +include README.md +include requirements.txt +include maigret/resources/data.json diff --git a/README.md b/README.md index 5fb952b..ede6bf0 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ # Maigret

- +

The Commissioner Jules Maigret is a fictional French police detective, created by Georges Simenon. His investigation method is based on understanding the personality of different people and their interactions. @@ -13,7 +13,7 @@ Purpose of Maigret - **collect a dossier on a person by username only**, checkin This is a [sherlock](https://github.com/sherlock-project/) fork with cool features under heavy development. *Don't forget to regularly update source code from repo*. -Currently supported >1300 sites ([full list](/sites.md)). +Currently supported >1300 sites ([full list](./sites.md)). ## Main features @@ -30,19 +30,20 @@ Currently supported >1300 sites ([full list](/sites.md)). **Python 3.8 is recommended.** ```bash -# clone the repo and change directory -$ git clone https://git.rip/soxoj/maigret && cd maigret +# install from pypi +$ pip3 install maigret -# install the requirements -$ python3 -m pip install -r requirements.txt +# or clone and install manually +$ git clone https://github.com/soxoj/maigret && cd maigret +$ pip3 install . ``` ## Using examples ```bash -python3 maigret user +maigret user -python3 maigret user1 user2 user3 +maigret user1 user2 user3 --print-not-found ``` With Docker: @@ -55,7 +56,7 @@ docker run maigret user ## Demo with page parsing and recursive username search ```bash -python3 maigret alexaimephotographycars +maigret alexaimephotographycars ``` ![animation of recursive search](./static/recursive_search.svg) diff --git a/maigret/__init__.py b/maigret/__init__.py index b0894c8..6a49466 100644 --- a/maigret/__init__.py +++ b/maigret/__init__.py @@ -1,5 +1 @@ -"""Sherlock Module - -This module contains the main logic to search for usernames at social -networks. -""" +"""Maigret""" diff --git a/maigret/__main__.py b/maigret/__main__.py index 749a4e1..c24c5cf 100644 --- a/maigret/__main__.py +++ b/maigret/__main__.py @@ -1,10 +1,7 @@ #! /usr/bin/env python3 """ -Maigret (Sherlock fork): Find Usernames Across Social Networks Module - -This module contains the main logic to search for usernames at social -networks. +Maigret entrypoint """ import asyncio diff --git a/maigret/maigret.py b/maigret/maigret.py index 3591afd..eb2e340 100755 --- a/maigret/maigret.py +++ b/maigret/maigret.py @@ -20,13 +20,14 @@ from http.cookies import SimpleCookie import aiohttp import requests from mock import Mock -from notify import QueryNotifyPrint -from result import QueryResult, QueryStatus -from sites import SitesInformation from socid_extractor import parse, extract -module_name = "Maigret OSINT tool" -__version__ = "0.1.0" +from .notify import QueryNotifyPrint +from .result import QueryResult, QueryStatus +from .sites import SitesInformation + + +__version__ = '0.1.6' supported_recursive_search_ids = ( 'yandex_public_id', @@ -560,7 +561,7 @@ async def main(): f"Python: {platform.python_version()}" parser = ArgumentParser(formatter_class=RawDescriptionHelpFormatter, - description=f"{module_name} (Version {__version__})" + description=f"Maigret v{__version__})" ) parser.add_argument("--version", action="version", version=version_string, @@ -859,9 +860,13 @@ async def main(): ) -if __name__ == "__main__": +def run(): try: asyncio.run(main()) except KeyboardInterrupt: print('Maigret is interrupted.') sys.exit(1) + + +if __name__ == "__main__": + run() \ No newline at end of file diff --git a/maigret/notify.py b/maigret/notify.py index 2ac0301..97325c9 100644 --- a/maigret/notify.py +++ b/maigret/notify.py @@ -4,7 +4,7 @@ This module defines the objects for notifying the caller about the results of queries. """ from colorama import Fore, Style, init -from result import QueryStatus +from .result import QueryStatus class QueryNotify(): diff --git a/maigret/tests/__init__.py b/maigret/tests/__init__.py deleted file mode 100644 index 944e27c..0000000 --- a/maigret/tests/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -"""Sherlock Tests - -This package contains various submodules used to run tests. -""" diff --git a/maigret/tests/all.py b/maigret/tests/all.py deleted file mode 100644 index 4a9e535..0000000 --- a/maigret/tests/all.py +++ /dev/null @@ -1,297 +0,0 @@ -"""Sherlock Tests - -This module contains various tests. -""" -from tests.base import SherlockBaseTest -import unittest - - -class SherlockDetectTests(SherlockBaseTest): - def test_detect_true_via_message(self): - """Test Username Does Exist (Via Message). - - This test ensures that the "message" detection mechanism of - ensuring that a Username does exist works properly. - - Keyword Arguments: - self -- This object. - - Return Value: - N/A. - Will trigger an assert if detection mechanism did not work as expected. - """ - - site = 'Instructables' - site_data = self.site_data_all[site] - - #Ensure that the site's detection method has not changed. - self.assertEqual("message", site_data["errorType"]) - - self.username_check([site_data["username_claimed"]], - [site], - exist_check=True - ) - - return - - def test_detect_false_via_message(self): - """Test Username Does Not Exist (Via Message). - - This test ensures that the "message" detection mechanism of - ensuring that a Username does *not* exist works properly. - - Keyword Arguments: - self -- This object. - - Return Value: - N/A. - Will trigger an assert if detection mechanism did not work as expected. - """ - - site = 'Instructables' - site_data = self.site_data_all[site] - - #Ensure that the site's detection method has not changed. - self.assertEqual("message", site_data["errorType"]) - - self.username_check([site_data["username_unclaimed"]], - [site], - exist_check=False - ) - - return - - def test_detect_true_via_status_code(self): - """Test Username Does Exist (Via Status Code). - - This test ensures that the "status code" detection mechanism of - ensuring that a Username does exist works properly. - - Keyword Arguments: - self -- This object. - - Return Value: - N/A. - Will trigger an assert if detection mechanism did not work as expected. - """ - - site = 'Facebook' - site_data = self.site_data_all[site] - - #Ensure that the site's detection method has not changed. - self.assertEqual("status_code", site_data["errorType"]) - - self.username_check([site_data["username_claimed"]], - [site], - exist_check=True - ) - - return - - def test_detect_false_via_status_code(self): - """Test Username Does Not Exist (Via Status Code). - - This test ensures that the "status code" detection mechanism of - ensuring that a Username does *not* exist works properly. - - Keyword Arguments: - self -- This object. - - Return Value: - N/A. - Will trigger an assert if detection mechanism did not work as expected. - """ - - site = 'Facebook' - site_data = self.site_data_all[site] - - #Ensure that the site's detection method has not changed. - self.assertEqual("status_code", site_data["errorType"]) - - self.username_check([site_data["username_unclaimed"]], - [site], - exist_check=False - ) - - return - - def test_detect_true_via_response_url(self): - """Test Username Does Exist (Via Response URL). - - This test ensures that the "response URL" detection mechanism of - ensuring that a Username does exist works properly. - - Keyword Arguments: - self -- This object. - - Return Value: - N/A. - Will trigger an assert if detection mechanism did not work as expected. - """ - - site = 'Quora' - site_data = self.site_data_all[site] - - #Ensure that the site's detection method has not changed. - self.assertEqual("response_url", site_data["errorType"]) - - self.username_check([site_data["username_claimed"]], - [site], - exist_check=True - ) - - return - - def test_detect_false_via_response_url(self): - """Test Username Does Not Exist (Via Response URL). - - This test ensures that the "response URL" detection mechanism of - ensuring that a Username does *not* exist works properly. - - Keyword Arguments: - self -- This object. - - Return Value: - N/A. - Will trigger an assert if detection mechanism did not work as expected. - """ - - site = 'Quora' - site_data = self.site_data_all[site] - - #Ensure that the site's detection method has not changed. - self.assertEqual("response_url", site_data["errorType"]) - - self.username_check([site_data["username_unclaimed"]], - [site], - exist_check=False - ) - - return - - -class SherlockSiteCoverageTests(SherlockBaseTest): - def test_coverage_false_via_response_url(self): - """Test Username Does Not Exist Site Coverage (Via Response URL). - - This test checks all sites with the "response URL" detection mechanism - to ensure that a Username that does not exist is reported that way. - - Keyword Arguments: - self -- This object. - - Return Value: - N/A. - Will trigger an assert if detection mechanism did not work as expected. - """ - - self.detect_type_check("response_url", exist_check=False) - - return - - def test_coverage_true_via_response_url(self): - """Test Username Does Exist Site Coverage (Via Response URL). - - This test checks all sites with the "response URL" detection mechanism - to ensure that a Username that does exist is reported that way. - - Keyword Arguments: - self -- This object. - - Return Value: - N/A. - Will trigger an assert if detection mechanism did not work as expected. - """ - - self.detect_type_check("response_url", exist_check=True) - - return - - def test_coverage_false_via_status(self): - """Test Username Does Not Exist Site Coverage (Via HTTP Status). - - This test checks all sites with the "HTTP Status" detection mechanism - to ensure that a Username that does not exist is reported that way. - - Keyword Arguments: - self -- This object. - - Return Value: - N/A. - Will trigger an assert if detection mechanism did not work as expected. - """ - - self.detect_type_check("status_code", exist_check=False) - - return - - def test_coverage_true_via_status(self): - """Test Username Does Exist Site Coverage (Via HTTP Status). - - This test checks all sites with the "HTTP Status" detection mechanism - to ensure that a Username that does exist is reported that way. - - Keyword Arguments: - self -- This object. - - Return Value: - N/A. - Will trigger an assert if detection mechanism did not work as expected. - """ - - self.detect_type_check("status_code", exist_check=True) - - return - - def test_coverage_false_via_message(self): - """Test Username Does Not Exist Site Coverage (Via Error Message). - - This test checks all sites with the "Error Message" detection mechanism - to ensure that a Username that does not exist is reported that way. - - Keyword Arguments: - self -- This object. - - Return Value: - N/A. - Will trigger an assert if detection mechanism did not work as expected. - """ - - self.detect_type_check("message", exist_check=False) - - return - - def test_coverage_true_via_message(self): - """Test Username Does Exist Site Coverage (Via Error Message). - - This test checks all sites with the "Error Message" detection mechanism - to ensure that a Username that does exist is reported that way. - - Keyword Arguments: - self -- This object. - - Return Value: - N/A. - Will trigger an assert if detection mechanism did not work as expected. - """ - - self.detect_type_check("message", exist_check=True) - - return - - def test_coverage_total(self): - """Test Site Coverage Is Total. - - This test checks that all sites have test data available. - - Keyword Arguments: - self -- This object. - - Return Value: - N/A. - Will trigger an assert if we do not have total coverage. - """ - - self.coverage_total_check() - - return diff --git a/maigret/tests/base.py b/maigret/tests/base.py deleted file mode 100644 index 02c5f57..0000000 --- a/maigret/tests/base.py +++ /dev/null @@ -1,228 +0,0 @@ -"""Sherlock Base Tests - -This module contains various utilities for running tests. -""" -import os -import os.path -import unittest -import maigret -from result import QueryStatus -from result import QueryResult -from notify import QueryNotify -from sites import SitesInformation -import warnings - - -class SherlockBaseTest(unittest.TestCase): - def setUp(self): - """Sherlock Base Test Setup. - - Does common setup tasks for base Sherlock tests. - - Keyword Arguments: - self -- This object. - - Return Value: - N/A. - """ - - #This ignores the ResourceWarning from an unclosed SSLSocket. - #TODO: Figure out how to fix the code so this is not needed. - warnings.simplefilter("ignore", ResourceWarning) - - #Create object with all information about sites we are aware of. - sites = SitesInformation() - - #Create original dictionary from SitesInformation() object. - #Eventually, the rest of the code will be updated to use the new object - #directly, but this will glue the two pieces together. - site_data_all = {} - for site in sites: - site_data_all[site.name] = site.information - self.site_data_all = site_data_all - - # Load excluded sites list, if any - excluded_sites_path = os.path.join(os.path.dirname(os.path.realpath(maigret.__file__)), "tests/.excluded_sites") - try: - with open(excluded_sites_path, "r", encoding="utf-8") as excluded_sites_file: - self.excluded_sites = excluded_sites_file.read().splitlines() - except FileNotFoundError: - self.excluded_sites = [] - - #Create notify object for query results. - self.query_notify = QueryNotify() - - self.tor=False - self.unique_tor=False - self.timeout=None - self.skip_error_sites=True - - return - - def site_data_filter(self, site_list): - """Filter Site Data. - - Keyword Arguments: - self -- This object. - site_list -- List of strings corresponding to sites which - should be filtered. - - Return Value: - Dictionary containing sub-set of site data specified by 'site_list'. - """ - - # Create new dictionary that has filtered site data based on input. - # Note that any site specified which is not understood will generate - # an error. - site_data = {} - for site in site_list: - with self.subTest(f"Checking test vector Site '{site}' " - f"exists in total site data." - ): - site_data[site] = self.site_data_all[site] - - return site_data - - def username_check(self, username_list, site_list, exist_check=True): - """Username Exist Check. - - Keyword Arguments: - self -- This object. - username_list -- List of strings corresponding to usernames - which should exist on *all* of the sites. - site_list -- List of strings corresponding to sites which - should be filtered. - exist_check -- Boolean which indicates if this should be - a check for Username existence, - or non-existence. - - Return Value: - N/A. - Will trigger an assert if Username does not have the expected - existence state. - """ - - #Filter all site data down to just what is needed for this test. - site_data = self.site_data_filter(site_list) - - if exist_check: - check_type_text = "claimed" - exist_result_desired = QueryStatus.CLAIMED - else: - check_type_text = "available" - exist_result_desired = QueryStatus.AVAILABLE - - for username in username_list: - results = maigret.sherlock(username, - site_data, - self.query_notify, - tor=self.tor, - unique_tor=self.unique_tor, - timeout=self.timeout - ) - for site, result in results.items(): - with self.subTest(f"Checking Username '{username}' " - f"{check_type_text} on Site '{site}'" - ): - if ( - (self.skip_error_sites == True) and - (result['status'].status == QueryStatus.UNKNOWN) - ): - #Some error connecting to site. - self.skipTest(f"Skipping Username '{username}' " - f"{check_type_text} on Site '{site}': " - f"Site returned error status." - ) - - self.assertEqual(exist_result_desired, - result['status'].status) - - return - - def detect_type_check(self, detect_type, exist_check=True): - """Username Exist Check. - - Keyword Arguments: - self -- This object. - detect_type -- String corresponding to detection algorithm - which is desired to be tested. - Note that only sites which have documented - usernames which exist and do not exist - will be tested. - exist_check -- Boolean which indicates if this should be - a check for Username existence, - or non-existence. - - Return Value: - N/A. - Runs tests on all sites using the indicated detection algorithm - and which also has test vectors specified. - Will trigger an assert if Username does not have the expected - existence state. - """ - - #Dictionary of sites that should be tested for having a username. - #This will allow us to test sites with a common username in parallel. - sites_by_username = {} - - for site, site_data in self.site_data_all.items(): - if ( - (site in self.excluded_sites) or - (site_data["errorType"] != detect_type) or - (site_data.get("username_claimed") is None) or - (site_data.get("username_unclaimed") is None) - ): - # This is either not a site we are interested in, or the - # site does not contain the required information to do - # the tests. - pass - else: - # We should run a test on this site. - - # Figure out which type of user - if exist_check: - username = site_data.get("username_claimed") - else: - username = site_data.get("username_unclaimed") - - # Add this site to the list of sites corresponding to this - # username. - if username in sites_by_username: - sites_by_username[username].append(site) - else: - sites_by_username[username] = [site] - - # Check on the username availability against all of the sites. - for username, site_list in sites_by_username.items(): - self.username_check([username], - site_list, - exist_check=exist_check - ) - - return - - def coverage_total_check(self): - """Total Coverage Check. - - Keyword Arguments: - self -- This object. - - Return Value: - N/A. - Counts up all Sites with full test data available. - Will trigger an assert if any Site does not have test coverage. - """ - - site_no_tests_list = [] - - for site, site_data in self.site_data_all.items(): - if ( - (site_data.get("username_claimed") is None) or - (site_data.get("username_unclaimed") is None) - ): - # Test information not available on this site. - site_no_tests_list.append(site) - - self.assertEqual("", ", ".join(site_no_tests_list)) - - return diff --git a/setup.py b/setup.py index 2825a3c..973ee1b 100644 --- a/setup.py +++ b/setup.py @@ -5,17 +5,22 @@ from setuptools import ( with open('README.md') as fh: - long_description = fh.read() - long_description.replace('./', 'https://raw.githubusercontent.com/soxoj/maigret/main/') + readme = fh.read() + long_description = readme.replace('./', 'https://raw.githubusercontent.com/soxoj/maigret/main/') + +with open('requirements.txt') as rf: + requires = rf.read().splitlines() setup(name='maigret', - version='0.1.0', + version='0.1.6', description='Collect a dossier on a person by username from a huge number of sites', long_description=long_description, long_description_content_type="text/markdown", url='https://github.com/soxoj/maigret', - entry_points={'console_scripts': ['maigret = maigret.cli:run']}, + install_requires=requires, + entry_points={'console_scripts': ['maigret = maigret.maigret:run']}, packages=find_packages(), + include_package_data=True, author='Soxoj', author_email='soxoj@protonmail.com', license='MIT',