Static links fixes, fixes for Pypi

This commit is contained in:
Soxoj
2020-12-05 01:24:12 +03:00
parent 285f72a285
commit 7997ade3c5
11 changed files with 60 additions and 585 deletions
+18 -22
View File
@@ -1,28 +1,24 @@
FROM python:3.7-alpine as build FROM python:3.7-alpine
WORKDIR /wheels LABEL maintainer="Soxoj <soxoj@protonmail.com>"
RUN apk add --no-cache \
g++ \ WORKDIR /app
ADD requirements.txt .
RUN pip install --upgrade pip \
&& apk add --update --virtual .build-dependencies \
build-base \
gcc \ gcc \
git \ musl-dev \
libxml2 \ libxml2 \
libxml2-dev \ libxml2-dev \
libxslt-dev \ libxslt-dev \
linux-headers && YARL_NO_EXTENSIONS=1 python3 -m pip install maigret \
COPY requirements.txt /opt/maigret/ && apk del .build-dependencies \
RUN pip3 wheel -r /opt/maigret/requirements.txt && rm -rf /var/cache/apk/* \
/tmp/* \
/var/tmp/*
ADD . .
FROM python:3.7-alpine ENTRYPOINT ["maigret"]
WORKDIR /opt/maigret
ARG VCS_REF
ARG VCS_URL="https://gitlab.com/soxoj/maigret"
LABEL org.label-schema.vcs-ref=$VCS_REF \
org.label-schema.vcs-url=$VCS_URL
COPY --from=build /wheels /wheels
COPY . /opt/maigret/
RUN apk add git
RUN pip3 install -r requirements.txt -f /wheels \
&& rm -rf /wheels \
&& rm -rf /root/.cache/pip/*
ENTRYPOINT ["python", "maigret"]
+4
View File
@@ -0,0 +1,4 @@
include LICENSE
include README.md
include requirements.txt
include maigret/resources/data.json
+10 -9
View File
@@ -1,7 +1,7 @@
# Maigret # Maigret
<p align="center"> <p align="center">
<img src="static/maigret.png" /> <img src="./static/maigret.png" />
</p> </p>
<i>The Commissioner Jules Maigret is a fictional French police detective, created by Georges Simenon. His investigation method is based on understanding the personality of different people and their interactions.</i> <i>The Commissioner Jules Maigret is a fictional French police detective, created by Georges Simenon. His investigation method is based on understanding the personality of different people and their interactions.</i>
@@ -13,7 +13,7 @@ Purpose of Maigret - **collect a dossier on a person by username only**, checkin
This is a [sherlock](https://github.com/sherlock-project/) fork with cool features under heavy development. This is a [sherlock](https://github.com/sherlock-project/) fork with cool features under heavy development.
*Don't forget to regularly update source code from repo*. *Don't forget to regularly update source code from repo*.
Currently supported >1300 sites ([full list](/sites.md)). Currently supported >1300 sites ([full list](./sites.md)).
## Main features ## Main features
@@ -30,19 +30,20 @@ Currently supported >1300 sites ([full list](/sites.md)).
**Python 3.8 is recommended.** **Python 3.8 is recommended.**
```bash ```bash
# clone the repo and change directory # install from pypi
$ git clone https://git.rip/soxoj/maigret && cd maigret $ pip3 install maigret
# install the requirements # or clone and install manually
$ python3 -m pip install -r requirements.txt $ git clone https://github.com/soxoj/maigret && cd maigret
$ pip3 install .
``` ```
## Using examples ## Using examples
```bash ```bash
python3 maigret user maigret user
python3 maigret user1 user2 user3 maigret user1 user2 user3 --print-not-found
``` ```
With Docker: With Docker:
@@ -55,7 +56,7 @@ docker run maigret user
## Demo with page parsing and recursive username search ## Demo with page parsing and recursive username search
```bash ```bash
python3 maigret alexaimephotographycars maigret alexaimephotographycars
``` ```
![animation of recursive search](./static/recursive_search.svg) ![animation of recursive search](./static/recursive_search.svg)
+1 -5
View File
@@ -1,5 +1 @@
"""Sherlock Module """Maigret"""
This module contains the main logic to search for usernames at social
networks.
"""
+1 -4
View File
@@ -1,10 +1,7 @@
#! /usr/bin/env python3 #! /usr/bin/env python3
""" """
Maigret (Sherlock fork): Find Usernames Across Social Networks Module Maigret entrypoint
This module contains the main logic to search for usernames at social
networks.
""" """
import asyncio import asyncio
+12 -7
View File
@@ -20,13 +20,14 @@ from http.cookies import SimpleCookie
import aiohttp import aiohttp
import requests import requests
from mock import Mock from mock import Mock
from notify import QueryNotifyPrint
from result import QueryResult, QueryStatus
from sites import SitesInformation
from socid_extractor import parse, extract from socid_extractor import parse, extract
module_name = "Maigret OSINT tool" from .notify import QueryNotifyPrint
__version__ = "0.1.0" from .result import QueryResult, QueryStatus
from .sites import SitesInformation
__version__ = '0.1.6'
supported_recursive_search_ids = ( supported_recursive_search_ids = (
'yandex_public_id', 'yandex_public_id',
@@ -560,7 +561,7 @@ async def main():
f"Python: {platform.python_version()}" f"Python: {platform.python_version()}"
parser = ArgumentParser(formatter_class=RawDescriptionHelpFormatter, parser = ArgumentParser(formatter_class=RawDescriptionHelpFormatter,
description=f"{module_name} (Version {__version__})" description=f"Maigret v{__version__})"
) )
parser.add_argument("--version", parser.add_argument("--version",
action="version", version=version_string, action="version", version=version_string,
@@ -859,9 +860,13 @@ async def main():
) )
if __name__ == "__main__": def run():
try: try:
asyncio.run(main()) asyncio.run(main())
except KeyboardInterrupt: except KeyboardInterrupt:
print('Maigret is interrupted.') print('Maigret is interrupted.')
sys.exit(1) sys.exit(1)
if __name__ == "__main__":
run()
+1 -1
View File
@@ -4,7 +4,7 @@ This module defines the objects for notifying the caller about the
results of queries. results of queries.
""" """
from colorama import Fore, Style, init from colorama import Fore, Style, init
from result import QueryStatus from .result import QueryStatus
class QueryNotify(): class QueryNotify():
-4
View File
@@ -1,4 +0,0 @@
"""Sherlock Tests
This package contains various submodules used to run tests.
"""
-297
View File
@@ -1,297 +0,0 @@
"""Sherlock Tests
This module contains various tests.
"""
from tests.base import SherlockBaseTest
import unittest
class SherlockDetectTests(SherlockBaseTest):
def test_detect_true_via_message(self):
"""Test Username Does Exist (Via Message).
This test ensures that the "message" detection mechanism of
ensuring that a Username does exist works properly.
Keyword Arguments:
self -- This object.
Return Value:
N/A.
Will trigger an assert if detection mechanism did not work as expected.
"""
site = 'Instructables'
site_data = self.site_data_all[site]
#Ensure that the site's detection method has not changed.
self.assertEqual("message", site_data["errorType"])
self.username_check([site_data["username_claimed"]],
[site],
exist_check=True
)
return
def test_detect_false_via_message(self):
"""Test Username Does Not Exist (Via Message).
This test ensures that the "message" detection mechanism of
ensuring that a Username does *not* exist works properly.
Keyword Arguments:
self -- This object.
Return Value:
N/A.
Will trigger an assert if detection mechanism did not work as expected.
"""
site = 'Instructables'
site_data = self.site_data_all[site]
#Ensure that the site's detection method has not changed.
self.assertEqual("message", site_data["errorType"])
self.username_check([site_data["username_unclaimed"]],
[site],
exist_check=False
)
return
def test_detect_true_via_status_code(self):
"""Test Username Does Exist (Via Status Code).
This test ensures that the "status code" detection mechanism of
ensuring that a Username does exist works properly.
Keyword Arguments:
self -- This object.
Return Value:
N/A.
Will trigger an assert if detection mechanism did not work as expected.
"""
site = 'Facebook'
site_data = self.site_data_all[site]
#Ensure that the site's detection method has not changed.
self.assertEqual("status_code", site_data["errorType"])
self.username_check([site_data["username_claimed"]],
[site],
exist_check=True
)
return
def test_detect_false_via_status_code(self):
"""Test Username Does Not Exist (Via Status Code).
This test ensures that the "status code" detection mechanism of
ensuring that a Username does *not* exist works properly.
Keyword Arguments:
self -- This object.
Return Value:
N/A.
Will trigger an assert if detection mechanism did not work as expected.
"""
site = 'Facebook'
site_data = self.site_data_all[site]
#Ensure that the site's detection method has not changed.
self.assertEqual("status_code", site_data["errorType"])
self.username_check([site_data["username_unclaimed"]],
[site],
exist_check=False
)
return
def test_detect_true_via_response_url(self):
"""Test Username Does Exist (Via Response URL).
This test ensures that the "response URL" detection mechanism of
ensuring that a Username does exist works properly.
Keyword Arguments:
self -- This object.
Return Value:
N/A.
Will trigger an assert if detection mechanism did not work as expected.
"""
site = 'Quora'
site_data = self.site_data_all[site]
#Ensure that the site's detection method has not changed.
self.assertEqual("response_url", site_data["errorType"])
self.username_check([site_data["username_claimed"]],
[site],
exist_check=True
)
return
def test_detect_false_via_response_url(self):
"""Test Username Does Not Exist (Via Response URL).
This test ensures that the "response URL" detection mechanism of
ensuring that a Username does *not* exist works properly.
Keyword Arguments:
self -- This object.
Return Value:
N/A.
Will trigger an assert if detection mechanism did not work as expected.
"""
site = 'Quora'
site_data = self.site_data_all[site]
#Ensure that the site's detection method has not changed.
self.assertEqual("response_url", site_data["errorType"])
self.username_check([site_data["username_unclaimed"]],
[site],
exist_check=False
)
return
class SherlockSiteCoverageTests(SherlockBaseTest):
def test_coverage_false_via_response_url(self):
"""Test Username Does Not Exist Site Coverage (Via Response URL).
This test checks all sites with the "response URL" detection mechanism
to ensure that a Username that does not exist is reported that way.
Keyword Arguments:
self -- This object.
Return Value:
N/A.
Will trigger an assert if detection mechanism did not work as expected.
"""
self.detect_type_check("response_url", exist_check=False)
return
def test_coverage_true_via_response_url(self):
"""Test Username Does Exist Site Coverage (Via Response URL).
This test checks all sites with the "response URL" detection mechanism
to ensure that a Username that does exist is reported that way.
Keyword Arguments:
self -- This object.
Return Value:
N/A.
Will trigger an assert if detection mechanism did not work as expected.
"""
self.detect_type_check("response_url", exist_check=True)
return
def test_coverage_false_via_status(self):
"""Test Username Does Not Exist Site Coverage (Via HTTP Status).
This test checks all sites with the "HTTP Status" detection mechanism
to ensure that a Username that does not exist is reported that way.
Keyword Arguments:
self -- This object.
Return Value:
N/A.
Will trigger an assert if detection mechanism did not work as expected.
"""
self.detect_type_check("status_code", exist_check=False)
return
def test_coverage_true_via_status(self):
"""Test Username Does Exist Site Coverage (Via HTTP Status).
This test checks all sites with the "HTTP Status" detection mechanism
to ensure that a Username that does exist is reported that way.
Keyword Arguments:
self -- This object.
Return Value:
N/A.
Will trigger an assert if detection mechanism did not work as expected.
"""
self.detect_type_check("status_code", exist_check=True)
return
def test_coverage_false_via_message(self):
"""Test Username Does Not Exist Site Coverage (Via Error Message).
This test checks all sites with the "Error Message" detection mechanism
to ensure that a Username that does not exist is reported that way.
Keyword Arguments:
self -- This object.
Return Value:
N/A.
Will trigger an assert if detection mechanism did not work as expected.
"""
self.detect_type_check("message", exist_check=False)
return
def test_coverage_true_via_message(self):
"""Test Username Does Exist Site Coverage (Via Error Message).
This test checks all sites with the "Error Message" detection mechanism
to ensure that a Username that does exist is reported that way.
Keyword Arguments:
self -- This object.
Return Value:
N/A.
Will trigger an assert if detection mechanism did not work as expected.
"""
self.detect_type_check("message", exist_check=True)
return
def test_coverage_total(self):
"""Test Site Coverage Is Total.
This test checks that all sites have test data available.
Keyword Arguments:
self -- This object.
Return Value:
N/A.
Will trigger an assert if we do not have total coverage.
"""
self.coverage_total_check()
return
-228
View File
@@ -1,228 +0,0 @@
"""Sherlock Base Tests
This module contains various utilities for running tests.
"""
import os
import os.path
import unittest
import maigret
from result import QueryStatus
from result import QueryResult
from notify import QueryNotify
from sites import SitesInformation
import warnings
class SherlockBaseTest(unittest.TestCase):
def setUp(self):
"""Sherlock Base Test Setup.
Does common setup tasks for base Sherlock tests.
Keyword Arguments:
self -- This object.
Return Value:
N/A.
"""
#This ignores the ResourceWarning from an unclosed SSLSocket.
#TODO: Figure out how to fix the code so this is not needed.
warnings.simplefilter("ignore", ResourceWarning)
#Create object with all information about sites we are aware of.
sites = SitesInformation()
#Create original dictionary from SitesInformation() object.
#Eventually, the rest of the code will be updated to use the new object
#directly, but this will glue the two pieces together.
site_data_all = {}
for site in sites:
site_data_all[site.name] = site.information
self.site_data_all = site_data_all
# Load excluded sites list, if any
excluded_sites_path = os.path.join(os.path.dirname(os.path.realpath(maigret.__file__)), "tests/.excluded_sites")
try:
with open(excluded_sites_path, "r", encoding="utf-8") as excluded_sites_file:
self.excluded_sites = excluded_sites_file.read().splitlines()
except FileNotFoundError:
self.excluded_sites = []
#Create notify object for query results.
self.query_notify = QueryNotify()
self.tor=False
self.unique_tor=False
self.timeout=None
self.skip_error_sites=True
return
def site_data_filter(self, site_list):
"""Filter Site Data.
Keyword Arguments:
self -- This object.
site_list -- List of strings corresponding to sites which
should be filtered.
Return Value:
Dictionary containing sub-set of site data specified by 'site_list'.
"""
# Create new dictionary that has filtered site data based on input.
# Note that any site specified which is not understood will generate
# an error.
site_data = {}
for site in site_list:
with self.subTest(f"Checking test vector Site '{site}' "
f"exists in total site data."
):
site_data[site] = self.site_data_all[site]
return site_data
def username_check(self, username_list, site_list, exist_check=True):
"""Username Exist Check.
Keyword Arguments:
self -- This object.
username_list -- List of strings corresponding to usernames
which should exist on *all* of the sites.
site_list -- List of strings corresponding to sites which
should be filtered.
exist_check -- Boolean which indicates if this should be
a check for Username existence,
or non-existence.
Return Value:
N/A.
Will trigger an assert if Username does not have the expected
existence state.
"""
#Filter all site data down to just what is needed for this test.
site_data = self.site_data_filter(site_list)
if exist_check:
check_type_text = "claimed"
exist_result_desired = QueryStatus.CLAIMED
else:
check_type_text = "available"
exist_result_desired = QueryStatus.AVAILABLE
for username in username_list:
results = maigret.sherlock(username,
site_data,
self.query_notify,
tor=self.tor,
unique_tor=self.unique_tor,
timeout=self.timeout
)
for site, result in results.items():
with self.subTest(f"Checking Username '{username}' "
f"{check_type_text} on Site '{site}'"
):
if (
(self.skip_error_sites == True) and
(result['status'].status == QueryStatus.UNKNOWN)
):
#Some error connecting to site.
self.skipTest(f"Skipping Username '{username}' "
f"{check_type_text} on Site '{site}': "
f"Site returned error status."
)
self.assertEqual(exist_result_desired,
result['status'].status)
return
def detect_type_check(self, detect_type, exist_check=True):
"""Username Exist Check.
Keyword Arguments:
self -- This object.
detect_type -- String corresponding to detection algorithm
which is desired to be tested.
Note that only sites which have documented
usernames which exist and do not exist
will be tested.
exist_check -- Boolean which indicates if this should be
a check for Username existence,
or non-existence.
Return Value:
N/A.
Runs tests on all sites using the indicated detection algorithm
and which also has test vectors specified.
Will trigger an assert if Username does not have the expected
existence state.
"""
#Dictionary of sites that should be tested for having a username.
#This will allow us to test sites with a common username in parallel.
sites_by_username = {}
for site, site_data in self.site_data_all.items():
if (
(site in self.excluded_sites) or
(site_data["errorType"] != detect_type) or
(site_data.get("username_claimed") is None) or
(site_data.get("username_unclaimed") is None)
):
# This is either not a site we are interested in, or the
# site does not contain the required information to do
# the tests.
pass
else:
# We should run a test on this site.
# Figure out which type of user
if exist_check:
username = site_data.get("username_claimed")
else:
username = site_data.get("username_unclaimed")
# Add this site to the list of sites corresponding to this
# username.
if username in sites_by_username:
sites_by_username[username].append(site)
else:
sites_by_username[username] = [site]
# Check on the username availability against all of the sites.
for username, site_list in sites_by_username.items():
self.username_check([username],
site_list,
exist_check=exist_check
)
return
def coverage_total_check(self):
"""Total Coverage Check.
Keyword Arguments:
self -- This object.
Return Value:
N/A.
Counts up all Sites with full test data available.
Will trigger an assert if any Site does not have test coverage.
"""
site_no_tests_list = []
for site, site_data in self.site_data_all.items():
if (
(site_data.get("username_claimed") is None) or
(site_data.get("username_unclaimed") is None)
):
# Test information not available on this site.
site_no_tests_list.append(site)
self.assertEqual("", ", ".join(site_no_tests_list))
return
+9 -4
View File
@@ -5,17 +5,22 @@ from setuptools import (
with open('README.md') as fh: with open('README.md') as fh:
long_description = fh.read() readme = fh.read()
long_description.replace('./', 'https://raw.githubusercontent.com/soxoj/maigret/main/') long_description = readme.replace('./', 'https://raw.githubusercontent.com/soxoj/maigret/main/')
with open('requirements.txt') as rf:
requires = rf.read().splitlines()
setup(name='maigret', setup(name='maigret',
version='0.1.0', version='0.1.6',
description='Collect a dossier on a person by username from a huge number of sites', description='Collect a dossier on a person by username from a huge number of sites',
long_description=long_description, long_description=long_description,
long_description_content_type="text/markdown", long_description_content_type="text/markdown",
url='https://github.com/soxoj/maigret', url='https://github.com/soxoj/maigret',
entry_points={'console_scripts': ['maigret = maigret.cli:run']}, install_requires=requires,
entry_points={'console_scripts': ['maigret = maigret.maigret:run']},
packages=find_packages(), packages=find_packages(),
include_package_data=True,
author='Soxoj', author='Soxoj',
author_email='soxoj@protonmail.com', author_email='soxoj@protonmail.com',
license='MIT', license='MIT',