mirror of
https://github.com/soxoj/maigret.git
synced 2026-05-06 14:08:59 +00:00
first commit
This commit is contained in:
@@ -0,0 +1,8 @@
|
||||
.git/
|
||||
.vscode/
|
||||
screenshot/
|
||||
tests/
|
||||
*.txt
|
||||
!/requirements.txt
|
||||
venv/
|
||||
|
||||
+29
@@ -0,0 +1,29 @@
|
||||
# Virtual Environment
|
||||
venv/
|
||||
|
||||
# Editor Configurations
|
||||
.vscode/
|
||||
.idea/
|
||||
|
||||
# Python
|
||||
__pycache__/
|
||||
|
||||
# Pip
|
||||
src/
|
||||
|
||||
# Jupyter Notebook
|
||||
.ipynb_checkpoints
|
||||
*.ipynb
|
||||
|
||||
# Output files, except requirements.txt
|
||||
*.txt
|
||||
!requirements.txt
|
||||
|
||||
# Comma-Separated Values (CSV) Reports
|
||||
*.csv
|
||||
|
||||
# Excluded sites list
|
||||
tests/.excluded_sites
|
||||
|
||||
# MacOS Folder Metadata File
|
||||
.DS_Store
|
||||
+27
@@ -0,0 +1,27 @@
|
||||
FROM python:3.7-alpine as build
|
||||
WORKDIR /wheels
|
||||
RUN apk add --no-cache \
|
||||
g++ \
|
||||
gcc \
|
||||
git \
|
||||
libxml2 \
|
||||
libxml2-dev \
|
||||
libxslt-dev \
|
||||
linux-headers
|
||||
COPY requirements.txt /opt/maigret/
|
||||
RUN pip3 wheel -r /opt/maigret/requirements.txt
|
||||
|
||||
|
||||
FROM python:3.7-alpine
|
||||
WORKDIR /opt/maigret
|
||||
ARG VCS_REF
|
||||
ARG VCS_URL="https://gitlab.com/soxoj/maigret"
|
||||
LABEL org.label-schema.vcs-ref=$VCS_REF \
|
||||
org.label-schema.vcs-url=$VCS_URL
|
||||
COPY --from=build /wheels /wheels
|
||||
COPY . /opt/maigret/
|
||||
RUN pip3 install -r requirements.txt -f /wheels \
|
||||
&& rm -rf /wheels \
|
||||
&& rm -rf /root/.cache/pip/*
|
||||
|
||||
ENTRYPOINT ["python", "maigret.py"]
|
||||
@@ -0,0 +1,45 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2019 Soxoj
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
|
||||
-------------------------------------------------------------------------------
|
||||
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2019 Sherlock Project
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
@@ -0,0 +1,54 @@
|
||||
# Maigret
|
||||
|
||||
<p align="center">
|
||||
<img src="static/maigret.png" />
|
||||
</p>
|
||||
|
||||
<i>The Commissioner Jules Maigret is a fictional French police detective, created by Georges Simenon. His investigation method is based on understanding the personality of different people and their interactions.</i>
|
||||
|
||||
## About
|
||||
|
||||
Purpose of Maigret - **collect a dossier on a person by username only**, checking for accounts on a huge number of sites.
|
||||
|
||||
This is a [sherlock](https://github.com/sherlock-project/) fork with cool features under heavy development.
|
||||
*Don't forget to regularly update source code from repo*.
|
||||
|
||||
Currently supported >1300 sites ([full list](/sites.md)).
|
||||
|
||||
## Main features
|
||||
|
||||
* Profile pages parsing, [extracting](https://github.com/soxoj/socid_extractor) personal info, links to other profiles, etc.
|
||||
* Recursive search by new usernames found
|
||||
* Search by tags (site categories, countries)
|
||||
* Censorship and captcha detection
|
||||
* Very few false positives
|
||||
|
||||
## Installation
|
||||
|
||||
**NOTE**: Python 3.7 or higher and pip is required.
|
||||
|
||||
**Python 3.8 is recommended.**
|
||||
|
||||
```bash
|
||||
# clone the repo and change directory
|
||||
$ git clone https://git.rip/soxoj/maigret && cd maigret
|
||||
|
||||
# install the requirements
|
||||
$ python3 -m pip install -r requirements.txt
|
||||
```
|
||||
|
||||
## Demo with page parsing and recursive username search
|
||||
|
||||
```bash
|
||||
python3 maigret alexaimephotographycars
|
||||
```
|
||||
|
||||

|
||||
|
||||
[Full output](./static/recursive_search.md)
|
||||
|
||||
## License
|
||||
|
||||
MIT © [Maigret](https://git.rip/soxoj/maigret)<br/>
|
||||
MIT © [Sherlock Project](https://github.com/sherlock-project/)<br/>
|
||||
Original Creator of Sherlock Project - [Siddharth Dushantha](https://github.com/sdushantha)
|
||||
@@ -0,0 +1,5 @@
|
||||
"""Sherlock Module
|
||||
|
||||
This module contains the main logic to search for usernames at social
|
||||
networks.
|
||||
"""
|
||||
@@ -0,0 +1,15 @@
|
||||
#! /usr/bin/env python3
|
||||
|
||||
"""
|
||||
Maigret (Sherlock fork): Find Usernames Across Social Networks Module
|
||||
|
||||
This module contains the main logic to search for usernames at social
|
||||
networks.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import maigret
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(maigret.main())
|
||||
Executable
+867
@@ -0,0 +1,867 @@
|
||||
#! /usr/bin/env python3
|
||||
|
||||
"""
|
||||
Maigret main module
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import csv
|
||||
import http.cookiejar as cookielib
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import platform
|
||||
import re
|
||||
import ssl
|
||||
import sys
|
||||
from argparse import ArgumentParser, RawDescriptionHelpFormatter
|
||||
from http.cookies import SimpleCookie
|
||||
|
||||
import aiohttp
|
||||
import requests
|
||||
from mock import Mock
|
||||
from notify import QueryNotifyPrint
|
||||
from result import QueryResult, QueryStatus
|
||||
from sites import SitesInformation
|
||||
from socid_extractor import parse, extract
|
||||
|
||||
module_name = "Maigret OSINT tool"
|
||||
__version__ = "0.1.0"
|
||||
|
||||
supported_recursive_search_ids = (
|
||||
'yandex_public_id',
|
||||
'gaia_id',
|
||||
'vk_id',
|
||||
'ok_id',
|
||||
'wikimapia_uid',
|
||||
)
|
||||
|
||||
common_errors = {
|
||||
'<title>Attention Required! | Cloudflare</title>': 'Cloudflare captcha',
|
||||
'<title>Доступ ограничен</title>': 'Rostelecom censorship',
|
||||
'document.getElementById(\'validate_form_submit\').disabled=true': 'Mail.ru captcha',
|
||||
'Verifying your browser, please wait...<br>DDoS Protection by</font> Blazingfast.io': 'Blazingfast protection',
|
||||
'404</h1><p class="error-card__description">Мы не нашли страницу': 'MegaFon 404 page',
|
||||
}
|
||||
|
||||
unsupported_characters = '#'
|
||||
|
||||
cookies_file = 'cookies.txt'
|
||||
|
||||
|
||||
async def get_response(request_future, error_type, social_network, logger):
|
||||
html_text = None
|
||||
status_code = 0
|
||||
|
||||
error_text = "General Unknown Error"
|
||||
expection_text = None
|
||||
|
||||
try:
|
||||
response = await request_future
|
||||
|
||||
status_code = response.status
|
||||
response_content = await response.content.read()
|
||||
charset = response.charset or 'utf-8'
|
||||
decoded_content = response_content.decode(charset, 'ignore')
|
||||
html_text = decoded_content
|
||||
|
||||
if status_code > 0:
|
||||
error_text = None
|
||||
|
||||
logger.debug(html_text)
|
||||
|
||||
except asyncio.TimeoutError as errt:
|
||||
error_text = "Timeout Error"
|
||||
expection_text = str(errt)
|
||||
except (ssl.SSLCertVerificationError, ssl.SSLError) as err:
|
||||
error_text = "SSL Error"
|
||||
expection_text = str(err)
|
||||
except aiohttp.client_exceptions.ClientConnectorError as err:
|
||||
error_text = "Error Connecting"
|
||||
expection_text = str(err)
|
||||
except aiohttp.http_exceptions.BadHttpMessage as err:
|
||||
error_text = "HTTP Error"
|
||||
expection_text = str(err)
|
||||
except Exception as err:
|
||||
logger.warning(f'Unhandled error while requesting {social_network}: {err}')
|
||||
logger.debug(err, exc_info=True)
|
||||
error_text = "Some Error"
|
||||
expection_text = str(err)
|
||||
|
||||
# TODO: return only needed information
|
||||
return html_text, status_code, error_text, expection_text
|
||||
|
||||
|
||||
async def update_site_data_from_response(site, site_data, site_info, semaphore, logger):
|
||||
async with semaphore:
|
||||
future = site_info.get('request_future')
|
||||
if not future:
|
||||
# ignore: search by incompatible id type
|
||||
return
|
||||
|
||||
error_type = site_info['errorType']
|
||||
site_data[site]['resp'] = await get_response(request_future=future,
|
||||
error_type=error_type,
|
||||
social_network=site,
|
||||
logger=logger)
|
||||
|
||||
|
||||
# TODO: move info separate module
|
||||
def detect_error_page(html_text, status_code, fail_flags, ignore_403):
|
||||
# Detect service restrictions such as a country restriction
|
||||
for flag, msg in fail_flags.items():
|
||||
if flag in html_text:
|
||||
return 'Some site error', msg
|
||||
|
||||
# Detect common restrictions such as provider censorship and bot protection
|
||||
for flag, msg in common_errors.items():
|
||||
if flag in html_text:
|
||||
return 'Error', msg
|
||||
|
||||
# Detect common site errors
|
||||
if status_code == 403 and not ignore_403:
|
||||
return 'Access denied', 'Access denied, use proxy/vpn'
|
||||
elif status_code >= 500:
|
||||
return f'Error {status_code}', f'Site error {status_code}'
|
||||
|
||||
return None, None
|
||||
|
||||
|
||||
async def maigret(username, site_data, query_notify, logger,
|
||||
proxy=None, timeout=None, recursive_search=False,
|
||||
id_type='username', tags=None, debug=False, forced=False,
|
||||
max_connections=100):
|
||||
"""Main search func
|
||||
|
||||
Checks for existence of username on various social media sites.
|
||||
|
||||
Keyword Arguments:
|
||||
username -- String indicating username that report
|
||||
should be created against.
|
||||
site_data -- Dictionary containing all of the site data.
|
||||
query_notify -- Object with base type of QueryNotify().
|
||||
This will be used to notify the caller about
|
||||
query results.
|
||||
proxy -- String indicating the proxy URL
|
||||
timeout -- Time in seconds to wait before timing out request.
|
||||
Default is no timeout.
|
||||
recursive_search -- Search for other usernames in website pages & recursive search by them.
|
||||
|
||||
Return Value:
|
||||
Dictionary containing results from report. Key of dictionary is the name
|
||||
of the social network site, and the value is another dictionary with
|
||||
the following keys:
|
||||
url_main: URL of main site.
|
||||
url_user: URL of user on site (if account exists).
|
||||
status: QueryResult() object indicating results of test for
|
||||
account existence.
|
||||
http_status: HTTP status code of query which checked for existence on
|
||||
site.
|
||||
response_text: Text that came back from request. May be None if
|
||||
there was an HTTP error when checking for existence.
|
||||
"""
|
||||
|
||||
# Notify caller that we are starting the query.
|
||||
if tags is None:
|
||||
tags = set()
|
||||
query_notify.start(username, id_type)
|
||||
|
||||
# TODO: connector
|
||||
connector = aiohttp.TCPConnector(ssl=False)
|
||||
session = aiohttp.ClientSession(connector=connector)
|
||||
|
||||
# Results from analysis of all sites
|
||||
results_total = {}
|
||||
|
||||
# First create futures for all requests. This allows for the requests to run in parallel
|
||||
for social_network, net_info in site_data.items():
|
||||
if net_info.get('type', 'username') != id_type:
|
||||
continue
|
||||
|
||||
site_tags = set(net_info.get('tags', []))
|
||||
if tags:
|
||||
if not set(tags).intersection(site_tags):
|
||||
continue
|
||||
|
||||
if 'disabled' in net_info and net_info['disabled'] and not forced:
|
||||
continue
|
||||
|
||||
# Results from analysis of this specific site
|
||||
results_site = {}
|
||||
|
||||
# Record URL of main site
|
||||
results_site['url_main'] = net_info.get("urlMain")
|
||||
|
||||
headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 11.1; rv:55.0) Gecko/20100101 Firefox/55.0',
|
||||
}
|
||||
|
||||
if "headers" in net_info:
|
||||
# Override/append any extra headers required by a given site.
|
||||
headers.update(net_info["headers"])
|
||||
|
||||
# URL of user on site (if it exists)
|
||||
url = net_info.get('url').format(username)
|
||||
|
||||
# Don't make request if username is invalid for the site
|
||||
regex_check = net_info.get("regexCheck")
|
||||
if regex_check and re.search(regex_check, username) is None:
|
||||
# No need to do the check at the site: this user name is not allowed.
|
||||
results_site['status'] = QueryResult(username,
|
||||
social_network,
|
||||
url,
|
||||
QueryStatus.ILLEGAL)
|
||||
results_site["url_user"] = ""
|
||||
results_site['http_status'] = ""
|
||||
results_site['response_text'] = ""
|
||||
query_notify.update(results_site['status'])
|
||||
else:
|
||||
# URL of user on site (if it exists)
|
||||
results_site["url_user"] = url
|
||||
url_probe = net_info.get("urlProbe")
|
||||
if url_probe is None:
|
||||
# Probe URL is normal one seen by people out on the web.
|
||||
url_probe = url
|
||||
else:
|
||||
# There is a special URL for probing existence separate
|
||||
# from where the user profile normally can be found.
|
||||
url_probe = url_probe.format(username)
|
||||
|
||||
if net_info["errorType"] == 'status_code' and net_info.get("request_head_only", True):
|
||||
# In most cases when we are detecting by status code,
|
||||
# it is not necessary to get the entire body: we can
|
||||
# detect fine with just the HEAD response.
|
||||
request_method = session.head
|
||||
else:
|
||||
# Either this detect method needs the content associated
|
||||
# with the GET response, or this specific website will
|
||||
# not respond properly unless we request the whole page.
|
||||
request_method = session.get
|
||||
|
||||
if net_info["errorType"] == "response_url":
|
||||
# Site forwards request to a different URL if username not
|
||||
# found. Disallow the redirect so we can capture the
|
||||
# http status from the original URL request.
|
||||
allow_redirects = False
|
||||
else:
|
||||
# Allow whatever redirect that the site wants to do.
|
||||
# The final result of the request will be what is available.
|
||||
allow_redirects = True
|
||||
|
||||
# TODO: cookies using
|
||||
def parse_cookies(cookies_str):
|
||||
cookies = SimpleCookie()
|
||||
cookies.load(cookies_str)
|
||||
return {key: morsel.value for key, morsel in cookies.items()}
|
||||
|
||||
if os.path.exists(cookies_file):
|
||||
cookies_obj = cookielib.MozillaCookieJar(cookies_file)
|
||||
cookies_obj.load(ignore_discard=True, ignore_expires=True)
|
||||
else:
|
||||
cookies_obj = []
|
||||
|
||||
# This future starts running the request in a new thread, doesn't block the main thread
|
||||
if proxy is not None:
|
||||
proxies = {"http": proxy, "https": proxy}
|
||||
future = request_method(url=url_probe, headers=headers,
|
||||
proxies=proxies,
|
||||
allow_redirects=allow_redirects,
|
||||
timeout=timeout,
|
||||
)
|
||||
else:
|
||||
future = request_method(url=url_probe, headers=headers,
|
||||
allow_redirects=allow_redirects,
|
||||
timeout=timeout,
|
||||
)
|
||||
|
||||
# Store future in data for access later
|
||||
net_info["request_future"] = future
|
||||
|
||||
# Add this site's results into final dictionary with all of the other results.
|
||||
results_total[social_network] = results_site
|
||||
|
||||
# TODO: move into top-level function
|
||||
|
||||
sem = asyncio.Semaphore(max_connections)
|
||||
|
||||
tasks = []
|
||||
for social_network, net_info in site_data.items():
|
||||
future = asyncio.ensure_future(update_site_data_from_response(social_network, site_data, net_info, sem, logger))
|
||||
tasks.append(future)
|
||||
await asyncio.gather(*tasks)
|
||||
await session.close()
|
||||
|
||||
# TODO: split to separate functions
|
||||
for social_network, net_info in site_data.items():
|
||||
|
||||
# Retrieve results again
|
||||
results_site = results_total.get(social_network)
|
||||
if not results_site:
|
||||
continue
|
||||
|
||||
# Retrieve other site information again
|
||||
url = results_site.get("url_user")
|
||||
logger.debug(url)
|
||||
|
||||
status = results_site.get("status")
|
||||
if status is not None:
|
||||
# We have already determined the user doesn't exist here
|
||||
continue
|
||||
|
||||
# Get the expected error type
|
||||
error_type = net_info["errorType"]
|
||||
|
||||
# Get the failure messages and comments
|
||||
failure_errors = net_info.get("errors", {})
|
||||
|
||||
# TODO: refactor
|
||||
resp = net_info.get('resp')
|
||||
if not resp:
|
||||
logger.error(f'No response for {social_network}')
|
||||
continue
|
||||
|
||||
html_text, status_code, error_text, expection_text = resp
|
||||
|
||||
# TODO: add elapsed request time counting
|
||||
response_time = None
|
||||
|
||||
if debug:
|
||||
with open('debug.txt', 'a') as f:
|
||||
status = status_code or 'No response'
|
||||
f.write(f'url: {url}\nerror: {str(error_text)}\nr: {status}\n')
|
||||
if html_text:
|
||||
f.write(f'code: {status}\nresponse: {str(html_text)}\n')
|
||||
|
||||
if status_code and not error_text:
|
||||
error_text, site_error_text = detect_error_page(html_text, status_code, failure_errors,
|
||||
'ignore_403' in net_info)
|
||||
|
||||
# presense flags
|
||||
# True by default
|
||||
presense_flags = net_info.get("presenseStrs", [])
|
||||
is_presense_detected = html_text and all(
|
||||
[(presense_flag in html_text) for presense_flag in presense_flags]) or not presense_flags
|
||||
|
||||
if error_text is not None:
|
||||
logger.debug(error_text)
|
||||
result = QueryResult(username,
|
||||
social_network,
|
||||
url,
|
||||
QueryStatus.UNKNOWN,
|
||||
query_time=response_time,
|
||||
context=error_text)
|
||||
elif error_type == "message":
|
||||
absence_flags = net_info.get("errorMsg")
|
||||
is_absence_flags_list = isinstance(absence_flags, list)
|
||||
absence_flags_set = set(absence_flags) if is_absence_flags_list else {absence_flags}
|
||||
# Checks if the error message is in the HTML
|
||||
is_absence_detected = any([(absence_flag in html_text) for absence_flag in absence_flags_set])
|
||||
if not is_absence_detected and is_presense_detected:
|
||||
result = QueryResult(username,
|
||||
social_network,
|
||||
url,
|
||||
QueryStatus.CLAIMED,
|
||||
query_time=response_time)
|
||||
else:
|
||||
result = QueryResult(username,
|
||||
social_network,
|
||||
url,
|
||||
QueryStatus.AVAILABLE,
|
||||
query_time=response_time)
|
||||
elif error_type == "status_code":
|
||||
# Checks if the status code of the response is 2XX
|
||||
if (not status_code >= 300 or status_code < 200) and is_presense_detected:
|
||||
result = QueryResult(username,
|
||||
social_network,
|
||||
url,
|
||||
QueryStatus.CLAIMED,
|
||||
query_time=response_time)
|
||||
else:
|
||||
result = QueryResult(username,
|
||||
social_network,
|
||||
url,
|
||||
QueryStatus.AVAILABLE,
|
||||
query_time=response_time)
|
||||
elif error_type == "response_url":
|
||||
# For this detection method, we have turned off the redirect.
|
||||
# So, there is no need to check the response URL: it will always
|
||||
# match the request. Instead, we will ensure that the response
|
||||
# code indicates that the request was successful (i.e. no 404, or
|
||||
# forward to some odd redirect).
|
||||
if 200 <= status_code < 300 and is_presense_detected:
|
||||
result = QueryResult(username,
|
||||
social_network,
|
||||
url,
|
||||
QueryStatus.CLAIMED,
|
||||
query_time=response_time)
|
||||
else:
|
||||
result = QueryResult(username,
|
||||
social_network,
|
||||
url,
|
||||
QueryStatus.AVAILABLE,
|
||||
query_time=response_time)
|
||||
else:
|
||||
# It should be impossible to ever get here...
|
||||
raise ValueError(f"Unknown Error Type '{error_type}' for "
|
||||
f"site '{social_network}'")
|
||||
|
||||
extracted_ids_data = {}
|
||||
|
||||
if recursive_search and result.status == QueryStatus.CLAIMED:
|
||||
try:
|
||||
extracted_ids_data = extract(html_text)
|
||||
except Exception as e:
|
||||
logger.warning(f'Error while parsing {social_network}: {e}', exc_info=True)
|
||||
|
||||
if extracted_ids_data:
|
||||
new_usernames = {}
|
||||
for k, v in extracted_ids_data.items():
|
||||
if 'username' in k:
|
||||
new_usernames[v] = 'username'
|
||||
if k in supported_recursive_search_ids:
|
||||
new_usernames[v] = k
|
||||
|
||||
results_site['ids_usernames'] = new_usernames
|
||||
result.ids_data = extracted_ids_data
|
||||
|
||||
is_similar = net_info.get('similarSearch', False)
|
||||
# Notify caller about results of query.
|
||||
query_notify.update(result, is_similar)
|
||||
|
||||
# Save status of request
|
||||
results_site['status'] = result
|
||||
|
||||
# Save results from request
|
||||
results_site['http_status'] = status_code
|
||||
results_site['is_similar'] = is_similar
|
||||
# results_site['response_text'] = html_text
|
||||
results_site['rank'] = net_info.get('rank', 0)
|
||||
|
||||
# Add this site's results into final dictionary with all of the other results.
|
||||
results_total[social_network] = results_site
|
||||
|
||||
# Notify caller that all queries are finished.
|
||||
query_notify.finish()
|
||||
|
||||
return results_total
|
||||
|
||||
|
||||
def timeout_check(value):
|
||||
"""Check Timeout Argument.
|
||||
|
||||
Checks timeout for validity.
|
||||
|
||||
Keyword Arguments:
|
||||
value -- Time in seconds to wait before timing out request.
|
||||
|
||||
Return Value:
|
||||
Floating point number representing the time (in seconds) that should be
|
||||
used for the timeout.
|
||||
|
||||
NOTE: Will raise an exception if the timeout in invalid.
|
||||
"""
|
||||
from argparse import ArgumentTypeError
|
||||
|
||||
try:
|
||||
timeout = float(value)
|
||||
except ValueError:
|
||||
raise ArgumentTypeError(f"Timeout '{value}' must be a number.")
|
||||
if timeout <= 0:
|
||||
raise ArgumentTypeError(f"Timeout '{value}' must be greater than 0.0s.")
|
||||
return timeout
|
||||
|
||||
|
||||
async def site_self_check(site_name, site_data, logger):
|
||||
query_notify = Mock()
|
||||
changes = {
|
||||
'disabled': False,
|
||||
}
|
||||
|
||||
check_data = [
|
||||
(site_data['username_claimed'], QueryStatus.CLAIMED),
|
||||
(site_data['username_unclaimed'], QueryStatus.AVAILABLE),
|
||||
]
|
||||
|
||||
logger.info(f'Checking {site_name}...')
|
||||
|
||||
for username, status in check_data:
|
||||
results = await maigret(
|
||||
username,
|
||||
{site_name: site_data},
|
||||
query_notify,
|
||||
logger,
|
||||
timeout=30,
|
||||
forced=True,
|
||||
)
|
||||
# don't disable entries with other ids types
|
||||
if site_name not in results:
|
||||
logger.info(results)
|
||||
changes['disabled'] = True
|
||||
continue
|
||||
site_status = results[site_name]['status'].status
|
||||
if site_status != status:
|
||||
if site_status == QueryStatus.UNKNOWN:
|
||||
msg = site_data.get('errorMsg')
|
||||
etype = site_data.get('errorType')
|
||||
logger.info(f'Error while searching {username} in {site_name}: {msg}, type {etype}')
|
||||
# don't disable in case of available username
|
||||
if status == QueryStatus.CLAIMED:
|
||||
changes['disabled'] = True
|
||||
elif status == QueryStatus.CLAIMED:
|
||||
logger.info(f'Not found `{username}` in {site_name}, must be claimed')
|
||||
changes['disabled'] = True
|
||||
else:
|
||||
logger.info(f'Found `{username}` in {site_name}, must be available')
|
||||
changes['disabled'] = True
|
||||
|
||||
logger.info(f'Site {site_name} is okay')
|
||||
return changes
|
||||
|
||||
|
||||
async def self_check(json_file, logger):
|
||||
sites = SitesInformation(json_file)
|
||||
all_sites = {}
|
||||
|
||||
def disabled_count(data):
|
||||
return len(list(filter(lambda x: x.get('disabled', False), data)))
|
||||
|
||||
async def update_site_data(site_name, site_data, all_sites, logger):
|
||||
updates = await site_self_check(site_name, dict(site_data), logger)
|
||||
all_sites[site_name].update(updates)
|
||||
|
||||
for site in sites:
|
||||
all_sites[site.name] = site.information
|
||||
|
||||
disabled_old_count = disabled_count(all_sites.values())
|
||||
|
||||
tasks = []
|
||||
for site_name, site_data in all_sites.items():
|
||||
future = asyncio.ensure_future(update_site_data(site_name, site_data, all_sites, logger))
|
||||
tasks.append(future)
|
||||
|
||||
await asyncio.gather(*tasks)
|
||||
|
||||
disabled_new_count = disabled_count(all_sites.values())
|
||||
total_disabled = disabled_new_count - disabled_old_count
|
||||
if total_disabled > 0:
|
||||
message = 'Disabled'
|
||||
else:
|
||||
message = 'Enabled'
|
||||
total_disabled *= -1
|
||||
print(f'{message} {total_disabled} checked sites. Run with `--info` flag to get more information')
|
||||
|
||||
with open(json_file, 'w') as f:
|
||||
json.dump(all_sites, f, indent=4)
|
||||
|
||||
|
||||
async def main():
|
||||
version_string = f"%(prog)s {__version__}\n" + \
|
||||
f"{requests.__description__}: {requests.__version__}\n" + \
|
||||
f"Python: {platform.python_version()}"
|
||||
|
||||
parser = ArgumentParser(formatter_class=RawDescriptionHelpFormatter,
|
||||
description=f"{module_name} (Version {__version__})"
|
||||
)
|
||||
parser.add_argument("--version",
|
||||
action="version", version=version_string,
|
||||
help="Display version information and dependencies."
|
||||
)
|
||||
parser.add_argument("--info",
|
||||
action="store_true", dest="info", default=False,
|
||||
help="Display service information."
|
||||
)
|
||||
parser.add_argument("--verbose", "-v",
|
||||
action="store_true", dest="verbose", default=False,
|
||||
help="Display extra information and metrics."
|
||||
)
|
||||
parser.add_argument("-d", "--debug",
|
||||
action="store_true", dest="debug", default=False,
|
||||
help="Saving debugging information and sites responses in debug.txt."
|
||||
)
|
||||
parser.add_argument("--rank", "-r",
|
||||
action="store_true", dest="rank", default=False,
|
||||
help="Present websites ordered by their Alexa.com global rank in popularity.")
|
||||
parser.add_argument("--folderoutput", "-fo", dest="folderoutput",
|
||||
help="If using multiple usernames, the output of the results will be saved to this folder."
|
||||
)
|
||||
parser.add_argument("--output", "-o", dest="output",
|
||||
help="If using single username, the output of the result will be saved to this file."
|
||||
)
|
||||
parser.add_argument("--csv",
|
||||
action="store_true", dest="csv", default=False,
|
||||
help="Create Comma-Separated Values (CSV) File."
|
||||
)
|
||||
parser.add_argument("--site",
|
||||
action="append", metavar='SITE_NAME',
|
||||
dest="site_list", default=None,
|
||||
help="Limit analysis to just the listed sites (use several times to specify more than one)"
|
||||
)
|
||||
parser.add_argument("--proxy", "-p", metavar='PROXY_URL',
|
||||
action="store", dest="proxy", default=None,
|
||||
help="Make requests over a proxy. e.g. socks5://127.0.0.1:1080"
|
||||
)
|
||||
parser.add_argument("--json", "-j", metavar="JSON_FILE",
|
||||
dest="json_file", default=None,
|
||||
help="Load data from a JSON file or an online, valid, JSON file.")
|
||||
parser.add_argument("--timeout",
|
||||
action="store", metavar='TIMEOUT',
|
||||
dest="timeout", type=timeout_check, default=10,
|
||||
help="Time (in seconds) to wait for response to requests."
|
||||
"Default timeout of 10.0s."
|
||||
"A longer timeout will be more likely to get results from slow sites."
|
||||
"On the other hand, this may cause a long delay to gather all results."
|
||||
)
|
||||
parser.add_argument("--print-not-found",
|
||||
action="store_true", dest="print_not_found", default=False,
|
||||
help="Print sites where the username was not found."
|
||||
)
|
||||
parser.add_argument("--print-errors",
|
||||
action="store_true", dest="print_check_errors", default=False,
|
||||
help="Print errors messages: connection, captcha, site country ban, etc."
|
||||
)
|
||||
parser.add_argument("--no-color",
|
||||
action="store_true", dest="no_color", default=False,
|
||||
help="Don't color terminal output"
|
||||
)
|
||||
parser.add_argument("--browse", "-b",
|
||||
action="store_true", dest="browse", default=False,
|
||||
help="Browse to all results on default bowser."
|
||||
)
|
||||
parser.add_argument("--no-recursion",
|
||||
action="store_true", dest="disable_recursive_search", default=False,
|
||||
help="Disable parsing pages for other usernames and recursive search by them."
|
||||
)
|
||||
parser.add_argument("--self-check",
|
||||
action="store_true", default=False,
|
||||
help="Do self check for sites and database and disable non-working ones."
|
||||
)
|
||||
parser.add_argument("--use-disabled-sites",
|
||||
action="store_true", default=False,
|
||||
help="Use disabled sites to search (may cause many false positives)."
|
||||
)
|
||||
parser.add_argument("--parse",
|
||||
dest="parse_url", default='',
|
||||
help="Parse page by URL and extract username and IDs to use for search."
|
||||
)
|
||||
parser.add_argument("username",
|
||||
nargs='+', metavar='USERNAMES',
|
||||
action="store",
|
||||
help="One or more usernames to check with social networks."
|
||||
)
|
||||
parser.add_argument("--tags",
|
||||
dest="tags", default='',
|
||||
help="Specify tags of sites."
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
# Logging
|
||||
log_level = logging.ERROR
|
||||
logging.basicConfig(
|
||||
format='[%(filename)s:%(lineno)d] %(levelname)-3s %(asctime)s %(message)s',
|
||||
datefmt='%H:%M:%S',
|
||||
level=logging.ERROR
|
||||
)
|
||||
|
||||
if args.debug:
|
||||
log_level = logging.DEBUG
|
||||
elif args.info:
|
||||
log_level = logging.INFO
|
||||
elif args.verbose:
|
||||
log_level = logging.WARNING
|
||||
|
||||
logger = logging.getLogger('maigret')
|
||||
logger.setLevel(log_level)
|
||||
|
||||
# Usernames initial list
|
||||
usernames = {
|
||||
u: 'username'
|
||||
for u in args.username
|
||||
if u not in ['-']
|
||||
}
|
||||
|
||||
recursive_search_enabled = not args.disable_recursive_search
|
||||
|
||||
# Make prompts
|
||||
if args.proxy is not None:
|
||||
print("Using the proxy: " + args.proxy)
|
||||
|
||||
# Check if both output methods are entered as input.
|
||||
if args.output is not None and args.folderoutput is not None:
|
||||
print("You can only use one of the output methods.")
|
||||
sys.exit(1)
|
||||
|
||||
# Check validity for single username output.
|
||||
if args.output is not None and len(args.username) != 1:
|
||||
print("You can only use --output with a single username")
|
||||
sys.exit(1)
|
||||
|
||||
if args.parse_url:
|
||||
page, _ = parse(args.parse_url, cookies_str='')
|
||||
info = extract(page)
|
||||
text = 'Extracted ID data from webpage: ' + ', '.join([f'{a}: {b}' for a, b in info.items()])
|
||||
print(text)
|
||||
for k, v in info.items():
|
||||
if 'username' in k:
|
||||
usernames[v] = 'username'
|
||||
if k in supported_recursive_search_ids:
|
||||
usernames[v] = k
|
||||
|
||||
if args.tags:
|
||||
args.tags = set(str(args.tags).split(','))
|
||||
|
||||
if args.json_file is None:
|
||||
args.json_file = \
|
||||
os.path.join(os.path.dirname(os.path.realpath(__file__)),
|
||||
"resources/data.json"
|
||||
)
|
||||
|
||||
# Database self-checking
|
||||
if args.self_check:
|
||||
print('Maigret sites database self-checking...')
|
||||
await self_check(args.json_file, logger)
|
||||
|
||||
# Create object with all information about sites we are aware of.
|
||||
try:
|
||||
sites = SitesInformation(args.json_file)
|
||||
except Exception as error:
|
||||
print(f"ERROR: {error}")
|
||||
sys.exit(1)
|
||||
|
||||
# Create original dictionary from SitesInformation() object.
|
||||
# Eventually, the rest of the code will be updated to use the new object
|
||||
# directly, but this will glue the two pieces together.
|
||||
site_data_all = {}
|
||||
for site in sites:
|
||||
site_data_all[site.name] = site.information
|
||||
|
||||
if args.site_list is None:
|
||||
# Not desired to look at a sub-set of sites
|
||||
site_data = site_data_all
|
||||
else:
|
||||
# User desires to selectively run queries on a sub-set of the site list.
|
||||
|
||||
# Make sure that the sites are supported & build up pruned site database.
|
||||
site_data = {}
|
||||
site_missing = []
|
||||
for site in args.site_list:
|
||||
for existing_site in site_data_all:
|
||||
if site.lower() == existing_site.lower():
|
||||
site_data[existing_site] = site_data_all[existing_site]
|
||||
if not site_data:
|
||||
# Build up list of sites not supported for future error message.
|
||||
site_missing.append(f"'{site}'")
|
||||
|
||||
if site_missing:
|
||||
print(
|
||||
f"Error: Desired sites not found: {', '.join(site_missing)}.")
|
||||
sys.exit(1)
|
||||
|
||||
if args.rank:
|
||||
# Sort data by rank
|
||||
site_dataCpy = dict(site_data)
|
||||
ranked_sites = sorted(site_data, key=lambda k: ("rank" not in k, site_data[k].get("rank", sys.maxsize)))
|
||||
site_data = {}
|
||||
for site in ranked_sites:
|
||||
site_data[site] = site_dataCpy.get(site)
|
||||
|
||||
# Database consistency
|
||||
enabled_count = len(list(filter(lambda x: not x.get('disabled', False), site_data.values())))
|
||||
print(f'Sites in database, enabled/total: {enabled_count}/{len(site_data)}')
|
||||
|
||||
# Create notify object for query results.
|
||||
query_notify = QueryNotifyPrint(result=None,
|
||||
verbose=args.verbose,
|
||||
print_found_only=not args.print_not_found,
|
||||
skip_check_errors=not args.print_check_errors,
|
||||
color=not args.no_color)
|
||||
|
||||
already_checked = set()
|
||||
|
||||
while usernames:
|
||||
username, id_type = list(usernames.items())[0]
|
||||
del usernames[username]
|
||||
|
||||
if username.lower() in already_checked:
|
||||
continue
|
||||
else:
|
||||
already_checked.add(username.lower())
|
||||
|
||||
# check for characters do not supported by sites generally
|
||||
found_unsupported_chars = set(unsupported_characters).intersection(set(username))
|
||||
|
||||
if found_unsupported_chars:
|
||||
pretty_chars_str = ','.join(map(lambda s: f'"{s}"', found_unsupported_chars))
|
||||
print(f'Found unsupported URL characters: {pretty_chars_str}, skip search by username "{username}"')
|
||||
continue
|
||||
|
||||
results = await maigret(username,
|
||||
site_data,
|
||||
query_notify,
|
||||
proxy=args.proxy,
|
||||
timeout=args.timeout,
|
||||
recursive_search=recursive_search_enabled,
|
||||
id_type=id_type,
|
||||
tags=args.tags,
|
||||
debug=args.verbose,
|
||||
logger=logger,
|
||||
forced=args.use_disabled_sites,
|
||||
)
|
||||
|
||||
if args.output:
|
||||
result_file = args.output
|
||||
elif args.folderoutput:
|
||||
# The usernames results should be stored in a targeted folder.
|
||||
# If the folder doesn't exist, create it first
|
||||
os.makedirs(args.folderoutput, exist_ok=True)
|
||||
result_file = os.path.join(args.folderoutput, f"{username}.txt")
|
||||
else:
|
||||
result_file = f"{username}.txt"
|
||||
|
||||
with open(result_file, "w", encoding="utf-8") as file:
|
||||
exists_counter = 0
|
||||
for website_name in results:
|
||||
dictionary = results[website_name]
|
||||
|
||||
new_usernames = dictionary.get('ids_usernames')
|
||||
if new_usernames:
|
||||
for u, utype in new_usernames.items():
|
||||
usernames[u] = utype
|
||||
|
||||
if dictionary.get("status").status == QueryStatus.CLAIMED:
|
||||
exists_counter += 1
|
||||
file.write(dictionary["url_user"] + "\n")
|
||||
file.write(f"Total Websites Username Detected On : {exists_counter}")
|
||||
|
||||
if args.csv:
|
||||
with open(username + ".csv", "w", newline='', encoding="utf-8") as csv_report:
|
||||
writer = csv.writer(csv_report)
|
||||
writer.writerow(['username',
|
||||
'name',
|
||||
'url_main',
|
||||
'url_user',
|
||||
'exists',
|
||||
'http_status',
|
||||
'response_time_s'
|
||||
]
|
||||
)
|
||||
for site in results:
|
||||
response_time_s = results[site]['status'].query_time
|
||||
if response_time_s is None:
|
||||
response_time_s = ""
|
||||
writer.writerow([username,
|
||||
site,
|
||||
results[site]['url_main'],
|
||||
results[site]['url_user'],
|
||||
str(results[site]['status'].status),
|
||||
results[site]['http_status'],
|
||||
response_time_s
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
try:
|
||||
asyncio.run(main())
|
||||
except KeyboardInterrupt:
|
||||
print('Maigret is interrupted.')
|
||||
sys.exit(1)
|
||||
@@ -0,0 +1,283 @@
|
||||
"""Sherlock Notify Module
|
||||
|
||||
This module defines the objects for notifying the caller about the
|
||||
results of queries.
|
||||
"""
|
||||
from colorama import Fore, Style, init
|
||||
from result import QueryStatus
|
||||
|
||||
|
||||
class QueryNotify():
|
||||
"""Query Notify Object.
|
||||
|
||||
Base class that describes methods available to notify the results of
|
||||
a query.
|
||||
It is intended that other classes inherit from this base class and
|
||||
override the methods to implement specific functionality.
|
||||
"""
|
||||
|
||||
def __init__(self, result=None):
|
||||
"""Create Query Notify Object.
|
||||
|
||||
Contains information about a specific method of notifying the results
|
||||
of a query.
|
||||
|
||||
Keyword Arguments:
|
||||
self -- This object.
|
||||
result -- Object of type QueryResult() containing
|
||||
results for this query.
|
||||
|
||||
Return Value:
|
||||
Nothing.
|
||||
"""
|
||||
|
||||
self.result = result
|
||||
|
||||
return
|
||||
|
||||
def start(self, message=None, id_type='username'):
|
||||
"""Notify Start.
|
||||
|
||||
Notify method for start of query. This method will be called before
|
||||
any queries are performed. This method will typically be
|
||||
overridden by higher level classes that will inherit from it.
|
||||
|
||||
Keyword Arguments:
|
||||
self -- This object.
|
||||
message -- Object that is used to give context to start
|
||||
of query.
|
||||
Default is None.
|
||||
|
||||
Return Value:
|
||||
Nothing.
|
||||
"""
|
||||
|
||||
return
|
||||
|
||||
def update(self, result):
|
||||
"""Notify Update.
|
||||
|
||||
Notify method for query result. This method will typically be
|
||||
overridden by higher level classes that will inherit from it.
|
||||
|
||||
Keyword Arguments:
|
||||
self -- This object.
|
||||
result -- Object of type QueryResult() containing
|
||||
results for this query.
|
||||
|
||||
Return Value:
|
||||
Nothing.
|
||||
"""
|
||||
|
||||
self.result = result
|
||||
|
||||
return
|
||||
|
||||
def finish(self, message=None):
|
||||
"""Notify Finish.
|
||||
|
||||
Notify method for finish of query. This method will be called after
|
||||
all queries have been performed. This method will typically be
|
||||
overridden by higher level classes that will inherit from it.
|
||||
|
||||
Keyword Arguments:
|
||||
self -- This object.
|
||||
message -- Object that is used to give context to start
|
||||
of query.
|
||||
Default is None.
|
||||
|
||||
Return Value:
|
||||
Nothing.
|
||||
"""
|
||||
|
||||
return
|
||||
|
||||
def __str__(self):
|
||||
"""Convert Object To String.
|
||||
|
||||
Keyword Arguments:
|
||||
self -- This object.
|
||||
|
||||
Return Value:
|
||||
Nicely formatted string to get information about this object.
|
||||
"""
|
||||
result = str(self.result)
|
||||
|
||||
return result
|
||||
|
||||
|
||||
class QueryNotifyPrint(QueryNotify):
|
||||
"""Query Notify Print Object.
|
||||
|
||||
Query notify class that prints results.
|
||||
"""
|
||||
|
||||
def __init__(self, result=None, verbose=False, print_found_only=False,
|
||||
skip_check_errors=False, color=True):
|
||||
"""Create Query Notify Print Object.
|
||||
|
||||
Contains information about a specific method of notifying the results
|
||||
of a query.
|
||||
|
||||
Keyword Arguments:
|
||||
self -- This object.
|
||||
result -- Object of type QueryResult() containing
|
||||
results for this query.
|
||||
verbose -- Boolean indicating whether to give verbose output.
|
||||
print_found_only -- Boolean indicating whether to only print found sites.
|
||||
color -- Boolean indicating whether to color terminal output
|
||||
|
||||
Return Value:
|
||||
Nothing.
|
||||
"""
|
||||
|
||||
# Colorama module's initialization.
|
||||
init(autoreset=True)
|
||||
|
||||
super().__init__(result)
|
||||
self.verbose = verbose
|
||||
self.print_found_only = print_found_only
|
||||
self.skip_check_errors = skip_check_errors
|
||||
self.color = color
|
||||
|
||||
return
|
||||
|
||||
def start(self, message, id_type):
|
||||
"""Notify Start.
|
||||
|
||||
Will print the title to the standard output.
|
||||
|
||||
Keyword Arguments:
|
||||
self -- This object.
|
||||
message -- String containing username that the series
|
||||
of queries are about.
|
||||
|
||||
Return Value:
|
||||
Nothing.
|
||||
"""
|
||||
|
||||
title = f"Checking {id_type}"
|
||||
if self.color:
|
||||
print(Style.BRIGHT + Fore.GREEN + "[" +
|
||||
Fore.YELLOW + "*" +
|
||||
Fore.GREEN + f"] {title}" +
|
||||
Fore.WHITE + f" {message}" +
|
||||
Fore.GREEN + " on:")
|
||||
else:
|
||||
print(f"[*] {title} {message} on:")
|
||||
|
||||
return
|
||||
|
||||
def get_additional_data_text(self, items, prepend=''):
|
||||
text = ''
|
||||
for num, item in enumerate(items):
|
||||
box_symbol = '┣╸' if num != len(items) - 1 else '┗╸'
|
||||
|
||||
if type(item) == tuple:
|
||||
field_name, field_value = item
|
||||
if field_value.startswith('[\''):
|
||||
is_last_item = num == len(items) - 1
|
||||
prepend_symbols = ' ' * 3 if is_last_item else ' ┃ '
|
||||
field_value = self.get_additional_data_text(eval(field_value), prepend_symbols)
|
||||
text += f'\n{prepend}{box_symbol}{field_name}: {field_value}'
|
||||
else:
|
||||
text += f'\n{prepend}{box_symbol} {item}'
|
||||
|
||||
return text
|
||||
|
||||
def update(self, result, is_similar=False):
|
||||
"""Notify Update.
|
||||
|
||||
Will print the query result to the standard output.
|
||||
|
||||
Keyword Arguments:
|
||||
self -- This object.
|
||||
result -- Object of type QueryResult() containing
|
||||
results for this query.
|
||||
|
||||
Return Value:
|
||||
Nothing.
|
||||
"""
|
||||
self.result = result
|
||||
|
||||
if not self.result.ids_data:
|
||||
ids_data_text = ""
|
||||
else:
|
||||
ids_data_text = self.get_additional_data_text(self.result.ids_data.items(), ' ')
|
||||
|
||||
def make_colored_terminal_notify(status, text, status_color, text_color, appendix):
|
||||
text = [
|
||||
f'{Style.BRIGHT}{Fore.WHITE}[{status_color}{status}{Fore.WHITE}]' +
|
||||
f'{text_color} {text}: {Style.RESET_ALL}' +
|
||||
f'{appendix}'
|
||||
]
|
||||
return ''.join(text)
|
||||
|
||||
def make_simple_terminal_notify(status, text, appendix):
|
||||
return f'[{status}] {text}: {appendix}'
|
||||
|
||||
def make_terminal_notify(is_colored=True, *args):
|
||||
if is_colored:
|
||||
return make_colored_terminal_notify(*args)
|
||||
else:
|
||||
return make_simple_terminal_notify(*args)
|
||||
|
||||
notify = None
|
||||
|
||||
# Output to the terminal is desired.
|
||||
if result.status == QueryStatus.CLAIMED:
|
||||
color = Fore.BLUE if is_similar else Fore.GREEN
|
||||
status = '?' if is_similar else '+'
|
||||
notify = make_terminal_notify(
|
||||
self.color,
|
||||
status, result.site_name,
|
||||
color, color,
|
||||
result.site_url_user + ids_data_text
|
||||
)
|
||||
elif result.status == QueryStatus.AVAILABLE:
|
||||
if not self.print_found_only:
|
||||
notify = make_terminal_notify(
|
||||
self.color,
|
||||
'-', result.site_name,
|
||||
Fore.RED, Fore.YELLOW,
|
||||
'Not found!' + ids_data_text
|
||||
)
|
||||
elif result.status == QueryStatus.UNKNOWN:
|
||||
if not self.skip_check_errors:
|
||||
notify = make_terminal_notify(
|
||||
self.color,
|
||||
'?', result.site_name,
|
||||
Fore.RED, Fore.RED,
|
||||
self.result.context + ids_data_text
|
||||
)
|
||||
elif result.status == QueryStatus.ILLEGAL:
|
||||
if not self.print_found_only:
|
||||
text = 'Illegal Username Format For This Site!'
|
||||
notify = make_terminal_notify(
|
||||
self.color,
|
||||
'-', result.site_name,
|
||||
Fore.RED, Fore.YELLOW,
|
||||
text + ids_data_text
|
||||
)
|
||||
else:
|
||||
# It should be impossible to ever get here...
|
||||
raise ValueError(f"Unknown Query Status '{str(result.status)}' for "
|
||||
f"site '{self.result.site_name}'")
|
||||
|
||||
if notify:
|
||||
print(notify)
|
||||
|
||||
return
|
||||
|
||||
def __str__(self):
|
||||
"""Convert Object To String.
|
||||
|
||||
Keyword Arguments:
|
||||
self -- This object.
|
||||
|
||||
Return Value:
|
||||
Nicely formatted string to get information about this object.
|
||||
"""
|
||||
result = str(self.result)
|
||||
|
||||
return result
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,93 @@
|
||||
"""Sherlock Result Module
|
||||
|
||||
This module defines various objects for recording the results of queries.
|
||||
"""
|
||||
from enum import Enum
|
||||
|
||||
|
||||
class QueryStatus(Enum):
|
||||
"""Query Status Enumeration.
|
||||
|
||||
Describes status of query about a given username.
|
||||
"""
|
||||
CLAIMED = "Claimed" # Username Detected
|
||||
AVAILABLE = "Available" # Username Not Detected
|
||||
UNKNOWN = "Unknown" # Error Occurred While Trying To Detect Username
|
||||
ILLEGAL = "Illegal" # Username Not Allowable For This Site
|
||||
|
||||
def __str__(self):
|
||||
"""Convert Object To String.
|
||||
|
||||
Keyword Arguments:
|
||||
self -- This object.
|
||||
|
||||
Return Value:
|
||||
Nicely formatted string to get information about this object.
|
||||
"""
|
||||
return self.value
|
||||
|
||||
|
||||
class QueryResult():
|
||||
"""Query Result Object.
|
||||
|
||||
Describes result of query about a given username.
|
||||
"""
|
||||
|
||||
def __init__(self, username, site_name, site_url_user, status, ids_data=None,
|
||||
query_time=None, context=None):
|
||||
"""Create Query Result Object.
|
||||
|
||||
Contains information about a specific method of detecting usernames on
|
||||
a given type of web sites.
|
||||
|
||||
Keyword Arguments:
|
||||
self -- This object.
|
||||
username -- String indicating username that query result
|
||||
was about.
|
||||
site_name -- String which identifies site.
|
||||
site_url_user -- String containing URL for username on site.
|
||||
NOTE: The site may or may not exist: this
|
||||
just indicates what the name would
|
||||
be, if it existed.
|
||||
status -- Enumeration of type QueryStatus() indicating
|
||||
the status of the query.
|
||||
query_time -- Time (in seconds) required to perform query.
|
||||
Default of None.
|
||||
context -- String indicating any additional context
|
||||
about the query. For example, if there was
|
||||
an error, this might indicate the type of
|
||||
error that occurred.
|
||||
Default of None.
|
||||
ids_data -- Extracted from website page info about other
|
||||
usernames and inner ids.
|
||||
|
||||
Return Value:
|
||||
Nothing.
|
||||
"""
|
||||
|
||||
self.username = username
|
||||
self.site_name = site_name
|
||||
self.site_url_user = site_url_user
|
||||
self.status = status
|
||||
self.query_time = query_time
|
||||
self.context = context
|
||||
self.ids_data = ids_data
|
||||
|
||||
return
|
||||
|
||||
def __str__(self):
|
||||
"""Convert Object To String.
|
||||
|
||||
Keyword Arguments:
|
||||
self -- This object.
|
||||
|
||||
Return Value:
|
||||
Nicely formatted string to get information about this object.
|
||||
"""
|
||||
status = str(self.status)
|
||||
if self.context is not None:
|
||||
# There is extra context information available about the results.
|
||||
# Append it to the normal response text.
|
||||
status += f" ({self.context})"
|
||||
|
||||
return status
|
||||
@@ -0,0 +1,246 @@
|
||||
"""Sherlock Sites Information Module
|
||||
|
||||
This module supports storing information about web sites.
|
||||
This is the raw data that will be used to search for usernames.
|
||||
"""
|
||||
import json
|
||||
import operator
|
||||
import sys
|
||||
|
||||
import requests
|
||||
|
||||
|
||||
class SiteInformation():
|
||||
def __init__(self, name, url_home, url_username_format, popularity_rank,
|
||||
username_claimed, username_unclaimed,
|
||||
information):
|
||||
"""Create Site Information Object.
|
||||
|
||||
Contains information about a specific web site.
|
||||
|
||||
Keyword Arguments:
|
||||
self -- This object.
|
||||
name -- String which identifies site.
|
||||
url_home -- String containing URL for home of site.
|
||||
url_username_format -- String containing URL for Username format
|
||||
on site.
|
||||
NOTE: The string should contain the
|
||||
token "{}" where the username should
|
||||
be substituted. For example, a string
|
||||
of "https://somesite.com/users/{}"
|
||||
indicates that the individual
|
||||
usernames would show up under the
|
||||
"https://somesite.com/users/" area of
|
||||
the web site.
|
||||
popularity_rank -- Integer indicating popularity of site.
|
||||
In general, smaller numbers mean more
|
||||
popular ("0" or None means ranking
|
||||
information not available).
|
||||
username_claimed -- String containing username which is known
|
||||
to be claimed on web site.
|
||||
username_unclaimed -- String containing username which is known
|
||||
to be unclaimed on web site.
|
||||
information -- Dictionary containing all known information
|
||||
about web site.
|
||||
NOTE: Custom information about how to
|
||||
actually detect the existence of the
|
||||
username will be included in this
|
||||
dictionary. This information will
|
||||
be needed by the detection method,
|
||||
but it is only recorded in this
|
||||
object for future use.
|
||||
|
||||
Return Value:
|
||||
Nothing.
|
||||
"""
|
||||
|
||||
self.name = name
|
||||
self.url_home = url_home
|
||||
self.url_username_format = url_username_format
|
||||
|
||||
if (popularity_rank is None) or (popularity_rank == 0):
|
||||
# We do not know the popularity, so make site go to bottom of list.
|
||||
popularity_rank = sys.maxsize
|
||||
self.popularity_rank = popularity_rank
|
||||
|
||||
self.username_claimed = username_claimed
|
||||
self.username_unclaimed = username_unclaimed
|
||||
self.information = information
|
||||
|
||||
return
|
||||
|
||||
def __str__(self):
|
||||
"""Convert Object To String.
|
||||
|
||||
Keyword Arguments:
|
||||
self -- This object.
|
||||
|
||||
Return Value:
|
||||
Nicely formatted string to get information about this object.
|
||||
"""
|
||||
|
||||
return f"{self.name} ({self.url_home})"
|
||||
|
||||
|
||||
class SitesInformation():
|
||||
def __init__(self, data_file_path=None):
|
||||
"""Create Sites Information Object.
|
||||
|
||||
Contains information about all supported web sites.
|
||||
|
||||
Keyword Arguments:
|
||||
self -- This object.
|
||||
data_file_path -- String which indicates path to data file.
|
||||
The file name must end in ".json".
|
||||
|
||||
There are 3 possible formats:
|
||||
* Absolute File Format
|
||||
For example, "c:/stuff/data.json".
|
||||
* Relative File Format
|
||||
The current working directory is used
|
||||
as the context.
|
||||
For example, "data.json".
|
||||
* URL Format
|
||||
For example,
|
||||
"https://example.com/data.json", or
|
||||
"http://example.com/data.json".
|
||||
|
||||
An exception will be thrown if the path
|
||||
to the data file is not in the expected
|
||||
format, or if there was any problem loading
|
||||
the file.
|
||||
|
||||
If this option is not specified, then a
|
||||
default site list will be used.
|
||||
|
||||
Return Value:
|
||||
Nothing.
|
||||
"""
|
||||
|
||||
# Ensure that specified data file has correct extension.
|
||||
if ".json" != data_file_path[-5:].lower():
|
||||
raise FileNotFoundError(f"Incorrect JSON file extension for "
|
||||
f"data file '{data_file_path}'."
|
||||
)
|
||||
|
||||
if (("http://" == data_file_path[:7].lower()) or
|
||||
("https://" == data_file_path[:8].lower())
|
||||
):
|
||||
# Reference is to a URL.
|
||||
try:
|
||||
response = requests.get(url=data_file_path)
|
||||
except Exception as error:
|
||||
raise FileNotFoundError(f"Problem while attempting to access "
|
||||
f"data file URL '{data_file_path}': "
|
||||
f"{str(error)}"
|
||||
)
|
||||
if response.status_code == 200:
|
||||
try:
|
||||
site_data = response.json()
|
||||
except Exception as error:
|
||||
raise ValueError(f"Problem parsing json contents at "
|
||||
f"'{data_file_path}': {str(error)}."
|
||||
)
|
||||
else:
|
||||
raise FileNotFoundError(f"Bad response while accessing "
|
||||
f"data file URL '{data_file_path}'."
|
||||
)
|
||||
else:
|
||||
# Reference is to a file.
|
||||
try:
|
||||
with open(data_file_path, "r", encoding="utf-8") as file:
|
||||
try:
|
||||
data = json.load(file)
|
||||
site_data = data.get("sites")
|
||||
engines_data = data.get("engines")
|
||||
except Exception as error:
|
||||
raise ValueError(f"Problem parsing json contents at "
|
||||
f"'{data_file_path}': {str(error)}."
|
||||
)
|
||||
except FileNotFoundError as error:
|
||||
raise FileNotFoundError(f"Problem while attempting to access "
|
||||
f"data file '{data_file_path}'."
|
||||
)
|
||||
|
||||
self.sites = {}
|
||||
|
||||
# Add all of site information from the json file to internal site list.
|
||||
for site_name in site_data:
|
||||
try:
|
||||
site = site_data[site_name]
|
||||
# If popularity unknown, make site be at bottom of list.
|
||||
popularity_rank = site.get("rank", sys.maxsize)
|
||||
|
||||
if 'engine' in site:
|
||||
engine_data = engines_data[site['engine']]['site']
|
||||
site.update(engine_data)
|
||||
|
||||
self.sites[site_name] = \
|
||||
SiteInformation(site_name,
|
||||
site["urlMain"],
|
||||
site["url"],
|
||||
popularity_rank,
|
||||
site["username_claimed"],
|
||||
site["username_unclaimed"],
|
||||
site
|
||||
)
|
||||
except KeyError as error:
|
||||
raise ValueError(f"Problem parsing json contents at "
|
||||
f"'{data_file_path}': "
|
||||
f"Missing attribute {str(error)}."
|
||||
)
|
||||
|
||||
return
|
||||
|
||||
def site_name_list(self, popularity_rank=False):
|
||||
"""Get Site Name List.
|
||||
|
||||
Keyword Arguments:
|
||||
self -- This object.
|
||||
popularity_rank -- Boolean indicating if list should be sorted
|
||||
by popularity rank.
|
||||
Default value is False.
|
||||
NOTE: List is sorted in ascending
|
||||
alphabetical order is popularity rank
|
||||
is not requested.
|
||||
|
||||
Return Value:
|
||||
List of strings containing names of sites.
|
||||
"""
|
||||
|
||||
if popularity_rank:
|
||||
# Sort in ascending popularity rank order.
|
||||
site_rank_name = \
|
||||
sorted([(site.popularity_rank, site.name) for site in self],
|
||||
key=operator.itemgetter(0)
|
||||
)
|
||||
site_names = [name for _, name in site_rank_name]
|
||||
else:
|
||||
# Sort in ascending alphabetical order.
|
||||
site_names = sorted([site.name for site in self], key=str.lower)
|
||||
|
||||
return site_names
|
||||
|
||||
def __iter__(self):
|
||||
"""Iterator For Object.
|
||||
|
||||
Keyword Arguments:
|
||||
self -- This object.
|
||||
|
||||
Return Value:
|
||||
Iterator for sites object.
|
||||
"""
|
||||
|
||||
for site_name in self.sites:
|
||||
yield self.sites[site_name]
|
||||
|
||||
def __len__(self):
|
||||
"""Length For Object.
|
||||
|
||||
Keyword Arguments:
|
||||
self -- This object.
|
||||
|
||||
Return Value:
|
||||
Length of sites object.
|
||||
"""
|
||||
return len(self.sites)
|
||||
@@ -0,0 +1,4 @@
|
||||
"""Sherlock Tests
|
||||
|
||||
This package contains various submodules used to run tests.
|
||||
"""
|
||||
@@ -0,0 +1,297 @@
|
||||
"""Sherlock Tests
|
||||
|
||||
This module contains various tests.
|
||||
"""
|
||||
from tests.base import SherlockBaseTest
|
||||
import unittest
|
||||
|
||||
|
||||
class SherlockDetectTests(SherlockBaseTest):
|
||||
def test_detect_true_via_message(self):
|
||||
"""Test Username Does Exist (Via Message).
|
||||
|
||||
This test ensures that the "message" detection mechanism of
|
||||
ensuring that a Username does exist works properly.
|
||||
|
||||
Keyword Arguments:
|
||||
self -- This object.
|
||||
|
||||
Return Value:
|
||||
N/A.
|
||||
Will trigger an assert if detection mechanism did not work as expected.
|
||||
"""
|
||||
|
||||
site = 'Instructables'
|
||||
site_data = self.site_data_all[site]
|
||||
|
||||
#Ensure that the site's detection method has not changed.
|
||||
self.assertEqual("message", site_data["errorType"])
|
||||
|
||||
self.username_check([site_data["username_claimed"]],
|
||||
[site],
|
||||
exist_check=True
|
||||
)
|
||||
|
||||
return
|
||||
|
||||
def test_detect_false_via_message(self):
|
||||
"""Test Username Does Not Exist (Via Message).
|
||||
|
||||
This test ensures that the "message" detection mechanism of
|
||||
ensuring that a Username does *not* exist works properly.
|
||||
|
||||
Keyword Arguments:
|
||||
self -- This object.
|
||||
|
||||
Return Value:
|
||||
N/A.
|
||||
Will trigger an assert if detection mechanism did not work as expected.
|
||||
"""
|
||||
|
||||
site = 'Instructables'
|
||||
site_data = self.site_data_all[site]
|
||||
|
||||
#Ensure that the site's detection method has not changed.
|
||||
self.assertEqual("message", site_data["errorType"])
|
||||
|
||||
self.username_check([site_data["username_unclaimed"]],
|
||||
[site],
|
||||
exist_check=False
|
||||
)
|
||||
|
||||
return
|
||||
|
||||
def test_detect_true_via_status_code(self):
|
||||
"""Test Username Does Exist (Via Status Code).
|
||||
|
||||
This test ensures that the "status code" detection mechanism of
|
||||
ensuring that a Username does exist works properly.
|
||||
|
||||
Keyword Arguments:
|
||||
self -- This object.
|
||||
|
||||
Return Value:
|
||||
N/A.
|
||||
Will trigger an assert if detection mechanism did not work as expected.
|
||||
"""
|
||||
|
||||
site = 'Facebook'
|
||||
site_data = self.site_data_all[site]
|
||||
|
||||
#Ensure that the site's detection method has not changed.
|
||||
self.assertEqual("status_code", site_data["errorType"])
|
||||
|
||||
self.username_check([site_data["username_claimed"]],
|
||||
[site],
|
||||
exist_check=True
|
||||
)
|
||||
|
||||
return
|
||||
|
||||
def test_detect_false_via_status_code(self):
|
||||
"""Test Username Does Not Exist (Via Status Code).
|
||||
|
||||
This test ensures that the "status code" detection mechanism of
|
||||
ensuring that a Username does *not* exist works properly.
|
||||
|
||||
Keyword Arguments:
|
||||
self -- This object.
|
||||
|
||||
Return Value:
|
||||
N/A.
|
||||
Will trigger an assert if detection mechanism did not work as expected.
|
||||
"""
|
||||
|
||||
site = 'Facebook'
|
||||
site_data = self.site_data_all[site]
|
||||
|
||||
#Ensure that the site's detection method has not changed.
|
||||
self.assertEqual("status_code", site_data["errorType"])
|
||||
|
||||
self.username_check([site_data["username_unclaimed"]],
|
||||
[site],
|
||||
exist_check=False
|
||||
)
|
||||
|
||||
return
|
||||
|
||||
def test_detect_true_via_response_url(self):
|
||||
"""Test Username Does Exist (Via Response URL).
|
||||
|
||||
This test ensures that the "response URL" detection mechanism of
|
||||
ensuring that a Username does exist works properly.
|
||||
|
||||
Keyword Arguments:
|
||||
self -- This object.
|
||||
|
||||
Return Value:
|
||||
N/A.
|
||||
Will trigger an assert if detection mechanism did not work as expected.
|
||||
"""
|
||||
|
||||
site = 'Quora'
|
||||
site_data = self.site_data_all[site]
|
||||
|
||||
#Ensure that the site's detection method has not changed.
|
||||
self.assertEqual("response_url", site_data["errorType"])
|
||||
|
||||
self.username_check([site_data["username_claimed"]],
|
||||
[site],
|
||||
exist_check=True
|
||||
)
|
||||
|
||||
return
|
||||
|
||||
def test_detect_false_via_response_url(self):
|
||||
"""Test Username Does Not Exist (Via Response URL).
|
||||
|
||||
This test ensures that the "response URL" detection mechanism of
|
||||
ensuring that a Username does *not* exist works properly.
|
||||
|
||||
Keyword Arguments:
|
||||
self -- This object.
|
||||
|
||||
Return Value:
|
||||
N/A.
|
||||
Will trigger an assert if detection mechanism did not work as expected.
|
||||
"""
|
||||
|
||||
site = 'Quora'
|
||||
site_data = self.site_data_all[site]
|
||||
|
||||
#Ensure that the site's detection method has not changed.
|
||||
self.assertEqual("response_url", site_data["errorType"])
|
||||
|
||||
self.username_check([site_data["username_unclaimed"]],
|
||||
[site],
|
||||
exist_check=False
|
||||
)
|
||||
|
||||
return
|
||||
|
||||
|
||||
class SherlockSiteCoverageTests(SherlockBaseTest):
|
||||
def test_coverage_false_via_response_url(self):
|
||||
"""Test Username Does Not Exist Site Coverage (Via Response URL).
|
||||
|
||||
This test checks all sites with the "response URL" detection mechanism
|
||||
to ensure that a Username that does not exist is reported that way.
|
||||
|
||||
Keyword Arguments:
|
||||
self -- This object.
|
||||
|
||||
Return Value:
|
||||
N/A.
|
||||
Will trigger an assert if detection mechanism did not work as expected.
|
||||
"""
|
||||
|
||||
self.detect_type_check("response_url", exist_check=False)
|
||||
|
||||
return
|
||||
|
||||
def test_coverage_true_via_response_url(self):
|
||||
"""Test Username Does Exist Site Coverage (Via Response URL).
|
||||
|
||||
This test checks all sites with the "response URL" detection mechanism
|
||||
to ensure that a Username that does exist is reported that way.
|
||||
|
||||
Keyword Arguments:
|
||||
self -- This object.
|
||||
|
||||
Return Value:
|
||||
N/A.
|
||||
Will trigger an assert if detection mechanism did not work as expected.
|
||||
"""
|
||||
|
||||
self.detect_type_check("response_url", exist_check=True)
|
||||
|
||||
return
|
||||
|
||||
def test_coverage_false_via_status(self):
|
||||
"""Test Username Does Not Exist Site Coverage (Via HTTP Status).
|
||||
|
||||
This test checks all sites with the "HTTP Status" detection mechanism
|
||||
to ensure that a Username that does not exist is reported that way.
|
||||
|
||||
Keyword Arguments:
|
||||
self -- This object.
|
||||
|
||||
Return Value:
|
||||
N/A.
|
||||
Will trigger an assert if detection mechanism did not work as expected.
|
||||
"""
|
||||
|
||||
self.detect_type_check("status_code", exist_check=False)
|
||||
|
||||
return
|
||||
|
||||
def test_coverage_true_via_status(self):
|
||||
"""Test Username Does Exist Site Coverage (Via HTTP Status).
|
||||
|
||||
This test checks all sites with the "HTTP Status" detection mechanism
|
||||
to ensure that a Username that does exist is reported that way.
|
||||
|
||||
Keyword Arguments:
|
||||
self -- This object.
|
||||
|
||||
Return Value:
|
||||
N/A.
|
||||
Will trigger an assert if detection mechanism did not work as expected.
|
||||
"""
|
||||
|
||||
self.detect_type_check("status_code", exist_check=True)
|
||||
|
||||
return
|
||||
|
||||
def test_coverage_false_via_message(self):
|
||||
"""Test Username Does Not Exist Site Coverage (Via Error Message).
|
||||
|
||||
This test checks all sites with the "Error Message" detection mechanism
|
||||
to ensure that a Username that does not exist is reported that way.
|
||||
|
||||
Keyword Arguments:
|
||||
self -- This object.
|
||||
|
||||
Return Value:
|
||||
N/A.
|
||||
Will trigger an assert if detection mechanism did not work as expected.
|
||||
"""
|
||||
|
||||
self.detect_type_check("message", exist_check=False)
|
||||
|
||||
return
|
||||
|
||||
def test_coverage_true_via_message(self):
|
||||
"""Test Username Does Exist Site Coverage (Via Error Message).
|
||||
|
||||
This test checks all sites with the "Error Message" detection mechanism
|
||||
to ensure that a Username that does exist is reported that way.
|
||||
|
||||
Keyword Arguments:
|
||||
self -- This object.
|
||||
|
||||
Return Value:
|
||||
N/A.
|
||||
Will trigger an assert if detection mechanism did not work as expected.
|
||||
"""
|
||||
|
||||
self.detect_type_check("message", exist_check=True)
|
||||
|
||||
return
|
||||
|
||||
def test_coverage_total(self):
|
||||
"""Test Site Coverage Is Total.
|
||||
|
||||
This test checks that all sites have test data available.
|
||||
|
||||
Keyword Arguments:
|
||||
self -- This object.
|
||||
|
||||
Return Value:
|
||||
N/A.
|
||||
Will trigger an assert if we do not have total coverage.
|
||||
"""
|
||||
|
||||
self.coverage_total_check()
|
||||
|
||||
return
|
||||
@@ -0,0 +1,228 @@
|
||||
"""Sherlock Base Tests
|
||||
|
||||
This module contains various utilities for running tests.
|
||||
"""
|
||||
import os
|
||||
import os.path
|
||||
import unittest
|
||||
import maigret
|
||||
from result import QueryStatus
|
||||
from result import QueryResult
|
||||
from notify import QueryNotify
|
||||
from sites import SitesInformation
|
||||
import warnings
|
||||
|
||||
|
||||
class SherlockBaseTest(unittest.TestCase):
|
||||
def setUp(self):
|
||||
"""Sherlock Base Test Setup.
|
||||
|
||||
Does common setup tasks for base Sherlock tests.
|
||||
|
||||
Keyword Arguments:
|
||||
self -- This object.
|
||||
|
||||
Return Value:
|
||||
N/A.
|
||||
"""
|
||||
|
||||
#This ignores the ResourceWarning from an unclosed SSLSocket.
|
||||
#TODO: Figure out how to fix the code so this is not needed.
|
||||
warnings.simplefilter("ignore", ResourceWarning)
|
||||
|
||||
#Create object with all information about sites we are aware of.
|
||||
sites = SitesInformation()
|
||||
|
||||
#Create original dictionary from SitesInformation() object.
|
||||
#Eventually, the rest of the code will be updated to use the new object
|
||||
#directly, but this will glue the two pieces together.
|
||||
site_data_all = {}
|
||||
for site in sites:
|
||||
site_data_all[site.name] = site.information
|
||||
self.site_data_all = site_data_all
|
||||
|
||||
# Load excluded sites list, if any
|
||||
excluded_sites_path = os.path.join(os.path.dirname(os.path.realpath(maigret.__file__)), "tests/.excluded_sites")
|
||||
try:
|
||||
with open(excluded_sites_path, "r", encoding="utf-8") as excluded_sites_file:
|
||||
self.excluded_sites = excluded_sites_file.read().splitlines()
|
||||
except FileNotFoundError:
|
||||
self.excluded_sites = []
|
||||
|
||||
#Create notify object for query results.
|
||||
self.query_notify = QueryNotify()
|
||||
|
||||
self.tor=False
|
||||
self.unique_tor=False
|
||||
self.timeout=None
|
||||
self.skip_error_sites=True
|
||||
|
||||
return
|
||||
|
||||
def site_data_filter(self, site_list):
|
||||
"""Filter Site Data.
|
||||
|
||||
Keyword Arguments:
|
||||
self -- This object.
|
||||
site_list -- List of strings corresponding to sites which
|
||||
should be filtered.
|
||||
|
||||
Return Value:
|
||||
Dictionary containing sub-set of site data specified by 'site_list'.
|
||||
"""
|
||||
|
||||
# Create new dictionary that has filtered site data based on input.
|
||||
# Note that any site specified which is not understood will generate
|
||||
# an error.
|
||||
site_data = {}
|
||||
for site in site_list:
|
||||
with self.subTest(f"Checking test vector Site '{site}' "
|
||||
f"exists in total site data."
|
||||
):
|
||||
site_data[site] = self.site_data_all[site]
|
||||
|
||||
return site_data
|
||||
|
||||
def username_check(self, username_list, site_list, exist_check=True):
|
||||
"""Username Exist Check.
|
||||
|
||||
Keyword Arguments:
|
||||
self -- This object.
|
||||
username_list -- List of strings corresponding to usernames
|
||||
which should exist on *all* of the sites.
|
||||
site_list -- List of strings corresponding to sites which
|
||||
should be filtered.
|
||||
exist_check -- Boolean which indicates if this should be
|
||||
a check for Username existence,
|
||||
or non-existence.
|
||||
|
||||
Return Value:
|
||||
N/A.
|
||||
Will trigger an assert if Username does not have the expected
|
||||
existence state.
|
||||
"""
|
||||
|
||||
#Filter all site data down to just what is needed for this test.
|
||||
site_data = self.site_data_filter(site_list)
|
||||
|
||||
if exist_check:
|
||||
check_type_text = "claimed"
|
||||
exist_result_desired = QueryStatus.CLAIMED
|
||||
else:
|
||||
check_type_text = "available"
|
||||
exist_result_desired = QueryStatus.AVAILABLE
|
||||
|
||||
for username in username_list:
|
||||
results = maigret.sherlock(username,
|
||||
site_data,
|
||||
self.query_notify,
|
||||
tor=self.tor,
|
||||
unique_tor=self.unique_tor,
|
||||
timeout=self.timeout
|
||||
)
|
||||
for site, result in results.items():
|
||||
with self.subTest(f"Checking Username '{username}' "
|
||||
f"{check_type_text} on Site '{site}'"
|
||||
):
|
||||
if (
|
||||
(self.skip_error_sites == True) and
|
||||
(result['status'].status == QueryStatus.UNKNOWN)
|
||||
):
|
||||
#Some error connecting to site.
|
||||
self.skipTest(f"Skipping Username '{username}' "
|
||||
f"{check_type_text} on Site '{site}': "
|
||||
f"Site returned error status."
|
||||
)
|
||||
|
||||
self.assertEqual(exist_result_desired,
|
||||
result['status'].status)
|
||||
|
||||
return
|
||||
|
||||
def detect_type_check(self, detect_type, exist_check=True):
|
||||
"""Username Exist Check.
|
||||
|
||||
Keyword Arguments:
|
||||
self -- This object.
|
||||
detect_type -- String corresponding to detection algorithm
|
||||
which is desired to be tested.
|
||||
Note that only sites which have documented
|
||||
usernames which exist and do not exist
|
||||
will be tested.
|
||||
exist_check -- Boolean which indicates if this should be
|
||||
a check for Username existence,
|
||||
or non-existence.
|
||||
|
||||
Return Value:
|
||||
N/A.
|
||||
Runs tests on all sites using the indicated detection algorithm
|
||||
and which also has test vectors specified.
|
||||
Will trigger an assert if Username does not have the expected
|
||||
existence state.
|
||||
"""
|
||||
|
||||
#Dictionary of sites that should be tested for having a username.
|
||||
#This will allow us to test sites with a common username in parallel.
|
||||
sites_by_username = {}
|
||||
|
||||
for site, site_data in self.site_data_all.items():
|
||||
if (
|
||||
(site in self.excluded_sites) or
|
||||
(site_data["errorType"] != detect_type) or
|
||||
(site_data.get("username_claimed") is None) or
|
||||
(site_data.get("username_unclaimed") is None)
|
||||
):
|
||||
# This is either not a site we are interested in, or the
|
||||
# site does not contain the required information to do
|
||||
# the tests.
|
||||
pass
|
||||
else:
|
||||
# We should run a test on this site.
|
||||
|
||||
# Figure out which type of user
|
||||
if exist_check:
|
||||
username = site_data.get("username_claimed")
|
||||
else:
|
||||
username = site_data.get("username_unclaimed")
|
||||
|
||||
# Add this site to the list of sites corresponding to this
|
||||
# username.
|
||||
if username in sites_by_username:
|
||||
sites_by_username[username].append(site)
|
||||
else:
|
||||
sites_by_username[username] = [site]
|
||||
|
||||
# Check on the username availability against all of the sites.
|
||||
for username, site_list in sites_by_username.items():
|
||||
self.username_check([username],
|
||||
site_list,
|
||||
exist_check=exist_check
|
||||
)
|
||||
|
||||
return
|
||||
|
||||
def coverage_total_check(self):
|
||||
"""Total Coverage Check.
|
||||
|
||||
Keyword Arguments:
|
||||
self -- This object.
|
||||
|
||||
Return Value:
|
||||
N/A.
|
||||
Counts up all Sites with full test data available.
|
||||
Will trigger an assert if any Site does not have test coverage.
|
||||
"""
|
||||
|
||||
site_no_tests_list = []
|
||||
|
||||
for site, site_data in self.site_data_all.items():
|
||||
if (
|
||||
(site_data.get("username_claimed") is None) or
|
||||
(site_data.get("username_unclaimed") is None)
|
||||
):
|
||||
# Test information not available on this site.
|
||||
site_no_tests_list.append(site)
|
||||
|
||||
self.assertEqual("", ", ".join(site_no_tests_list))
|
||||
|
||||
return
|
||||
@@ -0,0 +1,14 @@
|
||||
beautifulsoup4>=4.8.0
|
||||
bs4>=0.0.1
|
||||
certifi>=2019.6.16
|
||||
colorama>=0.4.1
|
||||
lxml>=4.4.0
|
||||
PySocks>=1.7.0
|
||||
requests>=2.22.0
|
||||
requests-futures>=1.0.0
|
||||
soupsieve>=1.9.2
|
||||
stem>=1.8.0
|
||||
torrequest>=0.1.0
|
||||
git+https://github.com/soxoj/socid_extractor
|
||||
aiohttp==3.5.4
|
||||
mock==4.0.2
|
||||
Binary file not shown.
|
After Width: | Height: | Size: 15 KiB |
@@ -0,0 +1,90 @@
|
||||
## Demo with page parsing and recursive username search
|
||||
|
||||
```bash
|
||||
python3 maigret --ids --print-found --skip-errors alexaimephotographycars
|
||||
[*] Checking username alexaimephotographycars on:
|
||||
[+] 500px: https://500px.com/p/alexaimephotographycars
|
||||
┣╸uid: dXJpOm5vZGU6VXNlcjoyNjQwMzQxNQ==
|
||||
┣╸legacy_id: 26403415
|
||||
┣╸username: alexaimephotographycars
|
||||
┣╸name: Alex Aimé
|
||||
┣╸website: www.flickr.com/photos/alexaimephotography/
|
||||
┣╸facebook_link: www.instagram.com/street.reality.photography/
|
||||
┣╸instagram_username: alexaimephotography
|
||||
┗╸twitter_username: Alexaimephotogr
|
||||
[*] Checking username alexaimephotography on:
|
||||
[+] DeviantART: https://alexaimephotography.deviantart.com
|
||||
┣╸country: France
|
||||
┣╸registered_for_seconds: 55040868
|
||||
┣╸gender: male
|
||||
┣╸username: Alexaimephotography
|
||||
┣╸twitter_username: alexaimephotogr
|
||||
┣╸website: www.instagram.com/alexaimephotography/
|
||||
┗╸links:
|
||||
┗╸ https://www.instagram.com/alexaimephotography/
|
||||
[+] EyeEm: https://www.eyeem.com/u/alexaimephotography
|
||||
┣╸eyeem_id: 21974802
|
||||
┣╸eyeem_username: alexaimephotography
|
||||
┣╸fullname: Alex
|
||||
┣╸followers: 10
|
||||
┣╸friends: 2
|
||||
┣╸liked_photos: 37
|
||||
┣╸photos: 10
|
||||
┗╸facebook_uid: 1534915183474093
|
||||
[+] Facebook: https://www.facebook.com/alexaimephotography
|
||||
[+] Gramho: https://gramho.com/explore-hashtag/alexaimephotography
|
||||
[+] Instagram: https://www.instagram.com/alexaimephotography
|
||||
┣╸username: alexaimephotography
|
||||
┣╸full_name: Alexaimephotography
|
||||
┣╸id: 6828488620
|
||||
┣╸biography: 🇮🇹 🇲🇫 🇩🇪
|
||||
Amateur photographer
|
||||
Follow me @street.reality.photography
|
||||
Sony A7ii
|
||||
┗╸external_url: https://www.flickr.com/photos/alexaimephotography2020/
|
||||
[+] Picuki: https://www.picuki.com/profile/alexaimephotography
|
||||
[+] Pinterest: https://www.pinterest.com/alexaimephotography/
|
||||
┣╸pinterest_username: alexaimephotography
|
||||
┣╸fullname: alexaimephotography
|
||||
┣╸image: https://s.pinimg.com/images/user/default_280.png
|
||||
┣╸board_count: 3
|
||||
┣╸pin_count: 4
|
||||
┣╸country: FR
|
||||
┣╸follower_count: 0
|
||||
┣╸following_count: 1
|
||||
┣╸is_website_verified: False
|
||||
┣╸is_indexed: True
|
||||
┣╸is_verified_merchant: False
|
||||
┗╸locale: fr
|
||||
[+] Reddit: https://www.reddit.com/user/alexaimephotography
|
||||
┣╸reddit_id: t5_1nytpy
|
||||
┣╸reddit_username: alexaimephotography
|
||||
┣╸display_name: alexaimephotography
|
||||
┣╸is_employee: False
|
||||
┣╸is_nsfw: False
|
||||
┣╸is_mod: True
|
||||
┣╸is_following: True
|
||||
┣╸has_user_profile: True
|
||||
┣╸hide_from_robots: False
|
||||
┣╸created_utc: 1562750403
|
||||
┣╸total_karma: 43075
|
||||
┗╸post_karma: 42574
|
||||
[+] Tumblr: https://alexaimephotography.tumblr.com/
|
||||
[+] VK: https://vk.com/alexaimephotography
|
||||
[+] Vimeo: https://vimeo.com/alexaimephotography
|
||||
┣╸uid: 75857717
|
||||
┣╸name: AlexAimePhotography
|
||||
┣╸username: alexaimephotography
|
||||
┣╸location: France
|
||||
┣╸created_at: 2017-12-06 06:49:28
|
||||
┣╸is_staff: False
|
||||
┗╸links:
|
||||
┣╸ https://500px.com/alexaimephotography
|
||||
┣╸ https://www.flickr.com/photos/photoambiance/
|
||||
┣╸ https://www.instagram.com/alexaimephotography/
|
||||
┣╸ https://www.youtube.com/channel/UC4NiYV3Yqih2WHcwKg4uPuQ
|
||||
┗╸ https://flii.by/alexaimephotography/
|
||||
[+] We Heart It: https://weheartit.com/alexaimephotography
|
||||
[*] Checking username Alexaimephotogr on:
|
||||
[+] Twitter: https://twitter.com/Alexaimephotogr
|
||||
```
|
||||
File diff suppressed because one or more lines are too long
|
After Width: | Height: | Size: 44 KiB |
Executable
+126
@@ -0,0 +1,126 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Maigret: Supported Site Listing with Alexa ranking and country tags
|
||||
This module generates the listing of supported sites in file `SITES.md`
|
||||
and pretty prints file with sites data.
|
||||
"""
|
||||
import json
|
||||
import sys
|
||||
import requests
|
||||
import logging
|
||||
import threading
|
||||
import xml.etree.ElementTree as ET
|
||||
from datetime import datetime
|
||||
from argparse import ArgumentParser, RawDescriptionHelpFormatter
|
||||
|
||||
RANKS = {str(i):str(i) for i in [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 50, 100, 500]}
|
||||
RANKS.update({
|
||||
'1000': '1K',
|
||||
'5000': '5K',
|
||||
'10000': '10K',
|
||||
'100000': '100K',
|
||||
'10000000': '1M',
|
||||
'50000000': '10M',
|
||||
})
|
||||
|
||||
def get_rank(domain_to_query, dest, print_errors=True):
|
||||
#Retrieve ranking data via alexa API
|
||||
url = f"http://data.alexa.com/data?cli=10&url={domain_to_query}"
|
||||
xml_data = requests.get(url).text
|
||||
root = ET.fromstring(xml_data)
|
||||
|
||||
try:
|
||||
#Get ranking for this site.
|
||||
dest['rank'] = int(root.find('.//REACH').attrib['RANK'])
|
||||
country = root.find('.//COUNTRY')
|
||||
if not country is None and country.attrib:
|
||||
country_code = country.attrib['CODE']
|
||||
tags = set(dest.get('tags', []))
|
||||
if country_code:
|
||||
tags.add(country_code.lower())
|
||||
dest['tags'] = sorted(list(tags))
|
||||
if 'type' in dest and dest['type'] != 'username':
|
||||
dest['disabled'] = False
|
||||
except Exception as e:
|
||||
if print_errors:
|
||||
logging.error(e)
|
||||
# We did not find the rank for some reason.
|
||||
print(f"Error retrieving rank information for '{domain_to_query}'")
|
||||
print(f" Returned XML is |{xml_data}|")
|
||||
|
||||
return
|
||||
|
||||
|
||||
def get_step_rank(rank):
|
||||
def get_readable_rank(r):
|
||||
return RANKS[str(r)]
|
||||
valid_step_ranks = sorted(map(int, RANKS.keys()))
|
||||
if rank == 0:
|
||||
return get_readable_rank(valid_step_ranks[-1])
|
||||
else:
|
||||
return get_readable_rank(list(filter(lambda x: x >= rank, valid_step_ranks))[0])
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = ArgumentParser(formatter_class=RawDescriptionHelpFormatter
|
||||
)
|
||||
parser.add_argument("--base","-b", metavar="BASE_FILE",
|
||||
dest="base_file", default="maigret/resources/data.json",
|
||||
help="JSON file with sites data to update.")
|
||||
|
||||
pool = list()
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
with open(args.base_file, "r", encoding="utf-8") as data_file:
|
||||
sites_info = json.load(data_file)
|
||||
data = sites_info['sites']
|
||||
engines = sites_info['engines']
|
||||
|
||||
with open("sites.md", "w") as site_file:
|
||||
data_length = len(data)
|
||||
site_file.write(f"""
|
||||
## List of supported sites: total {data_length}\n
|
||||
Rank data fetched from Alexa by domains.
|
||||
|
||||
""")
|
||||
|
||||
for social_network in data:
|
||||
url_main = data.get(social_network).get("urlMain")
|
||||
data.get(social_network)["rank"] = 0
|
||||
th = threading.Thread(target=get_rank, args=(url_main, data.get(social_network)))
|
||||
pool.append((social_network, url_main, th))
|
||||
th.start()
|
||||
|
||||
index = 1
|
||||
for social_network, url_main, th in pool:
|
||||
th.join()
|
||||
sys.stdout.write("\r{0}".format(f"Updated {index} out of {data_length} entries"))
|
||||
sys.stdout.flush()
|
||||
index = index + 1
|
||||
|
||||
sites_full_list = [(site, site_data['rank']) for site, site_data in data.items()]
|
||||
sites_full_list.sort(reverse=False, key=lambda x: x[1])
|
||||
|
||||
while sites_full_list[0][1] == 0:
|
||||
site = sites_full_list.pop(0)
|
||||
sites_full_list.append(site)
|
||||
|
||||
for num, site_tuple in enumerate(sites_full_list):
|
||||
site, rank = site_tuple
|
||||
url_main = data[site]['urlMain']
|
||||
valid_rank = get_step_rank(rank)
|
||||
all_tags = data[site].get('tags', [])
|
||||
tags = ', ' + ', '.join(all_tags) if all_tags else ''
|
||||
note = ''
|
||||
if data[site].get('disabled'):
|
||||
note = ', search is disabled'
|
||||
site_file.write(f'1. [{site}]({url_main})*: top {valid_rank}{tags}*{note}\n')
|
||||
|
||||
site_file.write(f'\nAlexa.com rank data fetched at ({datetime.utcnow()} UTC)\n')
|
||||
|
||||
sorted_json_data = json.dumps({'sites': data, 'engines': engines}, indent=2, sort_keys=True)
|
||||
|
||||
with open(args.base_file, "w") as data_file:
|
||||
data_file.write(sorted_json_data)
|
||||
|
||||
print("\nFinished updating supported site listing!")
|
||||
Reference in New Issue
Block a user