mirror of
https://github.com/soxoj/maigret.git
synced 2026-05-07 14:34:33 +00:00
Compare commits
63 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| bea900dda0 | |||
| bb1bde833d | |||
| 5b405c6abb | |||
| 99fa58ceed | |||
| c71e404f63 | |||
| 2c04ccce57 | |||
| 435db7cdc9 | |||
| 413a0502a4 | |||
| 2aedcc3166 | |||
| 28835204f5 | |||
| b11a247dfd | |||
| c9219d91ec | |||
| aa6cd0eca9 | |||
| 38e5d5c664 | |||
| 8a562d06ae | |||
| aa50ee9672 | |||
| 51327f9647 | |||
| 4a368c9bb6 | |||
| 6fd5f6e33a | |||
| fa3db9c39c | |||
| 5912ad4fbc | |||
| ee36dc0187 | |||
| 9eb62e4e22 | |||
| ead048af93 | |||
| acc751ff98 | |||
| b7bdd71cf0 | |||
| 43f189f774 | |||
| 5bda7fb339 | |||
| 414523a8ac | |||
| 6d4e268706 | |||
| b696b982f4 | |||
| d4234036c0 | |||
| b57c70091c | |||
| e90df3560b | |||
| bc6ee48b8c | |||
| e70bdf3789 | |||
| 84f9d417cf | |||
| 4333c40be7 | |||
| 9e504c0094 | |||
| 2f752a0368 | |||
| 53e9dab677 | |||
| 11b70a2a48 | |||
| 960708ef2e | |||
| e6f6d8735d | |||
| f77d7d307a | |||
| 158f739a59 | |||
| b6a207d0e3 | |||
| d59867b0d9 | |||
| 2145027196 | |||
| 386e9eba4f | |||
| 0e9655c46a | |||
| 009d51c380 | |||
| 78e9688ece | |||
| 3cbb9df7b3 | |||
| 2fb1f19948 | |||
| 3b91a9cd31 | |||
| 9858e71349 | |||
| c88e194d07 | |||
| ad5c7fbc7d | |||
| 66d6c7a93c | |||
| bdfb4911ce | |||
| 951be44452 | |||
| 188edc1b7f |
@@ -0,0 +1,32 @@
|
|||||||
|
name: Build docker image and push to DockerHub
|
||||||
|
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
branches: [ main ]
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
docker:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
-
|
||||||
|
name: Set up QEMU
|
||||||
|
uses: docker/setup-qemu-action@v1
|
||||||
|
-
|
||||||
|
name: Set up Docker Buildx
|
||||||
|
uses: docker/setup-buildx-action@v1
|
||||||
|
-
|
||||||
|
name: Login to DockerHub
|
||||||
|
uses: docker/login-action@v1
|
||||||
|
with:
|
||||||
|
username: ${{ secrets.DOCKER_HUB_USERNAME }}
|
||||||
|
password: ${{ secrets.DOCKER_HUB_ACCESS_TOKEN }}
|
||||||
|
-
|
||||||
|
name: Build and push
|
||||||
|
id: docker_build
|
||||||
|
uses: docker/build-push-action@v2
|
||||||
|
with:
|
||||||
|
push: true
|
||||||
|
tags: ${{ secrets.DOCKER_HUB_USERNAME }}/maigret:latest
|
||||||
|
-
|
||||||
|
name: Image digest
|
||||||
|
run: echo ${{ steps.docker_build.outputs.digest }}
|
||||||
@@ -26,7 +26,7 @@ jobs:
|
|||||||
- name: Install dependencies
|
- name: Install dependencies
|
||||||
run: |
|
run: |
|
||||||
python -m pip install --upgrade pip
|
python -m pip install --upgrade pip
|
||||||
python -m pip install flake8 pytest pytest-rerunfailures
|
python -m pip install -r test-requirements.txt
|
||||||
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
|
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
|
||||||
- name: Test with pytest
|
- name: Test with pytest
|
||||||
run: |
|
run: |
|
||||||
|
|||||||
+6
-3
@@ -22,9 +22,12 @@ src/
|
|||||||
# Comma-Separated Values (CSV) Reports
|
# Comma-Separated Values (CSV) Reports
|
||||||
*.csv
|
*.csv
|
||||||
|
|
||||||
# Excluded sites list
|
|
||||||
tests/.excluded_sites
|
|
||||||
|
|
||||||
# MacOS Folder Metadata File
|
# MacOS Folder Metadata File
|
||||||
.DS_Store
|
.DS_Store
|
||||||
/reports/
|
/reports/
|
||||||
|
|
||||||
|
# Testing
|
||||||
|
.coverage
|
||||||
|
dist/
|
||||||
|
htmlcov/
|
||||||
|
/test_*
|
||||||
@@ -2,6 +2,23 @@
|
|||||||
|
|
||||||
## [Unreleased]
|
## [Unreleased]
|
||||||
|
|
||||||
|
## [0.2.4] - 2021-05-18
|
||||||
|
* cli output report
|
||||||
|
* various improvements
|
||||||
|
|
||||||
|
## [0.2.3] - 2021-05-12
|
||||||
|
* added Yelp and yelp_userid support
|
||||||
|
* tags markup stabilization
|
||||||
|
* improved errors detection
|
||||||
|
|
||||||
|
## [0.2.2] - 2021-05-07
|
||||||
|
* improved ids extractors
|
||||||
|
* updated sites and engines
|
||||||
|
* updates CLI options
|
||||||
|
|
||||||
|
## [0.2.1] - 2021-05-02
|
||||||
|
* fixed json reports generation bug, added tests
|
||||||
|
|
||||||
## [0.2.0] - 2021-05-02
|
## [0.2.0] - 2021-05-02
|
||||||
* added `--retries` option
|
* added `--retries` option
|
||||||
* added `source` feature for sites' mirrors
|
* added `source` feature for sites' mirrors
|
||||||
|
|||||||
@@ -1,40 +1,55 @@
|
|||||||
# Maigret
|
# Maigret
|
||||||
|
|
||||||

|
|
||||||

|
|
||||||
[](https://gitter.im/maigret-osint/community)
|
|
||||||
|
|
||||||
<p align="center">
|
<p align="center">
|
||||||
<img src="./static/maigret.png" />
|
<p align="center">
|
||||||
|
<a href="https://pypi.org/project/maigret/">
|
||||||
|
<img alt="PyPI" src="https://img.shields.io/pypi/v/maigret?style=flat-square">
|
||||||
|
</a>
|
||||||
|
<a href="https://pypi.org/project/maigret/">
|
||||||
|
<img alt="PyPI - Downloads" src="https://img.shields.io/pypi/dw/maigret?style=flat-square">
|
||||||
|
</a>
|
||||||
|
<a href="https://gitter.im/maigret-osint/community">
|
||||||
|
<img alt="Chat - Gitter" src="./static/chat_gitter.svg" />
|
||||||
|
</a>
|
||||||
|
<a href="https://twitter.com/intent/follow?screen_name=sox0j">
|
||||||
|
<img src="https://img.shields.io/twitter/follow/sox0j?label=Follow%20sox0j&style=social&color=blue" alt="Follow @sox0j" />
|
||||||
|
</a>
|
||||||
|
</p>
|
||||||
|
<p align="center">
|
||||||
|
<img src="./static/maigret.png" height="200"/>
|
||||||
|
</p>
|
||||||
</p>
|
</p>
|
||||||
|
|
||||||
<i>The Commissioner Jules Maigret is a fictional French police detective, created by Georges Simenon. His investigation method is based on understanding the personality of different people and their interactions.</i>
|
<i>The Commissioner Jules Maigret is a fictional French police detective, created by Georges Simenon. His investigation method is based on understanding the personality of different people and their interactions.</i>
|
||||||
|
|
||||||
## About
|
## About
|
||||||
|
|
||||||
Purpose of Maigret - **collect a dossier on a person by username only**, checking for accounts on a huge number of sites.
|
**Maigret** collect a dossier on a person **by username only**, checking for accounts on a huge number of sites and gathering all the available information from web pages. Maigret is an easy-to-use and powerful fork of [Sherlock](https://github.com/sherlock-project/sherlock).
|
||||||
|
|
||||||
This is a [sherlock](https://github.com/sherlock-project/) fork with cool features under heavy development.
|
Currently supported more than 2000 sites ([full list](./sites.md)), search is launched against 500 popular sites in descending order of popularity by default.
|
||||||
*Don't forget to regularly update source code from repo*.
|
|
||||||
|
|
||||||
Currently supported more than 2000 sites ([full list](./sites.md)), by default search is launched against 500 popular sites in descending order of popularity.
|
|
||||||
|
|
||||||
## Main features
|
## Main features
|
||||||
|
|
||||||
* Profile pages parsing, [extracting](https://github.com/soxoj/socid_extractor) personal info, links to other profiles, etc.
|
* Profile pages parsing, [extraction](https://github.com/soxoj/socid_extractor) of personal info, links to other profiles, etc.
|
||||||
* Recursive search by new usernames found
|
* Recursive search by new usernames and other ids found
|
||||||
* Search by tags (site categories, countries)
|
* Search by tags (site categories, countries)
|
||||||
* Censorship and captcha detection
|
* Censorship and captcha detection
|
||||||
* Very few false positives
|
* Requests retries
|
||||||
* Failed requests' restarts
|
|
||||||
|
See full description of Maigret features [in the Wiki](https://github.com/soxoj/maigret/wiki/Features).
|
||||||
|
|
||||||
## Installation
|
## Installation
|
||||||
|
|
||||||
**NOTE**: Python 3.6 or higher and pip is required.
|
Maigret can be installed using pip, Docker, or simply can be launched from the cloned repo.
|
||||||
|
Also you can run Maigret using cloud shells (see buttons below).
|
||||||
|
|
||||||
**Python 3.8 is recommended.**
|
[](https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/soxoj/maigret&tutorial=README.md) [](https://repl.it/github/soxoj/maigret)
|
||||||
|
<a href="https://colab.research.google.com/gist//soxoj/879b51bc3b2f8b695abb054090645000/maigret.ipynb"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab" height="40"></a>
|
||||||
|
|
||||||
### Package installing
|
### Package installing
|
||||||
|
|
||||||
|
**NOTE**: Python 3.6 or higher and pip is required, **Python 3.8 is recommended.**
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# install from pypi
|
# install from pypi
|
||||||
pip3 install maigret
|
pip3 install maigret
|
||||||
@@ -42,34 +57,36 @@ pip3 install maigret
|
|||||||
# or clone and install manually
|
# or clone and install manually
|
||||||
git clone https://github.com/soxoj/maigret && cd maigret
|
git clone https://github.com/soxoj/maigret && cd maigret
|
||||||
pip3 install .
|
pip3 install .
|
||||||
|
|
||||||
|
# usage
|
||||||
|
maigret username
|
||||||
```
|
```
|
||||||
|
|
||||||
### Cloning a repository
|
### Cloning a repository
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
git clone https://github.com/soxoj/maigret && cd maigret
|
git clone https://github.com/soxoj/maigret && cd maigret
|
||||||
```
|
|
||||||
|
|
||||||
You can use a free virtual machine, the repo will be automatically cloned:
|
|
||||||
|
|
||||||
[](https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/soxoj/maigret&tutorial=README.md) [](https://repl.it/github/soxoj/maigret)
|
|
||||||
<a href="https://colab.research.google.com/gist//soxoj/879b51bc3b2f8b695abb054090645000/maigret.ipynb"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab" height="40"></a>
|
|
||||||
|
|
||||||
```bash
|
|
||||||
pip3 install -r requirements.txt
|
pip3 install -r requirements.txt
|
||||||
|
|
||||||
|
# usage
|
||||||
|
./maigret.py username
|
||||||
```
|
```
|
||||||
|
|
||||||
## Using examples
|
### Docker
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# for a cloned repo
|
# official image
|
||||||
./maigret.py user
|
docker pull soxoj/maigret
|
||||||
|
|
||||||
# for a package
|
# usage
|
||||||
maigret user
|
docker run soxoj/maigret:latest username
|
||||||
|
|
||||||
|
# manual build
|
||||||
|
docker build -t maigret .
|
||||||
```
|
```
|
||||||
|
|
||||||
Features:
|
## Usage examples
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# make HTML and PDF reports
|
# make HTML and PDF reports
|
||||||
maigret user --html --pdf
|
maigret user --html --pdf
|
||||||
@@ -77,22 +94,12 @@ maigret user --html --pdf
|
|||||||
# search on sites marked with tags photo & dating
|
# search on sites marked with tags photo & dating
|
||||||
maigret user --tags photo,dating
|
maigret user --tags photo,dating
|
||||||
|
|
||||||
|
|
||||||
# search for three usernames on all available sites
|
# search for three usernames on all available sites
|
||||||
maigret user1 user2 user3 -a
|
maigret user1 user2 user3 -a
|
||||||
|
|
||||||
```
|
```
|
||||||
|
|
||||||
Run `maigret --help` to get arguments description. Also options are documented in [the Maigret Wiki](https://github.com/soxoj/maigret/wiki/Command-line-options).
|
Use `maigret --help` to get full options description. Also options are documented in [the Maigret Wiki](https://github.com/soxoj/maigret/wiki/Command-line-options).
|
||||||
|
|
||||||
With Docker:
|
|
||||||
```
|
|
||||||
# manual build
|
|
||||||
docker build -t maigret . && docker run maigret user
|
|
||||||
|
|
||||||
# official image
|
|
||||||
docker run soxoj/maigret:latest user
|
|
||||||
```
|
|
||||||
|
|
||||||
## Demo with page parsing and recursive username search
|
## Demo with page parsing and recursive username search
|
||||||
|
|
||||||
|
|||||||
@@ -1,5 +1,12 @@
|
|||||||
"""Maigret"""
|
"""Maigret"""
|
||||||
|
|
||||||
|
__title__ = 'Maigret'
|
||||||
|
__package__ = 'maigret'
|
||||||
|
__author__ = 'Soxoj'
|
||||||
|
__author_email__ = 'soxoj@protonmail.com'
|
||||||
|
|
||||||
|
|
||||||
|
from .__version__ import __version__
|
||||||
from .checking import maigret as search
|
from .checking import maigret as search
|
||||||
from .sites import MaigretEngine, MaigretSite, MaigretDatabase
|
from .sites import MaigretEngine, MaigretSite, MaigretDatabase
|
||||||
from .notify import QueryNotifyPrint as Notifier
|
from .notify import QueryNotifyPrint as Notifier
|
||||||
|
|||||||
@@ -0,0 +1,3 @@
|
|||||||
|
"""Maigret version file"""
|
||||||
|
|
||||||
|
__version__ = '0.2.4'
|
||||||
@@ -34,24 +34,6 @@ class ParsingActivator:
|
|||||||
bearer_token = r.json()["accessToken"]
|
bearer_token = r.json()["accessToken"]
|
||||||
site.headers["authorization"] = f"Bearer {bearer_token}"
|
site.headers["authorization"] = f"Bearer {bearer_token}"
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def xssis(site, logger, cookies={}):
|
|
||||||
if not cookies:
|
|
||||||
logger.debug("You must have cookies to activate xss.is parsing!")
|
|
||||||
return
|
|
||||||
|
|
||||||
headers = dict(site.headers)
|
|
||||||
post_data = {
|
|
||||||
"_xfResponseType": "json",
|
|
||||||
"_xfToken": "1611177919,a2710362e45dad9aa1da381e21941a38",
|
|
||||||
}
|
|
||||||
headers["content-type"] = "application/x-www-form-urlencoded; charset=UTF-8"
|
|
||||||
r = requests.post(
|
|
||||||
site.activation["url"], headers=headers, cookies=cookies, data=post_data
|
|
||||||
)
|
|
||||||
csrf = r.json()["csrf"]
|
|
||||||
site.get_params["_xfToken"] = csrf
|
|
||||||
|
|
||||||
|
|
||||||
async def import_aiohttp_cookies(cookiestxt_filename):
|
async def import_aiohttp_cookies(cookiestxt_filename):
|
||||||
cookies_obj = MozillaCookieJar(cookiestxt_filename)
|
cookies_obj = MozillaCookieJar(cookiestxt_filename)
|
||||||
|
|||||||
+46
-36
@@ -6,12 +6,14 @@ import ssl
|
|||||||
import sys
|
import sys
|
||||||
import tqdm
|
import tqdm
|
||||||
from typing import Tuple, Optional, Dict, List
|
from typing import Tuple, Optional, Dict, List
|
||||||
|
from urllib.parse import quote
|
||||||
|
|
||||||
import aiohttp
|
import aiohttp
|
||||||
import tqdm.asyncio
|
import tqdm.asyncio
|
||||||
from aiohttp_socks import ProxyConnector
|
from aiohttp_socks import ProxyConnector
|
||||||
from python_socks import _errors as proxy_errors
|
from python_socks import _errors as proxy_errors
|
||||||
from socid_extractor import extract
|
from socid_extractor import extract
|
||||||
|
from aiohttp.client_exceptions import ServerDisconnectedError, ClientConnectorError
|
||||||
|
|
||||||
from .activation import ParsingActivator, import_aiohttp_cookies
|
from .activation import ParsingActivator, import_aiohttp_cookies
|
||||||
from . import errors
|
from . import errors
|
||||||
@@ -24,10 +26,10 @@ from .executors import (
|
|||||||
from .result import QueryResult, QueryStatus
|
from .result import QueryResult, QueryStatus
|
||||||
from .sites import MaigretDatabase, MaigretSite
|
from .sites import MaigretDatabase, MaigretSite
|
||||||
from .types import QueryOptions, QueryResultWrapper
|
from .types import QueryOptions, QueryResultWrapper
|
||||||
from .utils import get_random_user_agent
|
from .utils import get_random_user_agent, ascii_data_display
|
||||||
|
|
||||||
|
|
||||||
supported_recursive_search_ids = (
|
SUPPORTED_IDS = (
|
||||||
"yandex_public_id",
|
"yandex_public_id",
|
||||||
"gaia_id",
|
"gaia_id",
|
||||||
"vk_id",
|
"vk_id",
|
||||||
@@ -35,9 +37,10 @@ supported_recursive_search_ids = (
|
|||||||
"wikimapia_uid",
|
"wikimapia_uid",
|
||||||
"steam_id",
|
"steam_id",
|
||||||
"uidme_uguid",
|
"uidme_uguid",
|
||||||
|
"yelp_userid",
|
||||||
)
|
)
|
||||||
|
|
||||||
unsupported_characters = "#"
|
BAD_CHARS = "#"
|
||||||
|
|
||||||
|
|
||||||
async def get_response(request_future, logger) -> Tuple[str, int, Optional[CheckError]]:
|
async def get_response(request_future, logger) -> Tuple[str, int, Optional[CheckError]]:
|
||||||
@@ -54,17 +57,18 @@ async def get_response(request_future, logger) -> Tuple[str, int, Optional[Check
|
|||||||
decoded_content = response_content.decode(charset, "ignore")
|
decoded_content = response_content.decode(charset, "ignore")
|
||||||
html_text = decoded_content
|
html_text = decoded_content
|
||||||
|
|
||||||
|
error = None
|
||||||
if status_code == 0:
|
if status_code == 0:
|
||||||
error = CheckError("Connection lost")
|
error = CheckError("Connection lost")
|
||||||
else:
|
|
||||||
error = None
|
|
||||||
|
|
||||||
logger.debug(html_text)
|
logger.debug(html_text)
|
||||||
|
|
||||||
except asyncio.TimeoutError as e:
|
except asyncio.TimeoutError as e:
|
||||||
error = CheckError("Request timeout", str(e))
|
error = CheckError("Request timeout", str(e))
|
||||||
except aiohttp.client_exceptions.ClientConnectorError as e:
|
except ClientConnectorError as e:
|
||||||
error = CheckError("Connecting failure", str(e))
|
error = CheckError("Connecting failure", str(e))
|
||||||
|
except ServerDisconnectedError as e:
|
||||||
|
error = CheckError("Server disconnected", str(e))
|
||||||
except aiohttp.http_exceptions.BadHttpMessage as e:
|
except aiohttp.http_exceptions.BadHttpMessage as e:
|
||||||
error = CheckError("HTTP", str(e))
|
error = CheckError("HTTP", str(e))
|
||||||
except proxy_errors.ProxyError as e:
|
except proxy_errors.ProxyError as e:
|
||||||
@@ -73,9 +77,8 @@ async def get_response(request_future, logger) -> Tuple[str, int, Optional[Check
|
|||||||
error = CheckError("Interrupted")
|
error = CheckError("Interrupted")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
# python-specific exceptions
|
# python-specific exceptions
|
||||||
if sys.version_info.minor > 6:
|
if sys.version_info.minor > 6 and (
|
||||||
if isinstance(e, ssl.SSLCertVerificationError) or isinstance(
|
isinstance(e, ssl.SSLCertVerificationError) or isinstance(e, ssl.SSLError)
|
||||||
e, ssl.SSLError
|
|
||||||
):
|
):
|
||||||
error = CheckError("SSL", str(e))
|
error = CheckError("SSL", str(e))
|
||||||
else:
|
else:
|
||||||
@@ -109,6 +112,14 @@ def detect_error_page(
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def debug_response_logging(url, html_text, status_code, check_error):
|
||||||
|
with open("debug.log", "a") as f:
|
||||||
|
status = status_code or "No response"
|
||||||
|
f.write(f"url: {url}\nerror: {check_error}\nr: {status}\n")
|
||||||
|
if html_text:
|
||||||
|
f.write(f"code: {status}\nresponse: {str(html_text)}\n")
|
||||||
|
|
||||||
|
|
||||||
def process_site_result(
|
def process_site_result(
|
||||||
response, query_notify, logger, results_info: QueryResultWrapper, site: MaigretSite
|
response, query_notify, logger, results_info: QueryResultWrapper, site: MaigretSite
|
||||||
):
|
):
|
||||||
@@ -121,7 +132,7 @@ def process_site_result(
|
|||||||
username = results_info["username"]
|
username = results_info["username"]
|
||||||
is_parsing_enabled = results_info["parsing_enabled"]
|
is_parsing_enabled = results_info["parsing_enabled"]
|
||||||
url = results_info.get("url_user")
|
url = results_info.get("url_user")
|
||||||
logger.debug(url)
|
logger.info(url)
|
||||||
|
|
||||||
status = results_info.get("status")
|
status = results_info.get("status")
|
||||||
if status is not None:
|
if status is not None:
|
||||||
@@ -142,23 +153,20 @@ def process_site_result(
|
|||||||
response_time = None
|
response_time = None
|
||||||
|
|
||||||
if logger.level == logging.DEBUG:
|
if logger.level == logging.DEBUG:
|
||||||
with open("debug.txt", "a") as f:
|
debug_response_logging(url, html_text, status_code, check_error)
|
||||||
status = status_code or "No response"
|
|
||||||
f.write(f"url: {url}\nerror: {check_error}\nr: {status}\n")
|
|
||||||
if html_text:
|
|
||||||
f.write(f"code: {status}\nresponse: {str(html_text)}\n")
|
|
||||||
|
|
||||||
# additional check for errors
|
# additional check for errors
|
||||||
if status_code and not check_error:
|
if status_code and not check_error:
|
||||||
check_error = detect_error_page(
|
check_error = detect_error_page(
|
||||||
html_text, status_code, site.errors, site.ignore403
|
html_text, status_code, site.errors_dict, site.ignore403
|
||||||
)
|
)
|
||||||
|
|
||||||
if site.activation and html_text:
|
# parsing activation
|
||||||
is_need_activation = any(
|
is_need_activation = any(
|
||||||
[s for s in site.activation["marks"] if s in html_text]
|
[s for s in site.activation.get("marks", []) if s in html_text]
|
||||||
)
|
)
|
||||||
if is_need_activation:
|
|
||||||
|
if site.activation and html_text and is_need_activation:
|
||||||
method = site.activation["method"]
|
method = site.activation["method"]
|
||||||
try:
|
try:
|
||||||
activate_fun = getattr(ParsingActivator(), method)
|
activate_fun = getattr(ParsingActivator(), method)
|
||||||
@@ -169,13 +177,18 @@ def process_site_result(
|
|||||||
f"Activation method {method} for site {site.name} not found!"
|
f"Activation method {method} for site {site.name} not found!"
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.warning(f"Failed activation {method} for site {site.name}: {e}")
|
logger.warning(
|
||||||
|
f"Failed activation {method} for site {site.name}: {str(e)}",
|
||||||
|
exc_info=True,
|
||||||
|
)
|
||||||
|
# TODO: temporary check error
|
||||||
|
|
||||||
site_name = site.pretty_name
|
site_name = site.pretty_name
|
||||||
# presense flags
|
# presense flags
|
||||||
# True by default
|
# True by default
|
||||||
presense_flags = site.presense_strs
|
presense_flags = site.presense_strs
|
||||||
is_presense_detected = False
|
is_presense_detected = False
|
||||||
|
|
||||||
if html_text:
|
if html_text:
|
||||||
if not presense_flags:
|
if not presense_flags:
|
||||||
is_presense_detected = True
|
is_presense_detected = True
|
||||||
@@ -200,7 +213,7 @@ def process_site_result(
|
|||||||
)
|
)
|
||||||
|
|
||||||
if check_error:
|
if check_error:
|
||||||
logger.debug(check_error)
|
logger.warning(check_error)
|
||||||
result = QueryResult(
|
result = QueryResult(
|
||||||
username,
|
username,
|
||||||
site_name,
|
site_name,
|
||||||
@@ -220,9 +233,9 @@ def process_site_result(
|
|||||||
result = build_result(QueryStatus.CLAIMED)
|
result = build_result(QueryStatus.CLAIMED)
|
||||||
else:
|
else:
|
||||||
result = build_result(QueryStatus.AVAILABLE)
|
result = build_result(QueryStatus.AVAILABLE)
|
||||||
elif check_type == "status_code":
|
elif check_type in "status_code":
|
||||||
# Checks if the status code of the response is 2XX
|
# Checks if the status code of the response is 2XX
|
||||||
if is_presense_detected and (not status_code >= 300 or status_code < 200):
|
if 200 <= status_code < 300:
|
||||||
result = build_result(QueryStatus.CLAIMED)
|
result = build_result(QueryStatus.CLAIMED)
|
||||||
else:
|
else:
|
||||||
result = build_result(QueryStatus.AVAILABLE)
|
result = build_result(QueryStatus.AVAILABLE)
|
||||||
@@ -255,16 +268,16 @@ def process_site_result(
|
|||||||
for k, v in extracted_ids_data.items():
|
for k, v in extracted_ids_data.items():
|
||||||
if "username" in k:
|
if "username" in k:
|
||||||
new_usernames[v] = "username"
|
new_usernames[v] = "username"
|
||||||
if k in supported_recursive_search_ids:
|
if k in SUPPORTED_IDS:
|
||||||
new_usernames[v] = k
|
new_usernames[v] = k
|
||||||
|
|
||||||
results_info["ids_usernames"] = new_usernames
|
results_info["ids_usernames"] = new_usernames
|
||||||
results_info["ids_links"] = eval(extracted_ids_data.get("links", "[]"))
|
links = ascii_data_display(extracted_ids_data.get("links", "[]"))
|
||||||
|
if "website" in extracted_ids_data:
|
||||||
|
links.append(extracted_ids_data["website"])
|
||||||
|
results_info["ids_links"] = links
|
||||||
result.ids_data = extracted_ids_data
|
result.ids_data = extracted_ids_data
|
||||||
|
|
||||||
# Notify caller about results of query.
|
|
||||||
query_notify.update(result, site.similar_search)
|
|
||||||
|
|
||||||
# Save status of request
|
# Save status of request
|
||||||
results_info["status"] = result
|
results_info["status"] = result
|
||||||
|
|
||||||
@@ -303,7 +316,7 @@ def make_site_result(
|
|||||||
|
|
||||||
# URL of user on site (if it exists)
|
# URL of user on site (if it exists)
|
||||||
url = site.url.format(
|
url = site.url.format(
|
||||||
urlMain=site.url_main, urlSubpath=site.url_subpath, username=username
|
urlMain=site.url_main, urlSubpath=site.url_subpath, username=quote(username)
|
||||||
)
|
)
|
||||||
|
|
||||||
# workaround to prevent slash errors
|
# workaround to prevent slash errors
|
||||||
@@ -412,6 +425,8 @@ async def check_site_for_username(
|
|||||||
response, query_notify, logger, default_result, site
|
response, query_notify, logger, default_result, site
|
||||||
)
|
)
|
||||||
|
|
||||||
|
query_notify.update(response_result['status'], site.similar_search)
|
||||||
|
|
||||||
return site.name, response_result
|
return site.name, response_result
|
||||||
|
|
||||||
|
|
||||||
@@ -441,7 +456,7 @@ async def maigret(
|
|||||||
logger,
|
logger,
|
||||||
query_notify=None,
|
query_notify=None,
|
||||||
proxy=None,
|
proxy=None,
|
||||||
timeout=None,
|
timeout=3,
|
||||||
is_parsing_enabled=False,
|
is_parsing_enabled=False,
|
||||||
id_type="username",
|
id_type="username",
|
||||||
debug=False,
|
debug=False,
|
||||||
@@ -463,7 +478,7 @@ async def maigret(
|
|||||||
query results.
|
query results.
|
||||||
logger -- Standard Python logger object.
|
logger -- Standard Python logger object.
|
||||||
timeout -- Time in seconds to wait before timing out request.
|
timeout -- Time in seconds to wait before timing out request.
|
||||||
Default is no timeout.
|
Default is 3 seconds.
|
||||||
is_parsing_enabled -- Extract additional info from account pages.
|
is_parsing_enabled -- Extract additional info from account pages.
|
||||||
id_type -- Type of username to search.
|
id_type -- Type of username to search.
|
||||||
Default is 'username', see all supported here:
|
Default is 'username', see all supported here:
|
||||||
@@ -616,15 +631,10 @@ async def site_self_check(
|
|||||||
"disabled": False,
|
"disabled": False,
|
||||||
}
|
}
|
||||||
|
|
||||||
try:
|
|
||||||
check_data = [
|
check_data = [
|
||||||
(site.username_claimed, QueryStatus.CLAIMED),
|
(site.username_claimed, QueryStatus.CLAIMED),
|
||||||
(site.username_unclaimed, QueryStatus.AVAILABLE),
|
(site.username_unclaimed, QueryStatus.AVAILABLE),
|
||||||
]
|
]
|
||||||
except Exception as e:
|
|
||||||
logger.error(e)
|
|
||||||
logger.error(site.__dict__)
|
|
||||||
check_data = []
|
|
||||||
|
|
||||||
logger.info(f"Checking {site.name}...")
|
logger.info(f"Checking {site.name}...")
|
||||||
|
|
||||||
|
|||||||
+17
-2
@@ -1,6 +1,7 @@
|
|||||||
from typing import Dict, List, Any
|
from typing import Dict, List, Any
|
||||||
|
|
||||||
from .result import QueryResult
|
from .result import QueryResult
|
||||||
|
from .types import QueryResultWrapper
|
||||||
|
|
||||||
|
|
||||||
# error got as a result of completed search query
|
# error got as a result of completed search query
|
||||||
@@ -34,6 +35,12 @@ COMMON_ERRORS = {
|
|||||||
'Please stand by, while we are checking your browser': CheckError(
|
'Please stand by, while we are checking your browser': CheckError(
|
||||||
'Bot protection', 'Cloudflare'
|
'Bot protection', 'Cloudflare'
|
||||||
),
|
),
|
||||||
|
'<span data-translate="checking_browser">Checking your browser before accessing</span>': CheckError(
|
||||||
|
'Bot protection', 'Cloudflare'
|
||||||
|
),
|
||||||
|
'This website is using a security service to protect itself from online attacks.': CheckError(
|
||||||
|
'Access denied', 'Cloudflare'
|
||||||
|
),
|
||||||
'<title>Доступ ограничен</title>': CheckError('Censorship', 'Rostelecom'),
|
'<title>Доступ ограничен</title>': CheckError('Censorship', 'Rostelecom'),
|
||||||
'document.getElementById(\'validate_form_submit\').disabled=true': CheckError(
|
'document.getElementById(\'validate_form_submit\').disabled=true': CheckError(
|
||||||
'Captcha', 'Mail.ru'
|
'Captcha', 'Mail.ru'
|
||||||
@@ -48,6 +55,9 @@ COMMON_ERRORS = {
|
|||||||
'Censorship', 'MGTS'
|
'Censorship', 'MGTS'
|
||||||
),
|
),
|
||||||
'Incapsula incident ID': CheckError('Bot protection', 'Incapsula'),
|
'Incapsula incident ID': CheckError('Bot protection', 'Incapsula'),
|
||||||
|
'Сайт заблокирован хостинг-провайдером': CheckError(
|
||||||
|
'Site-specific', 'Site is disabled (Beget)'
|
||||||
|
),
|
||||||
}
|
}
|
||||||
|
|
||||||
ERRORS_TYPES = {
|
ERRORS_TYPES = {
|
||||||
@@ -57,6 +67,11 @@ ERRORS_TYPES = {
|
|||||||
'Request timeout': 'Try to increase timeout or to switch to another internet service provider',
|
'Request timeout': 'Try to increase timeout or to switch to another internet service provider',
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# TODO: checking for reason
|
||||||
|
ERRORS_REASONS = {
|
||||||
|
'Login required': 'Add authorization cookies through `--cookies-jar-file` (see cookies.txt)',
|
||||||
|
}
|
||||||
|
|
||||||
TEMPORARY_ERRORS_TYPES = [
|
TEMPORARY_ERRORS_TYPES = [
|
||||||
'Request timeout',
|
'Request timeout',
|
||||||
'Unknown',
|
'Unknown',
|
||||||
@@ -90,9 +105,9 @@ def solution_of(err_type) -> str:
|
|||||||
return ERRORS_TYPES.get(err_type, '')
|
return ERRORS_TYPES.get(err_type, '')
|
||||||
|
|
||||||
|
|
||||||
def extract_and_group(search_res: dict) -> List[Dict[str, Any]]:
|
def extract_and_group(search_res: QueryResultWrapper) -> List[Dict[str, Any]]:
|
||||||
errors_counts: Dict[str, int] = {}
|
errors_counts: Dict[str, int] = {}
|
||||||
for r in search_res:
|
for r in search_res.values():
|
||||||
if r and isinstance(r, dict) and r.get('status'):
|
if r and isinstance(r, dict) and r.get('status'):
|
||||||
if not isinstance(r['status'], QueryResult):
|
if not isinstance(r['status'], QueryResult):
|
||||||
continue
|
continue
|
||||||
|
|||||||
+265
-219
@@ -8,15 +8,17 @@ import os
|
|||||||
import sys
|
import sys
|
||||||
import platform
|
import platform
|
||||||
from argparse import ArgumentParser, RawDescriptionHelpFormatter
|
from argparse import ArgumentParser, RawDescriptionHelpFormatter
|
||||||
|
from typing import List, Tuple
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
from socid_extractor import extract, parse, __version__ as socid_version
|
from socid_extractor import extract, parse, __version__ as socid_version
|
||||||
|
|
||||||
|
from .__version__ import __version__
|
||||||
from .checking import (
|
from .checking import (
|
||||||
timeout_check,
|
timeout_check,
|
||||||
supported_recursive_search_ids,
|
SUPPORTED_IDS,
|
||||||
self_check,
|
self_check,
|
||||||
unsupported_characters,
|
BAD_CHARS,
|
||||||
maigret,
|
maigret,
|
||||||
)
|
)
|
||||||
from . import errors
|
from . import errors
|
||||||
@@ -29,18 +31,17 @@ from .report import (
|
|||||||
generate_report_context,
|
generate_report_context,
|
||||||
save_txt_report,
|
save_txt_report,
|
||||||
SUPPORTED_JSON_REPORT_FORMATS,
|
SUPPORTED_JSON_REPORT_FORMATS,
|
||||||
check_supported_json_format,
|
|
||||||
save_json_report,
|
save_json_report,
|
||||||
|
get_plaintext_report,
|
||||||
)
|
)
|
||||||
from .sites import MaigretDatabase
|
from .sites import MaigretDatabase
|
||||||
from .submit import submit_dialog
|
from .submit import submit_dialog
|
||||||
|
from .types import QueryResultWrapper
|
||||||
from .utils import get_dict_ascii_tree
|
from .utils import get_dict_ascii_tree
|
||||||
|
|
||||||
__version__ = '0.2.0'
|
|
||||||
|
|
||||||
|
def notify_about_errors(search_results: QueryResultWrapper, query_notify):
|
||||||
def notify_about_errors(search_results, query_notify):
|
errs = errors.extract_and_group(search_results)
|
||||||
errs = errors.extract_and_group(search_results.values())
|
|
||||||
was_errs_displayed = False
|
was_errs_displayed = False
|
||||||
for e in errs:
|
for e in errs:
|
||||||
if not errors.is_important(e):
|
if not errors.is_important(e):
|
||||||
@@ -48,7 +49,7 @@ def notify_about_errors(search_results, query_notify):
|
|||||||
text = f'Too many errors of type "{e["err"]}" ({e["perc"]}%)'
|
text = f'Too many errors of type "{e["err"]}" ({e["perc"]}%)'
|
||||||
solution = errors.solution_of(e['err'])
|
solution = errors.solution_of(e['err'])
|
||||||
if solution:
|
if solution:
|
||||||
text = '. '.join([text, solution])
|
text = '. '.join([text, solution.capitalize()])
|
||||||
|
|
||||||
query_notify.warning(text, '!')
|
query_notify.warning(text, '!')
|
||||||
was_errs_displayed = True
|
was_errs_displayed = True
|
||||||
@@ -59,6 +60,67 @@ def notify_about_errors(search_results, query_notify):
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def extract_ids_from_url(url: str, db: MaigretDatabase) -> dict:
|
||||||
|
results = {}
|
||||||
|
for s in db.sites:
|
||||||
|
result = s.extract_id_from_url(url)
|
||||||
|
if not result:
|
||||||
|
continue
|
||||||
|
_id, _type = result
|
||||||
|
results[_id] = _type
|
||||||
|
return results
|
||||||
|
|
||||||
|
|
||||||
|
def extract_ids_from_page(url, logger, timeout=5) -> dict:
|
||||||
|
results = {}
|
||||||
|
# url, headers
|
||||||
|
reqs: List[Tuple[str, set]] = [(url, set())]
|
||||||
|
try:
|
||||||
|
# temporary workaround for URL mutations MVP
|
||||||
|
from socid_extractor import mutate_url
|
||||||
|
|
||||||
|
reqs += list(mutate_url(url))
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(e)
|
||||||
|
|
||||||
|
for req in reqs:
|
||||||
|
url, headers = req
|
||||||
|
print(f'Scanning webpage by URL {url}...')
|
||||||
|
page, _ = parse(url, cookies_str='', headers=headers, timeout=timeout)
|
||||||
|
logger.debug(page)
|
||||||
|
info = extract(page)
|
||||||
|
if not info:
|
||||||
|
print('Nothing extracted')
|
||||||
|
else:
|
||||||
|
print(get_dict_ascii_tree(info.items(), new_line=False), ' ')
|
||||||
|
for k, v in info.items():
|
||||||
|
if 'username' in k:
|
||||||
|
results[v] = 'username'
|
||||||
|
if k in SUPPORTED_IDS:
|
||||||
|
results[v] = k
|
||||||
|
|
||||||
|
return results
|
||||||
|
|
||||||
|
|
||||||
|
def extract_ids_from_results(results: QueryResultWrapper, db: MaigretDatabase) -> dict:
|
||||||
|
ids_results = {}
|
||||||
|
for website_name in results:
|
||||||
|
dictionary = results[website_name]
|
||||||
|
# TODO: fix no site data issue
|
||||||
|
if not dictionary:
|
||||||
|
continue
|
||||||
|
|
||||||
|
new_usernames = dictionary.get('ids_usernames')
|
||||||
|
if new_usernames:
|
||||||
|
for u, utype in new_usernames.items():
|
||||||
|
ids_results[u] = utype
|
||||||
|
|
||||||
|
for url in dictionary.get('ids_links', []):
|
||||||
|
ids_results.update(extract_ids_from_url(url, db))
|
||||||
|
|
||||||
|
return ids_results
|
||||||
|
|
||||||
|
|
||||||
def setup_arguments_parser():
|
def setup_arguments_parser():
|
||||||
version_string = '\n'.join(
|
version_string = '\n'.join(
|
||||||
[
|
[
|
||||||
@@ -74,68 +136,18 @@ def setup_arguments_parser():
|
|||||||
formatter_class=RawDescriptionHelpFormatter,
|
formatter_class=RawDescriptionHelpFormatter,
|
||||||
description=f"Maigret v{__version__}",
|
description=f"Maigret v{__version__}",
|
||||||
)
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"username",
|
||||||
|
nargs='*',
|
||||||
|
metavar="USERNAMES",
|
||||||
|
help="One or more usernames to search by.",
|
||||||
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--version",
|
"--version",
|
||||||
action="version",
|
action="version",
|
||||||
version=version_string,
|
version=version_string,
|
||||||
help="Display version information and dependencies.",
|
help="Display version information and dependencies.",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
|
||||||
"--info",
|
|
||||||
"-vv",
|
|
||||||
action="store_true",
|
|
||||||
dest="info",
|
|
||||||
default=False,
|
|
||||||
help="Display service information.",
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"--verbose",
|
|
||||||
"-v",
|
|
||||||
action="store_true",
|
|
||||||
dest="verbose",
|
|
||||||
default=False,
|
|
||||||
help="Display extra information and metrics.",
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"-d",
|
|
||||||
"--debug",
|
|
||||||
"-vvv",
|
|
||||||
action="store_true",
|
|
||||||
dest="debug",
|
|
||||||
default=False,
|
|
||||||
help="Saving debugging information and sites responses in debug.txt.",
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"--site",
|
|
||||||
action="append",
|
|
||||||
metavar='SITE_NAME',
|
|
||||||
dest="site_list",
|
|
||||||
default=[],
|
|
||||||
help="Limit analysis to just the listed sites (use several times to specify more than one)",
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"--proxy",
|
|
||||||
"-p",
|
|
||||||
metavar='PROXY_URL',
|
|
||||||
action="store",
|
|
||||||
dest="proxy",
|
|
||||||
default=None,
|
|
||||||
help="Make requests over a proxy. e.g. socks5://127.0.0.1:1080",
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"--db",
|
|
||||||
metavar="DB_FILE",
|
|
||||||
dest="db_file",
|
|
||||||
default=None,
|
|
||||||
help="Load Maigret database from a JSON file or an online, valid, JSON file.",
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"--cookies-jar-file",
|
|
||||||
metavar="COOKIE_FILE",
|
|
||||||
dest="cookie_file",
|
|
||||||
default=None,
|
|
||||||
help="File with cookies.",
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--timeout",
|
"--timeout",
|
||||||
action="store",
|
action="store",
|
||||||
@@ -143,7 +155,7 @@ def setup_arguments_parser():
|
|||||||
dest="timeout",
|
dest="timeout",
|
||||||
type=timeout_check,
|
type=timeout_check,
|
||||||
default=30,
|
default=30,
|
||||||
help="Time (in seconds) to wait for response to requests. "
|
help="Time in seconds to wait for response to requests. "
|
||||||
"Default timeout of 30.0s. "
|
"Default timeout of 30.0s. "
|
||||||
"A longer timeout will be more likely to get results from slow sites. "
|
"A longer timeout will be more likely to get results from slow sites. "
|
||||||
"On the other hand, this may cause a long delay to gather all results. ",
|
"On the other hand, this may cause a long delay to gather all results. ",
|
||||||
@@ -154,7 +166,7 @@ def setup_arguments_parser():
|
|||||||
type=int,
|
type=int,
|
||||||
metavar='RETRIES',
|
metavar='RETRIES',
|
||||||
default=1,
|
default=1,
|
||||||
help="Attempts to restart temporary failed requests.",
|
help="Attempts to restart temporarily failed requests.",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"-n",
|
"-n",
|
||||||
@@ -165,65 +177,6 @@ def setup_arguments_parser():
|
|||||||
default=100,
|
default=100,
|
||||||
help="Allowed number of concurrent connections.",
|
help="Allowed number of concurrent connections.",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
|
||||||
"-a",
|
|
||||||
"--all-sites",
|
|
||||||
action="store_true",
|
|
||||||
dest="all_sites",
|
|
||||||
default=False,
|
|
||||||
help="Use all sites for scan.",
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"--top-sites",
|
|
||||||
action="store",
|
|
||||||
default=500,
|
|
||||||
type=int,
|
|
||||||
help="Count of sites for scan ranked by Alexa Top (default: 500).",
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"--print-not-found",
|
|
||||||
action="store_true",
|
|
||||||
dest="print_not_found",
|
|
||||||
default=False,
|
|
||||||
help="Print sites where the username was not found.",
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"--print-errors",
|
|
||||||
action="store_true",
|
|
||||||
dest="print_check_errors",
|
|
||||||
default=False,
|
|
||||||
help="Print errors messages: connection, captcha, site country ban, etc.",
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"--submit",
|
|
||||||
metavar='EXISTING_USER_URL',
|
|
||||||
type=str,
|
|
||||||
dest="new_site_to_submit",
|
|
||||||
default=False,
|
|
||||||
help="URL of existing profile in new site to submit.",
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"--no-color",
|
|
||||||
action="store_true",
|
|
||||||
dest="no_color",
|
|
||||||
default=False,
|
|
||||||
help="Don't color terminal output",
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"--no-progressbar",
|
|
||||||
action="store_true",
|
|
||||||
dest="no_progressbar",
|
|
||||||
default=False,
|
|
||||||
help="Don't show progressbar.",
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"--browse",
|
|
||||||
"-b",
|
|
||||||
action="store_true",
|
|
||||||
dest="browse",
|
|
||||||
default=False,
|
|
||||||
help="Browse to all results on default bowser.",
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--no-recursion",
|
"--no-recursion",
|
||||||
action="store_true",
|
action="store_true",
|
||||||
@@ -238,33 +191,27 @@ def setup_arguments_parser():
|
|||||||
default=False,
|
default=False,
|
||||||
help="Disable parsing pages for additional data and other usernames.",
|
help="Disable parsing pages for additional data and other usernames.",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
|
||||||
"--self-check",
|
|
||||||
action="store_true",
|
|
||||||
default=False,
|
|
||||||
help="Do self check for sites and database and disable non-working ones.",
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"--stats", action="store_true", default=False, help="Show database statistics."
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"--use-disabled-sites",
|
|
||||||
action="store_true",
|
|
||||||
default=False,
|
|
||||||
help="Use disabled sites to search (may cause many false positives).",
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"--parse",
|
|
||||||
dest="parse_url",
|
|
||||||
default='',
|
|
||||||
help="Parse page by URL and extract username and IDs to use for search.",
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--id-type",
|
"--id-type",
|
||||||
dest="id_type",
|
dest="id_type",
|
||||||
default='username',
|
default='username',
|
||||||
|
choices=SUPPORTED_IDS,
|
||||||
help="Specify identifier(s) type (default: username).",
|
help="Specify identifier(s) type (default: username).",
|
||||||
)
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--db",
|
||||||
|
metavar="DB_FILE",
|
||||||
|
dest="db_file",
|
||||||
|
default=None,
|
||||||
|
help="Load Maigret database from a JSON file or an online, valid, JSON file.",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--cookies-jar-file",
|
||||||
|
metavar="COOKIE_FILE",
|
||||||
|
dest="cookie_file",
|
||||||
|
default=None,
|
||||||
|
help="File with cookies.",
|
||||||
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--ignore-ids",
|
"--ignore-ids",
|
||||||
action="append",
|
action="append",
|
||||||
@@ -273,25 +220,156 @@ def setup_arguments_parser():
|
|||||||
default=[],
|
default=[],
|
||||||
help="Do not make search by the specified username or other ids.",
|
help="Do not make search by the specified username or other ids.",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
|
||||||
"username",
|
|
||||||
nargs='+',
|
|
||||||
metavar='USERNAMES',
|
|
||||||
action="store",
|
|
||||||
help="One or more usernames to check with social networks.",
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"--tags", dest="tags", default='', help="Specify tags of sites."
|
|
||||||
)
|
|
||||||
# reports options
|
# reports options
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--folderoutput",
|
"--folderoutput",
|
||||||
"-fo",
|
"-fo",
|
||||||
dest="folderoutput",
|
dest="folderoutput",
|
||||||
default="reports",
|
default="reports",
|
||||||
|
metavar="PATH",
|
||||||
help="If using multiple usernames, the output of the results will be saved to this folder.",
|
help="If using multiple usernames, the output of the results will be saved to this folder.",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
|
"--proxy",
|
||||||
|
"-p",
|
||||||
|
metavar='PROXY_URL',
|
||||||
|
action="store",
|
||||||
|
dest="proxy",
|
||||||
|
default=None,
|
||||||
|
help="Make requests over a proxy. e.g. socks5://127.0.0.1:1080",
|
||||||
|
)
|
||||||
|
|
||||||
|
filter_group = parser.add_argument_group(
|
||||||
|
'Site filtering', 'Options to set site search scope'
|
||||||
|
)
|
||||||
|
filter_group.add_argument(
|
||||||
|
"-a",
|
||||||
|
"--all-sites",
|
||||||
|
action="store_true",
|
||||||
|
dest="all_sites",
|
||||||
|
default=False,
|
||||||
|
help="Use all sites for scan.",
|
||||||
|
)
|
||||||
|
filter_group.add_argument(
|
||||||
|
"--top-sites",
|
||||||
|
action="store",
|
||||||
|
default=500,
|
||||||
|
metavar="N",
|
||||||
|
type=int,
|
||||||
|
help="Count of sites for scan ranked by Alexa Top (default: 500).",
|
||||||
|
)
|
||||||
|
filter_group.add_argument(
|
||||||
|
"--tags", dest="tags", default='', help="Specify tags of sites (see `--stats`)."
|
||||||
|
)
|
||||||
|
filter_group.add_argument(
|
||||||
|
"--site",
|
||||||
|
action="append",
|
||||||
|
metavar='SITE_NAME',
|
||||||
|
dest="site_list",
|
||||||
|
default=[],
|
||||||
|
help="Limit analysis to just the specified sites (multiple option).",
|
||||||
|
)
|
||||||
|
filter_group.add_argument(
|
||||||
|
"--use-disabled-sites",
|
||||||
|
action="store_true",
|
||||||
|
default=False,
|
||||||
|
help="Use disabled sites to search (may cause many false positives).",
|
||||||
|
)
|
||||||
|
|
||||||
|
modes_group = parser.add_argument_group(
|
||||||
|
'Operating modes',
|
||||||
|
'Various functions except the default search by a username. '
|
||||||
|
'Modes are executed sequentially in the order of declaration.',
|
||||||
|
)
|
||||||
|
modes_group.add_argument(
|
||||||
|
"--parse",
|
||||||
|
dest="parse_url",
|
||||||
|
default='',
|
||||||
|
metavar='URL',
|
||||||
|
help="Parse page by URL and extract username and IDs to use for search.",
|
||||||
|
)
|
||||||
|
modes_group.add_argument(
|
||||||
|
"--submit",
|
||||||
|
metavar='URL',
|
||||||
|
type=str,
|
||||||
|
dest="new_site_to_submit",
|
||||||
|
default=False,
|
||||||
|
help="URL of existing profile in new site to submit.",
|
||||||
|
)
|
||||||
|
modes_group.add_argument(
|
||||||
|
"--self-check",
|
||||||
|
action="store_true",
|
||||||
|
default=False,
|
||||||
|
help="Do self check for sites and database and disable non-working ones.",
|
||||||
|
)
|
||||||
|
modes_group.add_argument(
|
||||||
|
"--stats",
|
||||||
|
action="store_true",
|
||||||
|
default=False,
|
||||||
|
help="Show database statistics (most frequent sites engines and tags).",
|
||||||
|
)
|
||||||
|
|
||||||
|
output_group = parser.add_argument_group(
|
||||||
|
'Output options', 'Options to change verbosity and view of the console output'
|
||||||
|
)
|
||||||
|
output_group.add_argument(
|
||||||
|
"--print-not-found",
|
||||||
|
action="store_true",
|
||||||
|
dest="print_not_found",
|
||||||
|
default=False,
|
||||||
|
help="Print sites where the username was not found.",
|
||||||
|
)
|
||||||
|
output_group.add_argument(
|
||||||
|
"--print-errors",
|
||||||
|
action="store_true",
|
||||||
|
dest="print_check_errors",
|
||||||
|
default=False,
|
||||||
|
help="Print errors messages: connection, captcha, site country ban, etc.",
|
||||||
|
)
|
||||||
|
output_group.add_argument(
|
||||||
|
"--verbose",
|
||||||
|
"-v",
|
||||||
|
action="store_true",
|
||||||
|
dest="verbose",
|
||||||
|
default=False,
|
||||||
|
help="Display extra information and metrics.",
|
||||||
|
)
|
||||||
|
output_group.add_argument(
|
||||||
|
"--info",
|
||||||
|
"-vv",
|
||||||
|
action="store_true",
|
||||||
|
dest="info",
|
||||||
|
default=False,
|
||||||
|
help="Display extra/service information and metrics.",
|
||||||
|
)
|
||||||
|
output_group.add_argument(
|
||||||
|
"--debug",
|
||||||
|
"-vvv",
|
||||||
|
"-d",
|
||||||
|
action="store_true",
|
||||||
|
dest="debug",
|
||||||
|
default=False,
|
||||||
|
help="Display extra/service/debug information and metrics, save responses in debug.log.",
|
||||||
|
)
|
||||||
|
output_group.add_argument(
|
||||||
|
"--no-color",
|
||||||
|
action="store_true",
|
||||||
|
dest="no_color",
|
||||||
|
default=False,
|
||||||
|
help="Don't color terminal output",
|
||||||
|
)
|
||||||
|
output_group.add_argument(
|
||||||
|
"--no-progressbar",
|
||||||
|
action="store_true",
|
||||||
|
dest="no_progressbar",
|
||||||
|
default=False,
|
||||||
|
help="Don't show progressbar.",
|
||||||
|
)
|
||||||
|
|
||||||
|
report_group = parser.add_argument_group(
|
||||||
|
'Report formats', 'Supported formats of report files'
|
||||||
|
)
|
||||||
|
report_group.add_argument(
|
||||||
"-T",
|
"-T",
|
||||||
"--txt",
|
"--txt",
|
||||||
action="store_true",
|
action="store_true",
|
||||||
@@ -299,7 +377,7 @@ def setup_arguments_parser():
|
|||||||
default=False,
|
default=False,
|
||||||
help="Create a TXT report (one report per username).",
|
help="Create a TXT report (one report per username).",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
report_group.add_argument(
|
||||||
"-C",
|
"-C",
|
||||||
"--csv",
|
"--csv",
|
||||||
action="store_true",
|
action="store_true",
|
||||||
@@ -307,7 +385,7 @@ def setup_arguments_parser():
|
|||||||
default=False,
|
default=False,
|
||||||
help="Create a CSV report (one report per username).",
|
help="Create a CSV report (one report per username).",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
report_group.add_argument(
|
||||||
"-H",
|
"-H",
|
||||||
"--html",
|
"--html",
|
||||||
action="store_true",
|
action="store_true",
|
||||||
@@ -315,7 +393,7 @@ def setup_arguments_parser():
|
|||||||
default=False,
|
default=False,
|
||||||
help="Create an HTML report file (general report on all usernames).",
|
help="Create an HTML report file (general report on all usernames).",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
report_group.add_argument(
|
||||||
"-X",
|
"-X",
|
||||||
"--xmind",
|
"--xmind",
|
||||||
action="store_true",
|
action="store_true",
|
||||||
@@ -323,7 +401,7 @@ def setup_arguments_parser():
|
|||||||
default=False,
|
default=False,
|
||||||
help="Generate an XMind 8 mindmap report (one report per username).",
|
help="Generate an XMind 8 mindmap report (one report per username).",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
report_group.add_argument(
|
||||||
"-P",
|
"-P",
|
||||||
"--pdf",
|
"--pdf",
|
||||||
action="store_true",
|
action="store_true",
|
||||||
@@ -331,14 +409,14 @@ def setup_arguments_parser():
|
|||||||
default=False,
|
default=False,
|
||||||
help="Generate a PDF report (general report on all usernames).",
|
help="Generate a PDF report (general report on all usernames).",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
report_group.add_argument(
|
||||||
"-J",
|
"-J",
|
||||||
"--json",
|
"--json",
|
||||||
action="store",
|
action="store",
|
||||||
metavar='REPORT_TYPE',
|
metavar='TYPE',
|
||||||
dest="json",
|
dest="json",
|
||||||
default='',
|
default='',
|
||||||
type=check_supported_json_format,
|
choices=SUPPORTED_JSON_REPORT_FORMATS,
|
||||||
help=f"Generate a JSON report of specific type: {', '.join(SUPPORTED_JSON_REPORT_FORMATS)}"
|
help=f"Generate a JSON report of specific type: {', '.join(SUPPORTED_JSON_REPORT_FORMATS)}"
|
||||||
" (one report per username).",
|
" (one report per username).",
|
||||||
)
|
)
|
||||||
@@ -371,7 +449,7 @@ async def main():
|
|||||||
usernames = {
|
usernames = {
|
||||||
u: args.id_type
|
u: args.id_type
|
||||||
for u in args.username
|
for u in args.username
|
||||||
if u not in ['-'] and u not in args.ignore_ids_list
|
if u and u not in ['-'] and u not in args.ignore_ids_list
|
||||||
}
|
}
|
||||||
|
|
||||||
parsing_enabled = not args.disable_extracting
|
parsing_enabled = not args.disable_extracting
|
||||||
@@ -382,31 +460,10 @@ async def main():
|
|||||||
print("Using the proxy: " + args.proxy)
|
print("Using the proxy: " + args.proxy)
|
||||||
|
|
||||||
if args.parse_url:
|
if args.parse_url:
|
||||||
# url, headers
|
extracted_ids = extract_ids_from_page(
|
||||||
reqs = [(args.parse_url, set())]
|
args.parse_url, logger, timeout=args.timeout
|
||||||
try:
|
)
|
||||||
# temporary workaround for URL mutations MVP
|
usernames.update(extracted_ids)
|
||||||
from socid_extractor import mutate_url
|
|
||||||
|
|
||||||
reqs += list(mutate_url(args.parse_url))
|
|
||||||
except Exception as e:
|
|
||||||
logger.warning(e)
|
|
||||||
pass
|
|
||||||
|
|
||||||
for req in reqs:
|
|
||||||
url, headers = req
|
|
||||||
print(f'Scanning webpage by URL {url}...')
|
|
||||||
page, _ = parse(url, cookies_str='', headers=headers)
|
|
||||||
info = extract(page)
|
|
||||||
if not info:
|
|
||||||
print('Nothing extracted')
|
|
||||||
else:
|
|
||||||
print(get_dict_ascii_tree(info.items(), new_line=False), ' ')
|
|
||||||
for k, v in info.items():
|
|
||||||
if 'username' in k:
|
|
||||||
usernames[v] = 'username'
|
|
||||||
if k in supported_recursive_search_ids:
|
|
||||||
usernames[v] = k
|
|
||||||
|
|
||||||
if args.tags:
|
if args.tags:
|
||||||
args.tags = list(set(str(args.tags).split(',')))
|
args.tags = list(set(str(args.tags).split(',')))
|
||||||
@@ -434,7 +491,7 @@ async def main():
|
|||||||
top=args.top_sites,
|
top=args.top_sites,
|
||||||
tags=args.tags,
|
tags=args.tags,
|
||||||
names=args.site_list,
|
names=args.site_list,
|
||||||
disabled=False,
|
disabled=args.use_disabled_sites,
|
||||||
id_type=x,
|
id_type=x,
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -454,13 +511,17 @@ async def main():
|
|||||||
db, site_data, logger, max_connections=args.connections
|
db, site_data, logger, max_connections=args.connections
|
||||||
)
|
)
|
||||||
if is_need_update:
|
if is_need_update:
|
||||||
if input('Do you want to save changes permanently? [Yn]\n').lower() == 'y':
|
if input('Do you want to save changes permanently? [Yn]\n').lower() in (
|
||||||
|
'y',
|
||||||
|
'',
|
||||||
|
):
|
||||||
db.save_to_file(args.db_file)
|
db.save_to_file(args.db_file)
|
||||||
print('Database was successfully updated.')
|
print('Database was successfully updated.')
|
||||||
else:
|
else:
|
||||||
print('Updates will be applied only for current search session.')
|
print('Updates will be applied only for current search session.')
|
||||||
print(db.get_scan_stats(site_data))
|
print('Scan sessions flags stats: ' + str(db.get_scan_stats(site_data)))
|
||||||
|
|
||||||
|
# Database statistics
|
||||||
if args.stats:
|
if args.stats:
|
||||||
print(db.get_db_stats(db.sites_dict))
|
print(db.get_db_stats(db.sites_dict))
|
||||||
|
|
||||||
@@ -470,11 +531,6 @@ async def main():
|
|||||||
# Define one report filename template
|
# Define one report filename template
|
||||||
report_filepath_tpl = os.path.join(args.folderoutput, 'report_{username}{postfix}')
|
report_filepath_tpl = os.path.join(args.folderoutput, 'report_{username}{postfix}')
|
||||||
|
|
||||||
# Database stats
|
|
||||||
# TODO: verbose info about filtered sites
|
|
||||||
# enabled_count = len(list(filter(lambda x: not x.disabled, site_data.values())))
|
|
||||||
# print(f'Sites in database, enabled/total: {enabled_count}/{len(site_data)}')
|
|
||||||
|
|
||||||
if usernames == {}:
|
if usernames == {}:
|
||||||
# magic params to exit after init
|
# magic params to exit after init
|
||||||
query_notify.warning('No usernames to check, exiting.')
|
query_notify.warning('No usernames to check, exiting.')
|
||||||
@@ -483,7 +539,7 @@ async def main():
|
|||||||
if not site_data:
|
if not site_data:
|
||||||
query_notify.warning('No sites to check, exiting!')
|
query_notify.warning('No sites to check, exiting!')
|
||||||
sys.exit(2)
|
sys.exit(2)
|
||||||
else:
|
|
||||||
query_notify.warning(
|
query_notify.warning(
|
||||||
f'Starting a search on top {len(site_data)} sites from the Maigret database...'
|
f'Starting a search on top {len(site_data)} sites from the Maigret database...'
|
||||||
)
|
)
|
||||||
@@ -501,7 +557,7 @@ async def main():
|
|||||||
|
|
||||||
if username.lower() in already_checked:
|
if username.lower() in already_checked:
|
||||||
continue
|
continue
|
||||||
else:
|
|
||||||
already_checked.add(username.lower())
|
already_checked.add(username.lower())
|
||||||
|
|
||||||
if username in args.ignore_ids_list:
|
if username in args.ignore_ids_list:
|
||||||
@@ -511,10 +567,7 @@ async def main():
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
# check for characters do not supported by sites generally
|
# check for characters do not supported by sites generally
|
||||||
found_unsupported_chars = set(unsupported_characters).intersection(
|
found_unsupported_chars = set(BAD_CHARS).intersection(set(username))
|
||||||
set(username)
|
|
||||||
)
|
|
||||||
|
|
||||||
if found_unsupported_chars:
|
if found_unsupported_chars:
|
||||||
pretty_chars_str = ','.join(
|
pretty_chars_str = ','.join(
|
||||||
map(lambda s: f'"{s}"', found_unsupported_chars)
|
map(lambda s: f'"{s}"', found_unsupported_chars)
|
||||||
@@ -548,22 +601,9 @@ async def main():
|
|||||||
general_results.append((username, id_type, results))
|
general_results.append((username, id_type, results))
|
||||||
|
|
||||||
# TODO: tests
|
# TODO: tests
|
||||||
for website_name in results:
|
if recursive_search_enabled:
|
||||||
dictionary = results[website_name]
|
extracted_ids = extract_ids_from_results(results, db)
|
||||||
# TODO: fix no site data issue
|
usernames.update(extracted_ids)
|
||||||
if not dictionary or not recursive_search_enabled:
|
|
||||||
continue
|
|
||||||
|
|
||||||
new_usernames = dictionary.get('ids_usernames')
|
|
||||||
if new_usernames:
|
|
||||||
for u, utype in new_usernames.items():
|
|
||||||
usernames[u] = utype
|
|
||||||
|
|
||||||
for url in dictionary.get('ids_links', []):
|
|
||||||
for s in db.sites:
|
|
||||||
u = s.detect_username(url)
|
|
||||||
if u:
|
|
||||||
usernames[u] = 'username'
|
|
||||||
|
|
||||||
# reporting for a one username
|
# reporting for a one username
|
||||||
if args.xmind:
|
if args.xmind:
|
||||||
@@ -607,6 +647,12 @@ async def main():
|
|||||||
filename = report_filepath_tpl.format(username=username, postfix='.pdf')
|
filename = report_filepath_tpl.format(username=username, postfix='.pdf')
|
||||||
save_pdf_report(filename, report_context)
|
save_pdf_report(filename, report_context)
|
||||||
query_notify.warning(f'PDF report on all usernames saved in {filename}')
|
query_notify.warning(f'PDF report on all usernames saved in {filename}')
|
||||||
|
|
||||||
|
text_report = get_plaintext_report(report_context)
|
||||||
|
if text_report:
|
||||||
|
query_notify.info('Short text report:')
|
||||||
|
print(text_report)
|
||||||
|
|
||||||
# update database
|
# update database
|
||||||
db.save_to_file(args.db_file)
|
db.save_to_file(args.db_file)
|
||||||
|
|
||||||
|
|||||||
+38
-35
@@ -152,6 +152,27 @@ class QueryNotifyPrint(QueryNotify):
|
|||||||
|
|
||||||
return
|
return
|
||||||
|
|
||||||
|
def make_colored_terminal_notify(
|
||||||
|
self, status, text, status_color, text_color, appendix
|
||||||
|
):
|
||||||
|
text = [
|
||||||
|
f"{Style.BRIGHT}{Fore.WHITE}[{status_color}{status}{Fore.WHITE}]"
|
||||||
|
+ f"{text_color} {text}: {Style.RESET_ALL}"
|
||||||
|
+ f"{appendix}"
|
||||||
|
]
|
||||||
|
return "".join(text)
|
||||||
|
|
||||||
|
def make_simple_terminal_notify(
|
||||||
|
self, status, text, status_color, text_color, appendix
|
||||||
|
):
|
||||||
|
return f"[{status}] {text}: {appendix}"
|
||||||
|
|
||||||
|
def make_terminal_notify(self, *args):
|
||||||
|
if self.color:
|
||||||
|
return self.make_colored_terminal_notify(*args)
|
||||||
|
else:
|
||||||
|
return self.make_simple_terminal_notify(*args)
|
||||||
|
|
||||||
def start(self, message, id_type):
|
def start(self, message, id_type):
|
||||||
"""Notify Start.
|
"""Notify Start.
|
||||||
|
|
||||||
@@ -184,13 +205,20 @@ class QueryNotifyPrint(QueryNotify):
|
|||||||
else:
|
else:
|
||||||
print(f"[*] {title} {message} on:")
|
print(f"[*] {title} {message} on:")
|
||||||
|
|
||||||
def warning(self, message, symbol="-"):
|
def _colored_print(self, fore_color, msg):
|
||||||
msg = f"[{symbol}] {message}"
|
|
||||||
if self.color:
|
if self.color:
|
||||||
print(Style.BRIGHT + Fore.YELLOW + msg)
|
print(Style.BRIGHT + fore_color + msg)
|
||||||
else:
|
else:
|
||||||
print(msg)
|
print(msg)
|
||||||
|
|
||||||
|
def warning(self, message, symbol="-"):
|
||||||
|
msg = f"[{symbol}] {message}"
|
||||||
|
self._colored_print(Fore.YELLOW, msg)
|
||||||
|
|
||||||
|
def info(self, message, symbol="*"):
|
||||||
|
msg = f"[{symbol}] {message}"
|
||||||
|
self._colored_print(Fore.BLUE, msg)
|
||||||
|
|
||||||
def update(self, result, is_similar=False):
|
def update(self, result, is_similar=False):
|
||||||
"""Notify Update.
|
"""Notify Update.
|
||||||
|
|
||||||
@@ -204,40 +232,18 @@ class QueryNotifyPrint(QueryNotify):
|
|||||||
Return Value:
|
Return Value:
|
||||||
Nothing.
|
Nothing.
|
||||||
"""
|
"""
|
||||||
|
notify = None
|
||||||
self.result = result
|
self.result = result
|
||||||
|
|
||||||
if not self.result.ids_data:
|
|
||||||
ids_data_text = ""
|
ids_data_text = ""
|
||||||
else:
|
if self.result.ids_data:
|
||||||
ids_data_text = get_dict_ascii_tree(self.result.ids_data.items(), " ")
|
ids_data_text = get_dict_ascii_tree(self.result.ids_data.items(), " ")
|
||||||
|
|
||||||
def make_colored_terminal_notify(
|
|
||||||
status, text, status_color, text_color, appendix
|
|
||||||
):
|
|
||||||
text = [
|
|
||||||
f"{Style.BRIGHT}{Fore.WHITE}[{status_color}{status}{Fore.WHITE}]"
|
|
||||||
+ f"{text_color} {text}: {Style.RESET_ALL}"
|
|
||||||
+ f"{appendix}"
|
|
||||||
]
|
|
||||||
return "".join(text)
|
|
||||||
|
|
||||||
def make_simple_terminal_notify(status, text, appendix):
|
|
||||||
return f"[{status}] {text}: {appendix}"
|
|
||||||
|
|
||||||
def make_terminal_notify(is_colored=True, *args):
|
|
||||||
if is_colored:
|
|
||||||
return make_colored_terminal_notify(*args)
|
|
||||||
else:
|
|
||||||
return make_simple_terminal_notify(*args)
|
|
||||||
|
|
||||||
notify = None
|
|
||||||
|
|
||||||
# Output to the terminal is desired.
|
# Output to the terminal is desired.
|
||||||
if result.status == QueryStatus.CLAIMED:
|
if result.status == QueryStatus.CLAIMED:
|
||||||
color = Fore.BLUE if is_similar else Fore.GREEN
|
color = Fore.BLUE if is_similar else Fore.GREEN
|
||||||
status = "?" if is_similar else "+"
|
status = "?" if is_similar else "+"
|
||||||
notify = make_terminal_notify(
|
notify = self.make_terminal_notify(
|
||||||
self.color,
|
|
||||||
status,
|
status,
|
||||||
result.site_name,
|
result.site_name,
|
||||||
color,
|
color,
|
||||||
@@ -246,8 +252,7 @@ class QueryNotifyPrint(QueryNotify):
|
|||||||
)
|
)
|
||||||
elif result.status == QueryStatus.AVAILABLE:
|
elif result.status == QueryStatus.AVAILABLE:
|
||||||
if not self.print_found_only:
|
if not self.print_found_only:
|
||||||
notify = make_terminal_notify(
|
notify = self.make_terminal_notify(
|
||||||
self.color,
|
|
||||||
"-",
|
"-",
|
||||||
result.site_name,
|
result.site_name,
|
||||||
Fore.RED,
|
Fore.RED,
|
||||||
@@ -256,8 +261,7 @@ class QueryNotifyPrint(QueryNotify):
|
|||||||
)
|
)
|
||||||
elif result.status == QueryStatus.UNKNOWN:
|
elif result.status == QueryStatus.UNKNOWN:
|
||||||
if not self.skip_check_errors:
|
if not self.skip_check_errors:
|
||||||
notify = make_terminal_notify(
|
notify = self.make_terminal_notify(
|
||||||
self.color,
|
|
||||||
"?",
|
"?",
|
||||||
result.site_name,
|
result.site_name,
|
||||||
Fore.RED,
|
Fore.RED,
|
||||||
@@ -267,8 +271,7 @@ class QueryNotifyPrint(QueryNotify):
|
|||||||
elif result.status == QueryStatus.ILLEGAL:
|
elif result.status == QueryStatus.ILLEGAL:
|
||||||
if not self.print_found_only:
|
if not self.print_found_only:
|
||||||
text = "Illegal Username Format For This Site!"
|
text = "Illegal Username Format For This Site!"
|
||||||
notify = make_terminal_notify(
|
notify = self.make_terminal_notify(
|
||||||
self.color,
|
|
||||||
"-",
|
"-",
|
||||||
result.site_name,
|
result.site_name,
|
||||||
Fore.RED,
|
Fore.RED,
|
||||||
@@ -286,7 +289,7 @@ class QueryNotifyPrint(QueryNotify):
|
|||||||
sys.stdout.write("\x1b[1K\r")
|
sys.stdout.write("\x1b[1K\r")
|
||||||
print(notify)
|
print(notify)
|
||||||
|
|
||||||
return
|
return notify
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
"""Convert Object To String.
|
"""Convert Object To String.
|
||||||
|
|||||||
+51
-50
@@ -3,7 +3,6 @@ import io
|
|||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
from argparse import ArgumentTypeError
|
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from typing import Dict, Any
|
from typing import Dict, Any
|
||||||
|
|
||||||
@@ -71,6 +70,17 @@ def save_json_report(filename: str, username: str, results: dict, report_type: s
|
|||||||
generate_json_report(username, results, f, report_type=report_type)
|
generate_json_report(username, results, f, report_type=report_type)
|
||||||
|
|
||||||
|
|
||||||
|
def get_plaintext_report(context: dict) -> str:
|
||||||
|
output = (context['brief'] + " ").replace('. ', '.\n')
|
||||||
|
interests = list(map(lambda x: x[0], context.get('interests_tuple_list', [])))
|
||||||
|
countries = list(map(lambda x: x[0], context.get('countries_tuple_list', [])))
|
||||||
|
if countries:
|
||||||
|
output += f'Countries: {", ".join(countries)}\n'
|
||||||
|
if interests:
|
||||||
|
output += f'Interests (tags): {", ".join(interests)}\n'
|
||||||
|
return output.strip()
|
||||||
|
|
||||||
|
|
||||||
"""
|
"""
|
||||||
REPORTS GENERATING
|
REPORTS GENERATING
|
||||||
"""
|
"""
|
||||||
@@ -216,6 +226,7 @@ def generate_report_context(username_results: list):
|
|||||||
|
|
||||||
return {
|
return {
|
||||||
"username": first_username,
|
"username": first_username,
|
||||||
|
# TODO: return brief list
|
||||||
"brief": brief,
|
"brief": brief,
|
||||||
"results": username_results,
|
"results": username_results,
|
||||||
"first_seen": first_seen,
|
"first_seen": first_seen,
|
||||||
@@ -269,6 +280,9 @@ def generate_json_report(username: str, results: dict, file, report_type):
|
|||||||
|
|
||||||
data = dict(site_result)
|
data = dict(site_result)
|
||||||
data["status"] = data["status"].json()
|
data["status"] = data["status"].json()
|
||||||
|
data["site"] = data["site"].json
|
||||||
|
if "future" in data:
|
||||||
|
del data["future"]
|
||||||
|
|
||||||
if is_report_per_line:
|
if is_report_per_line:
|
||||||
data["sitename"] = sitename
|
data["sitename"] = sitename
|
||||||
@@ -290,11 +304,20 @@ def save_xmind_report(filename, username, results):
|
|||||||
os.remove(filename)
|
os.remove(filename)
|
||||||
workbook = xmind.load(filename)
|
workbook = xmind.load(filename)
|
||||||
sheet = workbook.getPrimarySheet()
|
sheet = workbook.getPrimarySheet()
|
||||||
design_sheet(sheet, username, results)
|
design_xmind_sheet(sheet, username, results)
|
||||||
xmind.save(workbook, path=filename)
|
xmind.save(workbook, path=filename)
|
||||||
|
|
||||||
|
|
||||||
def design_sheet(sheet, username, results):
|
def add_xmind_subtopic(userlink, k, v, supposed_data):
|
||||||
|
currentsublabel = userlink.addSubTopic()
|
||||||
|
field = "fullname" if k == "name" else k
|
||||||
|
if field not in supposed_data:
|
||||||
|
supposed_data[field] = []
|
||||||
|
supposed_data[field].append(v)
|
||||||
|
currentsublabel.setTitle("%s: %s" % (k, v))
|
||||||
|
|
||||||
|
|
||||||
|
def design_xmind_sheet(sheet, username, results):
|
||||||
alltags = {}
|
alltags = {}
|
||||||
supposed_data = {}
|
supposed_data = {}
|
||||||
|
|
||||||
@@ -308,64 +331,42 @@ def design_sheet(sheet, username, results):
|
|||||||
|
|
||||||
for website_name in results:
|
for website_name in results:
|
||||||
dictionary = results[website_name]
|
dictionary = results[website_name]
|
||||||
|
result_status = dictionary.get("status")
|
||||||
if dictionary.get("status").status == QueryStatus.CLAIMED:
|
if result_status.status != QueryStatus.CLAIMED:
|
||||||
# firsttime I found that entry
|
continue
|
||||||
for tag in dictionary.get("status").tags:
|
|
||||||
if tag.strip() == "":
|
stripped_tags = list(map(lambda x: x.strip(), result_status.tags))
|
||||||
|
normalized_tags = list(
|
||||||
|
filter(lambda x: x and not is_country_tag(x), stripped_tags)
|
||||||
|
)
|
||||||
|
|
||||||
|
category = None
|
||||||
|
for tag in normalized_tags:
|
||||||
|
if tag in alltags.keys():
|
||||||
continue
|
continue
|
||||||
if tag not in alltags.keys():
|
|
||||||
if not is_country_tag(tag):
|
|
||||||
tagsection = root_topic1.addSubTopic()
|
tagsection = root_topic1.addSubTopic()
|
||||||
tagsection.setTitle(tag)
|
tagsection.setTitle(tag)
|
||||||
alltags[tag] = tagsection
|
alltags[tag] = tagsection
|
||||||
|
|
||||||
category = None
|
|
||||||
for tag in dictionary.get("status").tags:
|
|
||||||
if tag.strip() == "":
|
|
||||||
continue
|
|
||||||
if not is_country_tag(tag):
|
|
||||||
category = tag
|
category = tag
|
||||||
|
|
||||||
if category is None:
|
section = alltags[category] if category else undefinedsection
|
||||||
userlink = undefinedsection.addSubTopic()
|
userlink = section.addSubTopic()
|
||||||
userlink.addLabel(dictionary.get("status").site_url_user)
|
userlink.addLabel(result_status.site_url_user)
|
||||||
else:
|
|
||||||
userlink = alltags[category].addSubTopic()
|
|
||||||
userlink.addLabel(dictionary.get("status").site_url_user)
|
|
||||||
|
|
||||||
if dictionary.get("status").ids_data:
|
ids_data = result_status.ids_data or {}
|
||||||
for k, v in dictionary.get("status").ids_data.items():
|
for k, v in ids_data.items():
|
||||||
# suppose target data
|
# suppose target data
|
||||||
if not isinstance(v, list):
|
if isinstance(v, list):
|
||||||
currentsublabel = userlink.addSubTopic()
|
|
||||||
field = "fullname" if k == "name" else k
|
|
||||||
if field not in supposed_data:
|
|
||||||
supposed_data[field] = []
|
|
||||||
supposed_data[field].append(v)
|
|
||||||
currentsublabel.setTitle("%s: %s" % (k, v))
|
|
||||||
else:
|
|
||||||
for currentval in v:
|
for currentval in v:
|
||||||
currentsublabel = userlink.addSubTopic()
|
add_xmind_subtopic(userlink, k, currentval, supposed_data)
|
||||||
field = "fullname" if k == "name" else k
|
else:
|
||||||
if field not in supposed_data:
|
add_xmind_subtopic(userlink, k, v, supposed_data)
|
||||||
supposed_data[field] = []
|
|
||||||
supposed_data[field].append(currentval)
|
|
||||||
currentsublabel.setTitle("%s: %s" % (k, currentval))
|
|
||||||
# add supposed data
|
# add supposed data
|
||||||
filterede_supposed_data = filter_supposed_data(supposed_data)
|
filtered_supposed_data = filter_supposed_data(supposed_data)
|
||||||
if len(filterede_supposed_data) > 0:
|
if len(filtered_supposed_data) > 0:
|
||||||
undefinedsection = root_topic1.addSubTopic()
|
undefinedsection = root_topic1.addSubTopic()
|
||||||
undefinedsection.setTitle("SUPPOSED DATA")
|
undefinedsection.setTitle("SUPPOSED DATA")
|
||||||
for k, v in filterede_supposed_data.items():
|
for k, v in filtered_supposed_data.items():
|
||||||
currentsublabel = undefinedsection.addSubTopic()
|
currentsublabel = undefinedsection.addSubTopic()
|
||||||
currentsublabel.setTitle("%s: %s" % (k, v))
|
currentsublabel.setTitle("%s: %s" % (k, v))
|
||||||
|
|
||||||
|
|
||||||
def check_supported_json_format(value):
|
|
||||||
if value and value not in SUPPORTED_JSON_REPORT_FORMATS:
|
|
||||||
raise ArgumentTypeError(
|
|
||||||
"JSON report type must be one of the following types: "
|
|
||||||
+ ", ".join(SUPPORTED_JSON_REPORT_FORMATS)
|
|
||||||
)
|
|
||||||
return value
|
|
||||||
|
|||||||
+5200
-2389
File diff suppressed because it is too large
Load Diff
+54
-20
@@ -3,7 +3,7 @@
|
|||||||
import copy
|
import copy
|
||||||
import json
|
import json
|
||||||
import sys
|
import sys
|
||||||
from typing import Optional, List, Dict, Any
|
from typing import Optional, List, Dict, Any, Tuple
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
|
|
||||||
@@ -53,6 +53,18 @@ SUPPORTED_TAGS = [
|
|||||||
"medicine",
|
"medicine",
|
||||||
"reading",
|
"reading",
|
||||||
"stock",
|
"stock",
|
||||||
|
"messaging",
|
||||||
|
"trading",
|
||||||
|
"links",
|
||||||
|
"fashion",
|
||||||
|
"tasks",
|
||||||
|
"military",
|
||||||
|
"auto",
|
||||||
|
"gambling",
|
||||||
|
"cybercriminal",
|
||||||
|
"review",
|
||||||
|
"bookmarks",
|
||||||
|
"design",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
@@ -146,6 +158,19 @@ class MaigretSite:
|
|||||||
|
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
def extract_id_from_url(self, url: str) -> Optional[Tuple[str, str]]:
|
||||||
|
if not self.url_regexp:
|
||||||
|
return None
|
||||||
|
|
||||||
|
match_groups = self.url_regexp.match(url)
|
||||||
|
if not match_groups:
|
||||||
|
return None
|
||||||
|
|
||||||
|
_id = match_groups.groups()[-1].rstrip("/")
|
||||||
|
_type = self.type
|
||||||
|
|
||||||
|
return _id, _type
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def pretty_name(self):
|
def pretty_name(self):
|
||||||
if self.source:
|
if self.source:
|
||||||
@@ -167,6 +192,25 @@ class MaigretSite:
|
|||||||
|
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
@property
|
||||||
|
def errors_dict(self) -> dict:
|
||||||
|
errors: Dict[str, str] = {}
|
||||||
|
if self.engine_obj:
|
||||||
|
errors.update(self.engine_obj.site.get('errors', {}))
|
||||||
|
errors.update(self.errors)
|
||||||
|
return errors
|
||||||
|
|
||||||
|
def get_url_type(self) -> str:
|
||||||
|
url = URLMatcher.extract_main_part(self.url)
|
||||||
|
if url.startswith("{username}"):
|
||||||
|
url = "SUBDOMAIN"
|
||||||
|
elif url == "":
|
||||||
|
url = f"{self.url} ({self.engine})"
|
||||||
|
else:
|
||||||
|
parts = url.split("/")
|
||||||
|
url = "/" + "/".join(parts[1:])
|
||||||
|
return url
|
||||||
|
|
||||||
def update(self, updates: "dict") -> "MaigretSite":
|
def update(self, updates: "dict") -> "MaigretSite":
|
||||||
self.__dict__.update(updates)
|
self.__dict__.update(updates)
|
||||||
self.update_detectors()
|
self.update_detectors()
|
||||||
@@ -405,44 +449,34 @@ class MaigretDatabase:
|
|||||||
if not sites_dict:
|
if not sites_dict:
|
||||||
sites_dict = self.sites_dict()
|
sites_dict = self.sites_dict()
|
||||||
|
|
||||||
|
urls = {}
|
||||||
|
tags = {}
|
||||||
output = ""
|
output = ""
|
||||||
disabled_count = 0
|
disabled_count = 0
|
||||||
total_count = len(sites_dict)
|
total_count = len(sites_dict)
|
||||||
urls = {}
|
|
||||||
tags = {}
|
|
||||||
|
|
||||||
for _, site in sites_dict.items():
|
for _, site in sites_dict.items():
|
||||||
if site.disabled:
|
if site.disabled:
|
||||||
disabled_count += 1
|
disabled_count += 1
|
||||||
|
|
||||||
url = URLMatcher.extract_main_part(site.url)
|
url_type = site.get_url_type()
|
||||||
if url.startswith("{username}"):
|
urls[url_type] = urls.get(url_type, 0) + 1
|
||||||
url = "SUBDOMAIN"
|
|
||||||
elif url == "":
|
|
||||||
url = f"{site.url} ({site.engine})"
|
|
||||||
else:
|
|
||||||
parts = url.split("/")
|
|
||||||
url = "/" + "/".join(parts[1:])
|
|
||||||
|
|
||||||
urls[url] = urls.get(url, 0) + 1
|
|
||||||
|
|
||||||
if not site.tags:
|
if not site.tags:
|
||||||
tags["NO_TAGS"] = tags.get("NO_TAGS", 0) + 1
|
tags["NO_TAGS"] = tags.get("NO_TAGS", 0) + 1
|
||||||
|
|
||||||
for tag in site.tags:
|
for tag in filter(lambda x: not is_country_tag(x), site.tags):
|
||||||
if is_country_tag(tag):
|
|
||||||
# currenty do not display country tags
|
|
||||||
continue
|
|
||||||
tags[tag] = tags.get(tag, 0) + 1
|
tags[tag] = tags.get(tag, 0) + 1
|
||||||
|
|
||||||
output += f"Enabled/total sites: {total_count - disabled_count}/{total_count}\n"
|
output += f"Enabled/total sites: {total_count - disabled_count}/{total_count}\n"
|
||||||
output += "Top sites' profile URLs:\n"
|
output += "Top profile URLs:\n"
|
||||||
for url, count in sorted(urls.items(), key=lambda x: x[1], reverse=True)[:20]:
|
for url, count in sorted(urls.items(), key=lambda x: x[1], reverse=True)[:20]:
|
||||||
if count == 1:
|
if count == 1:
|
||||||
break
|
break
|
||||||
output += f"{count}\t{url}\n"
|
output += f"{count}\t{url}\n"
|
||||||
output += "Top sites' tags:\n"
|
|
||||||
for tag, count in sorted(tags.items(), key=lambda x: x[1], reverse=True):
|
output += "Top tags:\n"
|
||||||
|
for tag, count in sorted(tags.items(), key=lambda x: x[1], reverse=True)[:200]:
|
||||||
mark = ""
|
mark = ""
|
||||||
if tag not in SUPPORTED_TAGS:
|
if tag not in SUPPORTED_TAGS:
|
||||||
mark = " (non-standard)"
|
mark = " (non-standard)"
|
||||||
|
|||||||
+39
-7
@@ -2,7 +2,7 @@ import asyncio
|
|||||||
import difflib
|
import difflib
|
||||||
import re
|
import re
|
||||||
from typing import List
|
from typing import List
|
||||||
|
import xml.etree.ElementTree as ET
|
||||||
import requests
|
import requests
|
||||||
|
|
||||||
from .activation import import_aiohttp_cookies
|
from .activation import import_aiohttp_cookies
|
||||||
@@ -46,6 +46,20 @@ def get_match_ratio(x):
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def get_alexa_rank(site_url_main):
|
||||||
|
url = f"http://data.alexa.com/data?cli=10&url={site_url_main}"
|
||||||
|
xml_data = requests.get(url).text
|
||||||
|
root = ET.fromstring(xml_data)
|
||||||
|
alexa_rank = 0
|
||||||
|
|
||||||
|
try:
|
||||||
|
alexa_rank = int(root.find('.//REACH').attrib['RANK'])
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
return alexa_rank
|
||||||
|
|
||||||
|
|
||||||
def extract_mainpage_url(url):
|
def extract_mainpage_url(url):
|
||||||
return "/".join(url.split("/", 3)[:3])
|
return "/".join(url.split("/", 3)[:3])
|
||||||
|
|
||||||
@@ -133,6 +147,7 @@ async def detect_known_engine(
|
|||||||
) -> List[MaigretSite]:
|
) -> List[MaigretSite]:
|
||||||
try:
|
try:
|
||||||
r = requests.get(url_mainpage)
|
r = requests.get(url_mainpage)
|
||||||
|
logger.debug(r.text)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.warning(e)
|
logger.warning(e)
|
||||||
print("Some error while checking main page")
|
print("Some error while checking main page")
|
||||||
@@ -199,6 +214,7 @@ async def check_features_manually(
|
|||||||
# cookies
|
# cookies
|
||||||
cookie_dict = None
|
cookie_dict = None
|
||||||
if cookie_file:
|
if cookie_file:
|
||||||
|
logger.info(f'Use {cookie_file} for cookies')
|
||||||
cookie_jar = await import_aiohttp_cookies(cookie_file)
|
cookie_jar = await import_aiohttp_cookies(cookie_file)
|
||||||
cookie_dict = {c.key: c.value for c in cookie_jar}
|
cookie_dict = {c.key: c.value for c in cookie_jar}
|
||||||
|
|
||||||
@@ -239,7 +255,7 @@ async def check_features_manually(
|
|||||||
features = input("If features was not detected correctly, write it manually: ")
|
features = input("If features was not detected correctly, write it manually: ")
|
||||||
|
|
||||||
if features:
|
if features:
|
||||||
presence_list = features.split(",")
|
presence_list = list(map(str.strip, features.split(",")))
|
||||||
|
|
||||||
absence_list = sorted(b_minus_a, key=get_match_ratio, reverse=True)[
|
absence_list = sorted(b_minus_a, key=get_match_ratio, reverse=True)[
|
||||||
:top_features_count
|
:top_features_count
|
||||||
@@ -248,7 +264,7 @@ async def check_features_manually(
|
|||||||
features = input("If features was not detected correctly, write it manually: ")
|
features = input("If features was not detected correctly, write it manually: ")
|
||||||
|
|
||||||
if features:
|
if features:
|
||||||
absence_list = features.split(",")
|
absence_list = list(map(str.strip, features.split(",")))
|
||||||
|
|
||||||
site_data = {
|
site_data = {
|
||||||
"absenceStrs": absence_list,
|
"absenceStrs": absence_list,
|
||||||
@@ -291,7 +307,13 @@ async def submit_dialog(db, url_exists, cookie_file, logger):
|
|||||||
|
|
||||||
url_mainpage = extract_mainpage_url(url_exists)
|
url_mainpage = extract_mainpage_url(url_exists)
|
||||||
|
|
||||||
|
print('Detecting site engine, please wait...')
|
||||||
|
sites = []
|
||||||
|
try:
|
||||||
sites = await detect_known_engine(db, url_exists, url_mainpage, logger)
|
sites = await detect_known_engine(db, url_exists, url_mainpage, logger)
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
print('Engine detect process is interrupted.')
|
||||||
|
|
||||||
if not sites:
|
if not sites:
|
||||||
print("Unable to detect site engine, lets generate checking features")
|
print("Unable to detect site engine, lets generate checking features")
|
||||||
sites = [
|
sites = [
|
||||||
@@ -304,6 +326,7 @@ async def submit_dialog(db, url_exists, cookie_file, logger):
|
|||||||
|
|
||||||
sem = asyncio.Semaphore(1)
|
sem = asyncio.Semaphore(1)
|
||||||
|
|
||||||
|
print("Checking, please wait...")
|
||||||
found = False
|
found = False
|
||||||
chosen_site = None
|
chosen_site = None
|
||||||
for s in sites:
|
for s in sites:
|
||||||
@@ -320,17 +343,26 @@ async def submit_dialog(db, url_exists, cookie_file, logger):
|
|||||||
print(
|
print(
|
||||||
"Try to run this mode again and increase features count or choose others."
|
"Try to run this mode again and increase features count or choose others."
|
||||||
)
|
)
|
||||||
|
return False
|
||||||
else:
|
else:
|
||||||
if (
|
if (
|
||||||
input(
|
input(
|
||||||
f"Site {chosen_site.name} successfully checked. Do you want to save it in the Maigret DB? [Yn] "
|
f"Site {chosen_site.name} successfully checked. Do you want to save it in the Maigret DB? [Yn] "
|
||||||
).lower()
|
)
|
||||||
in "y"
|
.lower()
|
||||||
|
.strip("y")
|
||||||
):
|
):
|
||||||
|
return False
|
||||||
|
|
||||||
|
chosen_site.name = input("Change site name if you want: ") or chosen_site.name
|
||||||
|
chosen_site.tags = list(map(str.strip, input("Site tags: ").split(',')))
|
||||||
|
rank = get_alexa_rank(chosen_site.url_main)
|
||||||
|
if rank:
|
||||||
|
print(f'New alexa rank: {rank}')
|
||||||
|
chosen_site.alexa_rank = rank
|
||||||
|
|
||||||
logger.debug(chosen_site.json)
|
logger.debug(chosen_site.json)
|
||||||
site_data = chosen_site.strip_engine_data()
|
site_data = chosen_site.strip_engine_data()
|
||||||
logger.debug(site_data.json)
|
logger.debug(site_data.json)
|
||||||
db.update_site(site_data)
|
db.update_site(site_data)
|
||||||
return True
|
return True
|
||||||
|
|
||||||
return False
|
|
||||||
|
|||||||
+12
-3
@@ -1,5 +1,7 @@
|
|||||||
|
import ast
|
||||||
import re
|
import re
|
||||||
import random
|
import random
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
|
||||||
DEFAULT_USER_AGENTS = [
|
DEFAULT_USER_AGENTS = [
|
||||||
@@ -55,14 +57,20 @@ class URLMatcher:
|
|||||||
url_main_part = self.extract_main_part(url)
|
url_main_part = self.extract_main_part(url)
|
||||||
for c in self.UNSAFE_SYMBOLS:
|
for c in self.UNSAFE_SYMBOLS:
|
||||||
url_main_part = url_main_part.replace(c, f"\\{c}")
|
url_main_part = url_main_part.replace(c, f"\\{c}")
|
||||||
username_regexp = username_regexp or ".+?"
|
prepared_username_regexp = (username_regexp or ".+?").lstrip('^').rstrip('$')
|
||||||
|
|
||||||
url_regexp = url_main_part.replace("{username}", f"({username_regexp})")
|
url_regexp = url_main_part.replace(
|
||||||
|
"{username}", f"({prepared_username_regexp})"
|
||||||
|
)
|
||||||
regexp_str = self._HTTP_URL_RE_STR.replace("(.+)", url_regexp)
|
regexp_str = self._HTTP_URL_RE_STR.replace("(.+)", url_regexp)
|
||||||
|
|
||||||
return re.compile(regexp_str)
|
return re.compile(regexp_str)
|
||||||
|
|
||||||
|
|
||||||
|
def ascii_data_display(data: str) -> Any:
|
||||||
|
return ast.literal_eval(data)
|
||||||
|
|
||||||
|
|
||||||
def get_dict_ascii_tree(items, prepend="", new_line=True):
|
def get_dict_ascii_tree(items, prepend="", new_line=True):
|
||||||
text = ""
|
text = ""
|
||||||
for num, item in enumerate(items):
|
for num, item in enumerate(items):
|
||||||
@@ -73,7 +81,8 @@ def get_dict_ascii_tree(items, prepend="", new_line=True):
|
|||||||
if field_value.startswith("['"):
|
if field_value.startswith("['"):
|
||||||
is_last_item = num == len(items) - 1
|
is_last_item = num == len(items) - 1
|
||||||
prepend_symbols = " " * 3 if is_last_item else " ┃ "
|
prepend_symbols = " " * 3 if is_last_item else " ┃ "
|
||||||
field_value = get_dict_ascii_tree(eval(field_value), prepend_symbols)
|
data = ascii_data_display(field_value)
|
||||||
|
field_value = get_dict_ascii_tree(data, prepend_symbols)
|
||||||
text += f"\n{prepend}{box_symbol}{field_name}: {field_value}"
|
text += f"\n{prepend}{box_symbol}{field_name}: {field_value}"
|
||||||
else:
|
else:
|
||||||
text += f"\n{prepend}{box_symbol} {item}"
|
text += f"\n{prepend}{box_symbol} {item}"
|
||||||
|
|||||||
+1
-1
@@ -26,7 +26,7 @@ python-socks==1.1.2
|
|||||||
requests>=2.24.0
|
requests>=2.24.0
|
||||||
requests-futures==1.0.0
|
requests-futures==1.0.0
|
||||||
six==1.15.0
|
six==1.15.0
|
||||||
socid-extractor>=0.0.16
|
socid-extractor>=0.0.20
|
||||||
soupsieve==2.1
|
soupsieve==2.1
|
||||||
stem==1.8.0
|
stem==1.8.0
|
||||||
torrequest==0.1.0
|
torrequest==0.1.0
|
||||||
|
|||||||
@@ -12,7 +12,7 @@ with open('requirements.txt') as rf:
|
|||||||
requires = rf.read().splitlines()
|
requires = rf.read().splitlines()
|
||||||
|
|
||||||
setup(name='maigret',
|
setup(name='maigret',
|
||||||
version='0.2.0',
|
version='0.2.4',
|
||||||
description='Collect a dossier on a person by username from a huge number of sites',
|
description='Collect a dossier on a person by username from a huge number of sites',
|
||||||
long_description=long_description,
|
long_description=long_description,
|
||||||
long_description_content_type="text/markdown",
|
long_description_content_type="text/markdown",
|
||||||
|
|||||||
@@ -0,0 +1,6 @@
|
|||||||
|
flake8==3.8.4
|
||||||
|
pytest==6.2.4
|
||||||
|
pytest-asyncio==0.14.0
|
||||||
|
pytest-cov==2.10.1
|
||||||
|
pytest-httpserver==1.0.0
|
||||||
|
pytest-rerunfailures==9.1.1
|
||||||
@@ -1,2 +1,4 @@
|
|||||||
#!/bin/sh
|
#!/bin/sh
|
||||||
pytest tests
|
coverage run --source=./maigret -m pytest tests
|
||||||
|
coverage report -m
|
||||||
|
coverage html
|
||||||
|
|||||||
+25
-3
@@ -6,10 +6,14 @@ import pytest
|
|||||||
from _pytest.mark import Mark
|
from _pytest.mark import Mark
|
||||||
|
|
||||||
from maigret.sites import MaigretDatabase
|
from maigret.sites import MaigretDatabase
|
||||||
|
from maigret.maigret import setup_arguments_parser
|
||||||
|
|
||||||
|
|
||||||
CUR_PATH = os.path.dirname(os.path.realpath(__file__))
|
CUR_PATH = os.path.dirname(os.path.realpath(__file__))
|
||||||
JSON_FILE = os.path.join(CUR_PATH, '../maigret/resources/data.json')
|
JSON_FILE = os.path.join(CUR_PATH, '../maigret/resources/data.json')
|
||||||
empty_mark = Mark('', [], {})
|
TEST_JSON_FILE = os.path.join(CUR_PATH, 'db.json')
|
||||||
|
LOCAL_TEST_JSON_FILE = os.path.join(CUR_PATH, 'local.json')
|
||||||
|
empty_mark = Mark('', (), {})
|
||||||
|
|
||||||
|
|
||||||
def by_slow_marker(item):
|
def by_slow_marker(item):
|
||||||
@@ -33,9 +37,17 @@ def remove_test_reports():
|
|||||||
|
|
||||||
@pytest.fixture(scope='session')
|
@pytest.fixture(scope='session')
|
||||||
def default_db():
|
def default_db():
|
||||||
db = MaigretDatabase().load_from_file(JSON_FILE)
|
return MaigretDatabase().load_from_file(JSON_FILE)
|
||||||
|
|
||||||
return db
|
|
||||||
|
@pytest.fixture(scope='function')
|
||||||
|
def test_db():
|
||||||
|
return MaigretDatabase().load_from_file(TEST_JSON_FILE)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(scope='function')
|
||||||
|
def local_test_db():
|
||||||
|
return MaigretDatabase().load_from_file(LOCAL_TEST_JSON_FILE)
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(autouse=True)
|
@pytest.fixture(autouse=True)
|
||||||
@@ -43,3 +55,13 @@ def reports_autoclean():
|
|||||||
remove_test_reports()
|
remove_test_reports()
|
||||||
yield
|
yield
|
||||||
remove_test_reports()
|
remove_test_reports()
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(scope='session')
|
||||||
|
def argparser():
|
||||||
|
return setup_arguments_parser()
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(scope="session")
|
||||||
|
def httpserver_listen_address():
|
||||||
|
return ("localhost", 8989)
|
||||||
|
|||||||
@@ -0,0 +1,26 @@
|
|||||||
|
{
|
||||||
|
"engines": {},
|
||||||
|
"sites": {
|
||||||
|
"GooglePlayStore": {
|
||||||
|
"tags": ["global", "us"],
|
||||||
|
"disabled": false,
|
||||||
|
"checkType": "status_code",
|
||||||
|
"alexaRank": 1,
|
||||||
|
"url": "https://play.google.com/store/apps/developer?id={username}",
|
||||||
|
"urlMain": "https://play.google.com/store",
|
||||||
|
"usernameClaimed": "Facebook_nosuchname",
|
||||||
|
"usernameUnclaimed": "noonewouldeverusethis7"
|
||||||
|
},
|
||||||
|
"Reddit": {
|
||||||
|
"tags": ["news", "social", "us"],
|
||||||
|
"checkType": "status_code",
|
||||||
|
"presenseStrs": ["totalKarma"],
|
||||||
|
"disabled": true,
|
||||||
|
"alexaRank": 17,
|
||||||
|
"url": "https://www.reddit.com/user/{username}",
|
||||||
|
"urlMain": "https://www.reddit.com/",
|
||||||
|
"usernameClaimed": "blue",
|
||||||
|
"usernameUnclaimed": "noonewouldeverusethis7"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,21 @@
|
|||||||
|
{
|
||||||
|
"engines": {},
|
||||||
|
"sites": {
|
||||||
|
"StatusCode": {
|
||||||
|
"checkType": "status_code",
|
||||||
|
"url": "http://localhost:8989/url?id={username}",
|
||||||
|
"urlMain": "http://localhost:8989/",
|
||||||
|
"usernameClaimed": "claimed",
|
||||||
|
"usernameUnclaimed": "unclaimed"
|
||||||
|
},
|
||||||
|
"Message": {
|
||||||
|
"checkType": "message",
|
||||||
|
"url": "http://localhost:8989/url?id={username}",
|
||||||
|
"urlMain": "http://localhost:8989/",
|
||||||
|
"presenseStrs": ["user", "profile"],
|
||||||
|
"absenseStrs": ["not found", "404"],
|
||||||
|
"usernameClaimed": "claimed",
|
||||||
|
"usernameUnclaimed": "unclaimed"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -22,6 +22,7 @@ httpbin.org FALSE / FALSE 0 a b
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.skip(reason="periodically fails")
|
||||||
@pytest.mark.slow
|
@pytest.mark.slow
|
||||||
def test_twitter_activation(default_db):
|
def test_twitter_activation(default_db):
|
||||||
twitter_site = default_db.sites_dict['Twitter']
|
twitter_site = default_db.sites_dict['Twitter']
|
||||||
|
|||||||
@@ -0,0 +1,65 @@
|
|||||||
|
from mock import Mock
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from maigret import search
|
||||||
|
|
||||||
|
|
||||||
|
def site_result_except(server, username, **kwargs):
|
||||||
|
query = f'id={username}'
|
||||||
|
server.expect_request('/url', query_string=query).respond_with_data(**kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_checking_by_status_code(httpserver, local_test_db):
|
||||||
|
sites_dict = local_test_db.sites_dict
|
||||||
|
|
||||||
|
site_result_except(httpserver, 'claimed', status=200)
|
||||||
|
site_result_except(httpserver, 'unclaimed', status=404)
|
||||||
|
|
||||||
|
result = await search('claimed', site_dict=sites_dict, logger=Mock())
|
||||||
|
assert result['StatusCode']['status'].is_found() is True
|
||||||
|
|
||||||
|
result = await search('unclaimed', site_dict=sites_dict, logger=Mock())
|
||||||
|
assert result['StatusCode']['status'].is_found() is False
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_checking_by_message_positive_full(httpserver, local_test_db):
|
||||||
|
sites_dict = local_test_db.sites_dict
|
||||||
|
|
||||||
|
site_result_except(httpserver, 'claimed', response_data="user profile")
|
||||||
|
site_result_except(httpserver, 'unclaimed', response_data="404 not found")
|
||||||
|
|
||||||
|
result = await search('claimed', site_dict=sites_dict, logger=Mock())
|
||||||
|
assert result['Message']['status'].is_found() is True
|
||||||
|
|
||||||
|
result = await search('unclaimed', site_dict=sites_dict, logger=Mock())
|
||||||
|
assert result['Message']['status'].is_found() is False
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_checking_by_message_positive_part(httpserver, local_test_db):
|
||||||
|
sites_dict = local_test_db.sites_dict
|
||||||
|
|
||||||
|
site_result_except(httpserver, 'claimed', response_data="profile")
|
||||||
|
site_result_except(httpserver, 'unclaimed', response_data="404")
|
||||||
|
|
||||||
|
result = await search('claimed', site_dict=sites_dict, logger=Mock())
|
||||||
|
assert result['Message']['status'].is_found() is True
|
||||||
|
|
||||||
|
result = await search('unclaimed', site_dict=sites_dict, logger=Mock())
|
||||||
|
assert result['Message']['status'].is_found() is False
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_checking_by_message_negative(httpserver, local_test_db):
|
||||||
|
sites_dict = local_test_db.sites_dict
|
||||||
|
|
||||||
|
site_result_except(httpserver, 'claimed', response_data="")
|
||||||
|
site_result_except(httpserver, 'unclaimed', response_data="user 404")
|
||||||
|
|
||||||
|
result = await search('claimed', site_dict=sites_dict, logger=Mock())
|
||||||
|
assert result['Message']['status'].is_found() is False
|
||||||
|
|
||||||
|
result = await search('unclaimed', site_dict=sites_dict, logger=Mock())
|
||||||
|
assert result['Message']['status'].is_found() is True
|
||||||
@@ -0,0 +1,93 @@
|
|||||||
|
"""Maigret command-line arguments parsing tests"""
|
||||||
|
from argparse import Namespace
|
||||||
|
from typing import Dict, Any
|
||||||
|
|
||||||
|
DEFAULT_ARGS: Dict[str, Any] = {
|
||||||
|
'all_sites': False,
|
||||||
|
'connections': 100,
|
||||||
|
'cookie_file': None,
|
||||||
|
'csv': False,
|
||||||
|
'db_file': None,
|
||||||
|
'debug': False,
|
||||||
|
'disable_extracting': False,
|
||||||
|
'disable_recursive_search': False,
|
||||||
|
'folderoutput': 'reports',
|
||||||
|
'html': False,
|
||||||
|
'id_type': 'username',
|
||||||
|
'ignore_ids_list': [],
|
||||||
|
'info': False,
|
||||||
|
'json': '',
|
||||||
|
'new_site_to_submit': False,
|
||||||
|
'no_color': False,
|
||||||
|
'no_progressbar': False,
|
||||||
|
'parse_url': '',
|
||||||
|
'pdf': False,
|
||||||
|
'print_check_errors': False,
|
||||||
|
'print_not_found': False,
|
||||||
|
'proxy': None,
|
||||||
|
'retries': 1,
|
||||||
|
'self_check': False,
|
||||||
|
'site_list': [],
|
||||||
|
'stats': False,
|
||||||
|
'tags': '',
|
||||||
|
'timeout': 30,
|
||||||
|
'top_sites': 500,
|
||||||
|
'txt': False,
|
||||||
|
'use_disabled_sites': False,
|
||||||
|
'username': [],
|
||||||
|
'verbose': False,
|
||||||
|
'xmind': False,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def test_args_search_mode(argparser):
|
||||||
|
args = argparser.parse_args('username'.split())
|
||||||
|
|
||||||
|
assert args.username == ['username']
|
||||||
|
|
||||||
|
want_args = dict(DEFAULT_ARGS)
|
||||||
|
want_args.update({'username': ['username']})
|
||||||
|
|
||||||
|
assert args == Namespace(**want_args)
|
||||||
|
|
||||||
|
|
||||||
|
def test_args_search_mode_several_usernames(argparser):
|
||||||
|
args = argparser.parse_args('username1 username2'.split())
|
||||||
|
|
||||||
|
assert args.username == ['username1', 'username2']
|
||||||
|
|
||||||
|
want_args = dict(DEFAULT_ARGS)
|
||||||
|
want_args.update({'username': ['username1', 'username2']})
|
||||||
|
|
||||||
|
assert args == Namespace(**want_args)
|
||||||
|
|
||||||
|
|
||||||
|
def test_args_self_check_mode(argparser):
|
||||||
|
args = argparser.parse_args('--self-check --site GitHub'.split())
|
||||||
|
|
||||||
|
want_args = dict(DEFAULT_ARGS)
|
||||||
|
want_args.update(
|
||||||
|
{
|
||||||
|
'self_check': True,
|
||||||
|
'site_list': ['GitHub'],
|
||||||
|
'username': [],
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
assert args == Namespace(**want_args)
|
||||||
|
|
||||||
|
|
||||||
|
def test_args_multiple_sites(argparser):
|
||||||
|
args = argparser.parse_args(
|
||||||
|
'--site GitHub VK --site PornHub --site Taringa,Steam'.split()
|
||||||
|
)
|
||||||
|
|
||||||
|
want_args = dict(DEFAULT_ARGS)
|
||||||
|
want_args.update(
|
||||||
|
{
|
||||||
|
'site_list': ['GitHub', 'PornHub', 'Taringa,Steam'],
|
||||||
|
'username': ['VK'],
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
assert args == Namespace(**want_args)
|
||||||
@@ -0,0 +1,15 @@
|
|||||||
|
"""Maigret data test functions"""
|
||||||
|
|
||||||
|
from maigret.utils import is_country_tag
|
||||||
|
from maigret.sites import SUPPORTED_TAGS
|
||||||
|
|
||||||
|
|
||||||
|
def test_tags_validity(default_db):
|
||||||
|
unknown_tags = set()
|
||||||
|
|
||||||
|
for site in default_db.sites:
|
||||||
|
for tag in filter(lambda x: not is_country_tag(x), site.tags):
|
||||||
|
if tag not in SUPPORTED_TAGS:
|
||||||
|
unknown_tags.add(tag)
|
||||||
|
|
||||||
|
assert unknown_tags == set()
|
||||||
+136
-55
@@ -1,96 +1,177 @@
|
|||||||
"""Maigret main module test functions"""
|
"""Maigret main module test functions"""
|
||||||
import asyncio
|
import asyncio
|
||||||
|
import copy
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
from mock import Mock
|
from mock import Mock
|
||||||
|
|
||||||
from maigret.maigret import self_check
|
from maigret.maigret import self_check, maigret
|
||||||
from maigret.sites import MaigretDatabase
|
from maigret.maigret import (
|
||||||
|
extract_ids_from_page,
|
||||||
|
extract_ids_from_results,
|
||||||
|
extract_ids_from_url,
|
||||||
|
)
|
||||||
|
from maigret.sites import MaigretSite
|
||||||
|
from maigret.result import QueryResult, QueryStatus
|
||||||
|
|
||||||
EXAMPLE_DB = {
|
|
||||||
'engines': {},
|
RESULTS_EXAMPLE = {
|
||||||
'sites': {
|
'Reddit': {
|
||||||
"GooglePlayStore": {
|
'cookies': None,
|
||||||
"tags": ["global", "us"],
|
'parsing_enabled': False,
|
||||||
"disabled": False,
|
'url_main': 'https://www.reddit.com/',
|
||||||
"checkType": "status_code",
|
'username': 'Facebook',
|
||||||
"alexaRank": 1,
|
|
||||||
"url": "https://play.google.com/store/apps/developer?id={username}",
|
|
||||||
"urlMain": "https://play.google.com/store",
|
|
||||||
"usernameClaimed": "Facebook_nosuchname",
|
|
||||||
"usernameUnclaimed": "noonewouldeverusethis7",
|
|
||||||
},
|
|
||||||
"Reddit": {
|
|
||||||
"tags": ["news", "social", "us"],
|
|
||||||
"checkType": "status_code",
|
|
||||||
"presenseStrs": ["totalKarma"],
|
|
||||||
"disabled": True,
|
|
||||||
"alexaRank": 17,
|
|
||||||
"url": "https://www.reddit.com/user/{username}",
|
|
||||||
"urlMain": "https://www.reddit.com/",
|
|
||||||
"usernameClaimed": "blue",
|
|
||||||
"usernameUnclaimed": "noonewouldeverusethis7",
|
|
||||||
},
|
},
|
||||||
|
'GooglePlayStore': {
|
||||||
|
'cookies': None,
|
||||||
|
'http_status': 200,
|
||||||
|
'is_similar': False,
|
||||||
|
'parsing_enabled': False,
|
||||||
|
'rank': 1,
|
||||||
|
'url_main': 'https://play.google.com/store',
|
||||||
|
'url_user': 'https://play.google.com/store/apps/developer?id=Facebook',
|
||||||
|
'username': 'Facebook',
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.slow
|
@pytest.mark.slow
|
||||||
def test_self_check_db_positive_disable():
|
def test_self_check_db_positive_disable(test_db):
|
||||||
logger = Mock()
|
logger = Mock()
|
||||||
db = MaigretDatabase()
|
assert test_db.sites[0].disabled is False
|
||||||
db.load_from_json(EXAMPLE_DB)
|
|
||||||
|
|
||||||
assert db.sites[0].disabled == False
|
|
||||||
|
|
||||||
loop = asyncio.get_event_loop()
|
loop = asyncio.get_event_loop()
|
||||||
loop.run_until_complete(self_check(db, db.sites_dict, logger, silent=True))
|
loop.run_until_complete(
|
||||||
|
self_check(test_db, test_db.sites_dict, logger, silent=True)
|
||||||
|
)
|
||||||
|
|
||||||
assert db.sites[0].disabled == True
|
assert test_db.sites[0].disabled is True
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.slow
|
@pytest.mark.slow
|
||||||
def test_self_check_db_positive_enable():
|
def test_self_check_db_positive_enable(test_db):
|
||||||
logger = Mock()
|
logger = Mock()
|
||||||
db = MaigretDatabase()
|
|
||||||
db.load_from_json(EXAMPLE_DB)
|
|
||||||
|
|
||||||
db.sites[0].disabled = True
|
test_db.sites[0].disabled = True
|
||||||
db.sites[0].username_claimed = 'Facebook'
|
test_db.sites[0].username_claimed = 'Facebook'
|
||||||
assert db.sites[0].disabled == True
|
assert test_db.sites[0].disabled is True
|
||||||
|
|
||||||
loop = asyncio.get_event_loop()
|
loop = asyncio.get_event_loop()
|
||||||
loop.run_until_complete(self_check(db, db.sites_dict, logger, silent=True))
|
loop.run_until_complete(
|
||||||
|
self_check(test_db, test_db.sites_dict, logger, silent=True)
|
||||||
|
)
|
||||||
|
|
||||||
assert db.sites[0].disabled == False
|
assert test_db.sites[0].disabled is False
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.slow
|
@pytest.mark.slow
|
||||||
def test_self_check_db_negative_disabled():
|
def test_self_check_db_negative_disabled(test_db):
|
||||||
logger = Mock()
|
logger = Mock()
|
||||||
db = MaigretDatabase()
|
|
||||||
db.load_from_json(EXAMPLE_DB)
|
|
||||||
|
|
||||||
db.sites[0].disabled = True
|
test_db.sites[0].disabled = True
|
||||||
assert db.sites[0].disabled == True
|
assert test_db.sites[0].disabled is True
|
||||||
|
|
||||||
loop = asyncio.get_event_loop()
|
loop = asyncio.get_event_loop()
|
||||||
loop.run_until_complete(self_check(db, db.sites_dict, logger, silent=True))
|
loop.run_until_complete(
|
||||||
|
self_check(test_db, test_db.sites_dict, logger, silent=True)
|
||||||
|
)
|
||||||
|
|
||||||
assert db.sites[0].disabled == True
|
assert test_db.sites[0].disabled is True
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.slow
|
@pytest.mark.slow
|
||||||
def test_self_check_db_negative_enabled():
|
def test_self_check_db_negative_enabled(test_db):
|
||||||
logger = Mock()
|
logger = Mock()
|
||||||
db = MaigretDatabase()
|
|
||||||
db.load_from_json(EXAMPLE_DB)
|
|
||||||
|
|
||||||
db.sites[0].disabled = False
|
test_db.sites[0].disabled = False
|
||||||
db.sites[0].username_claimed = 'Facebook'
|
test_db.sites[0].username_claimed = 'Facebook'
|
||||||
assert db.sites[0].disabled == False
|
assert test_db.sites[0].disabled is False
|
||||||
|
|
||||||
loop = asyncio.get_event_loop()
|
loop = asyncio.get_event_loop()
|
||||||
loop.run_until_complete(self_check(db, db.sites_dict, logger, silent=True))
|
loop.run_until_complete(
|
||||||
|
self_check(test_db, test_db.sites_dict, logger, silent=True)
|
||||||
|
)
|
||||||
|
|
||||||
assert db.sites[0].disabled == False
|
assert test_db.sites[0].disabled is False
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.slow
|
||||||
|
def test_maigret_results(test_db):
|
||||||
|
logger = Mock()
|
||||||
|
|
||||||
|
username = 'Facebook'
|
||||||
|
loop = asyncio.get_event_loop()
|
||||||
|
results = loop.run_until_complete(
|
||||||
|
maigret(username, site_dict=test_db.sites_dict, logger=logger, timeout=30)
|
||||||
|
)
|
||||||
|
|
||||||
|
assert isinstance(results, dict)
|
||||||
|
|
||||||
|
reddit_site = results['Reddit']['site']
|
||||||
|
assert isinstance(reddit_site, MaigretSite)
|
||||||
|
|
||||||
|
assert reddit_site.json == {
|
||||||
|
'tags': ['news', 'social', 'us'],
|
||||||
|
'checkType': 'status_code',
|
||||||
|
'presenseStrs': ['totalKarma'],
|
||||||
|
'disabled': True,
|
||||||
|
'alexaRank': 17,
|
||||||
|
'url': 'https://www.reddit.com/user/{username}',
|
||||||
|
'urlMain': 'https://www.reddit.com/',
|
||||||
|
'usernameClaimed': 'blue',
|
||||||
|
'usernameUnclaimed': 'noonewouldeverusethis7',
|
||||||
|
}
|
||||||
|
|
||||||
|
del results['Reddit']['site']
|
||||||
|
del results['GooglePlayStore']['site']
|
||||||
|
|
||||||
|
reddit_status = results['Reddit']['status']
|
||||||
|
assert isinstance(reddit_status, QueryResult)
|
||||||
|
assert reddit_status.status == QueryStatus.ILLEGAL
|
||||||
|
|
||||||
|
playstore_status = results['GooglePlayStore']['status']
|
||||||
|
assert isinstance(playstore_status, QueryResult)
|
||||||
|
assert playstore_status.status == QueryStatus.CLAIMED
|
||||||
|
|
||||||
|
del results['Reddit']['status']
|
||||||
|
del results['GooglePlayStore']['status']
|
||||||
|
|
||||||
|
assert results['Reddit'].get('future') is None
|
||||||
|
del results['GooglePlayStore']['future']
|
||||||
|
|
||||||
|
assert results == RESULTS_EXAMPLE
|
||||||
|
|
||||||
|
|
||||||
|
def test_extract_ids_from_url(default_db):
|
||||||
|
assert extract_ids_from_url('https://www.reddit.com/user/test', default_db) == {
|
||||||
|
'test': 'username'
|
||||||
|
}
|
||||||
|
assert extract_ids_from_url('https://vk.com/id123', default_db) == {'123': 'vk_id'}
|
||||||
|
assert extract_ids_from_url('https://vk.com/ida123', default_db) == {
|
||||||
|
'ida123': 'username'
|
||||||
|
}
|
||||||
|
assert extract_ids_from_url(
|
||||||
|
'https://my.mail.ru/yandex.ru/dipres8904/', default_db
|
||||||
|
) == {'dipres8904': 'username'}
|
||||||
|
assert extract_ids_from_url(
|
||||||
|
'https://reviews.yandex.ru/user/adbced123', default_db
|
||||||
|
) == {'adbced123': 'yandex_public_id'}
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.slow
|
||||||
|
def test_extract_ids_from_page(test_db):
|
||||||
|
logger = Mock()
|
||||||
|
extract_ids_from_page('https://www.reddit.com/user/test', logger) == {
|
||||||
|
'test': 'username'
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def test_extract_ids_from_results(test_db):
|
||||||
|
TEST_EXAMPLE = copy.deepcopy(RESULTS_EXAMPLE)
|
||||||
|
TEST_EXAMPLE['Reddit']['ids_usernames'] = {'test1': 'yandex_public_id'}
|
||||||
|
TEST_EXAMPLE['Reddit']['ids_links'] = ['https://www.reddit.com/user/test2']
|
||||||
|
|
||||||
|
extract_ids_from_results(TEST_EXAMPLE, test_db) == {
|
||||||
|
'test1': 'yandex_public_id',
|
||||||
|
'test2': 'username',
|
||||||
|
}
|
||||||
|
|||||||
@@ -0,0 +1,64 @@
|
|||||||
|
from maigret.errors import CheckError
|
||||||
|
from maigret.notify import QueryNotifyPrint
|
||||||
|
from maigret.result import QueryStatus, QueryResult
|
||||||
|
|
||||||
|
|
||||||
|
def test_notify_illegal():
|
||||||
|
n = QueryNotifyPrint(color=False)
|
||||||
|
|
||||||
|
assert (
|
||||||
|
n.update(
|
||||||
|
QueryResult(
|
||||||
|
username="test",
|
||||||
|
status=QueryStatus.ILLEGAL,
|
||||||
|
site_name="TEST_SITE",
|
||||||
|
site_url_user="http://example.com/test",
|
||||||
|
)
|
||||||
|
)
|
||||||
|
== "[-] TEST_SITE: Illegal Username Format For This Site!"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_notify_claimed():
|
||||||
|
n = QueryNotifyPrint(color=False)
|
||||||
|
|
||||||
|
assert (
|
||||||
|
n.update(
|
||||||
|
QueryResult(
|
||||||
|
username="test",
|
||||||
|
status=QueryStatus.CLAIMED,
|
||||||
|
site_name="TEST_SITE",
|
||||||
|
site_url_user="http://example.com/test",
|
||||||
|
)
|
||||||
|
)
|
||||||
|
== "[+] TEST_SITE: http://example.com/test"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_notify_available():
|
||||||
|
n = QueryNotifyPrint(color=False)
|
||||||
|
|
||||||
|
assert (
|
||||||
|
n.update(
|
||||||
|
QueryResult(
|
||||||
|
username="test",
|
||||||
|
status=QueryStatus.AVAILABLE,
|
||||||
|
site_name="TEST_SITE",
|
||||||
|
site_url_user="http://example.com/test",
|
||||||
|
)
|
||||||
|
)
|
||||||
|
== "[-] TEST_SITE: Not found!"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_notify_unknown():
|
||||||
|
n = QueryNotifyPrint(color=False)
|
||||||
|
result = QueryResult(
|
||||||
|
username="test",
|
||||||
|
status=QueryStatus.UNKNOWN,
|
||||||
|
site_name="TEST_SITE",
|
||||||
|
site_url_user="http://example.com/test",
|
||||||
|
)
|
||||||
|
result.error = CheckError('Type', 'Reason')
|
||||||
|
|
||||||
|
assert n.update(result) == "[?] TEST_SITE: Type error: Reason"
|
||||||
+17
-3
@@ -16,8 +16,14 @@ from maigret.report import (
|
|||||||
generate_report_template,
|
generate_report_template,
|
||||||
generate_report_context,
|
generate_report_context,
|
||||||
generate_json_report,
|
generate_json_report,
|
||||||
|
get_plaintext_report,
|
||||||
)
|
)
|
||||||
from maigret.result import QueryResult, QueryStatus
|
from maigret.result import QueryResult, QueryStatus
|
||||||
|
from maigret.sites import MaigretSite
|
||||||
|
|
||||||
|
|
||||||
|
GOOD_RESULT = QueryResult('', '', '', QueryStatus.CLAIMED)
|
||||||
|
BAD_RESULT = QueryResult('', '', '', QueryStatus.AVAILABLE)
|
||||||
|
|
||||||
EXAMPLE_RESULTS = {
|
EXAMPLE_RESULTS = {
|
||||||
'GitHub': {
|
'GitHub': {
|
||||||
@@ -35,12 +41,10 @@ EXAMPLE_RESULTS = {
|
|||||||
'http_status': 200,
|
'http_status': 200,
|
||||||
'is_similar': False,
|
'is_similar': False,
|
||||||
'rank': 78,
|
'rank': 78,
|
||||||
|
'site': MaigretSite('test', {}),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
GOOD_RESULT = QueryResult('', '', '', QueryStatus.CLAIMED)
|
|
||||||
BAD_RESULT = QueryResult('', '', '', QueryStatus.AVAILABLE)
|
|
||||||
|
|
||||||
GOOD_500PX_RESULT = copy.deepcopy(GOOD_RESULT)
|
GOOD_500PX_RESULT = copy.deepcopy(GOOD_RESULT)
|
||||||
GOOD_500PX_RESULT.tags = ['photo', 'us', 'global']
|
GOOD_500PX_RESULT.tags = ['photo', 'us', 'global']
|
||||||
GOOD_500PX_RESULT.ids_data = {
|
GOOD_500PX_RESULT.ids_data = {
|
||||||
@@ -343,3 +347,13 @@ def test_pdf_report():
|
|||||||
save_pdf_report(report_name, context)
|
save_pdf_report(report_name, context)
|
||||||
|
|
||||||
assert os.path.exists(report_name)
|
assert os.path.exists(report_name)
|
||||||
|
|
||||||
|
|
||||||
|
def test_text_report():
|
||||||
|
context = generate_report_context(TEST)
|
||||||
|
report_text = get_plaintext_report(context)
|
||||||
|
|
||||||
|
for brief_part in SUPPOSED_BRIEF.split():
|
||||||
|
assert brief_part in report_text
|
||||||
|
assert 'us' in report_text
|
||||||
|
assert 'photo' in report_text
|
||||||
|
|||||||
@@ -103,6 +103,7 @@ def test_saving_site_error():
|
|||||||
|
|
||||||
amperka = db.sites[0]
|
amperka = db.sites[0]
|
||||||
assert len(amperka.errors) == 2
|
assert len(amperka.errors) == 2
|
||||||
|
assert len(amperka.errors_dict) == 2
|
||||||
|
|
||||||
assert amperka.strip_engine_data().errors == {'error1': 'text1'}
|
assert amperka.strip_engine_data().errors == {'error1': 'text1'}
|
||||||
assert amperka.strip_engine_data().json['errors'] == {'error1': 'text1'}
|
assert amperka.strip_engine_data().json['errors'] == {'error1': 'text1'}
|
||||||
|
|||||||
+18
-6
@@ -40,13 +40,13 @@ def test_case_convert_camel_with_digits_to_snake():
|
|||||||
|
|
||||||
|
|
||||||
def test_is_country_tag():
|
def test_is_country_tag():
|
||||||
assert is_country_tag('ru') == True
|
assert is_country_tag('ru') is True
|
||||||
assert is_country_tag('FR') == True
|
assert is_country_tag('FR') is True
|
||||||
|
|
||||||
assert is_country_tag('a1') == False
|
assert is_country_tag('a1') is False
|
||||||
assert is_country_tag('dating') == False
|
assert is_country_tag('dating') is False
|
||||||
|
|
||||||
assert is_country_tag('global') == True
|
assert is_country_tag('global') is True
|
||||||
|
|
||||||
|
|
||||||
def test_enrich_link_str():
|
def test_enrich_link_str():
|
||||||
@@ -57,6 +57,11 @@ def test_enrich_link_str():
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_url_extract_main_part_negative():
|
||||||
|
url_main_part = 'None'
|
||||||
|
assert URLMatcher.extract_main_part(url_main_part) == ''
|
||||||
|
|
||||||
|
|
||||||
def test_url_extract_main_part():
|
def test_url_extract_main_part():
|
||||||
url_main_part = 'flickr.com/photos/alexaimephotography'
|
url_main_part = 'flickr.com/photos/alexaimephotography'
|
||||||
|
|
||||||
@@ -68,8 +73,10 @@ def test_url_extract_main_part():
|
|||||||
]
|
]
|
||||||
|
|
||||||
url_regexp = re.compile('^https?://(www.)?flickr.com/photos/(.+?)$')
|
url_regexp = re.compile('^https?://(www.)?flickr.com/photos/(.+?)$')
|
||||||
|
# combine parts variations
|
||||||
for url_parts in itertools.product(*parts):
|
for url_parts in itertools.product(*parts):
|
||||||
url = ''.join(url_parts)
|
url = ''.join(url_parts)
|
||||||
|
# ensure all combinations give valid main part
|
||||||
assert URLMatcher.extract_main_part(url) == url_main_part
|
assert URLMatcher.extract_main_part(url) == url_main_part
|
||||||
assert not url_regexp.match(url) is None
|
assert not url_regexp.match(url) is None
|
||||||
|
|
||||||
@@ -84,8 +91,10 @@ def test_url_make_profile_url_regexp():
|
|||||||
['/', ''],
|
['/', ''],
|
||||||
]
|
]
|
||||||
|
|
||||||
|
# combine parts variations
|
||||||
for url_parts in itertools.product(*parts):
|
for url_parts in itertools.product(*parts):
|
||||||
url = ''.join(url_parts)
|
url = ''.join(url_parts)
|
||||||
|
# ensure all combinations match pattern
|
||||||
assert (
|
assert (
|
||||||
URLMatcher.make_profile_url_regexp(url).pattern
|
URLMatcher.make_profile_url_regexp(url).pattern
|
||||||
== r'^https?://(www.)?flickr\.com/photos/(.+?)$'
|
== r'^https?://(www.)?flickr\.com/photos/(.+?)$'
|
||||||
@@ -98,6 +107,7 @@ def test_get_dict_ascii_tree():
|
|||||||
'legacy_id': '26403415',
|
'legacy_id': '26403415',
|
||||||
'username': 'alexaimephotographycars',
|
'username': 'alexaimephotographycars',
|
||||||
'name': 'Alex Aimé',
|
'name': 'Alex Aimé',
|
||||||
|
'links': "['www.instagram.com/street.reality.photography/']",
|
||||||
'created_at': '2018-05-04T10:17:01.000+0000',
|
'created_at': '2018-05-04T10:17:01.000+0000',
|
||||||
'image': 'https://drscdn.500px.org/user_avatar/26403415/q%3D85_w%3D300_h%3D300/v2?webp=true&v=2&sig=0235678a4f7b65e007e864033ebfaf5ef6d87fad34f80a8639d985320c20fe3b',
|
'image': 'https://drscdn.500px.org/user_avatar/26403415/q%3D85_w%3D300_h%3D300/v2?webp=true&v=2&sig=0235678a4f7b65e007e864033ebfaf5ef6d87fad34f80a8639d985320c20fe3b',
|
||||||
'image_bg': 'https://drscdn.500px.org/user_cover/26403415/q%3D65_m%3D2048/v2?webp=true&v=1&sig=bea411fb158391a4fdad498874ff17088f91257e59dfb376ff67e3a44c3a4201',
|
'image_bg': 'https://drscdn.500px.org/user_cover/26403415/q%3D65_m%3D2048/v2?webp=true&v=1&sig=bea411fb158391a4fdad498874ff17088f91257e59dfb376ff67e3a44c3a4201',
|
||||||
@@ -107,7 +117,7 @@ def test_get_dict_ascii_tree():
|
|||||||
'twitter_username': 'Alexaimephotogr',
|
'twitter_username': 'Alexaimephotogr',
|
||||||
}
|
}
|
||||||
|
|
||||||
ascii_tree = get_dict_ascii_tree(data.items())
|
ascii_tree = get_dict_ascii_tree(data.items(), prepend=" ")
|
||||||
|
|
||||||
assert (
|
assert (
|
||||||
ascii_tree
|
ascii_tree
|
||||||
@@ -116,6 +126,8 @@ def test_get_dict_ascii_tree():
|
|||||||
┣╸legacy_id: 26403415
|
┣╸legacy_id: 26403415
|
||||||
┣╸username: alexaimephotographycars
|
┣╸username: alexaimephotographycars
|
||||||
┣╸name: Alex Aimé
|
┣╸name: Alex Aimé
|
||||||
|
┣╸links:
|
||||||
|
┃ ┗╸ www.instagram.com/street.reality.photography/
|
||||||
┣╸created_at: 2018-05-04T10:17:01.000+0000
|
┣╸created_at: 2018-05-04T10:17:01.000+0000
|
||||||
┣╸image: https://drscdn.500px.org/user_avatar/26403415/q%3D85_w%3D300_h%3D300/v2?webp=true&v=2&sig=0235678a4f7b65e007e864033ebfaf5ef6d87fad34f80a8639d985320c20fe3b
|
┣╸image: https://drscdn.500px.org/user_avatar/26403415/q%3D85_w%3D300_h%3D300/v2?webp=true&v=2&sig=0235678a4f7b65e007e864033ebfaf5ef6d87fad34f80a8639d985320c20fe3b
|
||||||
┣╸image_bg: https://drscdn.500px.org/user_cover/26403415/q%3D65_m%3D2048/v2?webp=true&v=1&sig=bea411fb158391a4fdad498874ff17088f91257e59dfb376ff67e3a44c3a4201
|
┣╸image_bg: https://drscdn.500px.org/user_cover/26403415/q%3D65_m%3D2048/v2?webp=true&v=1&sig=bea411fb158391a4fdad498874ff17088f91257e59dfb376ff67e3a44c3a4201
|
||||||
|
|||||||
Executable
+57
@@ -0,0 +1,57 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
import random
|
||||||
|
from argparse import ArgumentParser, RawDescriptionHelpFormatter
|
||||||
|
|
||||||
|
from maigret.maigret import MaigretDatabase
|
||||||
|
from maigret.submit import get_alexa_rank
|
||||||
|
|
||||||
|
|
||||||
|
def update_tags(site):
|
||||||
|
tags = []
|
||||||
|
if not site.tags:
|
||||||
|
print(f'Site {site.name} doesn\'t have tags')
|
||||||
|
else:
|
||||||
|
tags = site.tags
|
||||||
|
print(f'Site {site.name} tags: ' + ', '.join(tags))
|
||||||
|
|
||||||
|
print(f'URL: {site.url_main}')
|
||||||
|
|
||||||
|
new_tags = set(input('Enter new tags: ').split(', '))
|
||||||
|
if "disabled" in new_tags:
|
||||||
|
new_tags.remove("disabled")
|
||||||
|
site.disabled = True
|
||||||
|
|
||||||
|
print(f'Old alexa rank: {site.alexa_rank}')
|
||||||
|
rank = get_alexa_rank(site.url_main)
|
||||||
|
if rank:
|
||||||
|
print(f'New alexa rank: {rank}')
|
||||||
|
site.alexa_rank = rank
|
||||||
|
|
||||||
|
site.tags = [x for x in list(new_tags) if x]
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
parser = ArgumentParser(formatter_class=RawDescriptionHelpFormatter
|
||||||
|
)
|
||||||
|
parser.add_argument("--base","-b", metavar="BASE_FILE",
|
||||||
|
dest="base_file", default="maigret/resources/data.json",
|
||||||
|
help="JSON file with sites data to update.")
|
||||||
|
|
||||||
|
pool = list()
|
||||||
|
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
db = MaigretDatabase()
|
||||||
|
db.load_from_file(args.base_file).sites
|
||||||
|
|
||||||
|
while True:
|
||||||
|
site = random.choice(db.sites)
|
||||||
|
if site.engine == 'uCoz':
|
||||||
|
continue
|
||||||
|
|
||||||
|
if not 'in' in site.tags:
|
||||||
|
continue
|
||||||
|
|
||||||
|
update_tags(site)
|
||||||
|
|
||||||
|
db.save_to_file(args.base_file)
|
||||||
+16
-11
@@ -37,15 +37,15 @@ def get_rank(domain_to_query, site, print_errors=True):
|
|||||||
try:
|
try:
|
||||||
#Get ranking for this site.
|
#Get ranking for this site.
|
||||||
site.alexa_rank = int(root.find('.//REACH').attrib['RANK'])
|
site.alexa_rank = int(root.find('.//REACH').attrib['RANK'])
|
||||||
country = root.find('.//COUNTRY')
|
# country = root.find('.//COUNTRY')
|
||||||
if not country is None and country.attrib:
|
# if not country is None and country.attrib:
|
||||||
country_code = country.attrib['CODE']
|
# country_code = country.attrib['CODE']
|
||||||
tags = set(site.tags)
|
# tags = set(site.tags)
|
||||||
if country_code:
|
# if country_code:
|
||||||
tags.add(country_code.lower())
|
# tags.add(country_code.lower())
|
||||||
site.tags = sorted(list(tags))
|
# site.tags = sorted(list(tags))
|
||||||
if site.type != 'username':
|
# if site.type != 'username':
|
||||||
site.disabled = False
|
# site.disabled = False
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
if print_errors:
|
if print_errors:
|
||||||
logging.error(e)
|
logging.error(e)
|
||||||
@@ -74,6 +74,7 @@ if __name__ == '__main__':
|
|||||||
dest="base_file", default="maigret/resources/data.json",
|
dest="base_file", default="maigret/resources/data.json",
|
||||||
help="JSON file with sites data to update.")
|
help="JSON file with sites data to update.")
|
||||||
|
|
||||||
|
parser.add_argument('--with-rank', help='update with use of local data only', action='store_true')
|
||||||
parser.add_argument('--empty-only', help='update only sites without rating', action='store_true')
|
parser.add_argument('--empty-only', help='update only sites without rating', action='store_true')
|
||||||
parser.add_argument('--exclude-engine', help='do not update score with certain engine',
|
parser.add_argument('--exclude-engine', help='do not update score with certain engine',
|
||||||
action="append", dest="exclude_engine_list", default=[])
|
action="append", dest="exclude_engine_list", default=[])
|
||||||
@@ -87,22 +88,25 @@ if __name__ == '__main__':
|
|||||||
|
|
||||||
with open("sites.md", "w") as site_file:
|
with open("sites.md", "w") as site_file:
|
||||||
site_file.write(f"""
|
site_file.write(f"""
|
||||||
## List of supported sites: total {len(sites_subset)}\n
|
## List of supported sites (search methods): total {len(sites_subset)}\n
|
||||||
Rank data fetched from Alexa by domains.
|
Rank data fetched from Alexa by domains.
|
||||||
|
|
||||||
""")
|
""")
|
||||||
|
|
||||||
for site in sites_subset:
|
for site in sites_subset:
|
||||||
|
if not args.with_rank:
|
||||||
|
break
|
||||||
url_main = site.url_main
|
url_main = site.url_main
|
||||||
if site.alexa_rank < sys.maxsize and args.empty_only:
|
if site.alexa_rank < sys.maxsize and args.empty_only:
|
||||||
continue
|
continue
|
||||||
if args.exclude_engine_list and site.engine in args.exclude_engine_list:
|
if args.exclude_engine_list and site.engine in args.exclude_engine_list:
|
||||||
continue
|
continue
|
||||||
site.alexa_rank = 0
|
site.alexa_rank = 0
|
||||||
th = threading.Thread(target=get_rank, args=(url_main, site))
|
th = threading.Thread(target=get_rank, args=(url_main, site,))
|
||||||
pool.append((site.name, url_main, th))
|
pool.append((site.name, url_main, th))
|
||||||
th.start()
|
th.start()
|
||||||
|
|
||||||
|
if args.with_rank:
|
||||||
index = 1
|
index = 1
|
||||||
for site_name, url_main, th in pool:
|
for site_name, url_main, th in pool:
|
||||||
th.join()
|
th.join()
|
||||||
@@ -123,6 +127,7 @@ Rank data fetched from Alexa by domains.
|
|||||||
url_main = site.url_main
|
url_main = site.url_main
|
||||||
valid_rank = get_step_rank(rank)
|
valid_rank = get_step_rank(rank)
|
||||||
all_tags = site.tags
|
all_tags = site.tags
|
||||||
|
all_tags.sort()
|
||||||
tags = ', ' + ', '.join(all_tags) if all_tags else ''
|
tags = ', ' + ', '.join(all_tags) if all_tags else ''
|
||||||
note = ''
|
note = ''
|
||||||
if site.disabled:
|
if site.disabled:
|
||||||
|
|||||||
Reference in New Issue
Block a user