mirror of
https://github.com/soxoj/maigret.git
synced 2026-05-15 02:45:36 +00:00
Compare commits
1 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 4791a6fc96 |
@@ -1,2 +0,0 @@
|
|||||||
#!/bin/sh
|
|
||||||
python3 ./utils/update_site_data.py
|
|
||||||
@@ -15,14 +15,10 @@ assignees: soxoj
|
|||||||
|
|
||||||
## Description
|
## Description
|
||||||
|
|
||||||
Info about Maigret version you are running and environment (`--version`, operation system, ISP provider):
|
Info about Maigret version you are running and environment (`--version`, operation system, ISP provuder):
|
||||||
<INSERT VERSION INFO HERE>
|
<INSERT VERSION INFO HERE>
|
||||||
|
|
||||||
How to reproduce this bug (commandline options / conditions):
|
How to reproduce this bug (commandline options / conditions):
|
||||||
<INSERT EXAMPLE OF CLI COMMAND HERE>
|
<INSERT EXAMPLE OF CLI COMMAND HERE>
|
||||||
|
|
||||||
<DESCRIPTION>
|
<DESCRIPTION>
|
||||||
|
|
||||||
<PASTE SCREENSHOT>
|
|
||||||
|
|
||||||
<ATTACH LOG FILE>
|
|
||||||
|
|||||||
@@ -27,7 +27,6 @@ jobs:
|
|||||||
with:
|
with:
|
||||||
push: true
|
push: true
|
||||||
tags: ${{ secrets.DOCKER_HUB_USERNAME }}/maigret:latest
|
tags: ${{ secrets.DOCKER_HUB_USERNAME }}/maigret:latest
|
||||||
platforms: linux/amd64,linux/arm64
|
|
||||||
-
|
-
|
||||||
name: Image digest
|
name: Image digest
|
||||||
run: echo ${{ steps.docker_build.outputs.digest }}
|
run: echo ${{ steps.docker_build.outputs.digest }}
|
||||||
|
|||||||
@@ -3,6 +3,8 @@ name: Package exe with PyInstaller - Windows
|
|||||||
on:
|
on:
|
||||||
push:
|
push:
|
||||||
branches: [ main ]
|
branches: [ main ]
|
||||||
|
pull_request:
|
||||||
|
branches: [ main ]
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
build:
|
build:
|
||||||
|
|||||||
@@ -2,7 +2,6 @@ name: Linting and testing
|
|||||||
|
|
||||||
on:
|
on:
|
||||||
push:
|
push:
|
||||||
branches: [ main ]
|
|
||||||
pull_request:
|
pull_request:
|
||||||
branches: [ main ]
|
branches: [ main ]
|
||||||
types: [opened, synchronize, reopened]
|
types: [opened, synchronize, reopened]
|
||||||
@@ -13,7 +12,7 @@ jobs:
|
|||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
strategy:
|
strategy:
|
||||||
matrix:
|
matrix:
|
||||||
python-version: [3.7, 3.8, 3.9]
|
python-version: [3.6.9, 3.7, 3.8, 3.9]
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v2
|
- uses: actions/checkout@v2
|
||||||
|
|||||||
@@ -2,7 +2,7 @@ name: Update sites rating and statistics
|
|||||||
|
|
||||||
on:
|
on:
|
||||||
pull_request:
|
pull_request:
|
||||||
branches: [ dev ]
|
branches: [ main ]
|
||||||
types: [opened, synchronize]
|
types: [opened, synchronize]
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
|
|||||||
@@ -15,10 +15,6 @@ src/
|
|||||||
.ipynb_checkpoints
|
.ipynb_checkpoints
|
||||||
*.ipynb
|
*.ipynb
|
||||||
|
|
||||||
# Logs and backups
|
|
||||||
*.log
|
|
||||||
*.bak
|
|
||||||
|
|
||||||
# Output files, except requirements.txt
|
# Output files, except requirements.txt
|
||||||
*.txt
|
*.txt
|
||||||
!requirements.txt
|
!requirements.txt
|
||||||
|
|||||||
@@ -2,103 +2,6 @@
|
|||||||
|
|
||||||
## [Unreleased]
|
## [Unreleased]
|
||||||
|
|
||||||
## [0.4.4] - 2022-09-03
|
|
||||||
* Fixed some false positives by @soxoj in https://github.com/soxoj/maigret/pull/433
|
|
||||||
* Drop Python 3.6 support by @soxoj in https://github.com/soxoj/maigret/pull/434
|
|
||||||
* Bump xhtml2pdf from 0.2.5 to 0.2.7 by @dependabot in https://github.com/soxoj/maigret/pull/409
|
|
||||||
* Bump reportlab from 3.6.6 to 3.6.9 by @dependabot in https://github.com/soxoj/maigret/pull/403
|
|
||||||
* Bump markupsafe from 2.0.1 to 2.1.1 by @dependabot in https://github.com/soxoj/maigret/pull/389
|
|
||||||
* Bump pycountry from 22.1.10 to 22.3.5 by @dependabot in https://github.com/soxoj/maigret/pull/384
|
|
||||||
* Bump pypdf2 from 1.26.0 to 1.27.4 by @dependabot in https://github.com/soxoj/maigret/pull/438
|
|
||||||
* Update GH actions by @soxoj in https://github.com/soxoj/maigret/pull/439
|
|
||||||
* Bump tqdm from 4.63.0 to 4.64.0 by @dependabot in https://github.com/soxoj/maigret/pull/440
|
|
||||||
* Bump jinja2 from 3.0.3 to 3.1.1 by @dependabot in https://github.com/soxoj/maigret/pull/441
|
|
||||||
* Bump soupsieve from 2.3.1 to 2.3.2 by @dependabot in https://github.com/soxoj/maigret/pull/436
|
|
||||||
* Bump pypdf2 from 1.26.0 to 1.27.4 by @dependabot in https://github.com/soxoj/maigret/pull/442
|
|
||||||
* Bump pyvis from 0.1.9 to 0.2.0 by @dependabot in https://github.com/soxoj/maigret/pull/443
|
|
||||||
* Bump pypdf2 from 1.27.4 to 1.27.6 by @dependabot in https://github.com/soxoj/maigret/pull/448
|
|
||||||
* Bump typing-extensions from 4.1.1 to 4.2.0 by @dependabot in https://github.com/soxoj/maigret/pull/447
|
|
||||||
* Bump soupsieve from 2.3.2 to 2.3.2.post1 by @dependabot in https://github.com/soxoj/maigret/pull/444
|
|
||||||
* Bump pypdf2 from 1.27.6 to 1.27.7 by @dependabot in https://github.com/soxoj/maigret/pull/449
|
|
||||||
* Bump pypdf2 from 1.27.7 to 1.27.8 by @dependabot in https://github.com/soxoj/maigret/pull/450
|
|
||||||
* XMind 8 report warning and some docs update by @soxoj in https://github.com/soxoj/maigret/pull/452
|
|
||||||
* False positive fixes 24.04.22 by @soxoj in https://github.com/soxoj/maigret/pull/455
|
|
||||||
* Bump pypdf2 from 1.27.8 to 1.27.9 by @dependabot in https://github.com/soxoj/maigret/pull/456
|
|
||||||
* Bump pytest from 7.0.1 to 7.1.2 by @dependabot in https://github.com/soxoj/maigret/pull/457
|
|
||||||
* Bump jinja2 from 3.1.1 to 3.1.2 by @dependabot in https://github.com/soxoj/maigret/pull/460
|
|
||||||
* Ubisoft forums addition by @fen0s in https://github.com/soxoj/maigret/pull/461
|
|
||||||
* Add BYOND, Figma, BeatStars by @fen0s in https://github.com/soxoj/maigret/pull/462
|
|
||||||
* fix Figma username definition, add a bunch of sites by @fen0s in https://github.com/soxoj/maigret/pull/464
|
|
||||||
* Bump pypdf2 from 1.27.9 to 1.27.10 by @dependabot in https://github.com/soxoj/maigret/pull/465
|
|
||||||
* Bump pypdf2 from 1.27.10 to 1.27.12 by @dependabot in https://github.com/soxoj/maigret/pull/466
|
|
||||||
* Sites fixes 05 05 22 by @soxoj in https://github.com/soxoj/maigret/pull/469
|
|
||||||
* Bump pyvis from 0.2.0 to 0.2.1 by @dependabot in https://github.com/soxoj/maigret/pull/472
|
|
||||||
* Social analyzer websites, also fixing presense strs by @fen0s in https://github.com/soxoj/maigret/pull/471
|
|
||||||
* Updated logic of false positive risk estimating by @soxoj in https://github.com/soxoj/maigret/pull/475
|
|
||||||
* Improved usability of external progressbar func by @soxoj in https://github.com/soxoj/maigret/pull/476
|
|
||||||
* New sites added, some tags/rank update by @soxoj in https://github.com/soxoj/maigret/pull/477
|
|
||||||
* Added new sites by @soxoj in https://github.com/soxoj/maigret/pull/480
|
|
||||||
* Added new forums, updated ranks, some utils improvements by @soxoj in https://github.com/soxoj/maigret/pull/481
|
|
||||||
* Disabled sites with false positives results by @soxoj in https://github.com/soxoj/maigret/pull/482
|
|
||||||
* Bump certifi from 2021.10.8 to 2022.5.18.1 by @dependabot in https://github.com/soxoj/maigret/pull/488
|
|
||||||
* Bump psutil from 5.9.0 to 5.9.1 by @dependabot in https://github.com/soxoj/maigret/pull/490
|
|
||||||
* Bump pypdf2 from 1.27.12 to 1.28.1 by @dependabot in https://github.com/soxoj/maigret/pull/491
|
|
||||||
* Bump pypdf2 from 1.28.1 to 1.28.2 by @dependabot in https://github.com/soxoj/maigret/pull/493
|
|
||||||
* added and fixed some websites in data.json by @kustermariocoding in https://github.com/soxoj/maigret/pull/494
|
|
||||||
* Bump pypdf2 from 1.28.2 to 2.0.0 by @dependabot in https://github.com/soxoj/maigret/pull/504
|
|
||||||
* Bump pefile from 2021.9.3 to 2022.5.30 by @dependabot in https://github.com/soxoj/maigret/pull/499
|
|
||||||
* Updated sites list, added disabled Anilist by @soxoj in https://github.com/soxoj/maigret/pull/502
|
|
||||||
* Bump lxml from 4.8.0 to 4.9.0 by @dependabot in https://github.com/soxoj/maigret/pull/503
|
|
||||||
* Compatibility with Python 10 by @soxoj in https://github.com/soxoj/maigret/pull/509
|
|
||||||
* feat: add .log & .bak files to gitignore in https://github.com/soxoj/maigret/pull/511
|
|
||||||
* fix some sites and delete abandoned by @fen0s in https://github.com/soxoj/maigret/pull/526
|
|
||||||
* Fixesjulyfirst by @fen0s in https://github.com/soxoj/maigret/pull/533
|
|
||||||
* yazbel, aboutcar, zhihu by @fen0s in https://github.com/soxoj/maigret/pull/531
|
|
||||||
* Fixes july third by @fen0s in https://github.com/soxoj/maigret/pull/535
|
|
||||||
* Update data.json by @fen0s in https://github.com/soxoj/maigret/pull/539
|
|
||||||
* Update data.json by @fen0s in https://github.com/soxoj/maigret/pull/540
|
|
||||||
* Bump reportlab from 3.6.9 to 3.6.11 by @dependabot in https://github.com/soxoj/maigret/pull/543
|
|
||||||
* Bump requests from 2.27.1 to 2.28.1 by @dependabot in https://github.com/soxoj/maigret/pull/530
|
|
||||||
* Bump pypdf2 from 2.0.0 to 2.5.0 by @dependabot in https://github.com/soxoj/maigret/pull/542
|
|
||||||
* Bump xhtml2pdf from 0.2.7 to 0.2.8 by @dependabot in https://github.com/soxoj/maigret/pull/522
|
|
||||||
* Bump lxml from 4.9.0 to 4.9.1 by @dependabot in https://github.com/soxoj/maigret/pull/538
|
|
||||||
* disable yandex music + set utf8 encoding by @fen0s in https://github.com/soxoj/maigret/pull/562
|
|
||||||
* fix false positives by @fen0s in https://github.com/soxoj/maigret/pull/577
|
|
||||||
* disable Instagram, fix two false positives by @fen0s in https://github.com/soxoj/maigret/pull/578
|
|
||||||
* Bump certifi from 2022.5.18.1 to 2022.6.15 by @dependabot in https://github.com/soxoj/maigret/pull/551
|
|
||||||
* August15 by @fen0s in https://github.com/soxoj/maigret/pull/591
|
|
||||||
* Bump pytest-httpserver from 1.0.4 to 1.0.5 by @dependabot in https://github.com/soxoj/maigret/pull/583
|
|
||||||
* Bump typing-extensions from 4.2.0 to 4.3.0 by @dependabot in https://github.com/soxoj/maigret/pull/549
|
|
||||||
* Bump colorama from 0.4.4 to 0.4.5 by @dependabot in https://github.com/soxoj/maigret/pull/548
|
|
||||||
* Bump chardet from 4.0.0 to 5.0.0 by @dependabot in https://github.com/soxoj/maigret/pull/550
|
|
||||||
* Bump cloudscraper from 1.2.60 to 1.2.63 by @dependabot in https://github.com/soxoj/maigret/pull/600
|
|
||||||
* Bump flake8 from 4.0.1 to 5.0.4 by @dependabot in https://github.com/soxoj/maigret/pull/598
|
|
||||||
* Bump attrs from 21.4.0 to 22.1.0 by @dependabot in https://github.com/soxoj/maigret/pull/597
|
|
||||||
* Bump pytest-asyncio from 0.18.2 to 0.19.0 by @dependabot in https://github.com/soxoj/maigret/pull/601
|
|
||||||
* Bump pypdf2 from 2.5.0 to 2.10.4 by @dependabot in https://github.com/soxoj/maigret/pull/606
|
|
||||||
* Bump pytest from 7.1.2 to 7.1.3 by @dependabot in https://github.com/soxoj/maigret/pull/613
|
|
||||||
* Update sites.md -Gitmemory.com suppression by @C3n7ral051nt4g3ncy in https://github.com/soxoj/maigret/pull/610
|
|
||||||
* Bump cloudscraper from 1.2.63 to 1.2.64 by @dependabot in https://github.com/soxoj/maigret/pull/614
|
|
||||||
* Bump pycountry from 22.1.10 to 22.3.5 by @dependabot in https://github.com/soxoj/maigret/pull/607
|
|
||||||
* add ProtonMail, disable 3 broken sites by @fen0s in https://github.com/soxoj/maigret/pull/619
|
|
||||||
* Bump tqdm from 4.64.0 to 4.64.1 by @dependabot in https://github.com/soxoj/maigret/pull/618
|
|
||||||
|
|
||||||
**Full Changelog**: https://github.com/soxoj/maigret/compare/v0.4.3...v0.4.4
|
|
||||||
|
|
||||||
## [0.4.3] - 2022-04-13
|
|
||||||
* Added Sites to data.json by @kustermariocoding in https://github.com/soxoj/maigret/pull/386
|
|
||||||
* added new Websites to data.json by @kustermariocoding in https://github.com/soxoj/maigret/pull/390
|
|
||||||
* Skipped broken tests by @soxoj in https://github.com/soxoj/maigret/pull/397
|
|
||||||
* Added new Websites to data.json by @kustermariocoding in https://github.com/soxoj/maigret/pull/401
|
|
||||||
* Added new Websites to data.json by @kustermariocoding in https://github.com/soxoj/maigret/pull/404
|
|
||||||
* Updated statistics by @soxoj in https://github.com/soxoj/maigret/pull/406
|
|
||||||
* Added new Websites to data.json by @kustermariocoding in https://github.com/soxoj/maigret/pull/413
|
|
||||||
* Disabled houzz.com, updated sites statistics by @soxoj in https://github.com/soxoj/maigret/pull/422
|
|
||||||
* Fixed last false positives by @soxoj in https://github.com/soxoj/maigret/pull/424
|
|
||||||
* Fixed actual false positives by @soxoj in https://github.com/soxoj/maigret/pull/431
|
|
||||||
|
|
||||||
**Full Changelog**: https://github.com/soxoj/maigret/compare/v0.4.2...v0.4.3
|
|
||||||
|
|
||||||
## [0.4.2] - 2022-03-07
|
## [0.4.2] - 2022-03-07
|
||||||
* [ImgBot] Optimize images by @imgbot in https://github.com/soxoj/maigret/pull/319
|
* [ImgBot] Optimize images by @imgbot in https://github.com/soxoj/maigret/pull/319
|
||||||
* Bump pytest-asyncio from 0.17.0 to 0.17.1 by @dependabot in https://github.com/soxoj/maigret/pull/321
|
* Bump pytest-asyncio from 0.17.0 to 0.17.1 by @dependabot in https://github.com/soxoj/maigret/pull/321
|
||||||
|
|||||||
+9
-9
@@ -1,16 +1,16 @@
|
|||||||
FROM python:3.9-slim
|
FROM python:3.9-slim
|
||||||
LABEL maintainer="Soxoj <soxoj@protonmail.com>"
|
MAINTAINER Soxoj <soxoj@protonmail.com>
|
||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
RUN pip install --no-cache-dir --upgrade pip
|
RUN pip install --upgrade pip
|
||||||
RUN apt-get update && \
|
RUN apt update && \
|
||||||
apt-get install --no-install-recommends -y \
|
apt install -y \
|
||||||
gcc \
|
gcc \
|
||||||
musl-dev \
|
musl-dev \
|
||||||
libxml2 \
|
libxml2 \
|
||||||
libxml2-dev \
|
libxml2-dev \
|
||||||
libxslt-dev \
|
libxslt-dev
|
||||||
&& \
|
RUN apt clean \
|
||||||
rm -rf /var/lib/apt/lists/* /tmp/*
|
&& rm -rf /var/lib/apt/lists/* /tmp/*
|
||||||
COPY . .
|
ADD . .
|
||||||
RUN YARL_NO_EXTENSIONS=1 python3 -m pip install --no-cache-dir .
|
RUN YARL_NO_EXTENSIONS=1 python3 -m pip install .
|
||||||
ENTRYPOINT ["maigret"]
|
ENTRYPOINT ["maigret"]
|
||||||
|
|||||||
@@ -21,7 +21,7 @@
|
|||||||
|
|
||||||
## About
|
## About
|
||||||
|
|
||||||
**Maigret** collects a dossier on a person **by username only**, checking for accounts on a huge number of sites and gathering all the available information from web pages. No API keys required. Maigret is an easy-to-use and powerful fork of [Sherlock](https://github.com/sherlock-project/sherlock).
|
**Maigret** collect a dossier on a person **by username only**, checking for accounts on a huge number of sites and gathering all the available information from web pages. No API keys required. Maigret is an easy-to-use and powerful fork of [Sherlock](https://github.com/sherlock-project/sherlock).
|
||||||
|
|
||||||
Currently supported more than 2500 sites ([full list](https://github.com/soxoj/maigret/blob/main/sites.md)), search is launched against 500 popular sites in descending order of popularity by default. Also supported checking of Tor sites, I2P sites, and domains (via DNS resolving).
|
Currently supported more than 2500 sites ([full list](https://github.com/soxoj/maigret/blob/main/sites.md)), search is launched against 500 popular sites in descending order of popularity by default. Also supported checking of Tor sites, I2P sites, and domains (via DNS resolving).
|
||||||
|
|
||||||
@@ -44,19 +44,23 @@ Standalone EXE-binaries for Windows are located in [Releases section](https://gi
|
|||||||
Also you can run Maigret using cloud shells and Jupyter notebooks (see buttons below).
|
Also you can run Maigret using cloud shells and Jupyter notebooks (see buttons below).
|
||||||
|
|
||||||
[](https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/soxoj/maigret&tutorial=README.md)
|
[](https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/soxoj/maigret&tutorial=README.md)
|
||||||
<a href="https://repl.it/github/soxoj/maigret"><img src="https://replit.com/badge/github/soxoj/maigret" alt="Run on Replit" height="50"></a>
|
<a href="https://repl.it/github/soxoj/maigret"><img src="https://user-images.githubusercontent.com/27065646/92304596-bf719b00-ef7f-11ea-987f-2c1f3c323088.png" alt="Run on Repl.it" height="50"></a>
|
||||||
|
|
||||||
<a href="https://colab.research.google.com/gist/soxoj/879b51bc3b2f8b695abb054090645000/maigret-collab.ipynb"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab" height="45"></a>
|
<a href="https://colab.research.google.com/gist/soxoj/879b51bc3b2f8b695abb054090645000/maigret-collab.ipynb"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab" height="45"></a>
|
||||||
<a href="https://mybinder.org/v2/gist/soxoj/9d65c2f4d3bec5dd25949197ea73cf3a/HEAD"><img src="https://mybinder.org/badge_logo.svg" alt="Open In Binder" height="45"></a>
|
<a href="https://mybinder.org/v2/gist/soxoj/9d65c2f4d3bec5dd25949197ea73cf3a/HEAD"><img src="https://mybinder.org/badge_logo.svg" alt="Open In Binder" height="45"></a>
|
||||||
|
|
||||||
### Package installing
|
### Package installing
|
||||||
|
|
||||||
**NOTE**: Python 3.7 or higher and pip is required, **Python 3.8 is recommended.**
|
**NOTE**: Python 3.6 or higher and pip is required, **Python 3.8 is recommended.**
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# install from pypi
|
# install from pypi
|
||||||
pip3 install maigret
|
pip3 install maigret
|
||||||
|
|
||||||
|
# or clone and install manually
|
||||||
|
git clone https://github.com/soxoj/maigret && cd maigret
|
||||||
|
pip3 install .
|
||||||
|
|
||||||
# usage
|
# usage
|
||||||
maigret username
|
maigret username
|
||||||
```
|
```
|
||||||
@@ -64,7 +68,6 @@ maigret username
|
|||||||
### Cloning a repository
|
### Cloning a repository
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# or clone and install manually
|
|
||||||
git clone https://github.com/soxoj/maigret && cd maigret
|
git clone https://github.com/soxoj/maigret && cd maigret
|
||||||
pip3 install -r requirements.txt
|
pip3 install -r requirements.txt
|
||||||
|
|
||||||
@@ -79,7 +82,7 @@ pip3 install -r requirements.txt
|
|||||||
docker pull soxoj/maigret
|
docker pull soxoj/maigret
|
||||||
|
|
||||||
# usage
|
# usage
|
||||||
docker run -v /mydir:/app/reports soxoj/maigret:latest username --html
|
docker run soxoj/maigret:latest username
|
||||||
|
|
||||||
# manual build
|
# manual build
|
||||||
docker build -t maigret .
|
docker build -t maigret .
|
||||||
@@ -100,11 +103,6 @@ maigret user1 user2 user3 -a
|
|||||||
|
|
||||||
Use `maigret --help` to get full options description. Also options [are documented](https://maigret.readthedocs.io/en/latest/command-line-options.html).
|
Use `maigret --help` to get full options description. Also options [are documented](https://maigret.readthedocs.io/en/latest/command-line-options.html).
|
||||||
|
|
||||||
## Contributing
|
|
||||||
|
|
||||||
Maigret has open-source code, so you may contribute your own sites by adding them to `data.json` file, or bring changes to it's code!
|
|
||||||
If you want to contribute, don't forget to activate statistics update hook, command for it would look like this: `git config --local core.hooksPath .githooks/`
|
|
||||||
You should make your git commits from your maigret git repo folder, or else the hook wouldn't find the statistics update script.
|
|
||||||
|
|
||||||
## Demo with page parsing and recursive username search
|
## Demo with page parsing and recursive username search
|
||||||
|
|
||||||
@@ -114,7 +112,7 @@ You should make your git commits from your maigret git repo folder, or else the
|
|||||||
|
|
||||||

|

|
||||||
|
|
||||||

|

|
||||||
|
|
||||||
|
|
||||||
[Full console output](https://raw.githubusercontent.com/soxoj/maigret/main/static/recursive_search.md)
|
[Full console output](https://raw.githubusercontent.com/soxoj/maigret/main/static/recursive_search.md)
|
||||||
|
|||||||
@@ -27,7 +27,7 @@ Options are also configurable through settings files, see
|
|||||||
:doc:`settings section <settings>`.
|
:doc:`settings section <settings>`.
|
||||||
|
|
||||||
``--tags`` - Filter sites for searching by tags: sites categories and
|
``--tags`` - Filter sites for searching by tags: sites categories and
|
||||||
two-letter country codes (**not a language!**). E.g. photo, dating, sport; jp, us, global.
|
two-letter country codes. E.g. photo, dating, sport; jp, us, global.
|
||||||
Multiple tags can be associated with one site. **Warning: tags markup is
|
Multiple tags can be associated with one site. **Warning: tags markup is
|
||||||
not stable now.**
|
not stable now.**
|
||||||
|
|
||||||
|
|||||||
+2
-2
@@ -6,8 +6,8 @@ project = 'Maigret'
|
|||||||
copyright = '2021, soxoj'
|
copyright = '2021, soxoj'
|
||||||
author = 'soxoj'
|
author = 'soxoj'
|
||||||
|
|
||||||
release = '0.4.4'
|
release = '0.4.2'
|
||||||
version = '0.4.4'
|
version = '0.4.2'
|
||||||
|
|
||||||
# -- General configuration
|
# -- General configuration
|
||||||
|
|
||||||
|
|||||||
@@ -69,7 +69,7 @@ PyPi package.
|
|||||||
4. Get auto-generate release notes:
|
4. Get auto-generate release notes:
|
||||||
|
|
||||||
- Open https://github.com/soxoj/maigret/releases/new
|
- Open https://github.com/soxoj/maigret/releases/new
|
||||||
- Click `Choose a tag`, enter `v0.4.0` (your version)
|
- Click `Choose a tag`, enter `test`
|
||||||
- Click `Create new tag`
|
- Click `Create new tag`
|
||||||
- Press `+ Auto-generate release notes`
|
- Press `+ Auto-generate release notes`
|
||||||
- Copy all the text from description text field below
|
- Copy all the text from description text field below
|
||||||
@@ -81,8 +81,8 @@ PyPi package.
|
|||||||
|
|
||||||
.. code-block:: console
|
.. code-block:: console
|
||||||
|
|
||||||
git add -p
|
git add ...
|
||||||
git commit -m 'Bump to YOUR VERSION'
|
git commit -m 'Bump to 0.4.0'
|
||||||
git push origin head
|
git push origin head
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -24,7 +24,7 @@ Enabled by default, can be disabled with ``--no-recursion``.
|
|||||||
Reports
|
Reports
|
||||||
-------
|
-------
|
||||||
|
|
||||||
Maigret currently supports HTML, PDF, TXT, XMind 8 mindmap, and JSON reports.
|
Maigret currently supports HTML, PDF, TXT, XMind mindmap, and JSON reports.
|
||||||
|
|
||||||
HTML/PDF reports contain:
|
HTML/PDF reports contain:
|
||||||
|
|
||||||
@@ -34,8 +34,6 @@ HTML/PDF reports contain:
|
|||||||
|
|
||||||
Also, there is a short text report in the CLI output after the end of a searching phase.
|
Also, there is a short text report in the CLI output after the end of a searching phase.
|
||||||
|
|
||||||
**Warning**: XMind 8 mindmaps are incompatible with XMind 2022!
|
|
||||||
|
|
||||||
Tags
|
Tags
|
||||||
----
|
----
|
||||||
|
|
||||||
|
|||||||
@@ -3,15 +3,4 @@
|
|||||||
Philosophy
|
Philosophy
|
||||||
==========
|
==========
|
||||||
|
|
||||||
TL;DR: Username => Dossier
|
Username => Dossier
|
||||||
|
|
||||||
Maigret is designed to gather all the available information about person by his usernname.
|
|
||||||
|
|
||||||
What kind of information is this? First, links to person accounts. Secondly, all the machine-extractable
|
|
||||||
pieces of info, such as: other usernames, full name, URLs to people's images, birthday, location (country,
|
|
||||||
city, etc.), gender.
|
|
||||||
|
|
||||||
All this information forms some dossier, but it also useful for other tools and analytical purposes.
|
|
||||||
Each collected piece of data has a label of a certain format (for example, ``follower_count`` for the number
|
|
||||||
of subscribers or ``created_at`` for account creation time) so that it can be parsed and analyzed by various
|
|
||||||
systems and stored in databases.
|
|
||||||
|
|||||||
@@ -17,7 +17,7 @@ There are several types of tags:
|
|||||||
|
|
||||||
Usage
|
Usage
|
||||||
-----
|
-----
|
||||||
``--tags us,jp`` -- search on US and Japanese sites (actually marked as such in the Maigret database)
|
``--tags en,jp`` -- search on US and Japanese sites (actually marked as such in the Maigret database)
|
||||||
|
|
||||||
``--tags coding`` -- search on sites related to software development.
|
``--tags coding`` -- search on sites related to software development.
|
||||||
|
|
||||||
|
|||||||
@@ -37,17 +37,17 @@ Start a search for accounts with username ``machine42`` only on US and Japanese
|
|||||||
|
|
||||||
.. code-block:: console
|
.. code-block:: console
|
||||||
|
|
||||||
maigret machine42 --tags en,jp
|
maigret michael --tags en,jp
|
||||||
|
|
||||||
Start a search for accounts with username ``machine42`` only on sites related to software development.
|
Start a search for accounts with username ``machine42`` only on sites related to software development.
|
||||||
|
|
||||||
.. code-block:: console
|
.. code-block:: console
|
||||||
|
|
||||||
maigret machine42 --tags coding
|
maigret michael --tags coding
|
||||||
|
|
||||||
Start a search for accounts with username ``machine42`` on uCoz sites only (mostly CIS countries).
|
Start a search for accounts with username ``machine42`` on uCoz sites only (mostly CIS countries).
|
||||||
|
|
||||||
.. code-block:: console
|
.. code-block:: console
|
||||||
|
|
||||||
maigret machine42 --tags ucoz
|
maigret michael --tags ucoz
|
||||||
|
|
||||||
|
|||||||
+2
-5
@@ -7,11 +7,8 @@ from maigret.maigret import main
|
|||||||
|
|
||||||
def run():
|
def run():
|
||||||
try:
|
try:
|
||||||
if sys.version_info.minor >= 10:
|
loop = asyncio.get_event_loop()
|
||||||
asyncio.run(main())
|
loop.run_until_complete(main())
|
||||||
else:
|
|
||||||
loop = asyncio.get_event_loop()
|
|
||||||
loop.run_until_complete(main())
|
|
||||||
except KeyboardInterrupt:
|
except KeyboardInterrupt:
|
||||||
print('Maigret is interrupted.')
|
print('Maigret is interrupted.')
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|||||||
@@ -1,3 +1,3 @@
|
|||||||
"""Maigret version file"""
|
"""Maigret version file"""
|
||||||
|
|
||||||
__version__ = '0.4.4'
|
__version__ = '0.4.2'
|
||||||
|
|||||||
+5
-13
@@ -10,7 +10,6 @@ import re
|
|||||||
import ssl
|
import ssl
|
||||||
import sys
|
import sys
|
||||||
import tqdm
|
import tqdm
|
||||||
import random
|
|
||||||
from typing import Tuple, Optional, Dict, List
|
from typing import Tuple, Optional, Dict, List
|
||||||
from urllib.parse import quote
|
from urllib.parse import quote
|
||||||
|
|
||||||
@@ -398,7 +397,7 @@ def process_site_result(
|
|||||||
|
|
||||||
|
|
||||||
def make_site_result(
|
def make_site_result(
|
||||||
site: MaigretSite, username: str, options: QueryOptions, logger, *args, **kwargs
|
site: MaigretSite, username: str, options: QueryOptions, logger
|
||||||
) -> QueryResultWrapper:
|
) -> QueryResultWrapper:
|
||||||
results_site: QueryResultWrapper = {}
|
results_site: QueryResultWrapper = {}
|
||||||
|
|
||||||
@@ -422,10 +421,6 @@ def make_site_result(
|
|||||||
if "url" not in site.__dict__:
|
if "url" not in site.__dict__:
|
||||||
logger.error("No URL for site %s", site.name)
|
logger.error("No URL for site %s", site.name)
|
||||||
|
|
||||||
if kwargs.get('retry') and hasattr(site, "mirrors"):
|
|
||||||
site.url_main = random.choice(site.mirrors)
|
|
||||||
logger.info(f"Use {site.url_main} as a main url of site {site}")
|
|
||||||
|
|
||||||
# URL of user on site (if it exists)
|
# URL of user on site (if it exists)
|
||||||
url = site.url.format(
|
url = site.url.format(
|
||||||
urlMain=site.url_main, urlSubpath=site.url_subpath, username=quote(username)
|
urlMain=site.url_main, urlSubpath=site.url_subpath, username=quote(username)
|
||||||
@@ -529,7 +524,7 @@ def make_site_result(
|
|||||||
async def check_site_for_username(
|
async def check_site_for_username(
|
||||||
site, username, options: QueryOptions, logger, query_notify, *args, **kwargs
|
site, username, options: QueryOptions, logger, query_notify, *args, **kwargs
|
||||||
) -> Tuple[str, QueryResultWrapper]:
|
) -> Tuple[str, QueryResultWrapper]:
|
||||||
default_result = make_site_result(site, username, options, logger, retry=kwargs.get('retry'))
|
default_result = make_site_result(site, username, options, logger)
|
||||||
future = default_result.get("future")
|
future = default_result.get("future")
|
||||||
if not future:
|
if not future:
|
||||||
return site.name, default_result
|
return site.name, default_result
|
||||||
@@ -585,8 +580,6 @@ async def maigret(
|
|||||||
cookies=None,
|
cookies=None,
|
||||||
retries=0,
|
retries=0,
|
||||||
check_domains=False,
|
check_domains=False,
|
||||||
*args,
|
|
||||||
**kwargs,
|
|
||||||
) -> QueryResultWrapper:
|
) -> QueryResultWrapper:
|
||||||
"""Main search func
|
"""Main search func
|
||||||
|
|
||||||
@@ -604,7 +597,7 @@ async def maigret(
|
|||||||
is_parsing_enabled -- Extract additional info from account pages.
|
is_parsing_enabled -- Extract additional info from account pages.
|
||||||
id_type -- Type of username to search.
|
id_type -- Type of username to search.
|
||||||
Default is 'username', see all supported here:
|
Default is 'username', see all supported here:
|
||||||
https://maigret.readthedocs.io/en/latest/supported-identifier-types.html
|
https://github.com/soxoj/maigret/wiki/Supported-identifier-types
|
||||||
max_connections -- Maximum number of concurrent connections allowed.
|
max_connections -- Maximum number of concurrent connections allowed.
|
||||||
Default is 100.
|
Default is 100.
|
||||||
no_progressbar -- Displaying of ASCII progressbar during scanner.
|
no_progressbar -- Displaying of ASCII progressbar during scanner.
|
||||||
@@ -667,8 +660,7 @@ async def maigret(
|
|||||||
executor = AsyncioSimpleExecutor(logger=logger)
|
executor = AsyncioSimpleExecutor(logger=logger)
|
||||||
else:
|
else:
|
||||||
executor = AsyncioProgressbarQueueExecutor(
|
executor = AsyncioProgressbarQueueExecutor(
|
||||||
logger=logger, in_parallel=max_connections, timeout=timeout + 0.5,
|
logger=logger, in_parallel=max_connections, timeout=timeout + 0.5
|
||||||
*args, **kwargs
|
|
||||||
)
|
)
|
||||||
|
|
||||||
# make options objects for all the requests
|
# make options objects for all the requests
|
||||||
@@ -710,7 +702,7 @@ async def maigret(
|
|||||||
tasks_dict[sitename] = (
|
tasks_dict[sitename] = (
|
||||||
check_site_for_username,
|
check_site_for_username,
|
||||||
[site, username, options, logger, query_notify],
|
[site, username, options, logger, query_notify],
|
||||||
{'default': (sitename, default_result), 'retry': retries-attempts+1},
|
{'default': (sitename, default_result)},
|
||||||
)
|
)
|
||||||
|
|
||||||
cur_results = await executor.run(tasks_dict.values())
|
cur_results = await executor.run(tasks_dict.values())
|
||||||
|
|||||||
+1
-2
@@ -63,9 +63,8 @@ COMMON_ERRORS = {
|
|||||||
ERRORS_TYPES = {
|
ERRORS_TYPES = {
|
||||||
'Captcha': 'Try to switch to another IP address or to use service cookies',
|
'Captcha': 'Try to switch to another IP address or to use service cookies',
|
||||||
'Bot protection': 'Try to switch to another IP address',
|
'Bot protection': 'Try to switch to another IP address',
|
||||||
'Censorship': 'Switch to another internet service provider',
|
'Censorship': 'switch to another internet service provider',
|
||||||
'Request timeout': 'Try to increase timeout or to switch to another internet service provider',
|
'Request timeout': 'Try to increase timeout or to switch to another internet service provider',
|
||||||
'Connecting failure': 'Try to decrease number of parallel connections (e.g. -n 10)',
|
|
||||||
}
|
}
|
||||||
|
|
||||||
# TODO: checking for reason
|
# TODO: checking for reason
|
||||||
|
|||||||
+2
-22
@@ -81,22 +81,6 @@ class AsyncioProgressbarQueueExecutor(AsyncExecutor):
|
|||||||
self.queue = asyncio.Queue(self.workers_count)
|
self.queue = asyncio.Queue(self.workers_count)
|
||||||
self.timeout = kwargs.get('timeout')
|
self.timeout = kwargs.get('timeout')
|
||||||
|
|
||||||
async def increment_progress(self, count):
|
|
||||||
update_func = self.progress.update
|
|
||||||
if asyncio.iscoroutinefunction(update_func):
|
|
||||||
await update_func(count)
|
|
||||||
else:
|
|
||||||
update_func(count)
|
|
||||||
await asyncio.sleep(0)
|
|
||||||
|
|
||||||
async def stop_progress(self):
|
|
||||||
stop_func = self.progress.close
|
|
||||||
if asyncio.iscoroutinefunction(stop_func):
|
|
||||||
await stop_func()
|
|
||||||
else:
|
|
||||||
stop_func()
|
|
||||||
await asyncio.sleep(0)
|
|
||||||
|
|
||||||
async def worker(self):
|
async def worker(self):
|
||||||
while True:
|
while True:
|
||||||
try:
|
try:
|
||||||
@@ -112,7 +96,7 @@ class AsyncioProgressbarQueueExecutor(AsyncExecutor):
|
|||||||
result = kwargs.get('default')
|
result = kwargs.get('default')
|
||||||
|
|
||||||
self.results.append(result)
|
self.results.append(result)
|
||||||
await self.increment_progress(1)
|
self.progress.update(1)
|
||||||
self.queue.task_done()
|
self.queue.task_done()
|
||||||
|
|
||||||
async def _run(self, queries: Iterable[QueryDraft]):
|
async def _run(self, queries: Iterable[QueryDraft]):
|
||||||
@@ -125,14 +109,10 @@ class AsyncioProgressbarQueueExecutor(AsyncExecutor):
|
|||||||
workers = [create_task_func()(self.worker()) for _ in range(min_workers)]
|
workers = [create_task_func()(self.worker()) for _ in range(min_workers)]
|
||||||
|
|
||||||
self.progress = self.progress_func(total=len(queries_list))
|
self.progress = self.progress_func(total=len(queries_list))
|
||||||
|
|
||||||
for t in queries_list:
|
for t in queries_list:
|
||||||
await self.queue.put(t)
|
await self.queue.put(t)
|
||||||
|
|
||||||
await self.queue.join()
|
await self.queue.join()
|
||||||
|
|
||||||
for w in workers:
|
for w in workers:
|
||||||
w.cancel()
|
w.cancel()
|
||||||
|
self.progress.close()
|
||||||
await self.stop_progress()
|
|
||||||
return self.results
|
return self.results
|
||||||
|
|||||||
+3
-6
@@ -172,7 +172,7 @@ def setup_arguments_parser(settings: Settings):
|
|||||||
type=int,
|
type=int,
|
||||||
dest="connections",
|
dest="connections",
|
||||||
default=settings.max_connections,
|
default=settings.max_connections,
|
||||||
help=f"Allowed number of concurrent connections (default {settings.max_connections}).",
|
help="Allowed number of concurrent connections.",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--no-recursion",
|
"--no-recursion",
|
||||||
@@ -719,11 +719,8 @@ async def main():
|
|||||||
|
|
||||||
def run():
|
def run():
|
||||||
try:
|
try:
|
||||||
if sys.version_info.minor >= 10:
|
loop = asyncio.get_event_loop()
|
||||||
asyncio.run(main())
|
loop.run_until_complete(main())
|
||||||
else:
|
|
||||||
loop = asyncio.get_event_loop()
|
|
||||||
loop.run_until_complete(main())
|
|
||||||
except KeyboardInterrupt:
|
except KeyboardInterrupt:
|
||||||
print('Maigret is interrupted.')
|
print('Maigret is interrupted.')
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|||||||
+1
-1
@@ -67,7 +67,7 @@ def save_txt_report(filename: str, username: str, results: dict):
|
|||||||
def save_html_report(filename: str, context: dict):
|
def save_html_report(filename: str, context: dict):
|
||||||
template, _ = generate_report_template(is_pdf=False)
|
template, _ = generate_report_template(is_pdf=False)
|
||||||
filled_template = template.render(**context)
|
filled_template = template.render(**context)
|
||||||
with open(filename, "w", encoding="utf-8") as f:
|
with open(filename, "w") as f:
|
||||||
f.write(filled_template)
|
f.write(filled_template)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
+624
-4044
File diff suppressed because it is too large
Load Diff
+4
-15
@@ -431,8 +431,6 @@ class MaigretDatabase:
|
|||||||
message_checks = 0
|
message_checks = 0
|
||||||
message_checks_one_factor = 0
|
message_checks_one_factor = 0
|
||||||
|
|
||||||
status_checks = 0
|
|
||||||
|
|
||||||
for _, site in sites_dict.items():
|
for _, site in sites_dict.items():
|
||||||
if site.disabled:
|
if site.disabled:
|
||||||
disabled_count += 1
|
disabled_count += 1
|
||||||
@@ -446,26 +444,17 @@ class MaigretDatabase:
|
|||||||
continue
|
continue
|
||||||
message_checks_one_factor += 1
|
message_checks_one_factor += 1
|
||||||
|
|
||||||
if site.check_type == 'status_code':
|
|
||||||
status_checks += 1
|
|
||||||
|
|
||||||
if not site.tags:
|
if not site.tags:
|
||||||
tags["NO_TAGS"] = tags.get("NO_TAGS", 0) + 1
|
tags["NO_TAGS"] = tags.get("NO_TAGS", 0) + 1
|
||||||
|
|
||||||
for tag in filter(lambda x: not is_country_tag(x), site.tags):
|
for tag in filter(lambda x: not is_country_tag(x), site.tags):
|
||||||
tags[tag] = tags.get(tag, 0) + 1
|
tags[tag] = tags.get(tag, 0) + 1
|
||||||
|
|
||||||
enabled_count = total_count-disabled_count
|
enabled_perc = round(100*(total_count-disabled_count)/total_count, 2)
|
||||||
enabled_perc = round(100*enabled_count/total_count, 2)
|
output += f"Enabled/total sites: {total_count - disabled_count}/{total_count} = {enabled_perc}%\n\n"
|
||||||
output += f"Enabled/total sites: {enabled_count}/{total_count} = {enabled_perc}%\n\n"
|
|
||||||
|
|
||||||
checks_perc = round(100*message_checks_one_factor/enabled_count, 2)
|
checks_perc = round(100*message_checks_one_factor/message_checks, 2)
|
||||||
output += f"Incomplete message checks: {message_checks_one_factor}/{enabled_count} = {checks_perc}% (false positive risks)\n\n"
|
output += f"Incomplete checks: {message_checks_one_factor}/{message_checks} = {checks_perc}% (false positive risks)\n\n"
|
||||||
|
|
||||||
status_checks_perc = round(100*status_checks/enabled_count, 2)
|
|
||||||
output += f"Status code checks: {status_checks}/{enabled_count} = {status_checks_perc}% (false positive risks)\n\n"
|
|
||||||
|
|
||||||
output += f"False positive risk (total): {checks_perc+status_checks_perc:.2f}%\n\n"
|
|
||||||
|
|
||||||
top_urls_count = 20
|
top_urls_count = 20
|
||||||
output += f"Top {top_urls_count} profile URLs:\n"
|
output += f"Top {top_urls_count} profile URLs:\n"
|
||||||
|
|||||||
+7
-36
@@ -1,11 +1,10 @@
|
|||||||
import asyncio
|
import asyncio
|
||||||
import json
|
import json
|
||||||
import re
|
import re
|
||||||
from typing import List, Tuple
|
from typing import List
|
||||||
import xml.etree.ElementTree as ET
|
import xml.etree.ElementTree as ET
|
||||||
from aiohttp import TCPConnector, ClientSession
|
from aiohttp import TCPConnector, ClientSession
|
||||||
import requests
|
import requests
|
||||||
import cloudscraper
|
|
||||||
|
|
||||||
from .activation import import_aiohttp_cookies
|
from .activation import import_aiohttp_cookies
|
||||||
from .checking import maigret
|
from .checking import maigret
|
||||||
@@ -15,27 +14,6 @@ from .sites import MaigretDatabase, MaigretSite, MaigretEngine
|
|||||||
from .utils import get_random_user_agent, get_match_ratio
|
from .utils import get_random_user_agent, get_match_ratio
|
||||||
|
|
||||||
|
|
||||||
class CloudflareSession:
|
|
||||||
def __init__(self):
|
|
||||||
self.scraper = cloudscraper.create_scraper()
|
|
||||||
|
|
||||||
async def get(self, *args, **kwargs):
|
|
||||||
await asyncio.sleep(0)
|
|
||||||
res = self.scraper.get(*args, **kwargs)
|
|
||||||
self.last_text = res.text
|
|
||||||
self.status = res.status_code
|
|
||||||
return self
|
|
||||||
|
|
||||||
def status_code(self):
|
|
||||||
return self.status
|
|
||||||
|
|
||||||
async def text(self):
|
|
||||||
await asyncio.sleep(0)
|
|
||||||
return self.last_text
|
|
||||||
|
|
||||||
async def close(self):
|
|
||||||
pass
|
|
||||||
|
|
||||||
class Submitter:
|
class Submitter:
|
||||||
HEADERS = {
|
HEADERS = {
|
||||||
"User-Agent": get_random_user_agent(),
|
"User-Agent": get_random_user_agent(),
|
||||||
@@ -163,18 +141,16 @@ class Submitter:
|
|||||||
fields['urlSubpath'] = f'/{subpath}'
|
fields['urlSubpath'] = f'/{subpath}'
|
||||||
return fields
|
return fields
|
||||||
|
|
||||||
async def detect_known_engine(self, url_exists, url_mainpage) -> [List[MaigretSite], str]:
|
async def detect_known_engine(self, url_exists, url_mainpage) -> List[MaigretSite]:
|
||||||
resp_text = ''
|
resp_text = ''
|
||||||
try:
|
try:
|
||||||
r = await self.session.get(url_mainpage)
|
r = await self.session.get(url_mainpage)
|
||||||
content = await r.content.read()
|
resp_text = await r.text()
|
||||||
charset = r.charset or "utf-8"
|
|
||||||
resp_text = content.decode(charset, "ignore")
|
|
||||||
self.logger.debug(resp_text)
|
self.logger.debug(resp_text)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.logger.warning(e)
|
self.logger.warning(e)
|
||||||
print("Some error while checking main page")
|
print("Some error while checking main page")
|
||||||
return [], resp_text
|
return []
|
||||||
|
|
||||||
for engine in self.db.engines:
|
for engine in self.db.engines:
|
||||||
strs_to_check = engine.__dict__.get("presenseStrs")
|
strs_to_check = engine.__dict__.get("presenseStrs")
|
||||||
@@ -217,9 +193,9 @@ class Submitter:
|
|||||||
)
|
)
|
||||||
sites.append(maigret_site)
|
sites.append(maigret_site)
|
||||||
|
|
||||||
return sites, resp_text
|
return sites
|
||||||
|
|
||||||
return [], resp_text
|
return []
|
||||||
|
|
||||||
def extract_username_dialog(self, url):
|
def extract_username_dialog(self, url):
|
||||||
url_parts = url.rstrip("/").split("/")
|
url_parts = url.rstrip("/").split("/")
|
||||||
@@ -362,15 +338,10 @@ class Submitter:
|
|||||||
print('Detecting site engine, please wait...')
|
print('Detecting site engine, please wait...')
|
||||||
sites = []
|
sites = []
|
||||||
try:
|
try:
|
||||||
sites, text = await self.detect_known_engine(url_exists, url_exists)
|
sites = await self.detect_known_engine(url_exists, url_mainpage)
|
||||||
except KeyboardInterrupt:
|
except KeyboardInterrupt:
|
||||||
print('Engine detect process is interrupted.')
|
print('Engine detect process is interrupted.')
|
||||||
|
|
||||||
|
|
||||||
if 'cloudflare' in text.lower():
|
|
||||||
print('Cloudflare protection detected. I will use cloudscraper for futher work')
|
|
||||||
# self.session = CloudflareSession()
|
|
||||||
|
|
||||||
if not sites:
|
if not sites:
|
||||||
print("Unable to detect site engine, lets generate checking features")
|
print("Unable to detect site engine, lets generate checking features")
|
||||||
|
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
maigret @ https://github.com/soxoj/maigret/archive/refs/heads/main.zip
|
maigret @ https://github.com/soxoj/maigret/archive/refs/heads/main.zip
|
||||||
pefile==2022.5.30
|
pefile==2021.9.3
|
||||||
psutil==5.9.2
|
psutil==5.9.0
|
||||||
pyinstaller @ https://github.com/pyinstaller/pyinstaller/archive/develop.zip
|
pyinstaller @ https://github.com/pyinstaller/pyinstaller/archive/develop.zip
|
||||||
pywin32-ctypes==0.2.0
|
pywin32-ctypes==0.2.0
|
||||||
+23
-23
@@ -1,39 +1,39 @@
|
|||||||
aiodns==3.0.0
|
aiodns==3.0.0
|
||||||
aiohttp==3.8.3
|
aiohttp==3.8.1
|
||||||
aiohttp-socks==0.7.1
|
aiohttp-socks==0.7.1
|
||||||
arabic-reshaper==2.1.4
|
arabic-reshaper==2.1.3
|
||||||
async-timeout==4.0.2
|
async-timeout==4.0.2
|
||||||
attrs==22.1.0
|
attrs==21.4.0
|
||||||
certifi==2022.9.24
|
certifi==2021.10.8
|
||||||
chardet==5.0.0
|
chardet==4.0.0
|
||||||
colorama==0.4.6
|
colorama==0.4.4
|
||||||
future==0.18.2
|
future==0.18.2
|
||||||
future-annotations==1.0.0
|
future-annotations==1.0.0
|
||||||
html5lib==1.1
|
html5lib==1.1
|
||||||
idna==3.4
|
idna==3.3
|
||||||
Jinja2==3.1.2
|
Jinja2==3.0.3
|
||||||
lxml==4.9.1
|
lxml==4.8.0
|
||||||
MarkupSafe==2.1.1
|
MarkupSafe==2.0.1
|
||||||
mock==4.0.3
|
mock==4.0.3
|
||||||
multidict==6.0.2
|
multidict==5.2.0;python_version<"3.7"
|
||||||
pycountry==22.3.5
|
multidict==6.0.2;python_version>="3.7"
|
||||||
PyPDF2==2.10.8
|
pycountry==22.1.10
|
||||||
|
PyPDF2==1.26.0
|
||||||
PySocks==1.7.1
|
PySocks==1.7.1
|
||||||
python-bidi==0.4.2
|
python-bidi==0.4.2
|
||||||
requests==2.28.1
|
requests==2.27.1
|
||||||
requests-futures==1.0.0
|
requests-futures==1.0.0
|
||||||
six==1.16.0
|
six==1.16.0
|
||||||
socid-extractor>=0.0.21
|
socid-extractor>=0.0.21
|
||||||
soupsieve==2.3.2.post1
|
soupsieve==2.3.1
|
||||||
stem==1.8.1
|
stem==1.8.0
|
||||||
torrequest==0.1.0
|
torrequest==0.1.0
|
||||||
tqdm==4.64.1
|
tqdm==4.63.0
|
||||||
typing-extensions==4.4.0
|
typing-extensions==4.1.1
|
||||||
webencodings==0.5.1
|
webencodings==0.5.1
|
||||||
xhtml2pdf==0.2.8
|
xhtml2pdf==0.2.5
|
||||||
XMind==1.2.0
|
XMind==1.2.0
|
||||||
yarl==1.8.1
|
yarl==1.7.2
|
||||||
networkx==2.5.1
|
networkx==2.5.1
|
||||||
pyvis==0.2.1
|
pyvis==0.1.9
|
||||||
reportlab==3.6.11
|
reportlab==3.6.6
|
||||||
cloudscraper==1.2.66
|
|
||||||
|
|||||||
@@ -11,7 +11,7 @@ with open('requirements.txt') as rf:
|
|||||||
requires = rf.read().splitlines()
|
requires = rf.read().splitlines()
|
||||||
|
|
||||||
setup(name='maigret',
|
setup(name='maigret',
|
||||||
version='0.4.4',
|
version='0.4.2',
|
||||||
description='Collect a dossier on a person by username from a huge number of sites',
|
description='Collect a dossier on a person by username from a huge number of sites',
|
||||||
long_description=long_description,
|
long_description=long_description,
|
||||||
long_description_content_type="text/markdown",
|
long_description_content_type="text/markdown",
|
||||||
|
|||||||
+18
-31
@@ -1,43 +1,30 @@
|
|||||||
name: maigret2
|
name: maigret2
|
||||||
adopt-info: maigret2
|
version: git
|
||||||
summary: SOCMINT / Instagram
|
summary: SOCMINT / Instagram
|
||||||
description: |
|
description: |
|
||||||
Test Test Test
|
Test Test Test
|
||||||
|
base: core18
|
||||||
license: MIT
|
|
||||||
|
|
||||||
base: core20
|
|
||||||
grade: stable
|
|
||||||
confinement: strict
|
confinement: strict
|
||||||
compression: lzo
|
|
||||||
|
|
||||||
architectures:
|
|
||||||
- build-on: amd64
|
parts:
|
||||||
|
maigret2:
|
||||||
|
plugin: python
|
||||||
|
python-version: python3
|
||||||
|
source: .
|
||||||
|
stage-packages:
|
||||||
|
- python-six
|
||||||
|
|
||||||
|
|
||||||
apps:
|
apps:
|
||||||
maigret2:
|
maigret2:
|
||||||
command: bin/maigret
|
command: bin/maigret
|
||||||
environment:
|
|
||||||
LC_ALL: C.UTF-8
|
|
||||||
plugs:
|
|
||||||
- home
|
|
||||||
- network
|
|
||||||
|
|
||||||
parts:
|
|
||||||
maigret2:
|
|
||||||
plugin: python
|
|
||||||
source: https://github.com/soxoj/maigret
|
|
||||||
source-type: git
|
|
||||||
|
|
||||||
build-packages:
|
|
||||||
- python3-pip
|
architectures:
|
||||||
- python3-six
|
- build-on: amd64
|
||||||
- python3
|
- build-on: i386
|
||||||
|
|
||||||
stage-packages:
|
|
||||||
- python3
|
|
||||||
- python3-six
|
|
||||||
|
|
||||||
override-pull: |
|
|
||||||
snapcraftctl pull
|
|
||||||
snapcraftctl set-version "$(git describe --tags | sed 's/^v//' | cut -d "-" -f1)"
|
|
||||||
|
|||||||
@@ -1,8 +1,8 @@
|
|||||||
reportlab==3.6.11
|
reportlab==3.6.6
|
||||||
flake8==5.0.4
|
flake8==4.0.1
|
||||||
pytest==7.2.0
|
pytest==7.0.1
|
||||||
pytest-asyncio==0.16.0;python_version<"3.7"
|
pytest-asyncio==0.16.0;python_version<"3.7"
|
||||||
pytest-asyncio==0.20.1;python_version>="3.7"
|
pytest-asyncio==0.18.2;python_version>="3.7"
|
||||||
pytest-cov==4.0.0
|
pytest-cov==3.0.0
|
||||||
pytest-httpserver==1.0.6
|
pytest-httpserver==1.0.4
|
||||||
pytest-rerunfailures==10.2
|
pytest-rerunfailures==10.2
|
||||||
|
|||||||
+5
-11
@@ -3,7 +3,7 @@ import random
|
|||||||
from argparse import ArgumentParser, RawDescriptionHelpFormatter
|
from argparse import ArgumentParser, RawDescriptionHelpFormatter
|
||||||
|
|
||||||
from maigret.maigret import MaigretDatabase
|
from maigret.maigret import MaigretDatabase
|
||||||
from maigret.submit import Submitter
|
from maigret.submit import get_alexa_rank
|
||||||
|
|
||||||
|
|
||||||
def update_tags(site):
|
def update_tags(site):
|
||||||
@@ -22,7 +22,7 @@ def update_tags(site):
|
|||||||
site.disabled = True
|
site.disabled = True
|
||||||
|
|
||||||
print(f'Old alexa rank: {site.alexa_rank}')
|
print(f'Old alexa rank: {site.alexa_rank}')
|
||||||
rank = Submitter.get_alexa_rank(site.url_main)
|
rank = get_alexa_rank(site.url_main)
|
||||||
if rank:
|
if rank:
|
||||||
print(f'New alexa rank: {rank}')
|
print(f'New alexa rank: {rank}')
|
||||||
site.alexa_rank = rank
|
site.alexa_rank = rank
|
||||||
@@ -36,7 +36,6 @@ if __name__ == '__main__':
|
|||||||
parser.add_argument("--base","-b", metavar="BASE_FILE",
|
parser.add_argument("--base","-b", metavar="BASE_FILE",
|
||||||
dest="base_file", default="maigret/resources/data.json",
|
dest="base_file", default="maigret/resources/data.json",
|
||||||
help="JSON file with sites data to update.")
|
help="JSON file with sites data to update.")
|
||||||
parser.add_argument("--name", help="Name of site to check")
|
|
||||||
|
|
||||||
pool = list()
|
pool = list()
|
||||||
|
|
||||||
@@ -46,17 +45,12 @@ if __name__ == '__main__':
|
|||||||
db.load_from_file(args.base_file).sites
|
db.load_from_file(args.base_file).sites
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
if args.name:
|
site = random.choice(db.sites)
|
||||||
sites = list(db.ranked_sites_dict(names=[args.name]).values())
|
|
||||||
site = random.choice(sites)
|
|
||||||
else:
|
|
||||||
site = random.choice(db.sites)
|
|
||||||
|
|
||||||
if site.engine == 'uCoz':
|
if site.engine == 'uCoz':
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# if not 'in' in site.tags:
|
if not 'in' in site.tags:
|
||||||
# continue
|
continue
|
||||||
|
|
||||||
update_tags(site)
|
update_tags(site)
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user