mirror of
https://github.com/soxoj/maigret.git
synced 2026-05-15 10:55:43 +00:00
Compare commits
1 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 4791a6fc96 |
@@ -1,2 +0,0 @@
|
||||
#!/bin/sh
|
||||
python3 ./utils/update_site_data.py
|
||||
@@ -15,14 +15,10 @@ assignees: soxoj
|
||||
|
||||
## Description
|
||||
|
||||
Info about Maigret version you are running and environment (`--version`, operation system, ISP provider):
|
||||
Info about Maigret version you are running and environment (`--version`, operation system, ISP provuder):
|
||||
<INSERT VERSION INFO HERE>
|
||||
|
||||
How to reproduce this bug (commandline options / conditions):
|
||||
<INSERT EXAMPLE OF CLI COMMAND HERE>
|
||||
|
||||
<DESCRIPTION>
|
||||
|
||||
<PASTE SCREENSHOT>
|
||||
|
||||
<ATTACH LOG FILE>
|
||||
|
||||
@@ -27,7 +27,6 @@ jobs:
|
||||
with:
|
||||
push: true
|
||||
tags: ${{ secrets.DOCKER_HUB_USERNAME }}/maigret:latest
|
||||
platforms: linux/amd64,linux/arm64
|
||||
-
|
||||
name: Image digest
|
||||
run: echo ${{ steps.docker_build.outputs.digest }}
|
||||
|
||||
@@ -3,6 +3,8 @@ name: Package exe with PyInstaller - Windows
|
||||
on:
|
||||
push:
|
||||
branches: [ main ]
|
||||
pull_request:
|
||||
branches: [ main ]
|
||||
|
||||
jobs:
|
||||
build:
|
||||
|
||||
@@ -2,7 +2,6 @@ name: Linting and testing
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [ main ]
|
||||
pull_request:
|
||||
branches: [ main ]
|
||||
types: [opened, synchronize, reopened]
|
||||
@@ -13,7 +12,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
matrix:
|
||||
python-version: [3.7, 3.8, 3.9]
|
||||
python-version: [3.6.9, 3.7, 3.8, 3.9]
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
|
||||
@@ -2,7 +2,7 @@ name: Update sites rating and statistics
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
branches: [ dev ]
|
||||
branches: [ main ]
|
||||
types: [opened, synchronize]
|
||||
|
||||
jobs:
|
||||
|
||||
@@ -15,10 +15,6 @@ src/
|
||||
.ipynb_checkpoints
|
||||
*.ipynb
|
||||
|
||||
# Logs and backups
|
||||
*.log
|
||||
*.bak
|
||||
|
||||
# Output files, except requirements.txt
|
||||
*.txt
|
||||
!requirements.txt
|
||||
|
||||
@@ -2,103 +2,6 @@
|
||||
|
||||
## [Unreleased]
|
||||
|
||||
## [0.4.4] - 2022-09-03
|
||||
* Fixed some false positives by @soxoj in https://github.com/soxoj/maigret/pull/433
|
||||
* Drop Python 3.6 support by @soxoj in https://github.com/soxoj/maigret/pull/434
|
||||
* Bump xhtml2pdf from 0.2.5 to 0.2.7 by @dependabot in https://github.com/soxoj/maigret/pull/409
|
||||
* Bump reportlab from 3.6.6 to 3.6.9 by @dependabot in https://github.com/soxoj/maigret/pull/403
|
||||
* Bump markupsafe from 2.0.1 to 2.1.1 by @dependabot in https://github.com/soxoj/maigret/pull/389
|
||||
* Bump pycountry from 22.1.10 to 22.3.5 by @dependabot in https://github.com/soxoj/maigret/pull/384
|
||||
* Bump pypdf2 from 1.26.0 to 1.27.4 by @dependabot in https://github.com/soxoj/maigret/pull/438
|
||||
* Update GH actions by @soxoj in https://github.com/soxoj/maigret/pull/439
|
||||
* Bump tqdm from 4.63.0 to 4.64.0 by @dependabot in https://github.com/soxoj/maigret/pull/440
|
||||
* Bump jinja2 from 3.0.3 to 3.1.1 by @dependabot in https://github.com/soxoj/maigret/pull/441
|
||||
* Bump soupsieve from 2.3.1 to 2.3.2 by @dependabot in https://github.com/soxoj/maigret/pull/436
|
||||
* Bump pypdf2 from 1.26.0 to 1.27.4 by @dependabot in https://github.com/soxoj/maigret/pull/442
|
||||
* Bump pyvis from 0.1.9 to 0.2.0 by @dependabot in https://github.com/soxoj/maigret/pull/443
|
||||
* Bump pypdf2 from 1.27.4 to 1.27.6 by @dependabot in https://github.com/soxoj/maigret/pull/448
|
||||
* Bump typing-extensions from 4.1.1 to 4.2.0 by @dependabot in https://github.com/soxoj/maigret/pull/447
|
||||
* Bump soupsieve from 2.3.2 to 2.3.2.post1 by @dependabot in https://github.com/soxoj/maigret/pull/444
|
||||
* Bump pypdf2 from 1.27.6 to 1.27.7 by @dependabot in https://github.com/soxoj/maigret/pull/449
|
||||
* Bump pypdf2 from 1.27.7 to 1.27.8 by @dependabot in https://github.com/soxoj/maigret/pull/450
|
||||
* XMind 8 report warning and some docs update by @soxoj in https://github.com/soxoj/maigret/pull/452
|
||||
* False positive fixes 24.04.22 by @soxoj in https://github.com/soxoj/maigret/pull/455
|
||||
* Bump pypdf2 from 1.27.8 to 1.27.9 by @dependabot in https://github.com/soxoj/maigret/pull/456
|
||||
* Bump pytest from 7.0.1 to 7.1.2 by @dependabot in https://github.com/soxoj/maigret/pull/457
|
||||
* Bump jinja2 from 3.1.1 to 3.1.2 by @dependabot in https://github.com/soxoj/maigret/pull/460
|
||||
* Ubisoft forums addition by @fen0s in https://github.com/soxoj/maigret/pull/461
|
||||
* Add BYOND, Figma, BeatStars by @fen0s in https://github.com/soxoj/maigret/pull/462
|
||||
* fix Figma username definition, add a bunch of sites by @fen0s in https://github.com/soxoj/maigret/pull/464
|
||||
* Bump pypdf2 from 1.27.9 to 1.27.10 by @dependabot in https://github.com/soxoj/maigret/pull/465
|
||||
* Bump pypdf2 from 1.27.10 to 1.27.12 by @dependabot in https://github.com/soxoj/maigret/pull/466
|
||||
* Sites fixes 05 05 22 by @soxoj in https://github.com/soxoj/maigret/pull/469
|
||||
* Bump pyvis from 0.2.0 to 0.2.1 by @dependabot in https://github.com/soxoj/maigret/pull/472
|
||||
* Social analyzer websites, also fixing presense strs by @fen0s in https://github.com/soxoj/maigret/pull/471
|
||||
* Updated logic of false positive risk estimating by @soxoj in https://github.com/soxoj/maigret/pull/475
|
||||
* Improved usability of external progressbar func by @soxoj in https://github.com/soxoj/maigret/pull/476
|
||||
* New sites added, some tags/rank update by @soxoj in https://github.com/soxoj/maigret/pull/477
|
||||
* Added new sites by @soxoj in https://github.com/soxoj/maigret/pull/480
|
||||
* Added new forums, updated ranks, some utils improvements by @soxoj in https://github.com/soxoj/maigret/pull/481
|
||||
* Disabled sites with false positives results by @soxoj in https://github.com/soxoj/maigret/pull/482
|
||||
* Bump certifi from 2021.10.8 to 2022.5.18.1 by @dependabot in https://github.com/soxoj/maigret/pull/488
|
||||
* Bump psutil from 5.9.0 to 5.9.1 by @dependabot in https://github.com/soxoj/maigret/pull/490
|
||||
* Bump pypdf2 from 1.27.12 to 1.28.1 by @dependabot in https://github.com/soxoj/maigret/pull/491
|
||||
* Bump pypdf2 from 1.28.1 to 1.28.2 by @dependabot in https://github.com/soxoj/maigret/pull/493
|
||||
* added and fixed some websites in data.json by @kustermariocoding in https://github.com/soxoj/maigret/pull/494
|
||||
* Bump pypdf2 from 1.28.2 to 2.0.0 by @dependabot in https://github.com/soxoj/maigret/pull/504
|
||||
* Bump pefile from 2021.9.3 to 2022.5.30 by @dependabot in https://github.com/soxoj/maigret/pull/499
|
||||
* Updated sites list, added disabled Anilist by @soxoj in https://github.com/soxoj/maigret/pull/502
|
||||
* Bump lxml from 4.8.0 to 4.9.0 by @dependabot in https://github.com/soxoj/maigret/pull/503
|
||||
* Compatibility with Python 10 by @soxoj in https://github.com/soxoj/maigret/pull/509
|
||||
* feat: add .log & .bak files to gitignore in https://github.com/soxoj/maigret/pull/511
|
||||
* fix some sites and delete abandoned by @fen0s in https://github.com/soxoj/maigret/pull/526
|
||||
* Fixesjulyfirst by @fen0s in https://github.com/soxoj/maigret/pull/533
|
||||
* yazbel, aboutcar, zhihu by @fen0s in https://github.com/soxoj/maigret/pull/531
|
||||
* Fixes july third by @fen0s in https://github.com/soxoj/maigret/pull/535
|
||||
* Update data.json by @fen0s in https://github.com/soxoj/maigret/pull/539
|
||||
* Update data.json by @fen0s in https://github.com/soxoj/maigret/pull/540
|
||||
* Bump reportlab from 3.6.9 to 3.6.11 by @dependabot in https://github.com/soxoj/maigret/pull/543
|
||||
* Bump requests from 2.27.1 to 2.28.1 by @dependabot in https://github.com/soxoj/maigret/pull/530
|
||||
* Bump pypdf2 from 2.0.0 to 2.5.0 by @dependabot in https://github.com/soxoj/maigret/pull/542
|
||||
* Bump xhtml2pdf from 0.2.7 to 0.2.8 by @dependabot in https://github.com/soxoj/maigret/pull/522
|
||||
* Bump lxml from 4.9.0 to 4.9.1 by @dependabot in https://github.com/soxoj/maigret/pull/538
|
||||
* disable yandex music + set utf8 encoding by @fen0s in https://github.com/soxoj/maigret/pull/562
|
||||
* fix false positives by @fen0s in https://github.com/soxoj/maigret/pull/577
|
||||
* disable Instagram, fix two false positives by @fen0s in https://github.com/soxoj/maigret/pull/578
|
||||
* Bump certifi from 2022.5.18.1 to 2022.6.15 by @dependabot in https://github.com/soxoj/maigret/pull/551
|
||||
* August15 by @fen0s in https://github.com/soxoj/maigret/pull/591
|
||||
* Bump pytest-httpserver from 1.0.4 to 1.0.5 by @dependabot in https://github.com/soxoj/maigret/pull/583
|
||||
* Bump typing-extensions from 4.2.0 to 4.3.0 by @dependabot in https://github.com/soxoj/maigret/pull/549
|
||||
* Bump colorama from 0.4.4 to 0.4.5 by @dependabot in https://github.com/soxoj/maigret/pull/548
|
||||
* Bump chardet from 4.0.0 to 5.0.0 by @dependabot in https://github.com/soxoj/maigret/pull/550
|
||||
* Bump cloudscraper from 1.2.60 to 1.2.63 by @dependabot in https://github.com/soxoj/maigret/pull/600
|
||||
* Bump flake8 from 4.0.1 to 5.0.4 by @dependabot in https://github.com/soxoj/maigret/pull/598
|
||||
* Bump attrs from 21.4.0 to 22.1.0 by @dependabot in https://github.com/soxoj/maigret/pull/597
|
||||
* Bump pytest-asyncio from 0.18.2 to 0.19.0 by @dependabot in https://github.com/soxoj/maigret/pull/601
|
||||
* Bump pypdf2 from 2.5.0 to 2.10.4 by @dependabot in https://github.com/soxoj/maigret/pull/606
|
||||
* Bump pytest from 7.1.2 to 7.1.3 by @dependabot in https://github.com/soxoj/maigret/pull/613
|
||||
* Update sites.md -Gitmemory.com suppression by @C3n7ral051nt4g3ncy in https://github.com/soxoj/maigret/pull/610
|
||||
* Bump cloudscraper from 1.2.63 to 1.2.64 by @dependabot in https://github.com/soxoj/maigret/pull/614
|
||||
* Bump pycountry from 22.1.10 to 22.3.5 by @dependabot in https://github.com/soxoj/maigret/pull/607
|
||||
* add ProtonMail, disable 3 broken sites by @fen0s in https://github.com/soxoj/maigret/pull/619
|
||||
* Bump tqdm from 4.64.0 to 4.64.1 by @dependabot in https://github.com/soxoj/maigret/pull/618
|
||||
|
||||
**Full Changelog**: https://github.com/soxoj/maigret/compare/v0.4.3...v0.4.4
|
||||
|
||||
## [0.4.3] - 2022-04-13
|
||||
* Added Sites to data.json by @kustermariocoding in https://github.com/soxoj/maigret/pull/386
|
||||
* added new Websites to data.json by @kustermariocoding in https://github.com/soxoj/maigret/pull/390
|
||||
* Skipped broken tests by @soxoj in https://github.com/soxoj/maigret/pull/397
|
||||
* Added new Websites to data.json by @kustermariocoding in https://github.com/soxoj/maigret/pull/401
|
||||
* Added new Websites to data.json by @kustermariocoding in https://github.com/soxoj/maigret/pull/404
|
||||
* Updated statistics by @soxoj in https://github.com/soxoj/maigret/pull/406
|
||||
* Added new Websites to data.json by @kustermariocoding in https://github.com/soxoj/maigret/pull/413
|
||||
* Disabled houzz.com, updated sites statistics by @soxoj in https://github.com/soxoj/maigret/pull/422
|
||||
* Fixed last false positives by @soxoj in https://github.com/soxoj/maigret/pull/424
|
||||
* Fixed actual false positives by @soxoj in https://github.com/soxoj/maigret/pull/431
|
||||
|
||||
**Full Changelog**: https://github.com/soxoj/maigret/compare/v0.4.2...v0.4.3
|
||||
|
||||
## [0.4.2] - 2022-03-07
|
||||
* [ImgBot] Optimize images by @imgbot in https://github.com/soxoj/maigret/pull/319
|
||||
* Bump pytest-asyncio from 0.17.0 to 0.17.1 by @dependabot in https://github.com/soxoj/maigret/pull/321
|
||||
|
||||
+9
-9
@@ -1,16 +1,16 @@
|
||||
FROM python:3.9-slim
|
||||
LABEL maintainer="Soxoj <soxoj@protonmail.com>"
|
||||
MAINTAINER Soxoj <soxoj@protonmail.com>
|
||||
WORKDIR /app
|
||||
RUN pip install --no-cache-dir --upgrade pip
|
||||
RUN apt-get update && \
|
||||
apt-get install --no-install-recommends -y \
|
||||
RUN pip install --upgrade pip
|
||||
RUN apt update && \
|
||||
apt install -y \
|
||||
gcc \
|
||||
musl-dev \
|
||||
libxml2 \
|
||||
libxml2-dev \
|
||||
libxslt-dev \
|
||||
&& \
|
||||
rm -rf /var/lib/apt/lists/* /tmp/*
|
||||
COPY . .
|
||||
RUN YARL_NO_EXTENSIONS=1 python3 -m pip install --no-cache-dir .
|
||||
libxslt-dev
|
||||
RUN apt clean \
|
||||
&& rm -rf /var/lib/apt/lists/* /tmp/*
|
||||
ADD . .
|
||||
RUN YARL_NO_EXTENSIONS=1 python3 -m pip install .
|
||||
ENTRYPOINT ["maigret"]
|
||||
|
||||
@@ -21,7 +21,7 @@
|
||||
|
||||
## About
|
||||
|
||||
**Maigret** collects a dossier on a person **by username only**, checking for accounts on a huge number of sites and gathering all the available information from web pages. No API keys required. Maigret is an easy-to-use and powerful fork of [Sherlock](https://github.com/sherlock-project/sherlock).
|
||||
**Maigret** collect a dossier on a person **by username only**, checking for accounts on a huge number of sites and gathering all the available information from web pages. No API keys required. Maigret is an easy-to-use and powerful fork of [Sherlock](https://github.com/sherlock-project/sherlock).
|
||||
|
||||
Currently supported more than 2500 sites ([full list](https://github.com/soxoj/maigret/blob/main/sites.md)), search is launched against 500 popular sites in descending order of popularity by default. Also supported checking of Tor sites, I2P sites, and domains (via DNS resolving).
|
||||
|
||||
@@ -44,19 +44,23 @@ Standalone EXE-binaries for Windows are located in [Releases section](https://gi
|
||||
Also you can run Maigret using cloud shells and Jupyter notebooks (see buttons below).
|
||||
|
||||
[](https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/soxoj/maigret&tutorial=README.md)
|
||||
<a href="https://repl.it/github/soxoj/maigret"><img src="https://replit.com/badge/github/soxoj/maigret" alt="Run on Replit" height="50"></a>
|
||||
<a href="https://repl.it/github/soxoj/maigret"><img src="https://user-images.githubusercontent.com/27065646/92304596-bf719b00-ef7f-11ea-987f-2c1f3c323088.png" alt="Run on Repl.it" height="50"></a>
|
||||
|
||||
<a href="https://colab.research.google.com/gist/soxoj/879b51bc3b2f8b695abb054090645000/maigret-collab.ipynb"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab" height="45"></a>
|
||||
<a href="https://mybinder.org/v2/gist/soxoj/9d65c2f4d3bec5dd25949197ea73cf3a/HEAD"><img src="https://mybinder.org/badge_logo.svg" alt="Open In Binder" height="45"></a>
|
||||
|
||||
### Package installing
|
||||
|
||||
**NOTE**: Python 3.7 or higher and pip is required, **Python 3.8 is recommended.**
|
||||
**NOTE**: Python 3.6 or higher and pip is required, **Python 3.8 is recommended.**
|
||||
|
||||
```bash
|
||||
# install from pypi
|
||||
pip3 install maigret
|
||||
|
||||
# or clone and install manually
|
||||
git clone https://github.com/soxoj/maigret && cd maigret
|
||||
pip3 install .
|
||||
|
||||
# usage
|
||||
maigret username
|
||||
```
|
||||
@@ -64,7 +68,6 @@ maigret username
|
||||
### Cloning a repository
|
||||
|
||||
```bash
|
||||
# or clone and install manually
|
||||
git clone https://github.com/soxoj/maigret && cd maigret
|
||||
pip3 install -r requirements.txt
|
||||
|
||||
@@ -79,7 +82,7 @@ pip3 install -r requirements.txt
|
||||
docker pull soxoj/maigret
|
||||
|
||||
# usage
|
||||
docker run -v /mydir:/app/reports soxoj/maigret:latest username --html
|
||||
docker run soxoj/maigret:latest username
|
||||
|
||||
# manual build
|
||||
docker build -t maigret .
|
||||
@@ -100,11 +103,6 @@ maigret user1 user2 user3 -a
|
||||
|
||||
Use `maigret --help` to get full options description. Also options [are documented](https://maigret.readthedocs.io/en/latest/command-line-options.html).
|
||||
|
||||
## Contributing
|
||||
|
||||
Maigret has open-source code, so you may contribute your own sites by adding them to `data.json` file, or bring changes to it's code!
|
||||
If you want to contribute, don't forget to activate statistics update hook, command for it would look like this: `git config --local core.hooksPath .githooks/`
|
||||
You should make your git commits from your maigret git repo folder, or else the hook wouldn't find the statistics update script.
|
||||
|
||||
## Demo with page parsing and recursive username search
|
||||
|
||||
@@ -114,7 +112,7 @@ You should make your git commits from your maigret git repo folder, or else the
|
||||
|
||||

|
||||
|
||||

|
||||

|
||||
|
||||
|
||||
[Full console output](https://raw.githubusercontent.com/soxoj/maigret/main/static/recursive_search.md)
|
||||
|
||||
@@ -27,7 +27,7 @@ Options are also configurable through settings files, see
|
||||
:doc:`settings section <settings>`.
|
||||
|
||||
``--tags`` - Filter sites for searching by tags: sites categories and
|
||||
two-letter country codes (**not a language!**). E.g. photo, dating, sport; jp, us, global.
|
||||
two-letter country codes. E.g. photo, dating, sport; jp, us, global.
|
||||
Multiple tags can be associated with one site. **Warning: tags markup is
|
||||
not stable now.**
|
||||
|
||||
|
||||
+2
-2
@@ -6,8 +6,8 @@ project = 'Maigret'
|
||||
copyright = '2021, soxoj'
|
||||
author = 'soxoj'
|
||||
|
||||
release = '0.4.4'
|
||||
version = '0.4.4'
|
||||
release = '0.4.2'
|
||||
version = '0.4.2'
|
||||
|
||||
# -- General configuration
|
||||
|
||||
|
||||
@@ -69,7 +69,7 @@ PyPi package.
|
||||
4. Get auto-generate release notes:
|
||||
|
||||
- Open https://github.com/soxoj/maigret/releases/new
|
||||
- Click `Choose a tag`, enter `v0.4.0` (your version)
|
||||
- Click `Choose a tag`, enter `test`
|
||||
- Click `Create new tag`
|
||||
- Press `+ Auto-generate release notes`
|
||||
- Copy all the text from description text field below
|
||||
@@ -81,8 +81,8 @@ PyPi package.
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
git add -p
|
||||
git commit -m 'Bump to YOUR VERSION'
|
||||
git add ...
|
||||
git commit -m 'Bump to 0.4.0'
|
||||
git push origin head
|
||||
|
||||
|
||||
|
||||
@@ -24,7 +24,7 @@ Enabled by default, can be disabled with ``--no-recursion``.
|
||||
Reports
|
||||
-------
|
||||
|
||||
Maigret currently supports HTML, PDF, TXT, XMind 8 mindmap, and JSON reports.
|
||||
Maigret currently supports HTML, PDF, TXT, XMind mindmap, and JSON reports.
|
||||
|
||||
HTML/PDF reports contain:
|
||||
|
||||
@@ -34,8 +34,6 @@ HTML/PDF reports contain:
|
||||
|
||||
Also, there is a short text report in the CLI output after the end of a searching phase.
|
||||
|
||||
**Warning**: XMind 8 mindmaps are incompatible with XMind 2022!
|
||||
|
||||
Tags
|
||||
----
|
||||
|
||||
|
||||
@@ -3,15 +3,4 @@
|
||||
Philosophy
|
||||
==========
|
||||
|
||||
TL;DR: Username => Dossier
|
||||
|
||||
Maigret is designed to gather all the available information about person by his usernname.
|
||||
|
||||
What kind of information is this? First, links to person accounts. Secondly, all the machine-extractable
|
||||
pieces of info, such as: other usernames, full name, URLs to people's images, birthday, location (country,
|
||||
city, etc.), gender.
|
||||
|
||||
All this information forms some dossier, but it also useful for other tools and analytical purposes.
|
||||
Each collected piece of data has a label of a certain format (for example, ``follower_count`` for the number
|
||||
of subscribers or ``created_at`` for account creation time) so that it can be parsed and analyzed by various
|
||||
systems and stored in databases.
|
||||
Username => Dossier
|
||||
|
||||
@@ -17,7 +17,7 @@ There are several types of tags:
|
||||
|
||||
Usage
|
||||
-----
|
||||
``--tags us,jp`` -- search on US and Japanese sites (actually marked as such in the Maigret database)
|
||||
``--tags en,jp`` -- search on US and Japanese sites (actually marked as such in the Maigret database)
|
||||
|
||||
``--tags coding`` -- search on sites related to software development.
|
||||
|
||||
|
||||
@@ -37,17 +37,17 @@ Start a search for accounts with username ``machine42`` only on US and Japanese
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
maigret machine42 --tags en,jp
|
||||
maigret michael --tags en,jp
|
||||
|
||||
Start a search for accounts with username ``machine42`` only on sites related to software development.
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
maigret machine42 --tags coding
|
||||
maigret michael --tags coding
|
||||
|
||||
Start a search for accounts with username ``machine42`` on uCoz sites only (mostly CIS countries).
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
maigret machine42 --tags ucoz
|
||||
maigret michael --tags ucoz
|
||||
|
||||
|
||||
+2
-5
@@ -7,11 +7,8 @@ from maigret.maigret import main
|
||||
|
||||
def run():
|
||||
try:
|
||||
if sys.version_info.minor >= 10:
|
||||
asyncio.run(main())
|
||||
else:
|
||||
loop = asyncio.get_event_loop()
|
||||
loop.run_until_complete(main())
|
||||
loop = asyncio.get_event_loop()
|
||||
loop.run_until_complete(main())
|
||||
except KeyboardInterrupt:
|
||||
print('Maigret is interrupted.')
|
||||
sys.exit(1)
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
"""Maigret version file"""
|
||||
|
||||
__version__ = '0.4.4'
|
||||
__version__ = '0.4.2'
|
||||
|
||||
+5
-13
@@ -10,7 +10,6 @@ import re
|
||||
import ssl
|
||||
import sys
|
||||
import tqdm
|
||||
import random
|
||||
from typing import Tuple, Optional, Dict, List
|
||||
from urllib.parse import quote
|
||||
|
||||
@@ -398,7 +397,7 @@ def process_site_result(
|
||||
|
||||
|
||||
def make_site_result(
|
||||
site: MaigretSite, username: str, options: QueryOptions, logger, *args, **kwargs
|
||||
site: MaigretSite, username: str, options: QueryOptions, logger
|
||||
) -> QueryResultWrapper:
|
||||
results_site: QueryResultWrapper = {}
|
||||
|
||||
@@ -422,10 +421,6 @@ def make_site_result(
|
||||
if "url" not in site.__dict__:
|
||||
logger.error("No URL for site %s", site.name)
|
||||
|
||||
if kwargs.get('retry') and hasattr(site, "mirrors"):
|
||||
site.url_main = random.choice(site.mirrors)
|
||||
logger.info(f"Use {site.url_main} as a main url of site {site}")
|
||||
|
||||
# URL of user on site (if it exists)
|
||||
url = site.url.format(
|
||||
urlMain=site.url_main, urlSubpath=site.url_subpath, username=quote(username)
|
||||
@@ -529,7 +524,7 @@ def make_site_result(
|
||||
async def check_site_for_username(
|
||||
site, username, options: QueryOptions, logger, query_notify, *args, **kwargs
|
||||
) -> Tuple[str, QueryResultWrapper]:
|
||||
default_result = make_site_result(site, username, options, logger, retry=kwargs.get('retry'))
|
||||
default_result = make_site_result(site, username, options, logger)
|
||||
future = default_result.get("future")
|
||||
if not future:
|
||||
return site.name, default_result
|
||||
@@ -585,8 +580,6 @@ async def maigret(
|
||||
cookies=None,
|
||||
retries=0,
|
||||
check_domains=False,
|
||||
*args,
|
||||
**kwargs,
|
||||
) -> QueryResultWrapper:
|
||||
"""Main search func
|
||||
|
||||
@@ -604,7 +597,7 @@ async def maigret(
|
||||
is_parsing_enabled -- Extract additional info from account pages.
|
||||
id_type -- Type of username to search.
|
||||
Default is 'username', see all supported here:
|
||||
https://maigret.readthedocs.io/en/latest/supported-identifier-types.html
|
||||
https://github.com/soxoj/maigret/wiki/Supported-identifier-types
|
||||
max_connections -- Maximum number of concurrent connections allowed.
|
||||
Default is 100.
|
||||
no_progressbar -- Displaying of ASCII progressbar during scanner.
|
||||
@@ -667,8 +660,7 @@ async def maigret(
|
||||
executor = AsyncioSimpleExecutor(logger=logger)
|
||||
else:
|
||||
executor = AsyncioProgressbarQueueExecutor(
|
||||
logger=logger, in_parallel=max_connections, timeout=timeout + 0.5,
|
||||
*args, **kwargs
|
||||
logger=logger, in_parallel=max_connections, timeout=timeout + 0.5
|
||||
)
|
||||
|
||||
# make options objects for all the requests
|
||||
@@ -710,7 +702,7 @@ async def maigret(
|
||||
tasks_dict[sitename] = (
|
||||
check_site_for_username,
|
||||
[site, username, options, logger, query_notify],
|
||||
{'default': (sitename, default_result), 'retry': retries-attempts+1},
|
||||
{'default': (sitename, default_result)},
|
||||
)
|
||||
|
||||
cur_results = await executor.run(tasks_dict.values())
|
||||
|
||||
+1
-2
@@ -63,9 +63,8 @@ COMMON_ERRORS = {
|
||||
ERRORS_TYPES = {
|
||||
'Captcha': 'Try to switch to another IP address or to use service cookies',
|
||||
'Bot protection': 'Try to switch to another IP address',
|
||||
'Censorship': 'Switch to another internet service provider',
|
||||
'Censorship': 'switch to another internet service provider',
|
||||
'Request timeout': 'Try to increase timeout or to switch to another internet service provider',
|
||||
'Connecting failure': 'Try to decrease number of parallel connections (e.g. -n 10)',
|
||||
}
|
||||
|
||||
# TODO: checking for reason
|
||||
|
||||
+2
-22
@@ -81,22 +81,6 @@ class AsyncioProgressbarQueueExecutor(AsyncExecutor):
|
||||
self.queue = asyncio.Queue(self.workers_count)
|
||||
self.timeout = kwargs.get('timeout')
|
||||
|
||||
async def increment_progress(self, count):
|
||||
update_func = self.progress.update
|
||||
if asyncio.iscoroutinefunction(update_func):
|
||||
await update_func(count)
|
||||
else:
|
||||
update_func(count)
|
||||
await asyncio.sleep(0)
|
||||
|
||||
async def stop_progress(self):
|
||||
stop_func = self.progress.close
|
||||
if asyncio.iscoroutinefunction(stop_func):
|
||||
await stop_func()
|
||||
else:
|
||||
stop_func()
|
||||
await asyncio.sleep(0)
|
||||
|
||||
async def worker(self):
|
||||
while True:
|
||||
try:
|
||||
@@ -112,7 +96,7 @@ class AsyncioProgressbarQueueExecutor(AsyncExecutor):
|
||||
result = kwargs.get('default')
|
||||
|
||||
self.results.append(result)
|
||||
await self.increment_progress(1)
|
||||
self.progress.update(1)
|
||||
self.queue.task_done()
|
||||
|
||||
async def _run(self, queries: Iterable[QueryDraft]):
|
||||
@@ -125,14 +109,10 @@ class AsyncioProgressbarQueueExecutor(AsyncExecutor):
|
||||
workers = [create_task_func()(self.worker()) for _ in range(min_workers)]
|
||||
|
||||
self.progress = self.progress_func(total=len(queries_list))
|
||||
|
||||
for t in queries_list:
|
||||
await self.queue.put(t)
|
||||
|
||||
await self.queue.join()
|
||||
|
||||
for w in workers:
|
||||
w.cancel()
|
||||
|
||||
await self.stop_progress()
|
||||
self.progress.close()
|
||||
return self.results
|
||||
|
||||
+3
-6
@@ -172,7 +172,7 @@ def setup_arguments_parser(settings: Settings):
|
||||
type=int,
|
||||
dest="connections",
|
||||
default=settings.max_connections,
|
||||
help=f"Allowed number of concurrent connections (default {settings.max_connections}).",
|
||||
help="Allowed number of concurrent connections.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--no-recursion",
|
||||
@@ -719,11 +719,8 @@ async def main():
|
||||
|
||||
def run():
|
||||
try:
|
||||
if sys.version_info.minor >= 10:
|
||||
asyncio.run(main())
|
||||
else:
|
||||
loop = asyncio.get_event_loop()
|
||||
loop.run_until_complete(main())
|
||||
loop = asyncio.get_event_loop()
|
||||
loop.run_until_complete(main())
|
||||
except KeyboardInterrupt:
|
||||
print('Maigret is interrupted.')
|
||||
sys.exit(1)
|
||||
|
||||
+1
-1
@@ -67,7 +67,7 @@ def save_txt_report(filename: str, username: str, results: dict):
|
||||
def save_html_report(filename: str, context: dict):
|
||||
template, _ = generate_report_template(is_pdf=False)
|
||||
filled_template = template.render(**context)
|
||||
with open(filename, "w", encoding="utf-8") as f:
|
||||
with open(filename, "w") as f:
|
||||
f.write(filled_template)
|
||||
|
||||
|
||||
|
||||
+623
-4043
File diff suppressed because it is too large
Load Diff
+4
-15
@@ -431,8 +431,6 @@ class MaigretDatabase:
|
||||
message_checks = 0
|
||||
message_checks_one_factor = 0
|
||||
|
||||
status_checks = 0
|
||||
|
||||
for _, site in sites_dict.items():
|
||||
if site.disabled:
|
||||
disabled_count += 1
|
||||
@@ -446,26 +444,17 @@ class MaigretDatabase:
|
||||
continue
|
||||
message_checks_one_factor += 1
|
||||
|
||||
if site.check_type == 'status_code':
|
||||
status_checks += 1
|
||||
|
||||
if not site.tags:
|
||||
tags["NO_TAGS"] = tags.get("NO_TAGS", 0) + 1
|
||||
|
||||
for tag in filter(lambda x: not is_country_tag(x), site.tags):
|
||||
tags[tag] = tags.get(tag, 0) + 1
|
||||
|
||||
enabled_count = total_count-disabled_count
|
||||
enabled_perc = round(100*enabled_count/total_count, 2)
|
||||
output += f"Enabled/total sites: {enabled_count}/{total_count} = {enabled_perc}%\n\n"
|
||||
enabled_perc = round(100*(total_count-disabled_count)/total_count, 2)
|
||||
output += f"Enabled/total sites: {total_count - disabled_count}/{total_count} = {enabled_perc}%\n\n"
|
||||
|
||||
checks_perc = round(100*message_checks_one_factor/enabled_count, 2)
|
||||
output += f"Incomplete message checks: {message_checks_one_factor}/{enabled_count} = {checks_perc}% (false positive risks)\n\n"
|
||||
|
||||
status_checks_perc = round(100*status_checks/enabled_count, 2)
|
||||
output += f"Status code checks: {status_checks}/{enabled_count} = {status_checks_perc}% (false positive risks)\n\n"
|
||||
|
||||
output += f"False positive risk (total): {checks_perc+status_checks_perc:.2f}%\n\n"
|
||||
checks_perc = round(100*message_checks_one_factor/message_checks, 2)
|
||||
output += f"Incomplete checks: {message_checks_one_factor}/{message_checks} = {checks_perc}% (false positive risks)\n\n"
|
||||
|
||||
top_urls_count = 20
|
||||
output += f"Top {top_urls_count} profile URLs:\n"
|
||||
|
||||
+7
-36
@@ -1,11 +1,10 @@
|
||||
import asyncio
|
||||
import json
|
||||
import re
|
||||
from typing import List, Tuple
|
||||
from typing import List
|
||||
import xml.etree.ElementTree as ET
|
||||
from aiohttp import TCPConnector, ClientSession
|
||||
import requests
|
||||
import cloudscraper
|
||||
|
||||
from .activation import import_aiohttp_cookies
|
||||
from .checking import maigret
|
||||
@@ -15,27 +14,6 @@ from .sites import MaigretDatabase, MaigretSite, MaigretEngine
|
||||
from .utils import get_random_user_agent, get_match_ratio
|
||||
|
||||
|
||||
class CloudflareSession:
|
||||
def __init__(self):
|
||||
self.scraper = cloudscraper.create_scraper()
|
||||
|
||||
async def get(self, *args, **kwargs):
|
||||
await asyncio.sleep(0)
|
||||
res = self.scraper.get(*args, **kwargs)
|
||||
self.last_text = res.text
|
||||
self.status = res.status_code
|
||||
return self
|
||||
|
||||
def status_code(self):
|
||||
return self.status
|
||||
|
||||
async def text(self):
|
||||
await asyncio.sleep(0)
|
||||
return self.last_text
|
||||
|
||||
async def close(self):
|
||||
pass
|
||||
|
||||
class Submitter:
|
||||
HEADERS = {
|
||||
"User-Agent": get_random_user_agent(),
|
||||
@@ -163,18 +141,16 @@ class Submitter:
|
||||
fields['urlSubpath'] = f'/{subpath}'
|
||||
return fields
|
||||
|
||||
async def detect_known_engine(self, url_exists, url_mainpage) -> [List[MaigretSite], str]:
|
||||
async def detect_known_engine(self, url_exists, url_mainpage) -> List[MaigretSite]:
|
||||
resp_text = ''
|
||||
try:
|
||||
r = await self.session.get(url_mainpage)
|
||||
content = await r.content.read()
|
||||
charset = r.charset or "utf-8"
|
||||
resp_text = content.decode(charset, "ignore")
|
||||
resp_text = await r.text()
|
||||
self.logger.debug(resp_text)
|
||||
except Exception as e:
|
||||
self.logger.warning(e)
|
||||
print("Some error while checking main page")
|
||||
return [], resp_text
|
||||
return []
|
||||
|
||||
for engine in self.db.engines:
|
||||
strs_to_check = engine.__dict__.get("presenseStrs")
|
||||
@@ -217,9 +193,9 @@ class Submitter:
|
||||
)
|
||||
sites.append(maigret_site)
|
||||
|
||||
return sites, resp_text
|
||||
return sites
|
||||
|
||||
return [], resp_text
|
||||
return []
|
||||
|
||||
def extract_username_dialog(self, url):
|
||||
url_parts = url.rstrip("/").split("/")
|
||||
@@ -362,15 +338,10 @@ class Submitter:
|
||||
print('Detecting site engine, please wait...')
|
||||
sites = []
|
||||
try:
|
||||
sites, text = await self.detect_known_engine(url_exists, url_exists)
|
||||
sites = await self.detect_known_engine(url_exists, url_mainpage)
|
||||
except KeyboardInterrupt:
|
||||
print('Engine detect process is interrupted.')
|
||||
|
||||
|
||||
if 'cloudflare' in text.lower():
|
||||
print('Cloudflare protection detected. I will use cloudscraper for futher work')
|
||||
# self.session = CloudflareSession()
|
||||
|
||||
if not sites:
|
||||
print("Unable to detect site engine, lets generate checking features")
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
maigret @ https://github.com/soxoj/maigret/archive/refs/heads/main.zip
|
||||
pefile==2022.5.30
|
||||
psutil==5.9.2
|
||||
pefile==2021.9.3
|
||||
psutil==5.9.0
|
||||
pyinstaller @ https://github.com/pyinstaller/pyinstaller/archive/develop.zip
|
||||
pywin32-ctypes==0.2.0
|
||||
+23
-23
@@ -1,39 +1,39 @@
|
||||
aiodns==3.0.0
|
||||
aiohttp==3.8.3
|
||||
aiohttp==3.8.1
|
||||
aiohttp-socks==0.7.1
|
||||
arabic-reshaper==2.1.4
|
||||
arabic-reshaper==2.1.3
|
||||
async-timeout==4.0.2
|
||||
attrs==22.1.0
|
||||
certifi==2022.9.24
|
||||
chardet==5.0.0
|
||||
colorama==0.4.6
|
||||
attrs==21.4.0
|
||||
certifi==2021.10.8
|
||||
chardet==4.0.0
|
||||
colorama==0.4.4
|
||||
future==0.18.2
|
||||
future-annotations==1.0.0
|
||||
html5lib==1.1
|
||||
idna==3.4
|
||||
Jinja2==3.1.2
|
||||
lxml==4.9.1
|
||||
MarkupSafe==2.1.1
|
||||
idna==3.3
|
||||
Jinja2==3.0.3
|
||||
lxml==4.8.0
|
||||
MarkupSafe==2.0.1
|
||||
mock==4.0.3
|
||||
multidict==6.0.2
|
||||
pycountry==22.3.5
|
||||
PyPDF2==2.10.8
|
||||
multidict==5.2.0;python_version<"3.7"
|
||||
multidict==6.0.2;python_version>="3.7"
|
||||
pycountry==22.1.10
|
||||
PyPDF2==1.26.0
|
||||
PySocks==1.7.1
|
||||
python-bidi==0.4.2
|
||||
requests==2.28.1
|
||||
requests==2.27.1
|
||||
requests-futures==1.0.0
|
||||
six==1.16.0
|
||||
socid-extractor>=0.0.21
|
||||
soupsieve==2.3.2.post1
|
||||
stem==1.8.1
|
||||
soupsieve==2.3.1
|
||||
stem==1.8.0
|
||||
torrequest==0.1.0
|
||||
tqdm==4.64.1
|
||||
typing-extensions==4.4.0
|
||||
tqdm==4.63.0
|
||||
typing-extensions==4.1.1
|
||||
webencodings==0.5.1
|
||||
xhtml2pdf==0.2.8
|
||||
xhtml2pdf==0.2.5
|
||||
XMind==1.2.0
|
||||
yarl==1.8.1
|
||||
yarl==1.7.2
|
||||
networkx==2.5.1
|
||||
pyvis==0.2.1
|
||||
reportlab==3.6.11
|
||||
cloudscraper==1.2.66
|
||||
pyvis==0.1.9
|
||||
reportlab==3.6.6
|
||||
|
||||
@@ -11,7 +11,7 @@ with open('requirements.txt') as rf:
|
||||
requires = rf.read().splitlines()
|
||||
|
||||
setup(name='maigret',
|
||||
version='0.4.4',
|
||||
version='0.4.2',
|
||||
description='Collect a dossier on a person by username from a huge number of sites',
|
||||
long_description=long_description,
|
||||
long_description_content_type="text/markdown",
|
||||
|
||||
+18
-31
@@ -1,43 +1,30 @@
|
||||
name: maigret2
|
||||
adopt-info: maigret2
|
||||
version: git
|
||||
summary: SOCMINT / Instagram
|
||||
description: |
|
||||
Test Test Test
|
||||
|
||||
license: MIT
|
||||
|
||||
base: core20
|
||||
grade: stable
|
||||
base: core18
|
||||
confinement: strict
|
||||
compression: lzo
|
||||
|
||||
architectures:
|
||||
- build-on: amd64
|
||||
|
||||
apps:
|
||||
maigret2:
|
||||
command: bin/maigret
|
||||
environment:
|
||||
LC_ALL: C.UTF-8
|
||||
plugs:
|
||||
- home
|
||||
- network
|
||||
|
||||
parts:
|
||||
maigret2:
|
||||
plugin: python
|
||||
source: https://github.com/soxoj/maigret
|
||||
source-type: git
|
||||
|
||||
build-packages:
|
||||
- python3-pip
|
||||
- python3-six
|
||||
- python3
|
||||
|
||||
python-version: python3
|
||||
source: .
|
||||
stage-packages:
|
||||
- python3
|
||||
- python3-six
|
||||
- python-six
|
||||
|
||||
|
||||
apps:
|
||||
maigret2:
|
||||
command: bin/maigret
|
||||
|
||||
|
||||
architectures:
|
||||
- build-on: amd64
|
||||
- build-on: i386
|
||||
|
||||
|
||||
|
||||
|
||||
override-pull: |
|
||||
snapcraftctl pull
|
||||
snapcraftctl set-version "$(git describe --tags | sed 's/^v//' | cut -d "-" -f1)"
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
reportlab==3.6.11
|
||||
flake8==5.0.4
|
||||
pytest==7.2.0
|
||||
reportlab==3.6.6
|
||||
flake8==4.0.1
|
||||
pytest==7.0.1
|
||||
pytest-asyncio==0.16.0;python_version<"3.7"
|
||||
pytest-asyncio==0.20.1;python_version>="3.7"
|
||||
pytest-cov==4.0.0
|
||||
pytest-httpserver==1.0.6
|
||||
pytest-asyncio==0.18.2;python_version>="3.7"
|
||||
pytest-cov==3.0.0
|
||||
pytest-httpserver==1.0.4
|
||||
pytest-rerunfailures==10.2
|
||||
|
||||
+5
-11
@@ -3,7 +3,7 @@ import random
|
||||
from argparse import ArgumentParser, RawDescriptionHelpFormatter
|
||||
|
||||
from maigret.maigret import MaigretDatabase
|
||||
from maigret.submit import Submitter
|
||||
from maigret.submit import get_alexa_rank
|
||||
|
||||
|
||||
def update_tags(site):
|
||||
@@ -22,7 +22,7 @@ def update_tags(site):
|
||||
site.disabled = True
|
||||
|
||||
print(f'Old alexa rank: {site.alexa_rank}')
|
||||
rank = Submitter.get_alexa_rank(site.url_main)
|
||||
rank = get_alexa_rank(site.url_main)
|
||||
if rank:
|
||||
print(f'New alexa rank: {rank}')
|
||||
site.alexa_rank = rank
|
||||
@@ -36,7 +36,6 @@ if __name__ == '__main__':
|
||||
parser.add_argument("--base","-b", metavar="BASE_FILE",
|
||||
dest="base_file", default="maigret/resources/data.json",
|
||||
help="JSON file with sites data to update.")
|
||||
parser.add_argument("--name", help="Name of site to check")
|
||||
|
||||
pool = list()
|
||||
|
||||
@@ -46,17 +45,12 @@ if __name__ == '__main__':
|
||||
db.load_from_file(args.base_file).sites
|
||||
|
||||
while True:
|
||||
if args.name:
|
||||
sites = list(db.ranked_sites_dict(names=[args.name]).values())
|
||||
site = random.choice(sites)
|
||||
else:
|
||||
site = random.choice(db.sites)
|
||||
|
||||
site = random.choice(db.sites)
|
||||
if site.engine == 'uCoz':
|
||||
continue
|
||||
|
||||
# if not 'in' in site.tags:
|
||||
# continue
|
||||
if not 'in' in site.tags:
|
||||
continue
|
||||
|
||||
update_tags(site)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user