Compare commits
142 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 0f215e9c9b | |||
| 911ae698be | |||
| a17e0c7a13 | |||
| e84e394e6f | |||
| b8ada1c818 | |||
| 959b2be136 | |||
| 97cc4b46d9 | |||
| f3b741d283 | |||
| 33620853a1 | |||
| 19ed03a94d | |||
| 35372446e0 | |||
| 519bb46db6 | |||
| 227a25bfa1 | |||
| 5da4e78092 | |||
| e4d6b064df | |||
| f99091f5f7 | |||
| f26976f1dd | |||
| 83ae9c0133 | |||
| 93c4fdeba9 | |||
| 6ec3c47769 | |||
| 3dc3fe9371 | |||
| ebf8227bf1 | |||
| 5b7b28e683 | |||
| 0e95e2e3cc | |||
| 4cd1fccaa3 | |||
| 83a9dafe55 | |||
| b4147d2cd3 | |||
| aa591da913 | |||
| 2d4d3ba0cc | |||
| ec21bbe974 | |||
| 1a4190ee03 | |||
| fe60783a68 | |||
| 8aa0fab314 | |||
| 941a5171ae | |||
| 9a1bd8ffdb | |||
| 68f586fcca | |||
| e39476c4c7 | |||
| 6a7f778c80 | |||
| 7679f98e58 | |||
| c6dbc09ba5 | |||
| b8352c3406 | |||
| 8a02ad5ed7 | |||
| 8fda5776c6 | |||
| 2347bd2f7d | |||
| 229472f323 | |||
| 6acc22dd69 | |||
| 8af07b3889 | |||
| e9df40bdce | |||
| d5bef9e3ac | |||
| 25121754bd | |||
| 198c11b8d4 | |||
| bf9bc5a518 | |||
| 41e246f6a6 | |||
| 9f58fb27ad | |||
| b344a5d98a | |||
| d8b26181f1 | |||
| a60d96c7f2 | |||
| a3159b213b | |||
| 123ead4c03 | |||
| cd7571ef57 | |||
| d922f9be25 | |||
| 3b20b36609 | |||
| ba86981cf4 | |||
| 561ced647f | |||
| 7be3ee8240 | |||
| 48ca13dc4d | |||
| 7f94e86259 | |||
| c2ed1af4b4 | |||
| 648ba6e64c | |||
| 56815d8368 | |||
| b178e97d90 | |||
| a764198c2c | |||
| 2c4684e4a9 | |||
| 8713e1a63e | |||
| 55adc70d10 | |||
| 53fc83dbce | |||
| e8bd00f013 | |||
| a0ba853e64 | |||
| 54b4c7d2ab | |||
| 8791bca866 | |||
| fb26ccd1f6 | |||
| c22abdb834 | |||
| 0689470506 | |||
| 410d7568b7 | |||
| 7280033198 | |||
| 3c6af42916 | |||
| cdb896ba32 | |||
| 6bd047fda3 | |||
| e30cf353a6 | |||
| bd9e48de7c | |||
| aec4fef8db | |||
| 1da49bd208 | |||
| 6da39cf3d5 | |||
| f869eb49ca | |||
| bebadb0362 | |||
| 495eef6ad5 | |||
| e1c72bfb94 | |||
| deb13c9638 | |||
| 1e8e1acd58 | |||
| 5e88fd9ba8 | |||
| 6bc836d6c4 | |||
| 080611c8b9 | |||
| c3cf589aed | |||
| e01d5caae1 | |||
| d90d8a8ac9 | |||
| c3ce8a200b | |||
| 65ea5ceeb1 | |||
| bca1d4bfd8 | |||
| c9e38632ca | |||
| 5f8ce2da98 | |||
| bc6f7f831d | |||
| f95c71d009 | |||
| 974c93f327 | |||
| ed7b65e5ed | |||
| f76ea5d738 | |||
| 960b28d454 | |||
| 09eef6701a | |||
| 329ef27eff | |||
| ccb3b3bbd1 | |||
| b21ac36b27 | |||
| 0f7aa2c456 | |||
| c0e60e25b8 | |||
| 4195a3ca21 | |||
| 5b3b81b482 | |||
| 29d2c07a76 | |||
| 7ff2424de1 | |||
| fc1dd9380e | |||
| e423d72576 | |||
| 9bc6c3370c | |||
| b90cdb1981 | |||
| 21b35e3798 | |||
| b8cf91cc8b | |||
| 8d5e557720 | |||
| 97e5f600d0 | |||
| 36ce285572 | |||
| c2e3e96cb7 | |||
| 900ed840b3 | |||
| c3dfe9cb4d | |||
| 4894a267d7 | |||
| 984584f87d | |||
| a96d574000 | |||
| 88d68490f3 |
@@ -2,54 +2,69 @@ name: Package exe with PyInstaller - Windows
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [ main, dev ]
|
||||
branches: [main, dev]
|
||||
|
||||
jobs:
|
||||
build:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: PyInstaller Windows Build
|
||||
uses: JackMcKew/pyinstaller-action-windows@main
|
||||
with:
|
||||
path: pyinstaller
|
||||
# Wine Python (not Linux) runs PyInstaller; altgraph needs pkg_resources — reinstall setuptools after all deps.
|
||||
- name: Prepare requirements for Wine (setuptools last)
|
||||
run: |
|
||||
set -euo pipefail
|
||||
cp pyinstaller/requirements.txt pyinstaller/requirements-wine.txt
|
||||
{
|
||||
echo ""
|
||||
echo "# CI: setuptools last so pkg_resources exists for PyInstaller/altgraph in Wine"
|
||||
echo "setuptools==70.0.0"
|
||||
} >> pyinstaller/requirements-wine.txt
|
||||
|
||||
- name: Upload PyInstaller Binary to Workflow as Artifact
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: maigret_standalone_win32
|
||||
path: pyinstaller/dist/windows
|
||||
- name: PyInstaller Windows Build
|
||||
uses: JackMcKew/pyinstaller-action-windows@main
|
||||
with:
|
||||
path: pyinstaller
|
||||
requirements: requirements-wine.txt
|
||||
|
||||
- name: Download PyInstaller Binary
|
||||
uses: actions/download-artifact@v4
|
||||
with:
|
||||
name: maigret_standalone_win32
|
||||
- name: Upload PyInstaller Binary to Workflow as Artifact
|
||||
if: success()
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: maigret_standalone_win32
|
||||
path: pyinstaller/dist/windows
|
||||
|
||||
- name: Create New Release and Upload PyInstaller Binary to Release
|
||||
uses: ncipollo/release-action@v1.14.0
|
||||
id: create_release
|
||||
with:
|
||||
allowUpdates: true
|
||||
draft: false
|
||||
prerelease: false
|
||||
artifactErrorsFailBuild: true
|
||||
makeLatest: true
|
||||
replacesArtifacts: true
|
||||
artifacts: maigret_standalone.exe
|
||||
name: Development Windows Release [${{ github.ref_name }}]
|
||||
tag: ${{ github.ref_name }}
|
||||
body: |
|
||||
This is a development release built from the **${{ github.ref_name }}** branch.
|
||||
- name: Download PyInstaller Binary
|
||||
if: success()
|
||||
uses: actions/download-artifact@v4
|
||||
with:
|
||||
name: maigret_standalone_win32
|
||||
|
||||
Take into account that `dev` releases may be unstable.
|
||||
Please, use [the development release](https://github.com/soxoj/maigret/releases/tag/main) build from the **main** branch.
|
||||
- name: Create New Release and Upload PyInstaller Binary to Release
|
||||
if: success()
|
||||
uses: ncipollo/release-action@v1.14.0
|
||||
id: create_release
|
||||
with:
|
||||
allowUpdates: true
|
||||
draft: false
|
||||
prerelease: false
|
||||
artifactErrorsFailBuild: true
|
||||
makeLatest: true
|
||||
replacesArtifacts: true
|
||||
artifacts: maigret_standalone.exe
|
||||
name: Development Windows Release [${{ github.ref_name }}]
|
||||
tag: ${{ github.ref_name }}
|
||||
body: |
|
||||
This is a development release built from the **${{ github.ref_name }}** branch.
|
||||
|
||||
Instructions:
|
||||
- Download the attached file `maigret_standalone.exe` to get the Windows executable.
|
||||
- Video guide on how to run it: https://youtu.be/qIgwTZOmMmM
|
||||
- For detailed documentation, visit: https://maigret.readthedocs.io/en/latest/
|
||||
Take into account that `dev` releases may be unstable.
|
||||
Please, use [the development release](https://github.com/soxoj/maigret/releases/tag/main) build from the **main** branch.
|
||||
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ github.token }}
|
||||
Instructions:
|
||||
- Download the attached file `maigret_standalone.exe` to get the Windows executable.
|
||||
- Video guide on how to run it: https://youtu.be/qIgwTZOmMmM
|
||||
- For detailed documentation, visit: https://maigret.readthedocs.io/en/latest/
|
||||
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ github.token }}
|
||||
|
||||
@@ -13,7 +13,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
matrix:
|
||||
python-version: ["3.10", "3.11", "3.12"]
|
||||
python-version: ["3.10", "3.11", "3.12", "3.13"]
|
||||
|
||||
steps:
|
||||
- name: Checkout
|
||||
@@ -22,6 +22,9 @@ jobs:
|
||||
uses: actions/setup-python@v2
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
- name: Install system dependencies
|
||||
run: |
|
||||
sudo apt-get update && sudo apt-get install -y libcairo2-dev
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
@@ -33,7 +36,7 @@ jobs:
|
||||
poetry run coverage report --fail-under=60
|
||||
poetry run coverage html
|
||||
- name: Upload coverage report
|
||||
uses: actions/upload-artifact@v3
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: htmlcov
|
||||
name: htmlcov-${{ strategy.job-index }}
|
||||
path: htmlcov
|
||||
@@ -1,30 +1,21 @@
|
||||
name: Upload Python Package to PyPI when a Release is Created
|
||||
|
||||
on:
|
||||
release:
|
||||
types: [created]
|
||||
|
||||
push:
|
||||
tags:
|
||||
- "v*"
|
||||
permissions:
|
||||
id-token: write
|
||||
contents: read
|
||||
jobs:
|
||||
pypi-publish:
|
||||
name: Publish release to PyPI
|
||||
build-and-publish:
|
||||
runs-on: ubuntu-latest
|
||||
environment:
|
||||
name: pypi
|
||||
url: https://pypi.org/p/maigret
|
||||
permissions:
|
||||
id-token: write
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v4
|
||||
- uses: astral-sh/setup-uv@v3
|
||||
- run: uv build
|
||||
- name: Publish to PyPI (Trusted Publishing)
|
||||
uses: pypa/gh-action-pypi-publish@release/v1
|
||||
with:
|
||||
python-version: "3.x"
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install setuptools wheel
|
||||
- name: Build package
|
||||
run: |
|
||||
python setup.py sdist bdist_wheel # Could also be python -m build
|
||||
- name: Publish package distributions to PyPI
|
||||
uses: pypa/gh-action-pypi-publish@release/v1
|
||||
packages-dir: dist
|
||||
@@ -1,9 +1,8 @@
|
||||
name: Update sites rating and statistics
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
branches: [ dev ]
|
||||
types: [opened, synchronize]
|
||||
push:
|
||||
branches: [ main ]
|
||||
|
||||
jobs:
|
||||
build:
|
||||
@@ -12,23 +11,34 @@ jobs:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v2.3.2
|
||||
with:
|
||||
ref: ${{ github.event.pull_request.head.sha }}
|
||||
fetch-depth: 0 # otherwise, there would be errors pushing refs to the destination repository.
|
||||
|
||||
- name: build application
|
||||
- name: Install system dependencies
|
||||
run: |
|
||||
sudo apt-get update && sudo apt-get install -y libcairo2-dev
|
||||
|
||||
- name: Build application
|
||||
run: |
|
||||
pip3 install .
|
||||
python3 ./utils/update_site_data.py --empty-only
|
||||
|
||||
- name: Commit and push changes
|
||||
- name: Check for meaningful changes
|
||||
id: check
|
||||
run: |
|
||||
git config --global user.name "Maigret autoupdate"
|
||||
git config --global user.email "soxoj@protonmail.com"
|
||||
echo `git name-rev ${{ github.event.pull_request.head.sha }} --name-only`
|
||||
export BRANCH=`git name-rev ${{ github.event.pull_request.head.sha }} --name-only | sed 's/remotes\/origin\///'`
|
||||
echo $BRANCH
|
||||
git remote -v
|
||||
git checkout $BRANCH
|
||||
git add sites.md
|
||||
git commit -m "Updated site list and statistics"
|
||||
git push origin $BRANCH
|
||||
REAL_CHANGES=$(git diff --unified=0 sites.md | grep '^[+-][^+-]' | grep -v 'The list was updated at' | wc -l)
|
||||
if [ "$REAL_CHANGES" -gt 0 ]; then
|
||||
echo "has_changes=true" >> $GITHUB_OUTPUT
|
||||
else
|
||||
echo "has_changes=false" >> $GITHUB_OUTPUT
|
||||
fi
|
||||
|
||||
- name: Create Pull Request
|
||||
if: steps.check.outputs.has_changes == 'true'
|
||||
uses: peter-evans/create-pull-request@v5
|
||||
with:
|
||||
token: ${{ secrets.GITHUB_TOKEN }}
|
||||
commit-message: "Updated site list and statistics"
|
||||
title: "Automated Sites List Update"
|
||||
body: "Automated changes to sites.md based on new Alexa rankings/statistics."
|
||||
branch: "auto/update-sites-list"
|
||||
delete-branch: true
|
||||
@@ -42,4 +42,4 @@ settings.json
|
||||
|
||||
# other
|
||||
*.egg-info
|
||||
build
|
||||
build
|
||||
@@ -1,6 +1,254 @@
|
||||
# Changelog
|
||||
|
||||
## [Unreleased]
|
||||
## [0.5.0] - 2025-08-10
|
||||
* Site Supression by @C3n7ral051nt4g3ncy in https://github.com/soxoj/maigret/pull/627
|
||||
* Bump yarl from 1.7.2 to 1.8.1 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/626
|
||||
* Streaming sites by @soxoj in https://github.com/soxoj/maigret/pull/628
|
||||
* Mirrors by @fen0s in https://github.com/soxoj/maigret/pull/630
|
||||
* Added Instagram scrapers by @soxoj in https://github.com/soxoj/maigret/pull/633
|
||||
* Bump psutil from 5.9.1 to 5.9.2 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/624
|
||||
* Bump pypdf2 from 2.10.4 to 2.10.5 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/625
|
||||
* Invalid results fixes by @soxoj in https://github.com/soxoj/maigret/pull/634
|
||||
* Bump pytest-httpserver from 1.0.5 to 1.0.6 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/638
|
||||
* Bump pypdf2 from 2.10.5 to 2.10.8 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/641
|
||||
* Bump certifi from 2022.6.15 to 2022.9.14 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/644
|
||||
* Bump idna from 3.3 to 3.4 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/640
|
||||
* fix false positives from bot by @fen0s in https://github.com/soxoj/maigret/pull/663
|
||||
* Add pre commit hook by @fen0s in https://github.com/soxoj/maigret/pull/664
|
||||
* site deletion by @C3n7ral051nt4g3ncy in https://github.com/soxoj/maigret/pull/648
|
||||
* Changed docker run to interactive and remove on exit by @dr-BEat in https://github.com/soxoj/maigret/pull/675
|
||||
* Corrected grammar in README.md by @Trkzi-Omar in https://github.com/soxoj/maigret/pull/674
|
||||
* fix sites from issues by @fen0s in https://github.com/soxoj/maigret/pull/680
|
||||
* correct username in usage examples by @LeonGr in https://github.com/soxoj/maigret/pull/673
|
||||
* Update README.md by @johanburati in https://github.com/soxoj/maigret/pull/669
|
||||
* Fix typos by @LorenzoSapora in https://github.com/soxoj/maigret/pull/681
|
||||
* Build docker images for arm64 and amd64 by @krydos in https://github.com/soxoj/maigret/pull/687
|
||||
* Bump certifi from 2022.9.14 to 2022.9.24 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/652
|
||||
* Bump aiohttp from 3.8.1 to 3.8.3 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/651
|
||||
* Bump arabic-reshaper from 2.1.3 to 2.1.4 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/650
|
||||
* Update README.md, Repl.it -> Replit with new badge by @PeterDaveHello in https://github.com/soxoj/maigret/pull/692
|
||||
* Refactor Dockerfile with best practices by @PeterDaveHello in https://github.com/soxoj/maigret/pull/691
|
||||
* Improve README.md Installation section by @PeterDaveHello in https://github.com/soxoj/maigret/pull/690
|
||||
* Bump pytest-cov from 3.0.0 to 4.0.0 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/688
|
||||
* Bump stem from 1.8.0 to 1.8.1 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/689
|
||||
* Bump typing-extensions from 4.3.0 to 4.4.0 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/698
|
||||
* Typo fixes in error.py by @Ben-Chapman in https://github.com/soxoj/maigret/pull/711
|
||||
* Fixed docs about tags by @soxoj in https://github.com/soxoj/maigret/pull/715
|
||||
* Fixed lightstalking.com by @soxoj in https://github.com/soxoj/maigret/pull/716
|
||||
* Fixed YouTube by @soxoj in https://github.com/soxoj/maigret/pull/717
|
||||
* Bump pytest-asyncio from 0.19.0 to 0.20.1 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/732
|
||||
* Updated snapcraft yaml by @kz6fittycent in https://github.com/soxoj/maigret/pull/720
|
||||
* Bump colorama from 0.4.5 to 0.4.6 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/733
|
||||
* Bump pytest from 7.1.3 to 7.2.0 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/734
|
||||
* disable not working sites by @fen0s in https://github.com/soxoj/maigret/pull/739
|
||||
* disable broken sites by @fen0s in https://github.com/soxoj/maigret/pull/756
|
||||
* Bump cloudscraper from 1.2.64 to 1.2.66 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/769
|
||||
* fix opensea and shutterstock, disable a few dead sites by @fen0s in https://github.com/soxoj/maigret/pull/798
|
||||
* Fixed documentation URL by @soxoj in https://github.com/soxoj/maigret/pull/799
|
||||
* Small readme fix by @soxoj in https://github.com/soxoj/maigret/pull/857
|
||||
* docs spelling error by @Nadeem-05 in https://github.com/soxoj/maigret/pull/866
|
||||
* Fix Pinterest false positive by @therealchiendat in https://github.com/soxoj/maigret/pull/862
|
||||
* Added new Websites by @codyMar30 in https://github.com/soxoj/maigret/pull/838
|
||||
* Update "future" package to v0.18.3 by @PeterDaveHello in https://github.com/soxoj/maigret/pull/834
|
||||
* Bump certifi from 2022.9.24 to 2022.12.7 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/793
|
||||
* Update dependency - networkx from v2.5.1 to v2.6 by @PeterDaveHello in https://github.com/soxoj/maigret/pull/738
|
||||
* Bump reportlab from 3.6.11 to 3.6.12 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/735
|
||||
* Bump typing-extensions from 4.4.0 to 4.5.0 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/888
|
||||
* Bump psutil from 5.9.2 to 5.9.4 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/741
|
||||
* Bump attrs from 22.1.0 to 22.2.0 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/892
|
||||
* Bump multidict from 6.0.2 to 6.0.4 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/891
|
||||
* Fixed false positives, updated networkx dep, some lint fixes by @soxoj in https://github.com/soxoj/maigret/pull/894
|
||||
* Bump lxml from 4.9.1 to 4.9.2 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/900
|
||||
* Bump yarl from 1.8.1 to 1.8.2 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/899
|
||||
* Fixed false positives on Mastodon sites by @soxoj in https://github.com/soxoj/maigret/pull/901
|
||||
* Added valid regex for Mastodon instances (#848) by @soxoj in https://github.com/soxoj/maigret/pull/906
|
||||
* Fix missing Mastodon Regex on #906 by @therealchiendat in https://github.com/soxoj/maigret/pull/908
|
||||
* Bump tqdm from 4.64.1 to 4.65.0 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/905
|
||||
* Bump requests from 2.28.1 to 2.28.2 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/904
|
||||
* Bump psutil from 5.9.4 to 5.9.5 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/910
|
||||
* fix deployment of tests by @noraj in https://github.com/soxoj/maigret/pull/933
|
||||
* Added 26 ENS and similar domains with tag `crypto` by @soxoj in https://github.com/soxoj/maigret/pull/942
|
||||
* Bump requests from 2.28.2 to 2.31.0 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/957
|
||||
* Update wizard.py by @engNoori in https://github.com/soxoj/maigret/pull/1016
|
||||
* Improved search through UnstoppableDomains by @soxoj in https://github.com/soxoj/maigret/pull/1040
|
||||
* Added memory.lol (Twitter usernames archive) by @soxoj in https://github.com/soxoj/maigret/pull/1067
|
||||
* Disabled and fixed several sites by @soxoj in https://github.com/soxoj/maigret/pull/1132
|
||||
* Fixed some sites (again) by @soxoj in https://github.com/soxoj/maigret/pull/1133
|
||||
* fix(sec): upgrade reportlab to 3.6.13 by @realize096 in https://github.com/soxoj/maigret/pull/1051
|
||||
* Add compatibility with pytest >= 7.3.0 by @tjni in https://github.com/soxoj/maigret/pull/1117
|
||||
* Additionally fixed sites, win32 build fix by @soxoj in https://github.com/soxoj/maigret/pull/1148
|
||||
* Sites fixes 250823 by @soxoj in https://github.com/soxoj/maigret/pull/1149
|
||||
* Bump reportlab from 3.6.12 to 4.0.4 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1160
|
||||
* Bump certifi from 2022.12.7 to 2023.7.22 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1070
|
||||
* fix(sec): upgrade certifi to 2022.12.07 by @realize096 in https://github.com/soxoj/maigret/pull/1173
|
||||
* Bump cloudscraper from 1.2.66 to 1.2.71 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/914
|
||||
* Some sites fixed & cloudflare detection by @soxoj in https://github.com/soxoj/maigret/pull/1178
|
||||
* EasyInstaller because everyone likes saving time :) by @CatchySmile in https://github.com/soxoj/maigret/pull/1212
|
||||
* Tests fixes + last updates by @soxoj in https://github.com/soxoj/maigret/pull/1228
|
||||
* Bump pypdf2 from 2.10.8 to 3.0.1 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/815
|
||||
* Bump pyvis from 0.2.1 to 0.3.2 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/861
|
||||
* Bump xhtml2pdf from 0.2.8 to 0.2.11 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/935
|
||||
* Bump flake8 from 5.0.4 to 6.1.0 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1091
|
||||
* Bump aiohttp from 3.8.3 to 3.8.6 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1222
|
||||
* Specified pyinstaller version by @soxoj in https://github.com/soxoj/maigret/pull/1230
|
||||
* Pyinstaller fix by @soxoj in https://github.com/soxoj/maigret/pull/1231
|
||||
* Test pyinstaller on dev branch by @soxoj in https://github.com/soxoj/maigret/pull/1233
|
||||
* Update main from dev again by @soxoj in https://github.com/soxoj/maigret/pull/1234
|
||||
* Bump typing-extensions from 4.5.0 to 4.8.0 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1239
|
||||
* Bump pytest-rerunfailures from 10.2 to 12.0 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1237
|
||||
* Bump async-timeout from 4.0.2 to 4.0.3 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1238
|
||||
* Changed pyinstaller dir by @soxoj in https://github.com/soxoj/maigret/pull/1245
|
||||
* Bump tqdm from 4.65.0 to 4.66.1 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1235
|
||||
* Updating site checkers, disabling suspended sites by @MeowyPouncer in https://github.com/soxoj/maigret/pull/1266
|
||||
* Updated site statistics by @soxoj in https://github.com/soxoj/maigret/pull/1273
|
||||
* Compat RegataOS (Opensuse) by @Jeiel0rbit in https://github.com/soxoj/maigret/pull/1308
|
||||
* fix reddit by @hhhtylerw in https://github.com/soxoj/maigret/pull/1296
|
||||
* Added Telegram bot link by @soxoj in https://github.com/soxoj/maigret/pull/1321
|
||||
* Added SOWEL classification by @soxoj in https://github.com/soxoj/maigret/pull/1453
|
||||
* Bump jinja2 from 3.1.2 to 3.1.3 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1358
|
||||
* Fixed/Disabled sites. Update requirements.txt by @rly0nheart in https://github.com/soxoj/maigret/pull/1517
|
||||
* Fixed 4 sites, added 6 sites, disabled 27 sites by @rly0nheart in https://github.com/soxoj/maigret/pull/1536
|
||||
* Fixed 3 sites, disabed 3, added by @rly0nheart in https://github.com/soxoj/maigret/pull/1539
|
||||
* Bump socid-extractor from 0.0.24 to 0.0.26 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1546
|
||||
* Added code conventions to CONTRIBUTING.md by @Lord-Topa in https://github.com/soxoj/maigret/pull/1589
|
||||
* Readme by @Lord-Topa in https://github.com/soxoj/maigret/pull/1588
|
||||
* Update data.json by @ranlo in https://github.com/soxoj/maigret/pull/1559
|
||||
* Adding permutator feature for usernames by @balestek in https://github.com/soxoj/maigret/pull/1575
|
||||
* Alik.cz indirectly requests removal by @ppfeister in https://github.com/soxoj/maigret/pull/1671
|
||||
* Fixed 1 site, PyInstaller workflow, Google Colab example by @Ixve in https://github.com/soxoj/maigret/pull/1558
|
||||
* Bump soupsieve from 2.5 to 2.6 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1708
|
||||
* Added dev documentation, fixed some sites, removed GitHub issue links… by @soxoj in https://github.com/soxoj/maigret/pull/1869
|
||||
* Bump cryptography from 42.0.7 to 43.0.1 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1870
|
||||
* Bump requests-futures from 1.0.1 to 1.0.2 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1868
|
||||
* Bump werkzeug from 3.0.3 to 3.0.6 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1846
|
||||
* Added .readthedocs.yaml, fixed Pyinstaller and Docker workflows by @soxoj in https://github.com/soxoj/maigret/pull/1874
|
||||
* Added GitHub and BuyMeACoffee sponsorships by @soxoj in https://github.com/soxoj/maigret/pull/1875
|
||||
* Bump psutil from 5.9.5 to 6.1.0 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1839
|
||||
* Bump flake8 from 6.1.0 to 7.1.1 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1692
|
||||
* Bump future from 0.18.3 to 1.0.0 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1545
|
||||
* Bump urllib3 from 2.2.1 to 2.2.2 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1600
|
||||
* Bump certifi from 2023.11.17 to 2024.8.30 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1840
|
||||
* Fixed test for aiohttp 3.10 by @soxoj in https://github.com/soxoj/maigret/pull/1876
|
||||
* Bump aiohttp from 3.9.5 to 3.10.5 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1721
|
||||
* Added new badges to README by @soxoj in https://github.com/soxoj/maigret/pull/1877
|
||||
* Show detailed error statistics for `-v` by @soxoj in https://github.com/soxoj/maigret/pull/1879
|
||||
* Disabled unavailable sites by @soxoj in https://github.com/soxoj/maigret/pull/1880
|
||||
* Added 7 sites, implemented integration with Marple, docs update by @soxoj in https://github.com/soxoj/maigret/pull/1881
|
||||
* Bump pefile from 2022.5.30 to 2024.8.26 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1883
|
||||
* Bump lxml from 4.9.4 to 5.3.0 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1884
|
||||
* New sites added by @soxoj in https://github.com/soxoj/maigret/pull/1888
|
||||
* Improved self-check mode, added 15 sites by @soxoj in https://github.com/soxoj/maigret/pull/1887
|
||||
* Bump pyinstaller from 6.1 to 6.11.1 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1882
|
||||
* Bump pytest-asyncio from 0.23.7 to 0.23.8 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1885
|
||||
* Pyinstaller bump & pefile fix by @soxoj in https://github.com/soxoj/maigret/pull/1890
|
||||
* Bump python-bidi from 0.4.2 to 0.6.3 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1886
|
||||
* Sites checks fixes by @soxoj in https://github.com/soxoj/maigret/pull/1896
|
||||
* Parallel execution optimization by @soxoj in https://github.com/soxoj/maigret/pull/1897
|
||||
* Maigret bot support (custom progress function fixed) by @soxoj in https://github.com/soxoj/maigret/pull/1898
|
||||
* Bump markupsafe from 2.1.5 to 3.0.2 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1895
|
||||
* Retries set to 0 by default, refactored code of executor with progress by @soxoj in https://github.com/soxoj/maigret/pull/1899
|
||||
* Bump aiohttp-socks from 0.7.1 to 0.9.1 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1900
|
||||
* Bump pycountry from 23.12.11 to 24.6.1 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1903
|
||||
* Bump pytest-cov from 4.1.0 to 6.0.0 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1902
|
||||
* Bump pyvis from 0.2.1 to 0.3.2 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1893
|
||||
* Close http connections (#1595) by @soxoj in https://github.com/soxoj/maigret/pull/1905
|
||||
* New logo by @soxoj in https://github.com/soxoj/maigret/pull/1906
|
||||
* Fixed dateutil parsing error for CDT timezone by @soxoj in https://github.com/soxoj/maigret/pull/1907
|
||||
* Bump alive-progress from 2.4.1 to 3.2.0 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1910
|
||||
* Permutator output and documentation updates by @soxoj in https://github.com/soxoj/maigret/pull/1914
|
||||
* Bump aiohttp from 3.11.7 to 3.11.8 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1912
|
||||
* Bump async-timeout from 4.0.3 to 5.0.1 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1909
|
||||
* An recursive search animation in README has been updated by @soxoj in https://github.com/soxoj/maigret/pull/1915
|
||||
* Bump pytest-rerunfailures from 12.0 to 15.0 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1911
|
||||
* Bump attrs from 22.2.0 to 24.2.0 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1913
|
||||
* Sites fixes by @soxoj in https://github.com/soxoj/maigret/pull/1917
|
||||
* Update README.md by @soxoj in https://github.com/soxoj/maigret/pull/1919
|
||||
* Refactored sites module, updated documentation by @soxoj in https://github.com/soxoj/maigret/pull/1918
|
||||
* Bump aiohttp from 3.11.8 to 3.11.9 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1920
|
||||
* Bump pytest from 7.4.4 to 8.3.4 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1923
|
||||
* Bump yarl from 1.18.0 to 1.18.3 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1922
|
||||
* Bump pytest-asyncio from 0.23.8 to 0.24.0 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1925
|
||||
* Documentation update by @soxoj in https://github.com/soxoj/maigret/pull/1926
|
||||
* Bump mock from 4.0.3 to 5.1.0 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1921
|
||||
* Bump pywin32-ctypes from 0.2.1 to 0.2.3 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1924
|
||||
* Installation docs update by @soxoj in https://github.com/soxoj/maigret/pull/1927
|
||||
* Disabled Figma check by @soxoj in https://github.com/soxoj/maigret/pull/1928
|
||||
* Put Windows executable in Releases for each dev and main commit by @soxoj in https://github.com/soxoj/maigret/pull/1929
|
||||
* Updated PyInstaller workflow by @soxoj in https://github.com/soxoj/maigret/pull/1930
|
||||
* Documentation update by @soxoj in https://github.com/soxoj/maigret/pull/1931
|
||||
* Fixed Figma check and some bugs by @soxoj in https://github.com/soxoj/maigret/pull/1932
|
||||
* Bump six from 1.16.0 to 1.17.0 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1933
|
||||
* Activation mechanism documentation added by @soxoj in https://github.com/soxoj/maigret/pull/1935
|
||||
* Readme/docs update based on GH discussions by @soxoj in https://github.com/soxoj/maigret/pull/1936
|
||||
* Bump aiohttp from 3.11.9 to 3.11.10 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1937
|
||||
* Weibo site check fix, activation mechanism added by @soxoj in https://github.com/soxoj/maigret/pull/1938
|
||||
* Fixed Ebay and BongaCams checks by @soxoj in https://github.com/soxoj/maigret/pull/1939
|
||||
* Sites fixes by @soxoj in https://github.com/soxoj/maigret/pull/1940
|
||||
* Fixed Linktr and discourse.mozilla.org by @soxoj in https://github.com/soxoj/maigret/pull/1941
|
||||
* Refactored self-check method, code formatting, small lint fixes by @soxoj in https://github.com/soxoj/maigret/pull/1942
|
||||
* Refactoring, test coverage increased to 60% by @soxoj in https://github.com/soxoj/maigret/pull/1943
|
||||
* Added a test for submitter by @soxoj in https://github.com/soxoj/maigret/pull/1944
|
||||
* Update README.md by @soxoj in https://github.com/soxoj/maigret/pull/1949
|
||||
* Updated OP.GG checks by @soxoj in https://github.com/soxoj/maigret/pull/1950
|
||||
* Fixed ProductHunt check by @soxoj in https://github.com/soxoj/maigret/pull/1951
|
||||
* Improved check feature extraction function, added tests by @soxoj in https://github.com/soxoj/maigret/pull/1952
|
||||
* Submit improvements and site check fixes by @soxoj in https://github.com/soxoj/maigret/pull/1956
|
||||
* chore: update submit.py by @eltociear in https://github.com/soxoj/maigret/pull/1957
|
||||
* Fixed Gravatar parsing (socid_extractor) by @soxoj in https://github.com/soxoj/maigret/pull/1958
|
||||
* Site check fixes by @soxoj in https://github.com/soxoj/maigret/pull/1962
|
||||
* fix bad linux filename generation by @overcuriousity in https://github.com/soxoj/maigret/pull/1961
|
||||
* Bump pytest-asyncio from 0.24.0 to 0.25.0 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1963
|
||||
* Fixed flaky tests to check cookies by @soxoj in https://github.com/soxoj/maigret/pull/1965
|
||||
* Preparation of 0.5.0 alpha version by @soxoj in https://github.com/soxoj/maigret/pull/1966
|
||||
* Created web frontend launched via --web flag by @overcuriousity in https://github.com/soxoj/maigret/pull/1967
|
||||
* Bump certifi from 2024.8.30 to 2024.12.14 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1969
|
||||
* Bump attrs from 24.2.0 to 24.3.0 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1970
|
||||
* Added web interface docs by @soxoj in https://github.com/soxoj/maigret/pull/1972
|
||||
* Small docs and parameters fixes for web interface mode by @soxoj in https://github.com/soxoj/maigret/pull/1973
|
||||
* [ImgBot] Optimize images by @imgbot[bot] in https://github.com/soxoj/maigret/pull/1974
|
||||
* Improving the web interface by @overcuriousity in https://github.com/soxoj/maigret/pull/1975
|
||||
* make graph more meaningful by @overcuriousity in https://github.com/soxoj/maigret/pull/1977
|
||||
* Async generator-executor for site checks by @soxoj in https://github.com/soxoj/maigret/pull/1978
|
||||
* Bump aiohttp from 3.11.10 to 3.11.11 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1979
|
||||
* Bump psutil from 6.1.0 to 6.1.1 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1980
|
||||
* Bump aiohttp-socks from 0.9.1 to 0.10.0 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1985
|
||||
* Bump mypy from 1.13.0 to 1.14.0 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1983
|
||||
* Bump aiohttp-socks from 0.10.0 to 0.10.1 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1987
|
||||
* Bump jinja2 from 3.1.4 to 3.1.5 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1982
|
||||
* Bump coverage from 7.6.9 to 7.6.10 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1986
|
||||
* Bump pytest-asyncio from 0.25.0 to 0.25.1 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1989
|
||||
* Bump mypy from 1.14.0 to 1.14.1 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1988
|
||||
* Bump pytest-asyncio from 0.25.1 to 0.25.2 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1990
|
||||
* docs: update usage-examples.rst by @eltociear in https://github.com/soxoj/maigret/pull/1996
|
||||
* upload-artifact action in python test workflow updated to v4 by @soxoj in https://github.com/soxoj/maigret/pull/2024
|
||||
* Pass db_file configuration to web interface by @pykereaper in https://github.com/soxoj/maigret/pull/2019
|
||||
* Fix usage of data.json files from web by @pykereaper in https://github.com/soxoj/maigret/pull/2020
|
||||
* Bump black from 24.10.0 to 25.1.0 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2001
|
||||
* Important Update Installer.bat by @CatchySmile in https://github.com/soxoj/maigret/pull/1994
|
||||
* Bump cryptography from 44.0.0 to 44.0.1 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2005
|
||||
* Bump jinja2 from 3.1.5 to 3.1.6 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2011
|
||||
* [#2010] Add 6 more websites to manage by @pylapp in https://github.com/soxoj/maigret/pull/2009
|
||||
* Bump flask from 3.1.0 to 3.1.1 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2028
|
||||
* Bump requests from 2.32.3 to 2.32.4 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2026
|
||||
* Bump pycares from 4.5.0 to 4.9.0 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2025
|
||||
* Bump pytest-asyncio from 0.25.2 to 0.26.0 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2016
|
||||
* Bump urllib3 from 2.2.3 to 2.5.0 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2027
|
||||
* Disable ICQ site by @Echo-Darlyson in https://github.com/soxoj/maigret/pull/1993
|
||||
* Bump attrs from 24.3.0 to 25.3.0 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2014
|
||||
* Bump certifi from 2024.12.14 to 2025.1.31 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2004
|
||||
* Bump typing-extensions from 4.12.2 to 4.14.1 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2038
|
||||
* Disable AskFM by @MR-VL in https://github.com/soxoj/maigret/pull/2037
|
||||
* Bump platformdirs from 4.3.6 to 4.3.8 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2033
|
||||
* Bump coverage from 7.6.10 to 7.9.2 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2039
|
||||
* Bump aiohttp from 3.11.11 to 3.12.14 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2041
|
||||
* Bump yarl from 1.18.3 to 1.20.1 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2032
|
||||
* Fixed test dialog_adds_site_negative by @soxoj in https://github.com/soxoj/maigret/pull/2107
|
||||
* Bump reportlab from 4.2.5 to 4.4.3 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2063
|
||||
* Bump asgiref from 3.8.1 to 3.9.1 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2040
|
||||
* Bump multidict from 6.1.0 to 6.6.3 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2034
|
||||
* Bump pytest-rerunfailures from 15.0 to 15.1 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2030
|
||||
|
||||
**Full Changelog**: https://github.com/soxoj/maigret/compare/v0.4.4...v0.5.0
|
||||
|
||||
## [0.4.4] - 2022-09-03
|
||||
* Fixed some false positives by @soxoj in https://github.com/soxoj/maigret/pull/433
|
||||
|
||||
@@ -1,16 +1,18 @@
|
||||
FROM python:3.10-slim
|
||||
FROM python:3.11-slim
|
||||
LABEL maintainer="Soxoj <soxoj@protonmail.com>"
|
||||
WORKDIR /app
|
||||
RUN pip install --no-cache-dir --upgrade pip
|
||||
RUN apt-get update && \
|
||||
apt-get install --no-install-recommends -y \
|
||||
gcc \
|
||||
musl-dev \
|
||||
libxml2 \
|
||||
build-essential \
|
||||
python3-dev \
|
||||
pkg-config \
|
||||
libcairo2-dev \
|
||||
libxml2-dev \
|
||||
libxslt-dev \
|
||||
&& \
|
||||
rm -rf /var/lib/apt/lists/* /tmp/*
|
||||
libxslt1-dev \
|
||||
&& rm -rf /var/lib/apt/lists/* /tmp/*
|
||||
COPY . .
|
||||
RUN YARL_NO_EXTENSIONS=1 python3 -m pip install --no-cache-dir .
|
||||
# For production use, set FLASK_HOST to a specific IP address for security
|
||||
ENV FLASK_HOST=0.0.0.0
|
||||
ENTRYPOINT ["maigret"]
|
||||
|
||||
@@ -1,85 +1,61 @@
|
||||
@echo off
|
||||
|
||||
REM check if running as admin
|
||||
|
||||
goto check_Permissions
|
||||
|
||||
:check_Permissions
|
||||
echo Administrative permissions required. Detecting permissions...
|
||||
|
||||
net session >nul 2>&1
|
||||
if %errorLevel% == 0 (
|
||||
goto 1
|
||||
echo Success: Elevated permissions granted.
|
||||
) else (
|
||||
cls
|
||||
echo Failure: You MUST run this as administator, otherwise commands will fail.
|
||||
echo Failure: Requires elevated permissions.
|
||||
pause >nul
|
||||
)
|
||||
|
||||
pause >nul
|
||||
|
||||
|
||||
|
||||
REM Step 2: Check if Python and pip3 are installed
|
||||
python --version >nul 2>&1
|
||||
if %errorlevel% neq 0 (
|
||||
echo Python is not installed. Please install Python 3.8 or higher.
|
||||
pause
|
||||
exit /b
|
||||
)
|
||||
|
||||
pip3 --version >nul 2>&1
|
||||
if %errorlevel% neq 0 (
|
||||
echo pip3 is not installed. Please install pip3.
|
||||
pause
|
||||
exit /b
|
||||
)
|
||||
|
||||
REM Step 3: Check Python version
|
||||
python -c "import sys; exit(0) if sys.version_info >= (3,8) else exit(1)"
|
||||
if %errorlevel% neq 0 (
|
||||
echo Python version 3.8 or higher is required.
|
||||
pause
|
||||
exit /b
|
||||
)
|
||||
|
||||
|
||||
:1
|
||||
cls
|
||||
:::===============================================================
|
||||
::: ______ __ __ _ _
|
||||
::: | ____| | \/ | (_) | |
|
||||
::: | |__ __ _ ___ _ _ | \ / | __ _ _ __ _ _ __ ___| |_
|
||||
::: | __| / _` / __| | | | | |\/| |/ _` | |/ _` | '__/ _ \ __|
|
||||
::: | |___| (_| \__ \ |_| | | | | | (_| | | (_| | | | __/ |_
|
||||
::: |______\__,_|___/\__, | |_| |_|\__,_|_|\__, |_| \___|\__|
|
||||
::: __/ | __/ |
|
||||
::: |___/ |___/
|
||||
:::
|
||||
:::===============================================================
|
||||
echo.
|
||||
for /f "delims=: tokens=*" %%A in ('findstr /b ::: "%~f0"') do @echo(%%A
|
||||
echo.
|
||||
echo ----------------------------------------------------------------
|
||||
echo Python 3.8 or higher and pip3 required.
|
||||
echo ----------------------------------------------------------------
|
||||
echo Press [I] to begin installation.
|
||||
echo Press [R] If already installed.
|
||||
echo ----------------------------------------------------------------
|
||||
echo --------------------------------------------------------
|
||||
echo Python 3.8 or higher and pip3 required.
|
||||
echo --------------------------------------------------------
|
||||
echo Press [I] to begin installation.
|
||||
echo Press [R] If already installed.
|
||||
echo --------------------------------------------------------
|
||||
choice /c IR
|
||||
if %errorlevel%==1 goto install1
|
||||
if %errorlevel%==1 goto check_python
|
||||
if %errorlevel%==2 goto after
|
||||
|
||||
:check_python
|
||||
cls
|
||||
for /f "tokens=2 delims= " %%i in ('python --version 2^>nul') do (
|
||||
for /f "tokens=1,2 delims=." %%j in ("%%i") do (
|
||||
if %%j GEQ 3 (
|
||||
if %%k GEQ 8 (
|
||||
goto check_pip
|
||||
)
|
||||
)
|
||||
)
|
||||
)
|
||||
echo Python 3.8 or higher is required. Please install it first.
|
||||
pause
|
||||
exit /b
|
||||
|
||||
:check_pip
|
||||
pip --version 2>nul | findstr /r /c:"pip" >nul
|
||||
if %errorlevel% neq 0 (
|
||||
echo pip is required. Please install it first.
|
||||
pause
|
||||
exit /b
|
||||
)
|
||||
goto install1
|
||||
|
||||
:install1
|
||||
cls
|
||||
echo ========================================================
|
||||
echo Maigret Installation Script
|
||||
echo Maigret Installation
|
||||
echo ========================================================
|
||||
echo.
|
||||
echo --------------------------------------------------------
|
||||
echo If your pip installation is outdated, it could cause
|
||||
echo cryptography to fail on installation.
|
||||
echo --------------------------------------------------------
|
||||
echo check for and install pip updates now?
|
||||
echo Check for and install pip 23.3.2 now?
|
||||
echo --------------------------------------------------------
|
||||
choice /c YN
|
||||
if %errorlevel%==1 goto install2
|
||||
@@ -87,42 +63,56 @@ if %errorlevel%==2 goto install3
|
||||
|
||||
:install2
|
||||
cls
|
||||
python -m pip install --upgrade pip
|
||||
goto:install3
|
||||
python -m pip install --upgrade pip==23.3.2
|
||||
if %errorlevel% neq 0 (
|
||||
echo Failed to update pip to version 23.3.2. Please check your installation.
|
||||
pause
|
||||
exit /b
|
||||
)
|
||||
goto install3
|
||||
|
||||
:install3
|
||||
cls
|
||||
echo ========================================================
|
||||
echo Maigret Installation Script
|
||||
echo Maigret Installation
|
||||
echo ========================================================
|
||||
echo.
|
||||
echo --------------------------------------------------------
|
||||
echo Install requirements and maigret?
|
||||
echo --------------------------------------------------------
|
||||
choice /c YN
|
||||
if %errorlevel%==1 goto install4
|
||||
if %errorlevel%==2 goto 1
|
||||
|
||||
:install4
|
||||
cls
|
||||
pip install .
|
||||
pip install maigret
|
||||
goto:after
|
||||
echo Installing Maigret...
|
||||
python -m pip install maigret
|
||||
if %errorlevel% neq 0 (
|
||||
echo Failed to install Maigret. Please check your installation.
|
||||
pause
|
||||
exit /b
|
||||
)
|
||||
echo.
|
||||
echo +------------------------------------------------------+
|
||||
echo Maigret installed successfully.
|
||||
echo +------------------------------------------------------+
|
||||
pause
|
||||
goto after
|
||||
|
||||
:after
|
||||
cls
|
||||
echo ========================================================
|
||||
echo Maigret Background Search
|
||||
echo Maigret Usage
|
||||
echo ========================================================
|
||||
echo.
|
||||
echo --------------------------------------------------------
|
||||
echo Please Enter Username / Email
|
||||
echo --------------------------------------------------------
|
||||
set /p input=
|
||||
maigret %input%
|
||||
echo +--------------------------------------------------------+
|
||||
echo To use Maigret, you can run the following command:
|
||||
echo.
|
||||
echo maigret [options] [username]
|
||||
echo.
|
||||
echo For example, to search for a username:
|
||||
echo.
|
||||
echo maigret example_username
|
||||
echo.
|
||||
echo For more options and usage details, refer to the Maigret documentation.
|
||||
echo.
|
||||
echo https://github.com/soxoj/maigret/blob/5b3b81b4822f6deb2e9c31eb95039907f25beb5e/README.md
|
||||
echo +--------------------------------------------------------+
|
||||
echo.
|
||||
cmd
|
||||
pause
|
||||
goto:after
|
||||
exit /b
|
||||
exit /b
|
||||
|
||||
@@ -0,0 +1,451 @@
|
||||
# Site checks — guide (Maigret)
|
||||
|
||||
Working document for future changes: workflow, findings from reviews, and practical steps. See also [`site-checks-playbook.md`](site-checks-playbook.md) (short checklist), [`socid_extractor_improvements.log`](socid_extractor_improvements.log) (proposals for upstream identity extraction), and the code in [`maigret/checking.py`](../maigret/checking.py).
|
||||
|
||||
**Documentation maintenance:** whenever you improve Maigret, add search tooling, or change check logic, update **this file** and [`site-checks-playbook.md`](site-checks-playbook.md) in sync (see the section at the end). If you change rules about the JSON API check or the `socid_extractor` log format, update **[`socid_extractor_improvements.log`](socid_extractor_improvements.log)** (template / header) together with this guide.
|
||||
|
||||
---
|
||||
|
||||
## 1. How checks work
|
||||
|
||||
Logic lives in `process_site_result` ([`maigret/checking.py`](../maigret/checking.py)):
|
||||
|
||||
| `checkType` | Meaning |
|
||||
|-------------|---------|
|
||||
| `message` | Profile is “found” if the HTML contains **none** of the `absenceStrs` substrings **and** at least one `presenseStrs` marker matches. If `presenseStrs` is **empty**, presence is treated as true for **any** page (risky configuration). |
|
||||
| `status_code` | HTTP **2xx** is enough — only safe if the server does **not** return 200 for “user not found”. |
|
||||
| `response_url` | Custom flow with **redirects disabled** so the status/URL of the *first* response can be used. |
|
||||
|
||||
For other `checkType` values, [`make_site_result`](../maigret/checking.py) sets **`allow_redirects=True`**: the client follows redirects and `process_site_result` sees the **final** response body and status (not the pre-redirect hop). You do **not** need to “turn on” follow-redirect separately for most sites.
|
||||
|
||||
Sites with an `engine` field (e.g. XenForo) are merged with a template from the `engines` section in [`maigret/resources/data.json`](../maigret/resources/data.json) ([`MaigretSite.update_from_engine`](../maigret/sites.py)).
|
||||
|
||||
### `urlProbe`: probe URL vs reported profile URL
|
||||
|
||||
- **`url`** — pattern for the **public profile page** users should open (what appears in reports as `url_user`). Supports `{username}`, `{urlMain}`, `{urlSubpath}`; the username segment is URL-encoded when the string is built ([`make_site_result`](../maigret/checking.py)).
|
||||
- **`urlProbe`** (optional) — if set, Maigret sends the HTTP **GET** (or HEAD where applicable) to **this** URL for the check, instead of to `url`. Same placeholders. Use it when the reliable signal is a **JSON/API** endpoint but the human-facing link must stay on the main site (e.g. `https://picsart.com/u/{username}` + probe `https://api.picsart.com/users/show/{username}.json`, or GitHub’s `https://github.com/{username}` + `https://api.github.com/users/{username}`).
|
||||
|
||||
If `urlProbe` is omitted, the probe URL defaults to `url`.
|
||||
|
||||
### Redirects and final URL as a signal
|
||||
|
||||
If the **HTML shell** looks the same for “user exists” and “user does not exist” (typical SPA), it is still worth checking whether the **server** behaves differently:
|
||||
|
||||
- **Final URL** after redirects (e.g. profile canonical URL vs `/404` path).
|
||||
- **Redirect chain** length or target host (e.g. lander vs profile).
|
||||
|
||||
If that differs reliably, you may be able to use **`checkType`: `response_url`** in [`data.json`](../maigret/resources/data.json) (no auto-follow) or extend logic — but only when the difference is stable.
|
||||
|
||||
**Server-side HTTP vs client-side navigation.** Maigret follows **HTTP** redirects only; it does **not** run JavaScript. If the browser shows a navigation to `/u/name/posts` or `/not-found` **after** the SPA bundle loads, that may never appear as an extra hop in `curl`/aiohttp — only a **trailing-slash** `301` might show up. Always confirm with `curl -sIL` / a small script whether the **Location** chain differs for real vs fake users before relying on URL-based rules.
|
||||
|
||||
**Empirical check (claimed vs non-existent usernames, `GET` with follow redirects, no JS):**
|
||||
|
||||
| Site | Result |
|
||||
|------|--------|
|
||||
| **Kaskus** | No HTTP redirects beyond the request path; same generic `<title>` and near-identical body length — **no** discriminating signal from redirects alone. |
|
||||
| **Bibsonomy** | Both requests redirect to **`/pow-challenge/?return=/user/...`** (proof-of-work). Only the `return` path changes with the username; **both** existing and fake hit the same challenge flow — not a profile-vs-missing distinction. |
|
||||
| **Picsart (web UI `https://picsart.com/u/{username}`)** | Only a **trailing-slash** `301`; the first HTML is the same empty app shell (~3 KiB) for real and fake users. Browser-only routes such as `…/posts` vs `…/not-found` are **not** visible as additional HTTP redirects in this pipeline. |
|
||||
|
||||
**Picsart — workable check via public API.** The site exposes **`https://api.picsart.com/users/show/{username}.json`**: JSON with `"status":"success"` and a user object when the account exists, and `"reason":"user_not_found"` when it does not. Put that URL in **`urlProbe`**, set **`url`** to the web profile pattern **`https://picsart.com/u/{username}`**, and use **`checkType`: `message`** with narrow `presenseStrs` / `absenceStrs` so reports show the human link while the request hits the API (see **`urlProbe`** above).
|
||||
|
||||
For **Kaskus** and **Bibsonomy**, HTTP-level comparison still does **not** unlock a safe check without PoW / richer signals; keep **`disabled: true`** until something stable appears (API, SSR markers, etc.).
|
||||
|
||||
---
|
||||
|
||||
## 2. Standard checks: public JSON API and `socid_extractor` log
|
||||
|
||||
### 2.1 Public JSON API (always)
|
||||
|
||||
When diagnosing a site—especially **SPAs**, **soft 404s**, or **near-identical HTML** for real vs fake users—**routinely look for a public JSON (or JSON-like) API** used for profile or user lookup. Typical leads: paths containing `/api/`, `/v1/`, `graphql`, `users/show`, `.json` suffixes, or the same endpoints mobile apps use. Verify with `curl` (or the Maigret request path) that **claimed** and **unclaimed** usernames produce **reliably different** bodies or status codes. If such an endpoint is more stable than HTML, put it in **`urlProbe`** and keep **`url`** as the canonical profile page on the main site (see **`urlProbe`** in section 1). If there is no separate public URL for humans, you may still point **`url`** at the API only (reports will show that URL).
|
||||
|
||||
This is a **standard** part of site-check work, not an optional extra.
|
||||
|
||||
### 2.2 Mandatory: [`LLM/socid_extractor_improvements.log`](socid_extractor_improvements.log)
|
||||
|
||||
If you discover **either**:
|
||||
|
||||
1. **JSON embedded in HTML** with user/profile fields (inline scripts, `__NEXT_DATA__`, `application/ld+json`, hydration blobs, etc.), or
|
||||
2. A **standalone JSON HTTP response** (public API) with user/profile data for that service,
|
||||
|
||||
you **must append** a proposal block to **[`LLM/socid_extractor_improvements.log`](socid_extractor_improvements.log)**.
|
||||
|
||||
**Why:** Maigret calls [`socid_extractor.extract`](https://pypi.org/project/socid-extractor/) on the response body ([`extract_ids_data` in `checking.py`](../maigret/checking.py)) to fill `ids_data`. New payloads usually need a **new scheme** upstream (`flags`, `regex`, optional `extract_json`, `fields`, optional `url_mutations` / `transforms`), matching patterns such as **`GitHub API`** or **`Gitlab API`** in `socid_extractor`’s `schemes.py`.
|
||||
|
||||
**Each log entry must include:**
|
||||
|
||||
- **Date** — ISO `YYYY-MM-DD` (day you add the entry).
|
||||
- **Example username** — Prefer the site’s `usernameClaimed` from `data.json`, or any account that reproduces the payload.
|
||||
- **Proposal** — Use the **block template** in the log file: detection idea, optional URL mutation, and field mappings in the same style as existing schemes.
|
||||
|
||||
If the service is **already covered** by an existing `socid_extractor` scheme, add a **short** entry anyway (date, example username, scheme name, “already implemented”) so there is an audit trail.
|
||||
|
||||
Do **not** paste secrets, cookies, or full private JSON; short key names and structure hints are enough.
|
||||
|
||||
---
|
||||
|
||||
## 3. Improvement workflow
|
||||
|
||||
### Phase A — Reproduce
|
||||
|
||||
1. Targeted run:
|
||||
```bash
|
||||
maigret --db /path/to/maigret/resources/data.json \
|
||||
TEST_USERNAME \
|
||||
--site "SiteName" \
|
||||
--print-not-found --print-errors \
|
||||
--no-progressbar -vv
|
||||
```
|
||||
2. Run separately with a **real** existing username and a **definitely non-existent** one (as `usernameClaimed` / `usernameUnclaimed` in JSON).
|
||||
3. If needed: `-vvv` and `debug.log` (raw response).
|
||||
4. Automated pair check:
|
||||
```bash
|
||||
maigret --db ... --self-check --site "SiteName" --no-progressbar
|
||||
```
|
||||
|
||||
### Phase B — Classify the cause
|
||||
|
||||
| Symptom | Likely cause |
|
||||
|---------|----------------|
|
||||
| False “found” with `status_code` | Soft 404 (200 on a “not found” page). |
|
||||
| False “found” with `message` | Overly broad `presenseStrs` (`name`, `email`, JSON keys) or stale `absenceStrs`. |
|
||||
| Same HTML for different users | SPA / skeleton shell before hydration — also compare **final URL / redirect chain** (see above); if still identical, often `disabled`. |
|
||||
| Login page instead of profile | XenForo etc.: guest, `ignore403`, “must be logged in” strings. |
|
||||
| reCAPTCHA / “Checking your browser” / “not a bot” | Bot protection; Maigret’s default User-Agent may worsen the response. |
|
||||
| Redirect to another domain / lander | Stale URL template. |
|
||||
|
||||
### Phase C — Edits in [`data.json`](../maigret/resources/data.json)
|
||||
|
||||
1. Update `url` / `urlMain` if needed (HTTPS, new profile path).
|
||||
2. Replace inappropriate `status_code` with `message` (or `response_url`), choosing:
|
||||
- **`absenceStrs`** — only what reliably appears on the “user does not exist” page;
|
||||
- **`presenseStrs`** — narrow markers of a real profile (avoid generic words).
|
||||
3. For XenForo: override only fields that differ in the site entry; do not break the global `engines` template.
|
||||
4. Refresh `usernameClaimed` / `usernameUnclaimed` if reference accounts disappeared.
|
||||
5. Set **`headers`** (e.g. another `User-Agent`) if the site serves a captcha only to “suspicious” clients.
|
||||
6. Use **`errors`**: HTML substring → meaningful check error (UNKNOWN), so it is not confused with “available”.
|
||||
|
||||
### Phase D — Decision criteria
|
||||
|
||||
| Outcome | When to use |
|
||||
|---------|-------------|
|
||||
| **Check fixed** | The `claimed` / `unclaimed` pair behaves predictably, `--self-check` passes, no regression on a similar site with the same engine. |
|
||||
| **Check disabled** (`disabled: true`) | Cloudflare / anti-bot / login required / indistinguishable SPA without stable markers. |
|
||||
| **Entry removed** | **Only** if the domain/service is gone (NXDOMAIN, clearly dead project), not “because it is hard to fix”. |
|
||||
|
||||
### Phase E — Before commit
|
||||
|
||||
- `maigret --self-check` for affected sites.
|
||||
- `make test`.
|
||||
|
||||
---
|
||||
|
||||
## 4. Findings from reviews (concrete site batch)
|
||||
|
||||
Summary from an earlier false-positive review for: OpenSea, Mercado Livre, Redtube, Tom’s Guide, Kaggle, Kaskus, Livemaster, TechPowerUp, authorSTREAM, Bibsonomy, Bulbagarden, iXBT, Serebii, Picsart, Hashnode, hi5.
|
||||
|
||||
### What most often broke checks
|
||||
|
||||
1. **`status_code` where content checks are needed** — soft 404 with status 200.
|
||||
2. **Broad `presenseStrs`** — matches on error pages or generic SPA shells.
|
||||
3. **XenForo + guest** — HTML includes strings like “You must be logged in” that overlap the engine template.
|
||||
4. **User-Agent** — on some sites (e.g. Kaggle) the default UA triggered a reCAPTCHA page instead of profile HTML; a deliberate `User-Agent` in site `headers` helped.
|
||||
5. **SPAs and redirects** — identical first HTML, redirect to lander / another product (hi5 → Tagged), URL format changes by region (Mercado Livre).
|
||||
|
||||
### What worked as a fix
|
||||
|
||||
- Switching to **`message`** with narrow strings from **`<title>`** or unique markup where stable (**Kaggle**, **Mercado Livre**, **Hashnode**).
|
||||
- For **Kaggle**, additionally: **`headers`**, **`errors`** for browser-check text.
|
||||
- **Redtube** stayed valid on **`status_code`** with a stable **404** for non-existent users.
|
||||
- **Picsart**: the web profile URL is a thin SPA shell; use the **JSON API** (`api.picsart.com/users/show/{username}.json`) in **`url`** with **`message`**-style markers (`"status":"success"` vs `user_not_found`), not the browser-only `/posts` vs `/not-found` navigation.
|
||||
|
||||
### What required disabling checks
|
||||
|
||||
Where you **cannot** reliably tell “profile exists” from “no profile” without bypassing protection, login, or full JS:
|
||||
|
||||
- Anti-bot / captcha / “not a bot” page;
|
||||
- Guest-only access to the needed page;
|
||||
- SPA with indistinguishable first response;
|
||||
- Forums returning **403** and a login page instead of a member profile for the member-search URL;
|
||||
- Stale URLs that redirect to a stub.
|
||||
|
||||
In those cases **`disabled: true`** is better than false “found”; remove the DB entry only on **actual** domain death.
|
||||
|
||||
### Code notes
|
||||
|
||||
- For the `status_code` branch in `process_site_result`, use **strict** comparison `check_type == "status_code"`, not a substring match inside `"status_code"`.
|
||||
- Treat empty `presenseStrs` with `message` as risky: when debugging, watch DEBUG-level logs if that diagnostics exists in code.
|
||||
|
||||
---
|
||||
|
||||
## 5. Future ideas (Maigret improvements)
|
||||
|
||||
- A mode or script: one site, two usernames, print statuses and first N bytes of the response (wrapper around `maigret()`).
|
||||
- Document in CLI help that **`--use-disabled-sites`** is needed to analyze disabled entries.
|
||||
|
||||
---
|
||||
|
||||
## 6. Development utilities
|
||||
|
||||
### 6.1 `utils/site_check.py` — Single site diagnostics
|
||||
|
||||
A comprehensive utility for testing individual sites with multiple modes:
|
||||
|
||||
```bash
|
||||
# Basic comparison of claimed vs unclaimed (aiohttp)
|
||||
python utils/site_check.py --site "VK" --check-claimed
|
||||
|
||||
# Test via Maigret's checker directly
|
||||
python utils/site_check.py --site "VK" --maigret
|
||||
|
||||
# Compare aiohttp vs Maigret results (find discrepancies)
|
||||
python utils/site_check.py --site "VK" --compare-methods
|
||||
|
||||
# Full diagnosis with recommendations
|
||||
python utils/site_check.py --site "VK" --diagnose
|
||||
|
||||
# Test with custom URL
|
||||
python utils/site_check.py --url "https://example.com/{username}" --compare user1 user2
|
||||
|
||||
# Find a valid username for a site
|
||||
python utils/site_check.py --site "VK" --find-user
|
||||
```
|
||||
|
||||
**Key features:**
|
||||
- `--maigret` — Uses Maigret's actual checking code, not raw aiohttp
|
||||
- `--compare-methods` — Shows if aiohttp and Maigret see different results (useful for debugging)
|
||||
- `--diagnose` — Validates checkType against actual responses, suggests fixes
|
||||
- Color output with markers detection (captcha, cloudflare, login, etc.)
|
||||
- `--json` flag for machine-readable output
|
||||
|
||||
**When to use each mode:**
|
||||
|
||||
| Mode | Use case |
|
||||
|------|----------|
|
||||
| `--check-claimed` | Quick sanity check: do claimed/unclaimed still differ? |
|
||||
| `--maigret` | Verify Maigret's actual behavior matches expectations |
|
||||
| `--compare-methods` | Debug "works in curl but fails in Maigret" issues |
|
||||
| `--diagnose` | Full analysis when a site is broken, get fix recommendations |
|
||||
|
||||
### 6.2 `utils/check_top_n.py` — Mass site checking
|
||||
|
||||
Batch-check top N sites by Alexa rank with categorized reporting:
|
||||
|
||||
```bash
|
||||
# Check top 100 sites
|
||||
python utils/check_top_n.py --top 100
|
||||
|
||||
# Faster with more parallelism
|
||||
python utils/check_top_n.py --top 100 --parallel 10
|
||||
|
||||
# Output JSON report
|
||||
python utils/check_top_n.py --top 100 --output report.json
|
||||
|
||||
# Only show broken sites
|
||||
python utils/check_top_n.py --top 100 --only-broken
|
||||
```
|
||||
|
||||
**Output categories:**
|
||||
- `working` — Site check passes
|
||||
- `broken` — Check fails (wrong status, missing markers)
|
||||
- `timeout` — Request timed out
|
||||
- `anti_bot` — 403/429 or captcha detected
|
||||
- `error` — Connection or other errors
|
||||
- `disabled` — Already disabled in data.json
|
||||
|
||||
**Report includes:**
|
||||
- Summary counts by category
|
||||
- List of broken sites with issues
|
||||
- Recommendations for fixes (e.g., "Switch to checkType: status_code")
|
||||
|
||||
### 6.3 Self-check behavior (`--self-check`)
|
||||
|
||||
The self-check command has been improved to be less aggressive:
|
||||
|
||||
```bash
|
||||
# Check sites WITHOUT auto-disabling (default)
|
||||
maigret --self-check --site "VK"
|
||||
|
||||
# Auto-disable failing sites (old behavior)
|
||||
maigret --self-check --site "VK" --auto-disable
|
||||
|
||||
# Show detailed diagnosis for each failure
|
||||
maigret --self-check --site "VK" --diagnose
|
||||
```
|
||||
|
||||
**Behavior changes:**
|
||||
|
||||
| Flag | Effect |
|
||||
|------|--------|
|
||||
| `--self-check` alone | Reports issues but does NOT disable sites |
|
||||
| `--auto-disable` | Automatically disables sites that fail (opt-in) |
|
||||
| `--diagnose` | Prints detailed diagnosis with recommendations |
|
||||
|
||||
**Why this matters:**
|
||||
- Old behavior was too aggressive — sites got disabled without explanation
|
||||
- New behavior reports issues and suggests fixes
|
||||
- Explicit `--auto-disable` required to modify database
|
||||
|
||||
---
|
||||
|
||||
## 7. Lessons learned (practical observations)
|
||||
|
||||
Collected from hands-on work fixing top-ranked sites (Reddit, Wikipedia, Microsoft Learn, Baidu, etc.).
|
||||
|
||||
### 7.1 JSON API is the first thing to look for
|
||||
|
||||
Both Reddit and Microsoft Learn had working public APIs that solved the problem entirely. The web pages were SPAs or blocked by anti-bot measures, but the APIs worked reliably:
|
||||
|
||||
- **Reddit**: `https://api.reddit.com/user/{username}/about` — returns JSON with user data or `{"message": "Not Found", "error": 404}`.
|
||||
- **Microsoft Learn**: `https://learn.microsoft.com/api/profiles/{username}` — returns JSON with `userName` field or HTTP 404.
|
||||
|
||||
This confirms the playbook recommendation: always check for `/api/`, `.json`, GraphQL endpoints before giving up on a site.
|
||||
|
||||
### 7.2 `urlProbe` is a powerful tool
|
||||
|
||||
It separates "what we check" (API) from "what we show the user" (human-readable profile URL). Reddit is a perfect example:
|
||||
|
||||
```json
|
||||
{
|
||||
"url": "https://www.reddit.com/user/{username}",
|
||||
"urlProbe": "https://api.reddit.com/user/{username}/about",
|
||||
"checkType": "message",
|
||||
"presenseStrs": ["\"name\":"],
|
||||
"absenceStrs": ["Not Found"]
|
||||
}
|
||||
```
|
||||
|
||||
The check hits the API, but reports display `www.reddit.com/user/blue`.
|
||||
|
||||
### 7.3 aiohttp ≠ curl ≠ requests
|
||||
|
||||
Wikipedia returned HTTP 200 for `curl` and Python `requests`, but HTTP 403 for `aiohttp`. This is **TLS fingerprinting** — the server identifies the HTTP library by cryptographic characteristics of the TLS handshake, not by headers.
|
||||
|
||||
**Key insight:** Changing `User-Agent` does **not** help against TLS fingerprinting. Always test with aiohttp directly (or via Maigret with `-vvv` and `debug.log`), not just `curl`.
|
||||
|
||||
```python
|
||||
# This returns 403 for Wikipedia even with browser UA:
|
||||
async with aiohttp.ClientSession() as session:
|
||||
async with session.get(url, headers={"User-Agent": "Mozilla/5.0 ..."}) as resp:
|
||||
print(resp.status) # 403
|
||||
```
|
||||
|
||||
### 7.4 HTTP 403 in Maigret can mean different things
|
||||
|
||||
Initially it seemed Wikipedia was returning 403, but `curl` showed 200. Only `debug.log` revealed the real picture — aiohttp was getting blocked at TLS level.
|
||||
|
||||
**Lesson:** Use `-vvv` flag and inspect `debug.log` for raw response status and body. The warning message alone may be misleading.
|
||||
|
||||
### 7.5 Dead services migrate, not disappear
|
||||
|
||||
MSDN Social and TechNet profiles redirected to Microsoft Learn. Instead of deleting old entries:
|
||||
|
||||
1. Keep old entries with `disabled: true` as historical record.
|
||||
2. Create a new entry for the current service with working API.
|
||||
|
||||
This preserves audit trail and avoids breaking existing workflows.
|
||||
|
||||
### 7.6 `status_code` is more reliable than `message` for APIs
|
||||
|
||||
Microsoft Learn API returns HTTP 404 for non-existent users — a clean signal without HTML parsing. For JSON APIs that return proper HTTP status codes, `status_code` is often the best choice:
|
||||
|
||||
```json
|
||||
{
|
||||
"checkType": "status_code",
|
||||
"urlProbe": "https://learn.microsoft.com/api/profiles/{username}"
|
||||
}
|
||||
```
|
||||
|
||||
No need for fragile string matching when the API speaks HTTP correctly.
|
||||
|
||||
### 7.8 Engine templates can silently break across many sites
|
||||
|
||||
The **vBulletin** engine template has `absenceStrs` in five languages ("This user has not registered…", "Пользователь не зарегистрирован…", etc.). In a batch review of ~12 vBulletin forums (oneclickchicks, mirf, Pesiq, VKMOnline, forum.zone-game.info, etc.), **none** of the absence strings matched — the forums returned identical pages for both claimed and unclaimed usernames. Root cause: many of these forums require login to view member profiles, so they serve a generic page (no "user not registered" message at all) instead of an informative error.
|
||||
|
||||
**Lesson:** When a whole engine class shows false positives, do not patch sites one by one — check whether the **engine template** itself still matches the actual error pages. A template written for one version/language pack may silently stop working after a forum upgrade or config change.
|
||||
|
||||
### 7.9 Search-by-author URLs are architecturally unreliable
|
||||
|
||||
Several sites (OnanistovNet, Shoppingzone, Pogovorim, Astrogalaxy, Sexwin) used a phpBB-style `search.php?keywords=&terms=all&author={username}` URL as the check endpoint. This searches for **posts** by that author, not for the user account itself. Even if the markers worked, a user who exists but has zero posts would be indistinguishable from a non-existent user. And in practice, the sites changed their response format — some now return HTTP 404, others dropped the expected Russian absence text altogether.
|
||||
|
||||
**Lesson:** Avoid author-search URLs as the check endpoint; they test "has posts" rather than "account exists" and are doubly fragile (both logic mismatch and format drift).
|
||||
|
||||
### 7.10 Some sites generate a page for any path — permanent false positives
|
||||
|
||||
Two distinct patterns:
|
||||
|
||||
- **Pbase** creates a stub page titled "pbase Artist {username}" for **every** URL, real or fake. Both return HTTP 200 with nearly identical content (~3.3 KB). No markers can distinguish them.
|
||||
- **ffm.bio** is even trickier: for the non-existent username `a.slomkoowski` it generated a page titled "mr.a" with description "a is a", apparently fuzzy-matching the path to the closest real entry. Both return HTTP 200 with large, content-rich pages.
|
||||
|
||||
**Lesson:** Before writing markers for a site, verify that the "unclaimed" URL actually produces an **error-like** response (different status, different title, unique error text). If the site always returns a plausible-looking page, no combination of `presenseStrs` / `absenceStrs` will help — `disabled: true` is the only safe option.
|
||||
|
||||
### 7.11 TLS fingerprinting can degrade over time (Kaggle)
|
||||
|
||||
Kaggle was previously fixed with a custom `User-Agent` header and `errors` for the "Checking your browser" captcha page. In the latest batch review, aiohttp receives HTTP 404 with identical content for **both** claimed and unclaimed usernames — the site now blocks the entire request before it reaches the profile page. This matches the TLS fingerprinting pattern seen earlier with Wikipedia (section 7.3), but here the degradation happened **after** a working fix was already in place.
|
||||
|
||||
**Lesson:** Sites that rely on bot-detection can tighten their rules at any time. A working `User-Agent` override today may fail tomorrow. When a previously fixed site starts returning identical responses for both usernames, suspect TLS fingerprinting first, and accept `disabled: true` if no public API is available.
|
||||
|
||||
### 7.12 API endpoints may bypass Cloudflare even when the main site is blocked
|
||||
|
||||
All four Fandom wikis returned HTTP 403 with a Cloudflare "Just a moment..." challenge when aiohttp accessed the user profile page (`/wiki/User:{username}`). However, the **MediaWiki API** on the same domain (`/api.php?action=query&list=users&ususers={username}&format=json`) returned clean JSON without any challenge. Similarly, **Substack** served a captcha-laden SPA for `/@{username}`, but its `public_profile` API (`/api/v1/user/{username}/public_profile`) responded with proper JSON and correct HTTP 404 for missing users.
|
||||
|
||||
This is likely because API routes are excluded from the Cloudflare WAF rules or use a different pipeline than the HTML-serving paths.
|
||||
|
||||
**Lesson:** When a site's main pages are blocked by Cloudflare or similar WAF, still check API endpoints on the **same domain** — they may not go through the same protection layer. This is especially true for:
|
||||
- MediaWiki's `api.php` on wiki farms (Fandom, Wikia, self-hosted MediaWiki)
|
||||
- REST API paths (`/api/v1/`, `/api/v2/`) on SPA-heavy sites
|
||||
- Internal data endpoints that the SPA itself calls
|
||||
|
||||
### 7.13 GraphQL APIs often support GET, not just POST
|
||||
|
||||
**hashnode** exposes a GraphQL endpoint at `https://gql.hashnode.com`. While GraphQL is typically associated with POST requests, many implementations also support **GET** with the query passed as a URL parameter. This is critical for Maigret, which only supports GET/HEAD for `urlProbe`.
|
||||
|
||||
```
|
||||
GET https://gql.hashnode.com?query=%7Buser(username%3A%20%22melwinalm%22)%20%7B%20name%20username%20%7D%7D
|
||||
→ {"data":{"user":{"name":"Melwin D'Almeida","username":"melwinalm"}}}
|
||||
|
||||
GET https://gql.hashnode.com?query=%7Buser(username%3A%20%22a.slomkoowski%22)%20%7B%20name%20username%20%7D%7D
|
||||
→ {"data":{"user":null}}
|
||||
```
|
||||
|
||||
**Lesson:** Before giving up on a GraphQL-only site, try the same query via GET with `?query=...` (URL-encoded). Many GraphQL servers accept both methods.
|
||||
|
||||
### 7.14 URL-encoding resolves template placeholder conflicts
|
||||
|
||||
The hashnode GraphQL query `{user(username: "{username}") { name }}` contains curly braces that conflict with Maigret's `{username}` placeholder — Python's `str.format()` would raise a `KeyError` on `{user(username...}`.
|
||||
|
||||
The fix: URL-encode the GraphQL braces (`{` → `%7B`, `}` → `%7D`) but leave `{username}` as-is. Python's `.format()` only interprets literal `{…}` as placeholders, not `%7B…%7D`, and the GraphQL server decodes the percent-encoding on its end:
|
||||
|
||||
```
|
||||
urlProbe: https://gql.hashnode.com?query=%7Buser(username%3A%20%22{username}%22)%20%7B%20name%20username%20%7D%7D
|
||||
```
|
||||
|
||||
After `.format(username="melwinalm")`:
|
||||
```
|
||||
https://gql.hashnode.com?query=%7Buser(username%3A%20%22melwinalm%22)%20%7B%20name%20username%20%7D%7D
|
||||
```
|
||||
|
||||
**Lesson:** When a `urlProbe` needs literal curly braces (GraphQL, JSON in URL, etc.), percent-encode them. This is a general technique for any `data.json` URL field processed by `.format()`.
|
||||
|
||||
### 7.7 The playbook classification works
|
||||
|
||||
The decision tree from the documentation accurately describes real-world cases:
|
||||
|
||||
| Situation | Playbook says | Actual result |
|
||||
|-----------|---------------|---------------|
|
||||
| Captcha (Baidu) | `disabled: true` | Correct |
|
||||
| TLS fingerprinting (Wikipedia) | `disabled: true` (anti-bot) | Correct |
|
||||
| Working API available (Reddit, MS Learn) | Use `urlProbe` | Correct |
|
||||
| Service migrated (MSDN → MS Learn) | Update URL or create new entry | Correct |
|
||||
|
||||
---
|
||||
|
||||
## Documentation maintenance
|
||||
|
||||
For any of the changes below, **always** keep these artifacts in sync — this file ([`site-checks-guide.md`](site-checks-guide.md)), [`site-checks-playbook.md`](site-checks-playbook.md), and (when rules or templates change) the header/template in [`socid_extractor_improvements.log`](socid_extractor_improvements.log):
|
||||
|
||||
- Maigret code changes (including [`maigret/checking.py`](../maigret/checking.py), request executors, CLI);
|
||||
- New or changed search tools / helper utilities for site checks;
|
||||
- Changes to rules or semantics of `checkType`, `data.json` fields, self-check, etc.;
|
||||
- Changes to the **public JSON API** diagnostic step or **mandatory** `socid_extractor` logging rules.
|
||||
|
||||
Prefer updating the guide, playbook, and log template in one commit or in the same task so instructions do not diverge. **Append-only:** new proposals go at the bottom of `socid_extractor_improvements.log`; do not delete historical entries when editing the template.
|
||||
@@ -0,0 +1,84 @@
|
||||
# Site checks — playbook (Maigret)
|
||||
|
||||
Short checklist for edits to [`maigret/resources/data.json`](../maigret/resources/data.json) and, when needed, [`maigret/checking.py`](../maigret/checking.py). Full guide: [`site-checks-guide.md`](site-checks-guide.md). Upstream extraction proposals: [`socid_extractor_improvements.log`](socid_extractor_improvements.log).
|
||||
|
||||
**Documentation maintenance:** whenever you improve Maigret, add search tooling, or change check logic, update **both** this file and [`site-checks-guide.md`](site-checks-guide.md) (see the “Documentation maintenance” section at the end of that file). When JSON API / `socid_extractor` logging rules change, update the **template header** in [`socid_extractor_improvements.log`](socid_extractor_improvements.log) in the same change.
|
||||
|
||||
## 0. Standard checks (do alongside reproduce / classify)
|
||||
|
||||
- **Public JSON API:** always look for a stable JSON (or GraphQL JSON) profile endpoint (`/api/`, `.json`, mobile-style URLs). When the API is more reliable than HTML, set **`urlProbe`** to that endpoint and keep **`url`** as the human-readable profile link (e.g. `https://picsart.com/u/{username}`). If there is no separate profile URL, use the API as `url` only. Details: **`urlProbe`** and section **2.1** in [`site-checks-guide.md`](site-checks-guide.md).
|
||||
- **`socid_extractor` log (mandatory):** if you find **embedded user JSON in HTML** or a **standalone JSON profile API**, append a dated entry (with **example username**) to [`socid_extractor_improvements.log`](socid_extractor_improvements.log). Details: section **2.2** in [`site-checks-guide.md`](site-checks-guide.md).
|
||||
|
||||
## 1. Reproduce
|
||||
|
||||
- Run a targeted check:
|
||||
`maigret USER --db /path/to/maigret/resources/data.json --site "SiteName" --print-not-found --print-errors --no-progressbar -vv`
|
||||
- Compare an **existing** and a **non-existent** username (as `usernameClaimed` / `usernameUnclaimed` in JSON).
|
||||
- With `-vvv`, inspect `debug.log` (raw response in the log).
|
||||
|
||||
## 2. Classify the cause
|
||||
|
||||
| Symptom | Typical cause | Action |
|
||||
|--------|-----------------|--------|
|
||||
| HTTP 200 for “user does not exist” | Soft 404 | Move from `status_code` to `message` or `response_url`; add `absenceStrs` / narrow `presenseStrs` |
|
||||
| Generic words match (`name`, `email`) | `presenseStrs` too broad | Remove generic markers; add profile-specific ones |
|
||||
| Same HTML without JS | SPA / skeleton shell | Compare **final URL and HTTP redirects** (Maigret already follows redirects by default). If the browser shows extra routes (`/posts`, `/not-found`) only **after JS**, they will **not** appear to Maigret — try a **public JSON/API** endpoint for the same site if one exists. See **Redirects and final URL** and **Picsart** in [`site-checks-guide.md`](site-checks-guide.md). |
|
||||
| 403 / “Log in” / guest-only | Auth or anti-bot required | `disabled: true` |
|
||||
| reCAPTCHA / “Checking your browser” | Bot protection | Try a reasonable `User-Agent` in `headers`; else `errors` + UNKNOWN or `disabled` |
|
||||
| Domain does not resolve / persistent timeout | Dead service | Remove entry **only** after confirming the domain is dead |
|
||||
|
||||
## 3. Data edits
|
||||
|
||||
1. Update `url` / `urlMain` if needed (HTTPS redirects). Use optional **`urlProbe`** when the HTTP check should hit a different URL than the profile link shown in reports (API vs web UI).
|
||||
2. For `message`: **always** tune string pairs so `absenceStrs` fire on “no user” pages and `presenseStrs` fire on real profiles without false absence hits.
|
||||
3. Engine (`engine`, e.g. XenForo): override only differing fields in the site entry so other sites are not broken.
|
||||
4. Keep `status_code` only if the response **reliably** differs by status code without soft 404.
|
||||
|
||||
## 4. Verify
|
||||
|
||||
- `maigret --self-check --site "SiteName" --db ...` for touched entries.
|
||||
- `make test` before commit.
|
||||
|
||||
## 5. Code notes
|
||||
|
||||
- `process_site_result` uses strict comparison to `"status_code"` for `checkType` (not a substring trick).
|
||||
- Empty `presenseStrs` with `message` means “presence always true”; a debug line is logged only at DEBUG level.
|
||||
|
||||
## 6. Development utilities
|
||||
|
||||
Quick reference for site check utilities. Full details: section **6** in [`site-checks-guide.md`](site-checks-guide.md).
|
||||
|
||||
| Command | Purpose |
|
||||
|---------|---------|
|
||||
| `python utils/site_check.py --site "X" --check-claimed` | Quick aiohttp comparison |
|
||||
| `python utils/site_check.py --site "X" --maigret` | Test via Maigret checker |
|
||||
| `python utils/site_check.py --site "X" --compare-methods` | Find aiohttp vs Maigret discrepancies |
|
||||
| `python utils/site_check.py --site "X" --diagnose` | Full diagnosis with fix recommendations |
|
||||
| `python utils/check_top_n.py --top 100` | Mass-check top 100 sites |
|
||||
| `maigret --self-check --site "X"` | Self-check (reports only, no auto-disable) |
|
||||
| `maigret --self-check --site "X" --auto-disable` | Self-check with auto-disable |
|
||||
| `maigret --self-check --site "X" --diagnose` | Self-check with detailed diagnosis |
|
||||
|
||||
## 7. Quick tips (lessons learned)
|
||||
|
||||
Practical observations from fixing top-ranked sites. Full details: section **7** in [`site-checks-guide.md`](site-checks-guide.md).
|
||||
|
||||
| Tip | Why it matters |
|
||||
|-----|----------------|
|
||||
| **API first** | Reddit, Microsoft Learn — APIs worked when web pages were blocked. Always check `/api/`, `.json` endpoints. |
|
||||
| **`urlProbe` separates check from display** | Check via API, show human URL in reports. Example: Reddit API → `www.reddit.com/user/` link. |
|
||||
| **aiohttp ≠ curl** | Wikipedia returned 200 for curl, 403 for aiohttp (TLS fingerprinting). Always test with Maigret directly. |
|
||||
| **Use `debug.log`** | Run with `-vvv` to see raw response. Warning messages alone can be misleading. |
|
||||
| **`status_code` for clean APIs** | If API returns proper 404 for missing users, prefer `status_code` over `message`. |
|
||||
| **Migrate, don't delete** | MSDN → Microsoft Learn: keep old entry disabled, create new one for current service. |
|
||||
| **Engine templates break silently** | vBulletin `absenceStrs` failed on ~12 forums at once — many require login, showing a generic page with no error text. Check the engine template first. |
|
||||
| **Search-by-author is unreliable** | phpBB `search.php?author=` checks for posts, not accounts. A user with zero posts looks identical to a non-existent user. Avoid these URLs. |
|
||||
| **Some sites always generate a page** | Pbase stubs "pbase Artist {name}" for any path; ffm.bio fuzzy-matches to the nearest real entry. No markers can help — `disabled: true`. |
|
||||
| **TLS fingerprinting degrades over time** | Kaggle's custom `User-Agent` fix stopped working — aiohttp now gets 404 for both usernames. Accept `disabled: true` when no API exists. |
|
||||
| **API endpoints bypass Cloudflare** | Fandom `api.php` and Substack `/api/v1/` returned clean JSON while main pages were blocked by Cloudflare. Always try API paths on the same domain. |
|
||||
| **GraphQL supports GET too** | hashnode GraphQL works via `GET ?query=...` (URL-encoded). Don't assume POST-only — Maigret can use GET `urlProbe` for GraphQL. |
|
||||
| **URL-encode braces for template safety** | GraphQL `{...}` conflicts with Maigret's `{username}`. Use `%7B`/`%7D` for literal braces in `urlProbe` — `.format()` ignores percent-encoded chars. |
|
||||
|
||||
## 8. Documentation maintenance
|
||||
|
||||
When you change Maigret, add search tools, or change check logic, keep **this playbook**, [`site-checks-guide.md`](site-checks-guide.md), and (when applicable) the template in [`socid_extractor_improvements.log`](socid_extractor_improvements.log) aligned. New log **entries** are append-only at the bottom of that file.
|
||||
@@ -1,7 +1,7 @@
|
||||
LINT_FILES=maigret wizard.py tests
|
||||
|
||||
test:
|
||||
coverage run --source=./maigret -m pytest tests
|
||||
coverage run --source=./maigret,./maigret/web -m pytest tests
|
||||
coverage report -m
|
||||
coverage html
|
||||
|
||||
|
||||
@@ -25,7 +25,7 @@
|
||||
|
||||
<i>The Commissioner Jules Maigret is a fictional French police detective, created by Georges Simenon. His investigation method is based on understanding the personality of different people and their interactions.</i>
|
||||
|
||||
<b>👉👉👉 [Online Telegram bot](https://t.me/osint_maigret_bot)</b>
|
||||
<b>👉👉👉 [Online Telegram bot](https://t.me/maigret_search_bot)</b>
|
||||
|
||||
## About
|
||||
|
||||
@@ -53,7 +53,7 @@ See the full description of Maigret features [in the documentation](https://maig
|
||||
|
||||
## Installation
|
||||
|
||||
‼️ Maigret is available online via [official Telegram bot](https://t.me/osint_maigret_bot). Consider using it if you don't want to install anything.
|
||||
‼️ Maigret is available online via [official Telegram bot](https://t.me/maigret_search_bot). Consider using it if you don't want to install anything.
|
||||
|
||||
### Windows
|
||||
|
||||
@@ -75,6 +75,7 @@ You can launch Maigret using cloud shells and Jupyter notebooks. Press one of th
|
||||
|
||||
Maigret can be installed using pip, Docker, or simply can be launched from the cloned repo.
|
||||
|
||||
|
||||
**NOTE**: Python 3.10 or higher and pip is required, **Python 3.11 is recommended.**
|
||||
|
||||
```bash
|
||||
@@ -131,6 +132,30 @@ maigret user1 user2 user3 -a
|
||||
|
||||
Use `maigret --help` to get full options description. Also options [are documented](https://maigret.readthedocs.io/en/latest/command-line-options.html).
|
||||
|
||||
### Web interface
|
||||
|
||||
You can run Maigret with a web interface, where you can view the graph with results and download reports of all formats on a single page.
|
||||
|
||||
<details>
|
||||
<summary>Web Interface Screenshots</summary>
|
||||
|
||||

|
||||
|
||||

|
||||
|
||||
</details>
|
||||
|
||||
Instructions:
|
||||
|
||||
1. Run Maigret with the ``--web`` flag and specify the port number.
|
||||
|
||||
```console
|
||||
maigret --web 5000
|
||||
```
|
||||
2. Open http://127.0.0.1:5000 in your browser and enter one or more usernames to make a search.
|
||||
|
||||
3. Wait a bit for the search to complete and view the graph with results, the table with all accounts found, and download reports of all formats.
|
||||
|
||||
## Contributing
|
||||
|
||||
Maigret has open-source code, so you may contribute your own sites by adding them to `data.json` file, or bring changes to it's code!
|
||||
|
||||
@@ -39,6 +39,18 @@ not stable now. Read more :doc:`in the separate section <tags>`.
|
||||
``--top-sites`` - Count of sites for scan ranked by Alexa Top
|
||||
**(default: top 500)**.
|
||||
|
||||
**Mirrors:** After the top *N* sites by Alexa rank are chosen (respecting
|
||||
``--tags``, ``--use-disabled-sites``, etc.), Maigret may add extra sites
|
||||
whose database field ``source`` names a **parent platform** that itself falls
|
||||
in the Alexa top *N* when ranking **including disabled** sites. For example,
|
||||
if ``Twitter`` ranks in the first 500 by Alexa, a mirror such as ``memory.lol``
|
||||
(with ``source: Twitter``) is included even though it has no rank and would
|
||||
otherwise be cut off. The same applies to Instagram-related mirrors (e.g.
|
||||
Picuki) when ``Instagram`` is in that parent top *N* by rank—even if the
|
||||
official ``Instagram`` entry is disabled and not scanned by default, its
|
||||
mirrors can still be pulled in. The final list is the ranked top *N* plus
|
||||
these mirrors (no fixed upper bound on mirror count).
|
||||
|
||||
``--timeout`` - Time (in seconds) to wait for responses from sites
|
||||
**(default: 30)**. A longer timeout will be more likely to get results
|
||||
from slow sites. On the other hand, this may cause a long delay to
|
||||
|
||||
@@ -3,10 +3,10 @@
|
||||
# -- Project information
|
||||
|
||||
project = 'Maigret'
|
||||
copyright = '2024, soxoj'
|
||||
copyright = '2025, soxoj'
|
||||
author = 'soxoj'
|
||||
|
||||
release = '0.5.0a1'
|
||||
release = '0.5.0'
|
||||
version = '0.5'
|
||||
|
||||
# -- General configuration
|
||||
|
||||
@@ -24,6 +24,8 @@ The supported methods (``checkType`` values in ``data.json``) are:
|
||||
|
||||
See the details of check mechanisms in the `checking.py <https://github.com/soxoj/maigret/blob/main/maigret/checking.py#L339>`_ file.
|
||||
|
||||
**Mirrors and ``--top-sites``:** When you limit scans with ``--top-sites N``, Maigret also includes *mirror* sites (entries whose ``source`` field points at a parent platform such as Twitter or Instagram) if that parent would appear in the Alexa top *N* when disabled sites are considered for ranking. See the **Mirrors** paragraph under ``--top-sites`` in :doc:`command-line-options`.
|
||||
|
||||
Testing
|
||||
-------
|
||||
|
||||
@@ -113,6 +115,31 @@ There are few options for sites data.json helpful in various cases:
|
||||
- ``requestHeadOnly`` - set to ``true`` if it's enough to make a HEAD request to the site
|
||||
- ``regexCheck`` - a regex to check if the username is valid, in case of frequent false-positives
|
||||
|
||||
``urlProbe`` (optional profile probe URL)
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
By default Maigret performs the HTTP request to the same URL as ``url`` (the public profile link pattern).
|
||||
|
||||
If you set ``urlProbe`` in ``data.json``, Maigret **fetches** that URL for the presence check (API, GraphQL, JSON endpoint, etc.), while **reports and ``url_user``** still use ``url`` — the human-readable profile page users should open.
|
||||
|
||||
Placeholders: ``{username}``, ``{urlMain}``, ``{urlSubpath}`` (same as for ``url``). Example: GitHub uses ``url`` ``https://github.com/{username}`` and ``urlProbe`` ``https://api.github.com/users/{username}``; Picsart uses the web profile ``https://picsart.com/u/{username}`` and probes ``https://api.picsart.com/users/show/{username}.json``.
|
||||
|
||||
Implementation: ``make_site_result`` in `checking.py <https://github.com/soxoj/maigret/blob/main/maigret/checking.py>`_.
|
||||
|
||||
Site check fixes using LLM
|
||||
--------------------------
|
||||
|
||||
.. note::
|
||||
The ``LLM/`` directory at the root of the repository contains detailed instructions for editing site checks (in Markdown format): checklist, full guide to ``checkType`` / ``data.json`` / ``urlProbe``, handling false positives, searching for public JSON APIs, and the proposal log for ``socid_extractor``.
|
||||
|
||||
Main files:
|
||||
|
||||
- `site-checks-playbook.md <https://github.com/soxoj/maigret/blob/main/LLM/site-checks-playbook.md>`_ — short checklist
|
||||
- `site-checks-guide.md <https://github.com/soxoj/maigret/blob/main/LLM/site-checks-guide.md>`_ — detailed guide
|
||||
- `socid_extractor_improvements.log <https://github.com/soxoj/maigret/blob/main/LLM/socid_extractor_improvements.log>`_ — template and entries for identity extractor improvements
|
||||
|
||||
These files should be kept up-to-date whenever changes are made to the check logic in the code or in ``data.json``.
|
||||
|
||||
.. _activation-mechanism:
|
||||
|
||||
Activation mechanism
|
||||
@@ -194,9 +221,10 @@ PyPi package.
|
||||
|
||||
2. Update Maigret version in three files manually:
|
||||
|
||||
- setup.py
|
||||
- pyproject.toml
|
||||
- maigret/__version__.py
|
||||
- docs/source/conf.py
|
||||
- docs/source/conf.py
|
||||
- snapcraft.yaml
|
||||
|
||||
3. Create a new empty text section in the beginning of the file `CHANGELOG.md` with a current date:
|
||||
|
||||
|
||||
@@ -5,6 +5,34 @@ Features
|
||||
|
||||
This is the list of Maigret features.
|
||||
|
||||
.. _web-interface:
|
||||
|
||||
Web Interface
|
||||
-------------
|
||||
|
||||
You can run Maigret with a web interface, where you can view the graph with results and download reports of all formats on a single page.
|
||||
|
||||
|
||||
.. image:: https://raw.githubusercontent.com/soxoj/maigret/main/static/web_interface_screenshot_start.png
|
||||
:alt: Web interface: how to start
|
||||
|
||||
|
||||
.. image:: https://raw.githubusercontent.com/soxoj/maigret/main/static/web_interface_screenshot.png
|
||||
:alt: Web interface: results
|
||||
|
||||
|
||||
Instructions:
|
||||
|
||||
1. Run Maigret with the ``--web`` flag and specify the port number.
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
maigret --web 5000
|
||||
|
||||
2. Open http://127.0.0.1:5000 in your browser and enter one or more usernames to make a search.
|
||||
|
||||
3. Wait a bit for the search to complete and view the graph with results, the table with all accounts found, and download reports of all formats.
|
||||
|
||||
Personal info gathering
|
||||
-----------------------
|
||||
|
||||
|
||||
|
Before Width: | Height: | Size: 375 KiB After Width: | Height: | Size: 234 KiB |
@@ -3,6 +3,16 @@
|
||||
Usage examples
|
||||
==============
|
||||
|
||||
You can use Maigret as:
|
||||
|
||||
- a command line tool: initial and a default mode
|
||||
- a `web interface <#web-interface>`_: view the graph with results and download all report formats on a single page
|
||||
- a library: integrate Maigret into your own project
|
||||
|
||||
Use Cases
|
||||
---------
|
||||
|
||||
|
||||
1. Search for accounts with username ``machine42`` on top 500 sites (by default, according to Alexa rank) from the Maigret DB.
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
"""Maigret version file"""
|
||||
|
||||
__version__ = '0.5.0a1'
|
||||
__version__ = '0.5.0'
|
||||
|
||||
@@ -26,11 +26,7 @@ except ImportError:
|
||||
from . import errors
|
||||
from .activation import ParsingActivator, import_aiohttp_cookies
|
||||
from .errors import CheckError
|
||||
from .executors import (
|
||||
AsyncExecutor,
|
||||
AsyncioSimpleExecutor,
|
||||
AsyncioProgressbarQueueExecutor,
|
||||
)
|
||||
from .executors import AsyncioQueueGeneratorExecutor
|
||||
from .result import MaigretCheckResult, MaigretCheckStatus
|
||||
from .sites import MaigretDatabase, MaigretSite
|
||||
from .types import QueryOptions, QueryResultWrapper
|
||||
@@ -311,6 +307,12 @@ def process_site_result(
|
||||
|
||||
if html_text:
|
||||
if not presense_flags:
|
||||
if check_type == "message" and logger.isEnabledFor(logging.DEBUG):
|
||||
logger.debug(
|
||||
"Site %s uses checkType message with empty presenseStrs; "
|
||||
"presence is treated as true for any page.",
|
||||
site.name,
|
||||
)
|
||||
is_presense_detected = True
|
||||
site.stats["presense_flag"] = None
|
||||
else:
|
||||
@@ -353,7 +355,7 @@ def process_site_result(
|
||||
result = build_result(MaigretCheckStatus.CLAIMED)
|
||||
else:
|
||||
result = build_result(MaigretCheckStatus.AVAILABLE)
|
||||
elif check_type in "status_code":
|
||||
elif check_type == "status_code":
|
||||
# Checks if the status code of the response is 2XX
|
||||
if 200 <= status_code < 300:
|
||||
result = build_result(MaigretCheckStatus.CLAIMED)
|
||||
@@ -545,6 +547,38 @@ async def check_site_for_username(
|
||||
return site.name, default_result
|
||||
|
||||
response = await checker.check()
|
||||
html_text = response[0] if response and response[0] else ""
|
||||
|
||||
# Retry once after token-style activation (e.g. Twitter guest token refresh).
|
||||
act = site.activation
|
||||
if act and html_text:
|
||||
marks = act.get("marks") or []
|
||||
if marks and any(m in html_text for m in marks):
|
||||
method = act["method"]
|
||||
try:
|
||||
activate_fun = getattr(ParsingActivator(), method)
|
||||
activate_fun(site, logger)
|
||||
except AttributeError as e:
|
||||
logger.warning(
|
||||
f"Activation method {method} for site {site.name} not found!",
|
||||
exc_info=True,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
f"Failed activation {method} for site {site.name}: {str(e)}",
|
||||
exc_info=True,
|
||||
)
|
||||
else:
|
||||
merged = dict(checker.headers or {})
|
||||
merged.update(site.headers)
|
||||
checker.prepare(
|
||||
url=checker.url,
|
||||
headers=merged,
|
||||
allow_redirects=checker.allow_redirects,
|
||||
timeout=checker.timeout,
|
||||
method=checker.method,
|
||||
)
|
||||
response = await checker.check()
|
||||
|
||||
response_result = process_site_result(
|
||||
response, query_notify, logger, default_result, site
|
||||
@@ -670,18 +704,13 @@ async def maigret(
|
||||
await debug_ip_request(clearweb_checker, logger)
|
||||
|
||||
# setup parallel executor
|
||||
executor: Optional[AsyncExecutor] = None
|
||||
if no_progressbar:
|
||||
# TODO: switch to AsyncioProgressbarQueueExecutor with progress object mock
|
||||
executor = AsyncioSimpleExecutor(logger=logger)
|
||||
else:
|
||||
executor = AsyncioProgressbarQueueExecutor(
|
||||
logger=logger,
|
||||
in_parallel=max_connections,
|
||||
timeout=timeout + 0.5,
|
||||
*args,
|
||||
**kwargs,
|
||||
)
|
||||
executor = AsyncioQueueGeneratorExecutor(
|
||||
logger=logger,
|
||||
in_parallel=max_connections,
|
||||
timeout=timeout + 0.5,
|
||||
*args,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
# make options objects for all the requests
|
||||
options: QueryOptions = {}
|
||||
@@ -728,13 +757,17 @@ async def maigret(
|
||||
},
|
||||
)
|
||||
|
||||
cur_results = await executor.run(tasks_dict.values())
|
||||
|
||||
# wait for executor timeout errors
|
||||
await asyncio.sleep(1)
|
||||
cur_results = []
|
||||
with alive_bar(
|
||||
len(tasks_dict), title="Searching", force_tty=True, disable=no_progressbar
|
||||
) as progress:
|
||||
async for result in executor.run(tasks_dict.values()):
|
||||
cur_results.append(result)
|
||||
progress()
|
||||
|
||||
all_results.update(cur_results)
|
||||
|
||||
# rerun for failed sites
|
||||
sites = get_failed_sites(dict(cur_results))
|
||||
attempts -= 1
|
||||
|
||||
@@ -793,9 +826,21 @@ async def site_self_check(
|
||||
i2p_proxy=None,
|
||||
skip_errors=False,
|
||||
cookies=None,
|
||||
auto_disable=False,
|
||||
diagnose=False,
|
||||
):
|
||||
"""
|
||||
Self-check a site configuration.
|
||||
|
||||
Args:
|
||||
auto_disable: If True, automatically disable sites that fail checks.
|
||||
If False (default), only report issues without disabling.
|
||||
diagnose: If True, print detailed diagnosis information.
|
||||
"""
|
||||
changes = {
|
||||
"disabled": False,
|
||||
"issues": [],
|
||||
"recommendations": [],
|
||||
}
|
||||
|
||||
check_data = [
|
||||
@@ -805,6 +850,8 @@ async def site_self_check(
|
||||
|
||||
logger.info(f"Checking {site.name}...")
|
||||
|
||||
results_cache = {}
|
||||
|
||||
for username, status in check_data:
|
||||
async with semaphore:
|
||||
results_dict = await maigret(
|
||||
@@ -826,15 +873,20 @@ async def site_self_check(
|
||||
# TODO: make normal checking
|
||||
if site.name not in results_dict:
|
||||
logger.info(results_dict)
|
||||
changes["disabled"] = True
|
||||
changes["issues"].append(f"Site {site.name} not in results (wrong id_type?)")
|
||||
if auto_disable:
|
||||
changes["disabled"] = True
|
||||
continue
|
||||
|
||||
logger.debug(results_dict)
|
||||
|
||||
result = results_dict[site.name]["status"]
|
||||
results_cache[username] = results_dict[site.name]
|
||||
|
||||
if result.error and 'Cannot connect to host' in result.error.desc:
|
||||
changes["disabled"] = True
|
||||
changes["issues"].append(f"Cannot connect to host")
|
||||
if auto_disable:
|
||||
changes["disabled"] = True
|
||||
|
||||
site_status = result.status
|
||||
|
||||
@@ -842,6 +894,8 @@ async def site_self_check(
|
||||
if site_status == MaigretCheckStatus.UNKNOWN:
|
||||
msgs = site.absence_strs
|
||||
etype = site.check_type
|
||||
error_msg = f"Error checking {username}: {result.context}"
|
||||
changes["issues"].append(error_msg)
|
||||
logger.warning(
|
||||
f"Error while searching {username} in {site.name}: {result.context}, {msgs}, type {etype}"
|
||||
)
|
||||
@@ -851,28 +905,62 @@ async def site_self_check(
|
||||
if skip_errors:
|
||||
pass
|
||||
# don't disable in case of available username
|
||||
elif status == MaigretCheckStatus.CLAIMED:
|
||||
elif status == MaigretCheckStatus.CLAIMED and auto_disable:
|
||||
changes["disabled"] = True
|
||||
elif status == MaigretCheckStatus.CLAIMED:
|
||||
changes["issues"].append(f"Claimed user '{username}' not detected as claimed")
|
||||
logger.warning(
|
||||
f"Not found `{username}` in {site.name}, must be claimed"
|
||||
)
|
||||
logger.info(results_dict[site.name])
|
||||
changes["disabled"] = True
|
||||
if auto_disable:
|
||||
changes["disabled"] = True
|
||||
else:
|
||||
changes["issues"].append(f"Unclaimed user '{username}' detected as claimed")
|
||||
logger.warning(f"Found `{username}` in {site.name}, must be available")
|
||||
logger.info(results_dict[site.name])
|
||||
changes["disabled"] = True
|
||||
if auto_disable:
|
||||
changes["disabled"] = True
|
||||
|
||||
logger.info(f"Site {site.name} checking is finished")
|
||||
|
||||
if changes["disabled"] != site.disabled:
|
||||
# Generate recommendations based on issues
|
||||
if changes["issues"] and len(results_cache) == 2:
|
||||
claimed_result = results_cache.get(site.username_claimed, {})
|
||||
unclaimed_result = results_cache.get(site.username_unclaimed, {})
|
||||
|
||||
claimed_http = claimed_result.get("http_status")
|
||||
unclaimed_http = unclaimed_result.get("http_status")
|
||||
|
||||
if claimed_http and unclaimed_http:
|
||||
if claimed_http != unclaimed_http and site.check_type != "status_code":
|
||||
changes["recommendations"].append(
|
||||
f"Consider checkType: status_code (HTTP {claimed_http} vs {unclaimed_http})"
|
||||
)
|
||||
|
||||
# Print diagnosis if requested
|
||||
if diagnose and changes["issues"]:
|
||||
print(f"\n--- {site.name} DIAGNOSIS ---")
|
||||
print(f" Check type: {site.check_type}")
|
||||
print(f" Issues:")
|
||||
for issue in changes["issues"]:
|
||||
print(f" - {issue}")
|
||||
if changes["recommendations"]:
|
||||
print(f" Recommendations:")
|
||||
for rec in changes["recommendations"]:
|
||||
print(f" -> {rec}")
|
||||
|
||||
# Only modify site if auto_disable is enabled
|
||||
if auto_disable and changes["disabled"] != site.disabled:
|
||||
site.disabled = changes["disabled"]
|
||||
logger.info(f"Switching property 'disabled' for {site.name} to {site.disabled}")
|
||||
db.update_site(site)
|
||||
if not silent:
|
||||
action = "Disabled" if site.disabled else "Enabled"
|
||||
print(f"{action} site {site.name}...")
|
||||
elif changes["issues"] and not silent and not diagnose:
|
||||
# Report issues without disabling
|
||||
print(f"Issues found in {site.name}: {len(changes['issues'])} (not auto-disabled)")
|
||||
|
||||
# remove service tag "unchecked"
|
||||
if "unchecked" in site.tags:
|
||||
@@ -891,10 +979,24 @@ async def self_check(
|
||||
proxy=None,
|
||||
tor_proxy=None,
|
||||
i2p_proxy=None,
|
||||
) -> bool:
|
||||
auto_disable=False,
|
||||
diagnose=False,
|
||||
) -> dict:
|
||||
"""
|
||||
Run self-check on sites.
|
||||
|
||||
Args:
|
||||
auto_disable: If True, automatically disable sites that fail checks.
|
||||
If False (default), only report issues without disabling.
|
||||
diagnose: If True, print detailed diagnosis for each failing site.
|
||||
|
||||
Returns:
|
||||
dict with 'needs_update' bool and 'results' list of check results
|
||||
"""
|
||||
sem = asyncio.Semaphore(max_connections)
|
||||
tasks = []
|
||||
all_sites = site_data
|
||||
all_results = []
|
||||
|
||||
def disabled_count(lst):
|
||||
return len(list(filter(lambda x: x.disabled, lst)))
|
||||
@@ -906,15 +1008,18 @@ async def self_check(
|
||||
|
||||
for _, site in all_sites.items():
|
||||
check_coro = site_self_check(
|
||||
site, logger, sem, db, silent, proxy, tor_proxy, i2p_proxy, skip_errors=True
|
||||
site, logger, sem, db, silent, proxy, tor_proxy, i2p_proxy,
|
||||
skip_errors=True, auto_disable=auto_disable, diagnose=diagnose
|
||||
)
|
||||
future = asyncio.ensure_future(check_coro)
|
||||
tasks.append(future)
|
||||
tasks.append((site.name, future))
|
||||
|
||||
if tasks:
|
||||
with alive_bar(len(tasks), title='Self-checking', force_tty=True) as progress:
|
||||
for f in asyncio.as_completed(tasks):
|
||||
await f
|
||||
for site_name, f in tasks:
|
||||
result = await f
|
||||
result['site_name'] = site_name
|
||||
all_results.append(result)
|
||||
progress() # Update the progress bar
|
||||
|
||||
unchecked_new_count = len(
|
||||
@@ -923,7 +1028,10 @@ async def self_check(
|
||||
disabled_new_count = disabled_count(all_sites.values())
|
||||
total_disabled = disabled_new_count - disabled_old_count
|
||||
|
||||
if total_disabled:
|
||||
# Count issues
|
||||
total_issues = sum(1 for r in all_results if r.get('issues'))
|
||||
|
||||
if auto_disable and total_disabled:
|
||||
if total_disabled >= 0:
|
||||
message = "Disabled"
|
||||
else:
|
||||
@@ -935,11 +1043,25 @@ async def self_check(
|
||||
f"{message} {total_disabled} ({disabled_old_count} => {disabled_new_count}) checked sites. "
|
||||
"Run with `--info` flag to get more information"
|
||||
)
|
||||
elif total_issues and not silent:
|
||||
print(f"\nFound issues in {total_issues} sites (auto-disable is OFF)")
|
||||
print("Use --auto-disable to automatically disable failing sites")
|
||||
print("Use --diagnose to see detailed diagnosis for each site")
|
||||
|
||||
if unchecked_new_count != unchecked_old_count:
|
||||
print(f"Unchecked sites verified: {unchecked_old_count - unchecked_new_count}")
|
||||
|
||||
return total_disabled != 0 or unchecked_new_count != unchecked_old_count
|
||||
needs_update = total_disabled != 0 or unchecked_new_count != unchecked_old_count
|
||||
|
||||
# For backwards compatibility, return bool if auto_disable is True
|
||||
if auto_disable:
|
||||
return needs_update
|
||||
|
||||
return {
|
||||
'needs_update': needs_update,
|
||||
'results': all_results,
|
||||
'total_issues': total_issues,
|
||||
}
|
||||
|
||||
|
||||
def extract_ids_data(html_text, logger, site) -> Dict:
|
||||
|
||||
@@ -32,6 +32,9 @@ COMMON_ERRORS = {
|
||||
'<title>Attention Required! | Cloudflare</title>': CheckError(
|
||||
'Captcha', 'Cloudflare'
|
||||
),
|
||||
'<title>Just a moment</title>': CheckError(
|
||||
'Bot protection', 'Cloudflare challenge page'
|
||||
),
|
||||
'Please stand by, while we are checking your browser': CheckError(
|
||||
'Bot protection', 'Cloudflare'
|
||||
),
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import asyncio
|
||||
import sys
|
||||
import time
|
||||
from typing import Any, Iterable, List
|
||||
from typing import Any, Iterable, List, Callable
|
||||
|
||||
import alive_progress
|
||||
from alive_progress import alive_bar
|
||||
@@ -19,6 +19,7 @@ def create_task_func():
|
||||
|
||||
|
||||
class AsyncExecutor:
|
||||
# Deprecated: will be removed soon, don't use it
|
||||
def __init__(self, *args, **kwargs):
|
||||
self.logger = kwargs['logger']
|
||||
|
||||
@@ -34,6 +35,7 @@ class AsyncExecutor:
|
||||
|
||||
|
||||
class AsyncioSimpleExecutor(AsyncExecutor):
|
||||
# Deprecated: will be removed soon, don't use it
|
||||
def __init__(self, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
self.semaphore = asyncio.Semaphore(kwargs.get('in_parallel', 100))
|
||||
@@ -48,6 +50,7 @@ class AsyncioSimpleExecutor(AsyncExecutor):
|
||||
|
||||
|
||||
class AsyncioProgressbarExecutor(AsyncExecutor):
|
||||
# Deprecated: will be removed soon, don't use it
|
||||
def __init__(self, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
@@ -71,6 +74,7 @@ class AsyncioProgressbarExecutor(AsyncExecutor):
|
||||
|
||||
|
||||
class AsyncioProgressbarSemaphoreExecutor(AsyncExecutor):
|
||||
# Deprecated: will be removed soon, don't use it
|
||||
def __init__(self, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
self.semaphore = asyncio.Semaphore(kwargs.get('in_parallel', 1))
|
||||
@@ -174,3 +178,67 @@ class AsyncioProgressbarQueueExecutor(AsyncExecutor):
|
||||
w.cancel()
|
||||
|
||||
return self.results
|
||||
|
||||
|
||||
class AsyncioQueueGeneratorExecutor:
|
||||
# Deprecated: will be removed soon, don't use it
|
||||
def __init__(self, *args, **kwargs):
|
||||
self.workers_count = kwargs.get('in_parallel', 10)
|
||||
self.queue = asyncio.Queue()
|
||||
self.timeout = kwargs.get('timeout')
|
||||
self.logger = kwargs['logger']
|
||||
self._results = asyncio.Queue()
|
||||
self._stop_signal = object()
|
||||
|
||||
async def worker(self):
|
||||
"""Process tasks from the queue and put results into the results queue."""
|
||||
while True:
|
||||
task = await self.queue.get()
|
||||
if task is self._stop_signal:
|
||||
self.queue.task_done()
|
||||
break
|
||||
|
||||
try:
|
||||
f, args, kwargs = task
|
||||
query_future = f(*args, **kwargs)
|
||||
query_task = create_task_func()(query_future)
|
||||
|
||||
try:
|
||||
result = await asyncio.wait_for(query_task, timeout=self.timeout)
|
||||
except asyncio.TimeoutError:
|
||||
result = kwargs.get('default')
|
||||
await self._results.put(result)
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error in worker: {e}")
|
||||
finally:
|
||||
self.queue.task_done()
|
||||
|
||||
async def run(self, queries: Iterable[Callable[..., Any]]):
|
||||
"""Run workers to process queries in parallel."""
|
||||
start_time = time.time()
|
||||
|
||||
# Add tasks to the queue
|
||||
for t in queries:
|
||||
await self.queue.put(t)
|
||||
|
||||
# Create workers
|
||||
workers = [
|
||||
asyncio.create_task(self.worker()) for _ in range(self.workers_count)
|
||||
]
|
||||
|
||||
# Add stop signals
|
||||
for _ in range(self.workers_count):
|
||||
await self.queue.put(self._stop_signal)
|
||||
|
||||
try:
|
||||
while any(w.done() is False for w in workers) or not self._results.empty():
|
||||
try:
|
||||
result = await asyncio.wait_for(self._results.get(), timeout=1)
|
||||
yield result
|
||||
except asyncio.TimeoutError:
|
||||
pass
|
||||
finally:
|
||||
# Ensure all workers are awaited
|
||||
await asyncio.gather(*workers)
|
||||
self.execution_time = time.time() - start_time
|
||||
self.logger.debug(f"Spent time: {self.execution_time}")
|
||||
|
||||
@@ -316,7 +316,19 @@ def setup_arguments_parser(settings: Settings):
|
||||
"--self-check",
|
||||
action="store_true",
|
||||
default=settings.self_check_enabled,
|
||||
help="Do self check for sites and database and disable non-working ones.",
|
||||
help="Do self check for sites and database. Use --auto-disable to disable failing sites.",
|
||||
)
|
||||
modes_group.add_argument(
|
||||
"--auto-disable",
|
||||
action="store_true",
|
||||
default=False,
|
||||
help="With --self-check: automatically disable sites that fail checks.",
|
||||
)
|
||||
modes_group.add_argument(
|
||||
"--diagnose",
|
||||
action="store_true",
|
||||
default=False,
|
||||
help="With --self-check: print detailed diagnosis for each failing site.",
|
||||
)
|
||||
modes_group.add_argument(
|
||||
"--stats",
|
||||
@@ -324,7 +336,15 @@ def setup_arguments_parser(settings: Settings):
|
||||
default=False,
|
||||
help="Show database statistics (most frequent sites engines and tags).",
|
||||
)
|
||||
|
||||
modes_group.add_argument(
|
||||
"--web",
|
||||
metavar='PORT',
|
||||
type=int,
|
||||
nargs='?', # Optional PORT value
|
||||
const=5000, # Default PORT if `--web` is provided without a value
|
||||
default=None, # Explicitly set default to None
|
||||
help="Launch the web interface on the specified port (default: 5000 if no PORT is provided).",
|
||||
)
|
||||
output_group = parser.add_argument_group(
|
||||
'Output options', 'Options to change verbosity and view of the console output'
|
||||
)
|
||||
@@ -512,7 +532,9 @@ async def main():
|
||||
if args.tags:
|
||||
args.tags = list(set(str(args.tags).split(',')))
|
||||
|
||||
db_file = path.join(path.dirname(path.realpath(__file__)), args.db_file)
|
||||
db_file = args.db_file \
|
||||
if (args.db_file.startswith("http://") or args.db_file.startswith("https://")) \
|
||||
else path.join(path.dirname(path.realpath(__file__)), args.db_file)
|
||||
|
||||
if args.top_sites == 0 or args.all_sites:
|
||||
args.top_sites = sys.maxsize
|
||||
@@ -556,7 +578,7 @@ async def main():
|
||||
query_notify.success(
|
||||
f'Maigret sites database self-check started for {len(site_data)} sites...'
|
||||
)
|
||||
is_need_update = await self_check(
|
||||
check_result = await self_check(
|
||||
db,
|
||||
site_data,
|
||||
logger,
|
||||
@@ -564,7 +586,16 @@ async def main():
|
||||
max_connections=args.connections,
|
||||
tor_proxy=args.tor_proxy,
|
||||
i2p_proxy=args.i2p_proxy,
|
||||
auto_disable=args.auto_disable,
|
||||
diagnose=args.diagnose,
|
||||
)
|
||||
|
||||
# Handle both old (bool) and new (dict) return types
|
||||
if isinstance(check_result, dict):
|
||||
is_need_update = check_result.get('needs_update', False)
|
||||
else:
|
||||
is_need_update = check_result
|
||||
|
||||
if is_need_update:
|
||||
if input('Do you want to save changes permanently? [Yn]\n').lower() in (
|
||||
'y',
|
||||
@@ -592,6 +623,21 @@ async def main():
|
||||
# Define one report filename template
|
||||
report_filepath_tpl = path.join(report_dir, 'report_{username}{postfix}')
|
||||
|
||||
# Web interface
|
||||
if args.web is not None:
|
||||
from maigret.web.app import app
|
||||
|
||||
app.config["MAIGRET_DB_FILE"] = db_file
|
||||
|
||||
port = (
|
||||
args.web if args.web else 5000
|
||||
) # args.web is either the specified port or 5000 by default
|
||||
|
||||
# Host configuration: secure by default, but allow override via environment
|
||||
host = os.getenv('FLASK_HOST', '127.0.0.1')
|
||||
app.run(host=host, port=port)
|
||||
return
|
||||
|
||||
if usernames == {}:
|
||||
# magic params to exit after init
|
||||
query_notify.warning('No usernames to check, exiting.')
|
||||
|
||||
@@ -98,21 +98,20 @@ class MaigretGraph:
|
||||
def __init__(self, graph):
|
||||
self.G = graph
|
||||
|
||||
def add_node(self, key, value):
|
||||
def add_node(self, key, value, color=None):
|
||||
node_name = f'{key}: {value}'
|
||||
|
||||
params = self.other_params
|
||||
params = dict(self.other_params)
|
||||
if key in SUPPORTED_IDS:
|
||||
params = self.username_params
|
||||
params = dict(self.username_params)
|
||||
elif value.startswith('http'):
|
||||
params = self.site_params
|
||||
params = dict(self.site_params)
|
||||
|
||||
self.G.add_node(node_name, title=node_name, **params)
|
||||
|
||||
if value != value.lower():
|
||||
normalized_node_name = self.add_node(key, value.lower())
|
||||
self.link(node_name, normalized_node_name)
|
||||
params['title'] = node_name
|
||||
if color:
|
||||
params['color'] = color
|
||||
|
||||
self.G.add_node(node_name, **params)
|
||||
return node_name
|
||||
|
||||
def link(self, node1_name, node2_name):
|
||||
@@ -120,94 +119,126 @@ class MaigretGraph:
|
||||
|
||||
|
||||
def save_graph_report(filename: str, username_results: list, db: MaigretDatabase):
|
||||
# moved here to speed up the launch of Maigret
|
||||
import networkx as nx
|
||||
|
||||
G = nx.Graph()
|
||||
graph = MaigretGraph(G)
|
||||
|
||||
base_site_nodes = {}
|
||||
site_account_nodes = {}
|
||||
processed_values = {} # Track processed values to avoid duplicates
|
||||
|
||||
for username, id_type, results in username_results:
|
||||
username_node_name = graph.add_node(id_type, username)
|
||||
# Add username node, using normalized version directly if different
|
||||
norm_username = username.lower()
|
||||
username_node_name = graph.add_node(id_type, norm_username)
|
||||
|
||||
for website_name in results:
|
||||
dictionary = results[website_name]
|
||||
# TODO: fix no site data issue
|
||||
if not dictionary:
|
||||
continue
|
||||
|
||||
if dictionary.get("is_similar"):
|
||||
for website_name, dictionary in results.items():
|
||||
if not dictionary or dictionary.get("is_similar"):
|
||||
continue
|
||||
|
||||
status = dictionary.get("status")
|
||||
if not status: # FIXME: currently in case of timeout
|
||||
if not status or status.status != MaigretCheckStatus.CLAIMED:
|
||||
continue
|
||||
|
||||
if dictionary["status"].status != MaigretCheckStatus.CLAIMED:
|
||||
continue
|
||||
# base site node
|
||||
site_base_url = website_name
|
||||
if site_base_url not in base_site_nodes:
|
||||
base_site_nodes[site_base_url] = graph.add_node(
|
||||
'site', site_base_url, color='#28a745'
|
||||
) # Green color
|
||||
|
||||
site_fallback_name = dictionary.get(
|
||||
'url_user', f'{website_name}: {username.lower()}'
|
||||
)
|
||||
# site_node_name = dictionary.get('url_user', f'{website_name}: {username.lower()}')
|
||||
site_node_name = graph.add_node('site', site_fallback_name)
|
||||
graph.link(username_node_name, site_node_name)
|
||||
site_base_node_name = base_site_nodes[site_base_url]
|
||||
|
||||
# account node
|
||||
account_url = dictionary.get('url_user', f'{site_base_url}/{norm_username}')
|
||||
account_node_id = f"{site_base_url}: {account_url}"
|
||||
if account_node_id not in site_account_nodes:
|
||||
site_account_nodes[account_node_id] = graph.add_node(
|
||||
'account', account_url
|
||||
)
|
||||
|
||||
account_node_name = site_account_nodes[account_node_id]
|
||||
|
||||
# link username → account → site
|
||||
graph.link(username_node_name, account_node_name)
|
||||
graph.link(account_node_name, site_base_node_name)
|
||||
|
||||
def process_ids(parent_node, ids):
|
||||
for k, v in ids.items():
|
||||
if k.endswith('_count') or k.startswith('is_') or k.endswith('_at'):
|
||||
continue
|
||||
if k in 'image':
|
||||
if (
|
||||
k.endswith('_count')
|
||||
or k.startswith('is_')
|
||||
or k.endswith('_at')
|
||||
or k in 'image'
|
||||
):
|
||||
continue
|
||||
|
||||
v_data = v
|
||||
if v.startswith('['):
|
||||
try:
|
||||
v_data = ast.literal_eval(v)
|
||||
except Exception as e:
|
||||
logging.error(e)
|
||||
# Normalize value if string
|
||||
norm_v = v.lower() if isinstance(v, str) else v
|
||||
value_key = f"{k}:{norm_v}"
|
||||
|
||||
# value is a list
|
||||
if isinstance(v_data, list):
|
||||
list_node_name = graph.add_node(k, site_fallback_name)
|
||||
for vv in v_data:
|
||||
data_node_name = graph.add_node(vv, site_fallback_name)
|
||||
graph.link(list_node_name, data_node_name)
|
||||
if value_key in processed_values:
|
||||
ids_data_name = processed_values[value_key]
|
||||
else:
|
||||
v_data = v
|
||||
if isinstance(v, str) and v.startswith('['):
|
||||
try:
|
||||
v_data = ast.literal_eval(v)
|
||||
except Exception as e:
|
||||
logging.error(e)
|
||||
continue
|
||||
|
||||
if isinstance(v_data, list):
|
||||
list_node_name = graph.add_node(k, site_base_url)
|
||||
processed_values[value_key] = list_node_name
|
||||
for vv in v_data:
|
||||
data_node_name = graph.add_node(vv, site_base_url)
|
||||
graph.link(list_node_name, data_node_name)
|
||||
|
||||
add_ids = {
|
||||
a: b for b, a in db.extract_ids_from_url(vv).items()
|
||||
}
|
||||
if add_ids:
|
||||
process_ids(data_node_name, add_ids)
|
||||
ids_data_name = list_node_name
|
||||
else:
|
||||
ids_data_name = graph.add_node(k, norm_v)
|
||||
processed_values[value_key] = ids_data_name
|
||||
|
||||
if 'username' in k or k in SUPPORTED_IDS:
|
||||
new_username_key = f"username:{norm_v}"
|
||||
if new_username_key not in processed_values:
|
||||
new_username_node_name = graph.add_node(
|
||||
'username', norm_v
|
||||
)
|
||||
processed_values[new_username_key] = (
|
||||
new_username_node_name
|
||||
)
|
||||
graph.link(ids_data_name, new_username_node_name)
|
||||
|
||||
add_ids = {
|
||||
a: b for b, a in db.extract_ids_from_url(vv).items()
|
||||
k: v for v, k in db.extract_ids_from_url(v).items()
|
||||
}
|
||||
if add_ids:
|
||||
process_ids(data_node_name, add_ids)
|
||||
else:
|
||||
# value is just a string
|
||||
# ids_data_name = f'{k}: {v}'
|
||||
# if ids_data_name == parent_node:
|
||||
# continue
|
||||
process_ids(ids_data_name, add_ids)
|
||||
|
||||
ids_data_name = graph.add_node(k, v)
|
||||
# G.add_node(ids_data_name, size=10, title=ids_data_name, group=3)
|
||||
graph.link(parent_node, ids_data_name)
|
||||
|
||||
# check for username
|
||||
if 'username' in k or k in SUPPORTED_IDS:
|
||||
new_username_node_name = graph.add_node('username', v)
|
||||
graph.link(ids_data_name, new_username_node_name)
|
||||
|
||||
add_ids = {k: v for v, k in db.extract_ids_from_url(v).items()}
|
||||
if add_ids:
|
||||
process_ids(ids_data_name, add_ids)
|
||||
graph.link(parent_node, ids_data_name)
|
||||
|
||||
if status.ids_data:
|
||||
process_ids(site_node_name, status.ids_data)
|
||||
process_ids(account_node_name, status.ids_data)
|
||||
|
||||
nodes_to_remove = []
|
||||
for node in G.nodes:
|
||||
if len(str(node)) > 100:
|
||||
nodes_to_remove.append(node)
|
||||
# Remove overly long nodes
|
||||
nodes_to_remove = [node for node in G.nodes if len(str(node)) > 100]
|
||||
G.remove_nodes_from(nodes_to_remove)
|
||||
|
||||
[G.remove_node(node) for node in nodes_to_remove]
|
||||
# Remove site nodes with only one connection
|
||||
single_degree_sites = [
|
||||
n for n, deg in G.degree() if n.startswith("site:") and deg <= 1
|
||||
]
|
||||
G.remove_nodes_from(single_degree_sites)
|
||||
|
||||
# moved here to speed up the launch of Maigret
|
||||
# Generate interactive visualization
|
||||
from pyvis.network import Network
|
||||
|
||||
nt = Network(notebook=True, height="750px", width="100%")
|
||||
|
||||
@@ -53,5 +53,6 @@
|
||||
"xmind_report": false,
|
||||
"graph_report": false,
|
||||
"pdf_report": false,
|
||||
"html_report": false
|
||||
"html_report": false,
|
||||
"web_interface_port": 5000
|
||||
}
|
||||
@@ -5,7 +5,7 @@ from typing import List
|
||||
|
||||
SETTINGS_FILES_PATHS = [
|
||||
path.join(path.dirname(path.realpath(__file__)), "resources/settings.json"),
|
||||
'~/.maigret/settings.json',
|
||||
path.expanduser('~/.maigret/settings.json'),
|
||||
path.join(os.getcwd(), 'settings.json'),
|
||||
]
|
||||
|
||||
@@ -42,6 +42,7 @@ class Settings:
|
||||
pdf_report: bool
|
||||
html_report: bool
|
||||
graph_report: bool
|
||||
web_interface_port: int
|
||||
|
||||
# submit mode settings
|
||||
presence_strings: list
|
||||
|
||||
@@ -325,6 +325,14 @@ class MaigretDatabase:
|
||||
"""
|
||||
Ranking and filtering of the sites list
|
||||
|
||||
When ``top`` is limited (not "all sites"), **mirrors** may be appended after
|
||||
the Alexa-ranked slice. A mirror is any filtered site with a non-empty
|
||||
``source`` field equal to the name of a site that appears in the first
|
||||
``top`` positions of a **parent ranking** that includes disabled sites.
|
||||
Thus mirrors such as third-party viewers (e.g. for Twitter or Instagram)
|
||||
are still scanned when their parent platform ranks highly, even if the
|
||||
official site is disabled and omitted from the main list.
|
||||
|
||||
Args:
|
||||
reverse (bool, optional): Reverse the sorting order. Defaults to False.
|
||||
top (int, optional): Maximum number of sites to return. Defaults to sys.maxsize.
|
||||
@@ -334,7 +342,8 @@ class MaigretDatabase:
|
||||
id_type (str, optional): Type of identifier to filter by. Defaults to "username".
|
||||
|
||||
Returns:
|
||||
dict: Dictionary of filtered and ranked sites, with site names as keys and MaigretSite objects as values
|
||||
dict: Dictionary of filtered and ranked sites (base top slice plus mirrors),
|
||||
with site names as keys and MaigretSite objects as values
|
||||
"""
|
||||
normalized_names = list(map(str.lower, names))
|
||||
normalized_tags = list(map(str.lower, tags))
|
||||
@@ -371,6 +380,32 @@ class MaigretDatabase:
|
||||
sorted_list = sorted(
|
||||
filtered_list, key=lambda x: x.alexa_rank, reverse=reverse
|
||||
)[:top]
|
||||
|
||||
# Mirrors: sites whose `source` matches a parent platform that ranks in the
|
||||
# top `top` by Alexa when disabled entries are included in the ranking pool
|
||||
# (so e.g. Instagram can be a parent for Picuki even if Instagram is disabled).
|
||||
if top < sys.maxsize and sorted_list:
|
||||
filter_fun_ranking_parents = (
|
||||
lambda x: filter_tags_engines_fun(x)
|
||||
and filter_names_fun(x)
|
||||
and is_id_type_ok(x)
|
||||
)
|
||||
ranking_pool = [s for s in self.sites if filter_fun_ranking_parents(s)]
|
||||
sorted_parents = sorted(
|
||||
ranking_pool, key=lambda x: x.alexa_rank, reverse=reverse
|
||||
)[:top]
|
||||
parent_names_lower = {s.name.lower() for s in sorted_parents}
|
||||
base_names = {s.name for s in sorted_list}
|
||||
|
||||
def is_mirror(s) -> bool:
|
||||
if not s.source or s.name in base_names:
|
||||
return False
|
||||
return s.source.lower() in parent_names_lower
|
||||
|
||||
mirrors = [s for s in filtered_list if is_mirror(s)]
|
||||
mirrors.sort(key=lambda x: (x.alexa_rank, x.name))
|
||||
sorted_list = list(sorted_list) + mirrors
|
||||
|
||||
return {site.name: site for site in sorted_list}
|
||||
|
||||
@property
|
||||
|
||||
@@ -188,6 +188,7 @@ class Submitter:
|
||||
)
|
||||
return entered_username if entered_username else supposed_username
|
||||
|
||||
# TODO: replace with checking.py/SimpleAiohttpChecker call
|
||||
@staticmethod
|
||||
async def get_html_response_to_compare(
|
||||
url: str, session: ClientSession = None, redirects=False, headers: Dict = None
|
||||
|
||||
@@ -0,0 +1,348 @@
|
||||
from flask import (
|
||||
Flask,
|
||||
render_template,
|
||||
request,
|
||||
send_file,
|
||||
Response,
|
||||
flash,
|
||||
redirect,
|
||||
url_for,
|
||||
)
|
||||
import logging
|
||||
import os
|
||||
import asyncio
|
||||
from datetime import datetime
|
||||
from threading import Thread
|
||||
import maigret
|
||||
import maigret.settings
|
||||
from maigret.sites import MaigretDatabase
|
||||
from maigret.report import generate_report_context
|
||||
|
||||
app = Flask(__name__)
|
||||
# Use environment variable for secret key, generate random one if not set
|
||||
app.secret_key = os.getenv('FLASK_SECRET_KEY', os.urandom(24).hex())
|
||||
|
||||
# add background job tracking
|
||||
background_jobs = {}
|
||||
job_results = {}
|
||||
|
||||
# Configuration
|
||||
app.config["MAIGRET_DB_FILE"] = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'resources', 'data.json')
|
||||
app.config["COOKIES_FILE"] = "cookies.txt"
|
||||
app.config["UPLOAD_FOLDER"] = 'uploads'
|
||||
app.config["REPORTS_FOLDER"] = os.path.abspath('/tmp/maigret_reports')
|
||||
|
||||
|
||||
def setup_logger(log_level, name):
|
||||
logger = logging.getLogger(name)
|
||||
logger.setLevel(log_level)
|
||||
return logger
|
||||
|
||||
|
||||
async def maigret_search(username, options):
|
||||
logger = setup_logger(logging.WARNING, 'maigret')
|
||||
try:
|
||||
db = MaigretDatabase().load_from_path(app.config["MAIGRET_DB_FILE"])
|
||||
|
||||
top_sites = int(options.get('top_sites') or 500)
|
||||
if options.get('all_sites'):
|
||||
top_sites = 999999999 # effectively all
|
||||
|
||||
tags = options.get('tags', [])
|
||||
site_list = options.get('site_list', [])
|
||||
logger.info(f"Filtering sites by tags: {tags}")
|
||||
|
||||
sites = db.ranked_sites_dict(
|
||||
top=top_sites,
|
||||
tags=tags,
|
||||
names=site_list,
|
||||
disabled=False,
|
||||
id_type='username',
|
||||
)
|
||||
|
||||
logger.info(f"Found {len(sites)} sites matching the tag criteria")
|
||||
|
||||
results = await maigret.search(
|
||||
username=username,
|
||||
site_dict=sites,
|
||||
timeout=int(options.get('timeout', 30)),
|
||||
logger=logger,
|
||||
id_type='username',
|
||||
cookies=app.config["COOKIES_FILE"] if options.get('use_cookies') else None,
|
||||
is_parsing_enabled=(not options.get('disable_extracting', False)),
|
||||
recursive_search_enabled=(
|
||||
not options.get('disable_recursive_search', False)
|
||||
),
|
||||
check_domains=options.get('with_domains', False),
|
||||
proxy=options.get('proxy', None),
|
||||
tor_proxy=options.get('tor_proxy', None),
|
||||
i2p_proxy=options.get('i2p_proxy', None),
|
||||
)
|
||||
return results
|
||||
except Exception as e:
|
||||
logger.error(f"Error during search: {str(e)}")
|
||||
raise
|
||||
|
||||
|
||||
async def search_multiple_usernames(usernames, options):
|
||||
results = []
|
||||
for username in usernames:
|
||||
try:
|
||||
search_results = await maigret_search(username.strip(), options)
|
||||
results.append((username.strip(), 'username', search_results))
|
||||
except Exception as e:
|
||||
logging.error(f"Error searching username {username}: {str(e)}")
|
||||
return results
|
||||
|
||||
|
||||
def process_search_task(usernames, options, timestamp):
|
||||
try:
|
||||
loop = asyncio.new_event_loop()
|
||||
asyncio.set_event_loop(loop)
|
||||
|
||||
general_results = loop.run_until_complete(
|
||||
search_multiple_usernames(usernames, options)
|
||||
)
|
||||
|
||||
os.makedirs(app.config["REPORTS_FOLDER"], exist_ok=True)
|
||||
session_folder = os.path.join(
|
||||
app.config["REPORTS_FOLDER"], f"search_{timestamp}"
|
||||
)
|
||||
os.makedirs(session_folder, exist_ok=True)
|
||||
|
||||
graph_path = os.path.join(session_folder, "combined_graph.html")
|
||||
maigret.report.save_graph_report(
|
||||
graph_path,
|
||||
general_results,
|
||||
MaigretDatabase().load_from_path(app.config["MAIGRET_DB_FILE"]),
|
||||
)
|
||||
|
||||
individual_reports = []
|
||||
for username, id_type, results in general_results:
|
||||
report_base = os.path.join(session_folder, f"report_{username}")
|
||||
|
||||
csv_path = f"{report_base}.csv"
|
||||
json_path = f"{report_base}.json"
|
||||
pdf_path = f"{report_base}.pdf"
|
||||
html_path = f"{report_base}.html"
|
||||
|
||||
context = generate_report_context(general_results)
|
||||
|
||||
maigret.report.save_csv_report(csv_path, username, results)
|
||||
maigret.report.save_json_report(
|
||||
json_path, username, results, report_type='ndjson'
|
||||
)
|
||||
maigret.report.save_pdf_report(pdf_path, context)
|
||||
maigret.report.save_html_report(html_path, context)
|
||||
|
||||
claimed_profiles = []
|
||||
for site_name, site_data in results.items():
|
||||
if (
|
||||
site_data.get('status')
|
||||
and site_data['status'].status
|
||||
== maigret.result.MaigretCheckStatus.CLAIMED
|
||||
):
|
||||
claimed_profiles.append(
|
||||
{
|
||||
'site_name': site_name,
|
||||
'url': site_data.get('url_user', ''),
|
||||
'tags': (
|
||||
site_data.get('status').tags
|
||||
if site_data.get('status')
|
||||
else []
|
||||
),
|
||||
}
|
||||
)
|
||||
|
||||
individual_reports.append(
|
||||
{
|
||||
'username': username,
|
||||
'csv_file': os.path.join(
|
||||
f"search_{timestamp}", f"report_{username}.csv"
|
||||
),
|
||||
'json_file': os.path.join(
|
||||
f"search_{timestamp}", f"report_{username}.json"
|
||||
),
|
||||
'pdf_file': os.path.join(
|
||||
f"search_{timestamp}", f"report_{username}.pdf"
|
||||
),
|
||||
'html_file': os.path.join(
|
||||
f"search_{timestamp}", f"report_{username}.html"
|
||||
),
|
||||
'claimed_profiles': claimed_profiles,
|
||||
}
|
||||
)
|
||||
|
||||
# save results and mark job as complete using timestamp as key
|
||||
job_results[timestamp] = {
|
||||
'status': 'completed',
|
||||
'session_folder': f"search_{timestamp}",
|
||||
'graph_file': os.path.join(f"search_{timestamp}", "combined_graph.html"),
|
||||
'usernames': usernames,
|
||||
'individual_reports': individual_reports,
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logging.error(f"Error in search task for timestamp {timestamp}: {str(e)}")
|
||||
job_results[timestamp] = {'status': 'failed', 'error': str(e)}
|
||||
finally:
|
||||
background_jobs[timestamp]['completed'] = True
|
||||
|
||||
|
||||
@app.route('/')
|
||||
def index():
|
||||
# load site data for autocomplete
|
||||
db = MaigretDatabase().load_from_path(app.config["MAIGRET_DB_FILE"])
|
||||
site_options = []
|
||||
|
||||
for site in db.sites:
|
||||
# add main site name
|
||||
site_options.append(site.name)
|
||||
# add URL if different from name
|
||||
if site.url_main and site.url_main not in site_options:
|
||||
site_options.append(site.url_main)
|
||||
|
||||
# sort and deduplicate
|
||||
site_options = sorted(set(site_options))
|
||||
|
||||
return render_template('index.html', site_options=site_options)
|
||||
|
||||
|
||||
# Modified search route
|
||||
@app.route('/search', methods=['POST'])
|
||||
def search():
|
||||
usernames_input = request.form.get('usernames', '').strip()
|
||||
if not usernames_input:
|
||||
flash('At least one username is required', 'danger')
|
||||
return redirect(url_for('index'))
|
||||
|
||||
usernames = [
|
||||
u.strip() for u in usernames_input.replace(',', ' ').split() if u.strip()
|
||||
]
|
||||
|
||||
# Create timestamp for this search session
|
||||
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
|
||||
# Get selected tags - ensure it's a list
|
||||
selected_tags = request.form.getlist('tags')
|
||||
logging.info(f"Selected tags: {selected_tags}")
|
||||
|
||||
options = {
|
||||
'top_sites': request.form.get('top_sites') or '500',
|
||||
'timeout': request.form.get('timeout') or '30',
|
||||
'use_cookies': 'use_cookies' in request.form,
|
||||
'all_sites': 'all_sites' in request.form,
|
||||
'disable_recursive_search': 'disable_recursive_search' in request.form,
|
||||
'disable_extracting': 'disable_extracting' in request.form,
|
||||
'with_domains': 'with_domains' in request.form,
|
||||
'proxy': request.form.get('proxy', None) or None,
|
||||
'tor_proxy': request.form.get('tor_proxy', None) or None,
|
||||
'i2p_proxy': request.form.get('i2p_proxy', None) or None,
|
||||
'permute': 'permute' in request.form,
|
||||
'tags': selected_tags, # Pass selected tags as a list
|
||||
'site_list': [
|
||||
s.strip() for s in request.form.get('site', '').split(',') if s.strip()
|
||||
],
|
||||
}
|
||||
|
||||
logging.info(
|
||||
f"Starting search for usernames: {usernames} with tags: {selected_tags}"
|
||||
)
|
||||
|
||||
# Start background job
|
||||
background_jobs[timestamp] = {
|
||||
'completed': False,
|
||||
'thread': Thread(
|
||||
target=process_search_task, args=(usernames, options, timestamp)
|
||||
),
|
||||
}
|
||||
background_jobs[timestamp]['thread'].start()
|
||||
|
||||
return redirect(url_for('status', timestamp=timestamp))
|
||||
|
||||
|
||||
@app.route('/status/<timestamp>')
|
||||
def status(timestamp):
|
||||
logging.info(f"Status check for timestamp: {timestamp}")
|
||||
|
||||
# Validate timestamp
|
||||
if timestamp not in background_jobs:
|
||||
flash('Invalid search session.', 'danger')
|
||||
logging.error(f"Invalid search session: {timestamp}")
|
||||
return redirect(url_for('index'))
|
||||
|
||||
# Check if job is completed
|
||||
if background_jobs[timestamp]['completed']:
|
||||
result = job_results.get(timestamp)
|
||||
if not result:
|
||||
flash('No results found for this search session.', 'warning')
|
||||
logging.error(f"No results found for completed session: {timestamp}")
|
||||
return redirect(url_for('index'))
|
||||
|
||||
if result['status'] == 'completed':
|
||||
# Note: use the session_folder from the results to redirect
|
||||
return redirect(url_for('results', session_id=result['session_folder']))
|
||||
else:
|
||||
error_msg = result.get('error', 'Unknown error occurred.')
|
||||
flash(f'Search failed: {error_msg}', 'danger')
|
||||
logging.error(f"Search failed for session {timestamp}: {error_msg}")
|
||||
return redirect(url_for('index'))
|
||||
|
||||
# If job is still running, show a status page
|
||||
return render_template('status.html', timestamp=timestamp)
|
||||
|
||||
|
||||
@app.route('/results/<session_id>')
|
||||
def results(session_id):
|
||||
# Find completed results that match this session_folder
|
||||
result_data = next(
|
||||
(
|
||||
r
|
||||
for r in job_results.values()
|
||||
if r.get('status') == 'completed' and r['session_folder'] == session_id
|
||||
),
|
||||
None,
|
||||
)
|
||||
|
||||
if not result_data:
|
||||
flash('No results found for this session ID.', 'danger')
|
||||
logging.error(f"Results for session {session_id} not found in job_results.")
|
||||
return redirect(url_for('index'))
|
||||
|
||||
return render_template(
|
||||
'results.html',
|
||||
usernames=result_data['usernames'],
|
||||
graph_file=result_data['graph_file'],
|
||||
individual_reports=result_data['individual_reports'],
|
||||
timestamp=session_id.replace('search_', ''),
|
||||
)
|
||||
|
||||
|
||||
@app.route('/reports/<path:filename>')
|
||||
def download_report(filename):
|
||||
try:
|
||||
os.makedirs(app.config["REPORTS_FOLDER"], exist_ok=True)
|
||||
file_path = os.path.normpath(
|
||||
os.path.join(app.config["REPORTS_FOLDER"], filename)
|
||||
)
|
||||
if not file_path.startswith(app.config["REPORTS_FOLDER"]):
|
||||
raise Exception("Invalid file path")
|
||||
return send_file(file_path)
|
||||
except Exception as e:
|
||||
logging.error(f"Error serving file {filename}: {str(e)}")
|
||||
return "File not found", 404
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
||||
)
|
||||
debug_mode = os.getenv('FLASK_DEBUG', 'False').lower() in ['true', '1', 't']
|
||||
|
||||
# Host configuration: secure by default
|
||||
# Use 127.0.0.1 for local development, 0.0.0.0 only if explicitly set
|
||||
host = os.getenv('FLASK_HOST', '127.0.0.1')
|
||||
port = int(os.getenv('FLASK_PORT', '5000'))
|
||||
|
||||
app.run(host=host, port=port, debug=debug_mode)
|
||||
|
After Width: | Height: | Size: 45 KiB |
@@ -0,0 +1,118 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="en" data-bs-theme="dark">
|
||||
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>Maigret Web Interface</title>
|
||||
<link href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0/dist/css/bootstrap.min.css" rel="stylesheet">
|
||||
<style>
|
||||
body {
|
||||
min-height: 100vh;
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
}
|
||||
|
||||
.main-container {
|
||||
flex: 1;
|
||||
padding-top: 2rem;
|
||||
}
|
||||
|
||||
.form-container {
|
||||
max-width: auto;
|
||||
margin: auto;
|
||||
padding-bottom: 2rem;
|
||||
}
|
||||
|
||||
[data-bs-theme="dark"] {
|
||||
--bs-body-bg: #212529;
|
||||
--bs-body-color: #dee2e6;
|
||||
}
|
||||
|
||||
.header {
|
||||
padding: 1rem 0;
|
||||
margin-bottom: 2rem;
|
||||
border-bottom: 1px solid var(--bs-border-color);
|
||||
}
|
||||
|
||||
.header-content {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
justify-content: space-between;
|
||||
}
|
||||
|
||||
.logo-container {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 1rem;
|
||||
}
|
||||
|
||||
.logo {
|
||||
height: 40px;
|
||||
width: auto;
|
||||
}
|
||||
|
||||
.footer {
|
||||
margin-top: auto;
|
||||
padding: 1rem 0;
|
||||
text-align: center;
|
||||
border-top: 1px solid var(--bs-border-color);
|
||||
font-size: 0.9rem;
|
||||
}
|
||||
|
||||
.footer a {
|
||||
color: inherit;
|
||||
text-decoration: none;
|
||||
}
|
||||
|
||||
.footer a:hover {
|
||||
text-decoration: underline;
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
|
||||
<body>
|
||||
<div class="header">
|
||||
<div class="container">
|
||||
<div class="header-content">
|
||||
<div class="logo-container">
|
||||
<img src="{{ url_for('static', filename='maigret.png') }}" alt="Maigret Logo" class="logo">
|
||||
<h1 class="h4 mb-0">Maigret Web Interface</h1>
|
||||
</div>
|
||||
<button class="btn btn-outline-secondary" id="theme-toggle">
|
||||
Toggle Dark/Light Mode
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="main-container">
|
||||
<div class="container">
|
||||
{% block content %}{% endblock %}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<footer class="footer">
|
||||
<div class="container">
|
||||
<p class="mb-0">
|
||||
Powered by <a href="https://github.com/soxoj/maigret" target="_blank">Maigret</a> |
|
||||
Licensed under <a href="https://github.com/soxoj/maigret/blob/main/LICENSE" target="_blank">MIT
|
||||
License</a>
|
||||
</p>
|
||||
</div>
|
||||
</footer>
|
||||
|
||||
<script src="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0/dist/js/bootstrap.bundle.min.js"></script>
|
||||
<script>
|
||||
document.getElementById('theme-toggle').addEventListener('click', function () {
|
||||
const html = document.documentElement;
|
||||
if (html.getAttribute('data-bs-theme') === 'dark') {
|
||||
html.setAttribute('data-bs-theme', 'light');
|
||||
} else {
|
||||
html.setAttribute('data-bs-theme', 'dark');
|
||||
}
|
||||
});
|
||||
</script>
|
||||
</body>
|
||||
|
||||
</html>
|
||||
@@ -0,0 +1,383 @@
|
||||
{% extends "base.html" %}
|
||||
|
||||
{% block content %}
|
||||
<style>
|
||||
.tag-cloud {
|
||||
display: flex;
|
||||
flex-wrap: wrap;
|
||||
gap: 8px;
|
||||
padding: 15px;
|
||||
border-radius: 8px;
|
||||
background: rgba(0, 0, 0, 0.05);
|
||||
margin-bottom: 20px;
|
||||
}
|
||||
|
||||
.tag {
|
||||
display: inline-block;
|
||||
padding: 5px 10px;
|
||||
border-radius: 15px;
|
||||
background-color: #dc3545;
|
||||
color: white;
|
||||
cursor: pointer;
|
||||
font-size: 14px;
|
||||
transition: all 0.3s ease;
|
||||
user-select: none;
|
||||
}
|
||||
|
||||
.tag.selected {
|
||||
background-color: #28a745;
|
||||
}
|
||||
|
||||
.tag:hover {
|
||||
transform: translateY(-2px);
|
||||
box-shadow: 0 2px 5px rgba(0, 0, 0, 0.2);
|
||||
}
|
||||
|
||||
.hidden-select {
|
||||
display: none !important;
|
||||
}
|
||||
|
||||
.site-input-container {
|
||||
position: relative;
|
||||
}
|
||||
|
||||
.site-input {
|
||||
width: 100%;
|
||||
}
|
||||
|
||||
.selected-sites {
|
||||
display: flex;
|
||||
flex-wrap: wrap;
|
||||
gap: 8px;
|
||||
padding: 10px 0;
|
||||
}
|
||||
|
||||
.selected-site {
|
||||
background-color: #214e7b;
|
||||
padding: 2px 8px;
|
||||
border-radius: 12px;
|
||||
font-size: 14px;
|
||||
display: inline-flex;
|
||||
align-items: center;
|
||||
gap: 5px;
|
||||
}
|
||||
|
||||
.remove-site {
|
||||
cursor: pointer;
|
||||
color: #dc3545;
|
||||
font-weight: bold;
|
||||
}
|
||||
|
||||
.section-header {
|
||||
cursor: pointer;
|
||||
padding: 1rem;
|
||||
background: rgba(255, 255, 255, 0.05);
|
||||
border-radius: 4px;
|
||||
margin-bottom: 0.5rem;
|
||||
display: flex;
|
||||
justify-content: space-between;
|
||||
align-items: center;
|
||||
}
|
||||
|
||||
.section-content {
|
||||
padding: 1rem;
|
||||
display: none;
|
||||
}
|
||||
|
||||
.section-content.show {
|
||||
display: block;
|
||||
}
|
||||
|
||||
.chevron::after {
|
||||
content: '▼';
|
||||
transition: transform 0.2s;
|
||||
}
|
||||
|
||||
.chevron.collapsed::after {
|
||||
transform: rotate(-90deg);
|
||||
}
|
||||
|
||||
.main-search-section {
|
||||
background: rgba(255, 255, 255, 0.03);
|
||||
padding: 2rem;
|
||||
border-radius: 8px;
|
||||
margin-bottom: 2rem;
|
||||
}
|
||||
|
||||
.search-button {
|
||||
width: 100%;
|
||||
padding: 1rem;
|
||||
font-size: 1.2rem;
|
||||
margin-top: 2rem;
|
||||
}
|
||||
</style>
|
||||
|
||||
<div class="form-container">
|
||||
{% if error %}
|
||||
<div class="alert alert-danger">{{ error }}</div>
|
||||
{% endif %}
|
||||
|
||||
<form method="POST" action="{{ url_for('search') }}" class="mb-4">
|
||||
<!-- Main Search Section -->
|
||||
<div class="main-search-section">
|
||||
<div class="mb-4">
|
||||
<label for="usernames" class="form-label h5">Usernames to Search</label>
|
||||
<textarea class="form-control" id="usernames" name="usernames" rows="3" required
|
||||
placeholder="Enter one or more usernames (separated by spaces or commas)..."></textarea>
|
||||
</div>
|
||||
|
||||
<div class="row align-items-center">
|
||||
<div class="col-md-6">
|
||||
<label for="top_sites" class="form-label">Number of Sites</label>
|
||||
<input type="number" class="form-control" id="top_sites" name="top_sites" min="1" max="10000"
|
||||
placeholder="Default: 500">
|
||||
</div>
|
||||
<div class="col-md-6">
|
||||
<label for="timeout" class="form-label">Timeout (seconds)</label>
|
||||
<input type="number" class="form-control" id="timeout" name="timeout" min="1"
|
||||
placeholder="Default: 30">
|
||||
</div>
|
||||
<div class="col-12 mt-3">
|
||||
<div class="form-check">
|
||||
<input type="checkbox" class="form-check-input" id="all_sites" name="all_sites"
|
||||
onchange="document.getElementById('top_sites').disabled = this.checked;">
|
||||
<label class="form-check-label" for="all_sites">Search All Sites</label>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Filters Section -->
|
||||
<div class="mb-4">
|
||||
<div class="section-header" onclick="toggleSection('filters')">
|
||||
<h5 class="mb-0">Filters</h5>
|
||||
<span class="chevron"></span>
|
||||
</div>
|
||||
<div id="filters" class="section-content">
|
||||
<div class="mb-3 site-input-container">
|
||||
<label for="site" class="form-label">Specify Sites (Optional)</label>
|
||||
<input type="text" class="form-control site-input" id="siteInput"
|
||||
placeholder="Type to search for sites..." list="siteOptions">
|
||||
<input type="hidden" id="site" name="site">
|
||||
<datalist id="siteOptions">
|
||||
{% for site in site_options %}
|
||||
<option value="{{ site }}">
|
||||
{% endfor %}
|
||||
</datalist>
|
||||
<div class="selected-sites" id="selectedSites"></div>
|
||||
</div>
|
||||
|
||||
<div class="mb-3">
|
||||
<label class="form-label">Tags (click to select)</label>
|
||||
<div class="tag-cloud" id="tagCloud"></div>
|
||||
<select multiple class="hidden-select" id="tags" name="tags">
|
||||
<option value="gaming">Gaming</option>
|
||||
<option value="coding">Coding</option>
|
||||
<option value="photo">Photo</option>
|
||||
<option value="music">Music</option>
|
||||
<option value="blog">Blog</option>
|
||||
<option value="finance">Finance</option>
|
||||
<option value="freelance">Freelance</option>
|
||||
<option value="dating">Dating</option>
|
||||
<option value="tech">Tech</option>
|
||||
<option value="forum">Forum</option>
|
||||
<option value="porn">Porn</option>
|
||||
<option value="erotic">Erotic</option>
|
||||
<option value="webcam">Webcam</option>
|
||||
<option value="video">Video</option>
|
||||
<option value="movies">Movies</option>
|
||||
<option value="hacking">Hacking</option>
|
||||
<option value="art">Art</option>
|
||||
<option value="discussion">Discussion</option>
|
||||
<option value="sharing">Sharing</option>
|
||||
<option value="writing">Writing</option>
|
||||
<option value="wiki">Wiki</option>
|
||||
<option value="business">Business</option>
|
||||
<option value="shopping">Shopping</option>
|
||||
<option value="sport">Sport</option>
|
||||
<option value="books">Books</option>
|
||||
<option value="news">News</option>
|
||||
<option value="documents">Documents</option>
|
||||
<option value="travel">Travel</option>
|
||||
<option value="maps">Maps</option>
|
||||
<option value="hobby">Hobby</option>
|
||||
<option value="apps">Apps</option>
|
||||
<option value="classified">Classified</option>
|
||||
<option value="career">Career</option>
|
||||
<option value="geosocial">Geosocial</option>
|
||||
<option value="streaming">Streaming</option>
|
||||
<option value="education">Education</option>
|
||||
<option value="networking">Networking</option>
|
||||
<option value="torrent">Torrent</option>
|
||||
<option value="science">Science</option>
|
||||
<option value="medicine">Medicine</option>
|
||||
<option value="reading">Reading</option>
|
||||
<option value="stock">Stock</option>
|
||||
<option value="messaging">Messaging</option>
|
||||
<option value="trading">Trading</option>
|
||||
<option value="links">Links</option>
|
||||
<option value="fashion">Fashion</option>
|
||||
<option value="tasks">Tasks</option>
|
||||
<option value="military">Military</option>
|
||||
<option value="auto">Auto</option>
|
||||
<option value="gambling">Gambling</option>
|
||||
<option value="cybercriminal">Cybercriminal</option>
|
||||
<option value="review">Review</option>
|
||||
<option value="bookmarks">Bookmarks</option>
|
||||
<option value="design">Design</option>
|
||||
<option value="tor">Tor</option>
|
||||
<option value="i2p">I2P</option>
|
||||
<option value="q&a">Q&A</option>
|
||||
<option value="crypto">Crypto</option>
|
||||
<option value="ai">AI</option>
|
||||
</select>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Advanced Options Section -->
|
||||
<div class="mb-4">
|
||||
<div class="section-header" onclick="toggleSection('advanced')">
|
||||
<h5 class="mb-0">Advanced Options</h5>
|
||||
<span class="chevron"></span>
|
||||
</div>
|
||||
<div id="advanced" class="section-content">
|
||||
<div class="mb-3 form-check">
|
||||
<input type="checkbox" class="form-check-input" id="permute" name="permute">
|
||||
<label class="form-check-label" for="permute">Enable Username Permutations</label>
|
||||
</div>
|
||||
<div class="mb-3 form-check">
|
||||
<input type="checkbox" class="form-check-input" id="disable_recursive_search"
|
||||
name="disable_recursive_search">
|
||||
<label class="form-check-label" for="disable_recursive_search">Disable Recursive Search</label>
|
||||
</div>
|
||||
<div class="mb-3 form-check">
|
||||
<input type="checkbox" class="form-check-input" id="disable_extracting" name="disable_extracting">
|
||||
<label class="form-check-label" for="disable_extracting">Disable Information Extraction</label>
|
||||
</div>
|
||||
<div class="mb-3 form-check">
|
||||
<input type="checkbox" class="form-check-input" id="with_domains" name="with_domains">
|
||||
<label class="form-check-label" for="with_domains">Check Domains</label>
|
||||
</div>
|
||||
<div class="mb-3">
|
||||
<label for="proxy" class="form-label">Proxy URL</label>
|
||||
<input type="text" class="form-control" id="proxy" name="proxy"
|
||||
placeholder="e.g., 127.0.0.1:1080">
|
||||
</div>
|
||||
<div class="mb-3">
|
||||
<label for="tor_proxy" class="form-label">TOR Proxy URL</label>
|
||||
<input type="text" class="form-control" id="tor_proxy" name="tor_proxy"
|
||||
placeholder="Default: 127.0.0.1:9050">
|
||||
</div>
|
||||
<div class="mb-3">
|
||||
<label for="i2p_proxy" class="form-label">I2P Proxy URL</label>
|
||||
<input type="text" class="form-control" id="i2p_proxy" name="i2p_proxy"
|
||||
placeholder="Default: 127.0.0.1:4444">
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<button type="submit" class="btn search-button" style="background-color: rgb(249, 207, 0); color: black;">
|
||||
Start Search
|
||||
</button>
|
||||
</form>
|
||||
</div>
|
||||
|
||||
<script>
|
||||
function toggleSection(sectionId) {
|
||||
const content = document.getElementById(sectionId);
|
||||
const header = content.previousElementSibling;
|
||||
content.classList.toggle('show');
|
||||
header.querySelector('.chevron').classList.toggle('collapsed');
|
||||
}
|
||||
|
||||
document.addEventListener('DOMContentLoaded', function () {
|
||||
// Tag cloud functionality
|
||||
const tagCloud = document.getElementById('tagCloud');
|
||||
const hiddenSelect = document.getElementById('tags');
|
||||
const allTags = Array.from(hiddenSelect.options).map(opt => ({
|
||||
value: opt.value,
|
||||
label: opt.text
|
||||
}));
|
||||
|
||||
allTags.forEach(tag => {
|
||||
const tagElement = document.createElement('span');
|
||||
tagElement.className = 'tag';
|
||||
tagElement.textContent = tag.label;
|
||||
tagElement.dataset.value = tag.value;
|
||||
|
||||
tagElement.addEventListener('click', function () {
|
||||
const isSelected = this.classList.toggle('selected');
|
||||
const option = Array.from(hiddenSelect.options).find(opt => opt.value === tag.value);
|
||||
if (option) {
|
||||
option.selected = isSelected;
|
||||
}
|
||||
});
|
||||
|
||||
tagCloud.appendChild(tagElement);
|
||||
});
|
||||
|
||||
// Site selection functionality
|
||||
const siteInput = document.getElementById('siteInput');
|
||||
const hiddenInput = document.getElementById('site');
|
||||
const selectedSitesContainer = document.getElementById('selectedSites');
|
||||
let selectedSites = new Set();
|
||||
|
||||
function updateHiddenInput() {
|
||||
hiddenInput.value = Array.from(selectedSites).join(',');
|
||||
}
|
||||
|
||||
function addSite(site) {
|
||||
if (site && !selectedSites.has(site)) {
|
||||
selectedSites.add(site);
|
||||
updateHiddenInput();
|
||||
const siteElement = document.createElement('span');
|
||||
siteElement.className = 'selected-site';
|
||||
siteElement.innerHTML = `${site}<span class="remove-site" data-site="${site}">×</span>`;
|
||||
selectedSitesContainer.appendChild(siteElement);
|
||||
}
|
||||
}
|
||||
|
||||
function removeSite(site) {
|
||||
selectedSites.delete(site);
|
||||
updateHiddenInput();
|
||||
const siteElements = selectedSitesContainer.querySelectorAll('.selected-site');
|
||||
siteElements.forEach(el => {
|
||||
if (el.querySelector('.remove-site').dataset.site === site) {
|
||||
el.remove();
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
siteInput.addEventListener('change', function (e) {
|
||||
const value = this.value.trim();
|
||||
if (value) {
|
||||
addSite(value);
|
||||
this.value = '';
|
||||
}
|
||||
});
|
||||
|
||||
selectedSitesContainer.addEventListener('click', function (e) {
|
||||
if (e.target.classList.contains('remove-site')) {
|
||||
removeSite(e.target.dataset.site);
|
||||
}
|
||||
});
|
||||
|
||||
siteInput.addEventListener('paste', function (e) {
|
||||
e.preventDefault();
|
||||
const paste = (e.clipboardData || window.clipboardData).getData('text');
|
||||
const sites = paste.split(',').map(site => site.trim()).filter(site => site);
|
||||
sites.forEach(addSite);
|
||||
});
|
||||
|
||||
const form = document.querySelector('form');
|
||||
form.addEventListener('submit', function (e) {
|
||||
const selectedTags = Array.from(tagCloud.querySelectorAll('.tag.selected'));
|
||||
Array.from(hiddenSelect.options).forEach(opt => {
|
||||
opt.selected = selectedTags.some(tag => tag.dataset.value === opt.value);
|
||||
});
|
||||
updateHiddenInput();
|
||||
});
|
||||
});
|
||||
</script>
|
||||
{% endblock %}
|
||||
@@ -0,0 +1,156 @@
|
||||
{% extends "base.html" %}
|
||||
{% block content %}
|
||||
<style>
|
||||
.tag-badge {
|
||||
background-color: #214e7b;
|
||||
padding: 2px 8px;
|
||||
border-radius: 12px;
|
||||
font-size: 14px;
|
||||
display: inline-flex;
|
||||
align-items: center;
|
||||
gap: 5px;
|
||||
margin: 2px;
|
||||
color: white;
|
||||
}
|
||||
|
||||
.profile-list {
|
||||
list-style: none;
|
||||
padding: 0;
|
||||
}
|
||||
|
||||
.profile-item {
|
||||
margin-bottom: 10px;
|
||||
padding: 10px;
|
||||
display: flex;
|
||||
justify-content: space-between;
|
||||
align-items: center;
|
||||
border-bottom: 1px solid rgba(255, 255, 255, 0.1);
|
||||
}
|
||||
|
||||
.profile-link {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 8px;
|
||||
}
|
||||
|
||||
.favicon {
|
||||
width: 16px;
|
||||
height: 16px;
|
||||
}
|
||||
|
||||
.tag-container {
|
||||
display: flex;
|
||||
flex-wrap: wrap;
|
||||
gap: 5px;
|
||||
justify-content: flex-end;
|
||||
}
|
||||
|
||||
.report-container {
|
||||
margin-bottom: 1rem;
|
||||
}
|
||||
|
||||
.report-header {
|
||||
cursor: pointer;
|
||||
padding: 1rem;
|
||||
background: rgba(255, 255, 255, 0.05);
|
||||
border-radius: 4px;
|
||||
margin-bottom: 0.5rem;
|
||||
}
|
||||
|
||||
.report-content {
|
||||
display: none;
|
||||
}
|
||||
|
||||
.report-content.show {
|
||||
display: block;
|
||||
}
|
||||
|
||||
.chevron::after {
|
||||
content: '▼';
|
||||
margin-left: 8px;
|
||||
transition: transform 0.2s;
|
||||
}
|
||||
|
||||
.chevron.collapsed::after {
|
||||
transform: rotate(-90deg);
|
||||
}
|
||||
</style>
|
||||
|
||||
<div class="form-container">
|
||||
<h1 class="mb-4">Search Results</h1>
|
||||
<!-- Flash messages -->
|
||||
{% with messages = get_flashed_messages() %}
|
||||
{% if messages %}
|
||||
{% for message in messages %}
|
||||
<div class="alert alert-info">{{ message }}</div>
|
||||
{% endfor %}
|
||||
{% endif %}
|
||||
{% endwith %}
|
||||
|
||||
<p>The search has completed. <a href="{{ url_for('index')}}">Back to start.</a></p>
|
||||
|
||||
{% if graph_file %}
|
||||
<h3>Combined Graph</h3>
|
||||
<iframe src="{{ url_for('download_report', filename=graph_file) }}" style="width:100%; height:600px; border:none;"></iframe>
|
||||
{% endif %}
|
||||
|
||||
<hr>
|
||||
|
||||
{% if individual_reports %}
|
||||
<h3>Individual Reports</h3>
|
||||
<div class="reports-list">
|
||||
{% for report in individual_reports %}
|
||||
<div class="report-container">
|
||||
<div class="report-header" onclick="toggleReport(this)" data-target="report-{{ loop.index }}">
|
||||
<h5 class="mb-0 d-flex align-items-center">
|
||||
<span>{{ report.username }}</span>
|
||||
<span class="chevron"></span>
|
||||
</h5>
|
||||
</div>
|
||||
<div id="report-{{ loop.index }}" class="report-content">
|
||||
<p>
|
||||
<a href="{{ url_for('download_report', filename=report.csv_file) }}">CSV Report</a> |
|
||||
<a href="{{ url_for('download_report', filename=report.json_file) }}">JSON Report</a> |
|
||||
<a href="{{ url_for('download_report', filename=report.pdf_file) }}">PDF Report</a> |
|
||||
<a href="{{ url_for('download_report', filename=report.html_file) }}">HTML Report</a>
|
||||
</p>
|
||||
{% if report.claimed_profiles %}
|
||||
<strong>Claimed Profiles:</strong>
|
||||
<ul class="profile-list">
|
||||
{% for profile in report.claimed_profiles %}
|
||||
<li class="profile-item">
|
||||
<div class="profile-link">
|
||||
<img class="favicon" src="https://www.google.com/s2/favicons?domain={{ profile.url }}" onerror="this.style.display='none'" alt="">
|
||||
<a href="{{ profile.url }}" target="_blank">{{ profile.site_name }}</a>
|
||||
</div>
|
||||
{% if profile.tags %}
|
||||
<div class="tag-container">
|
||||
{% for tag in profile.tags %}
|
||||
<span class="tag-badge">{{ tag }}</span>
|
||||
{% endfor %}
|
||||
</div>
|
||||
{% endif %}
|
||||
</li>
|
||||
{% endfor %}
|
||||
</ul>
|
||||
{% else %}
|
||||
<p>No claimed profiles found.</p>
|
||||
{% endif %}
|
||||
</div>
|
||||
</div>
|
||||
{% endfor %}
|
||||
</div>
|
||||
{% else %}
|
||||
<p>No individual reports available.</p>
|
||||
{% endif %}
|
||||
</div>
|
||||
|
||||
<script>
|
||||
function toggleReport(header) {
|
||||
const reportId = header.getAttribute('data-target');
|
||||
const content = document.getElementById(reportId);
|
||||
content.classList.toggle('show');
|
||||
header.querySelector('.chevron').classList.toggle('collapsed');
|
||||
}
|
||||
</script>
|
||||
{% endblock %}
|
||||
@@ -0,0 +1,16 @@
|
||||
{% extends "base.html" %}
|
||||
{% block content %}
|
||||
<div class="container mt-4 text-center">
|
||||
<h2>Search in progress...</h2>
|
||||
<p>Your request is being processed in the background. This page will automatically redirect once the results are ready.</p>
|
||||
<div class="spinner-border text-primary" role="status">
|
||||
<span class="visually-hidden">Loading...</span>
|
||||
</div>
|
||||
<script>
|
||||
// Auto-refresh the page every 5 seconds to check completion
|
||||
setTimeout(function() {
|
||||
window.location.reload();
|
||||
}, 5000);
|
||||
</script>
|
||||
</div>
|
||||
{% endblock %}
|
||||
@@ -1,5 +1,5 @@
|
||||
maigret @ https://github.com/soxoj/maigret/archive/refs/heads/main.zip
|
||||
pefile==2023.2.7 # do not bump while pyinstaller is 6.11.1, there is a conflict
|
||||
psutil==6.1.0
|
||||
pyinstaller==6.11.1
|
||||
psutil==7.1.3
|
||||
pyinstaller==6.16.0
|
||||
pywin32-ctypes==0.2.3
|
||||
|
||||
@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
|
||||
|
||||
[tool.poetry]
|
||||
name = "maigret"
|
||||
version = "0.5.0a1"
|
||||
version = "0.5.0"
|
||||
description = "🕵️♂️ Collect a dossier on a person by username from thousands of sites."
|
||||
authors = ["Soxoj <soxoj@protonmail.com>"]
|
||||
readme = "README.md"
|
||||
@@ -32,28 +32,28 @@ classifiers = [
|
||||
# poetry install --with dev
|
||||
python = "^3.10"
|
||||
aiodns = "^3.0.0"
|
||||
aiohttp = "^3.11.10"
|
||||
aiohttp-socks = "^0.9.1"
|
||||
aiohttp = "^3.12.14"
|
||||
aiohttp-socks = "^0.10.1"
|
||||
arabic-reshaper = "^3.0.0"
|
||||
async-timeout = "^5.0.1"
|
||||
attrs = "^24.2.0"
|
||||
certifi = "^2024.8.30"
|
||||
attrs = "^25.3.0"
|
||||
certifi = "^2025.6.15"
|
||||
chardet = "^5.0.0"
|
||||
colorama = "^0.4.6"
|
||||
future = "^1.0.0"
|
||||
future-annotations= "^1.0.0"
|
||||
html5lib = "^1.1"
|
||||
idna = "^3.4"
|
||||
Jinja2 = "^3.1.3"
|
||||
lxml = "^5.3.0"
|
||||
Jinja2 = "^3.1.6"
|
||||
lxml = ">=5.3,<7.0"
|
||||
MarkupSafe = "^3.0.2"
|
||||
mock = "^5.1.0"
|
||||
multidict = "^6.0.4"
|
||||
multidict = "^6.6.3"
|
||||
pycountry = "^24.6.1"
|
||||
PyPDF2 = "^3.0.1"
|
||||
PySocks = "^1.7.1"
|
||||
python-bidi = "^0.6.3"
|
||||
requests = "^2.31.0"
|
||||
requests = "^2.32.4"
|
||||
requests-futures = "^1.0.2"
|
||||
six = "^1.17.0"
|
||||
socid-extractor = "^0.0.27"
|
||||
@@ -61,32 +61,34 @@ soupsieve = "^2.6"
|
||||
stem = "^1.8.1"
|
||||
torrequest = "^0.1.0"
|
||||
alive_progress = "^3.2.0"
|
||||
typing-extensions = "^4.8.0"
|
||||
typing-extensions = "^4.14.1"
|
||||
webencodings = "^0.5.1"
|
||||
xhtml2pdf = "^0.2.11"
|
||||
XMind = "^1.2.0"
|
||||
yarl = "^1.18.3"
|
||||
yarl = "^1.20.1"
|
||||
networkx = "^2.6.3"
|
||||
pyvis = "^0.3.2"
|
||||
reportlab = "^4.2.0"
|
||||
reportlab = "^4.4.3"
|
||||
cloudscraper = "^1.2.71"
|
||||
platformdirs = "^4.3.6"
|
||||
flask = {extras = ["async"], version = "^3.1.1"}
|
||||
asgiref = "^3.9.1"
|
||||
platformdirs = "^4.3.8"
|
||||
|
||||
|
||||
[tool.poetry.group.dev.dependencies]
|
||||
# How to add a new dev dependency: poetry add black --group dev
|
||||
# Install dev dependencies with: poetry install --with dev
|
||||
flake8 = "^7.1.1"
|
||||
pytest = "^8.3.4"
|
||||
pytest-asyncio = "^0.25.0"
|
||||
pytest-cov = "^6.0.0"
|
||||
pytest = ">=8.3.4,<10.0.0"
|
||||
pytest-asyncio = "^1.0.0"
|
||||
pytest-cov = ">=6,<8"
|
||||
pytest-httpserver = "^1.0.0"
|
||||
pytest-rerunfailures = "^15.0"
|
||||
reportlab = "^4.2.0"
|
||||
mypy = "^1.13.0"
|
||||
pytest-rerunfailures = ">=15.1,<17.0"
|
||||
reportlab = "^4.4.3"
|
||||
mypy = "^1.14.1"
|
||||
tuna = "^0.5.11"
|
||||
coverage = "^7.6.9"
|
||||
black = "^24.10.0"
|
||||
coverage = "^7.9.2"
|
||||
black = ">=25.1,<27.0"
|
||||
|
||||
[tool.poetry.scripts]
|
||||
# Run with: poetry run maigret <username>
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
|
||||
## List of supported sites (search methods): total 3137
|
||||
## List of supported sites (search methods): total 3144
|
||||
|
||||
Rank data fetched from Alexa by domains.
|
||||
|
||||
@@ -8,13 +8,14 @@ Rank data fetched from Alexa by domains.
|
||||
1.  [GooglePlayStore (https://play.google.com/store)](https://play.google.com/store)*: top 1, apps, us*
|
||||
1.  [YouTube (https://www.youtube.com/)](https://www.youtube.com/)*: top 2, video*
|
||||
1.  [YouTube User (https://www.youtube.com/)](https://www.youtube.com/)*: top 2, video*
|
||||
1.  [Baidu (https://tieba.baidu.com)](https://tieba.baidu.com)*: top 3, cn*
|
||||
1.  [Baidu (https://tieba.baidu.com)](https://tieba.baidu.com)*: top 3, cn*, search is disabled
|
||||
1.  [Facebook (https://www.facebook.com/)](https://www.facebook.com/)*: top 10, networking*
|
||||
1.  [Amazon (https://amazon.com)](https://amazon.com)*: top 50, us*
|
||||
1.  [Wikipedia (https://www.wikipedia.org/)](https://www.wikipedia.org/)*: top 50, wiki*
|
||||
1.  [Wikipedia (https://en.wikipedia.org/)](https://en.wikipedia.org/)*: top 50, wiki*, search is disabled
|
||||
1.  [Reddit (https://www.reddit.com/)](https://www.reddit.com/)*: top 50, discussion, news*
|
||||
1.  [social.msdn.microsoft.com (https://social.msdn.microsoft.com)](https://social.msdn.microsoft.com)*: top 50, us*, search is disabled
|
||||
1.  [MicrosoftTechNet (https://social.technet.microsoft.com)](https://social.technet.microsoft.com)*: top 50, us*, search is disabled
|
||||
1.  [MicrosoftLearn (https://learn.microsoft.com)](https://learn.microsoft.com)*: top 50, tech, us*
|
||||
1.  [Weibo (https://weibo.com)](https://weibo.com)*: top 50, cn, networking*
|
||||
1.  [GitHubGist (https://gist.github.com)](https://gist.github.com)*: top 50, coding, sharing*
|
||||
1.  [VK (https://vk.com/)](https://vk.com/)*: top 50, ru*
|
||||
@@ -52,7 +53,7 @@ Rank data fetched from Alexa by domains.
|
||||
1.  [YandexBugbounty (https://yandex.ru/bugbounty/)](https://yandex.ru/bugbounty/)*: top 50, hacking, ru*, search is disabled
|
||||
1.  [YandexCollections API (by yandex_public_id) (https://yandex.ru/collections/)](https://yandex.ru/collections/)*: top 50, ru, sharing*
|
||||
1.  [YandexMarket (https://market.yandex.ru/)](https://market.yandex.ru/)*: top 50, ru*
|
||||
1.  [YandexMusic (https://music.yandex.ru/)](https://music.yandex.ru/)*: top 50, music, ru*
|
||||
1.  [YandexMusic (https://music.yandex.ru/)](https://music.yandex.ru/)*: top 50, music, ru*, search is disabled
|
||||
1.  [YandexZnatoki (https://yandex.ru/q/)](https://yandex.ru/q/)*: top 50, ru*
|
||||
1.  [YandexZenChannel (https://dzen.ru)](https://dzen.ru)*: top 50, ru*
|
||||
1.  [YandexZenUser (https://zen.yandex.ru)](https://zen.yandex.ru)*: top 50, ru*
|
||||
@@ -61,18 +62,18 @@ Rank data fetched from Alexa by domains.
|
||||
1.  [OK (https://ok.ru/)](https://ok.ru/)*: top 100, ru*
|
||||
1.  [community.adobe.com (https://community.adobe.com)](https://community.adobe.com)*: top 100, us*
|
||||
1.  [TradingView (https://www.tradingview.com/)](https://www.tradingview.com/)*: top 100, trading, us*
|
||||
1.  [Aparat (https://www.aparat.com)](https://www.aparat.com)*: top 100, ir, video*
|
||||
1.  [Aparat (https://www.aparat.com)](https://www.aparat.com)*: top 100, ir, video*, search is disabled
|
||||
1.  [ChaturBate (https://chaturbate.com)](https://chaturbate.com)*: top 100, us*
|
||||
1.  [Medium (https://medium.com/)](https://medium.com/)*: top 100, blog, us*, search is disabled
|
||||
1.  [Livejasmin (https://www.livejasmin.com/)](https://www.livejasmin.com/)*: top 100, us, webcam*
|
||||
1.  [Pornhub (https://pornhub.com/)](https://pornhub.com/)*: top 100, porn*
|
||||
1.  [Pornhub (https://pornhub.com/)](https://pornhub.com/)*: top 100, porn*, search is disabled
|
||||
1.  [Imgur (https://imgur.com)](https://imgur.com)*: top 100, photo*
|
||||
1.  [Armchairgm (https://armchairgm.fandom.com/)](https://armchairgm.fandom.com/)*: top 100, us, wiki*
|
||||
1.  [Battleraprus (https://battleraprus.fandom.com/ru)](https://battleraprus.fandom.com/ru)*: top 100, ru, us, wiki*
|
||||
1.  [Battleraprus (https://battleraprus.fandom.com/ru)](https://battleraprus.fandom.com/ru)*: top 100, ru, us, wiki*, search is disabled
|
||||
1.  [BleachFandom (https://bleach.fandom.com/ru)](https://bleach.fandom.com/ru)*: top 100, ru, wiki*
|
||||
1.  [Fandom (https://www.fandom.com/)](https://www.fandom.com/)*: top 100, us*
|
||||
1.  [FandomCommunityCentral (https://community.fandom.com)](https://community.fandom.com)*: top 100, wiki*
|
||||
1.  [Etsy (https://www.etsy.com/)](https://www.etsy.com/)*: top 100, shopping, us*
|
||||
1.  [Etsy (https://www.etsy.com/)](https://www.etsy.com/)*: top 100, shopping, us*, search is disabled
|
||||
1.  [GitHub (https://www.github.com/)](https://www.github.com/)*: top 100, coding*
|
||||
1.  [Spotify (https://open.spotify.com/)](https://open.spotify.com/)*: top 100, music, us*, search is disabled
|
||||
1.  [TikTok (https://www.tiktok.com/)](https://www.tiktok.com/)*: top 100, video*
|
||||
@@ -80,7 +81,7 @@ Rank data fetched from Alexa by domains.
|
||||
1.  [Tumblr (https://www.tumblr.com)](https://www.tumblr.com)*: top 500, blog*
|
||||
1.  [Roblox (https://www.roblox.com/)](https://www.roblox.com/)*: top 500, gaming, us*
|
||||
1.  [SoundCloud (https://soundcloud.com/)](https://soundcloud.com/)*: top 500, music*
|
||||
1.  [Udemy (https://www.udemy.com)](https://www.udemy.com)*: top 500, in*
|
||||
1.  [Udemy (https://www.udemy.com)](https://www.udemy.com)*: top 500, in*, search is disabled
|
||||
1.  [discourse.mozilla.org (https://discourse.mozilla.org)](https://discourse.mozilla.org)*: top 500*
|
||||
1.  [linktr.ee (https://linktr.ee)](https://linktr.ee)*: top 500, links*
|
||||
1.  [xHamster (https://xhamster.com)](https://xhamster.com)*: top 500, porn, us*
|
||||
@@ -106,7 +107,7 @@ Rank data fetched from Alexa by domains.
|
||||
1.  [Figma (https://www.figma.com/)](https://www.figma.com/)*: top 500, design*
|
||||
1.  [iStock (https://www.istockphoto.com)](https://www.istockphoto.com)*: top 500, photo, stock*
|
||||
1.  [Scribd (https://www.scribd.com/)](https://www.scribd.com/)*: top 500, reading*
|
||||
1.  [opensea.io (https://opensea.io)](https://opensea.io)*: top 500, us*
|
||||
1.  [opensea.io (https://opensea.io)](https://opensea.io)*: top 500, us*, search is disabled
|
||||
1.  [DailyMotion (https://www.dailymotion.com)](https://www.dailymotion.com)*: top 500, video*
|
||||
1.  [Behance (https://www.behance.net/)](https://www.behance.net/)*: top 500, business*
|
||||
1.  [Yelp (http://www.yelp.com)](http://www.yelp.com)*: top 500, review*, search is disabled
|
||||
@@ -114,23 +115,23 @@ Rank data fetched from Alexa by domains.
|
||||
1.  [Blogger (https://www.blogger.com/)](https://www.blogger.com/)*: top 500, blog*
|
||||
1.  [Patreon (https://www.patreon.com/)](https://www.patreon.com/)*: top 500, finance*
|
||||
1.  [GoodReads (https://www.goodreads.com/)](https://www.goodreads.com/)*: top 500, books, us*
|
||||
1.  [OP.GG [LeagueOfLegends] Brazil (https://www.op.gg/)](https://www.op.gg/)*: top 500, br, gaming*
|
||||
1.  [OP.GG [LeagueOfLegends] North America (https://www.op.gg/)](https://www.op.gg/)*: top 500, gaming*
|
||||
1.  [OP.GG [LeagueOfLegends] Middle East (https://www.op.gg/)](https://www.op.gg/)*: top 500, gaming*
|
||||
1.  [OP.GG [LeagueOfLegends] Europe Nordic & East (https://www.op.gg/)](https://www.op.gg/)*: top 500, gaming*
|
||||
1.  [OP.GG [LeagueOfLegends] Europe West (https://www.op.gg/)](https://www.op.gg/)*: top 500, gaming*
|
||||
1.  [OP.GG [LeagueOfLegends] Oceania (https://www.op.gg/)](https://www.op.gg/)*: top 500, gaming*
|
||||
1.  [OP.GG [LeagueOfLegends] Korea (https://www.op.gg/)](https://www.op.gg/)*: top 500, gaming, kr*
|
||||
1.  [OP.GG [LeagueOfLegends] Japan (https://www.op.gg/)](https://www.op.gg/)*: top 500, gaming, jp*
|
||||
1.  [OP.GG [LeagueOfLegends] LAS (https://www.op.gg/)](https://www.op.gg/)*: top 500, gaming*
|
||||
1.  [OP.GG [LeagueOfLegends] LAN (https://www.op.gg/)](https://www.op.gg/)*: top 500, gaming*
|
||||
1.  [OP.GG [LeagueOfLegends] Russia (https://www.op.gg/)](https://www.op.gg/)*: top 500, gaming, ru*
|
||||
1.  [OP.GG [LeagueOfLegends] Turkey (https://www.op.gg/)](https://www.op.gg/)*: top 500, gaming, tr*
|
||||
1.  [OP.GG [LeagueOfLegends] Singapore (https://www.op.gg/)](https://www.op.gg/)*: top 500, gaming, sg*
|
||||
1.  [OP.GG [LeagueOfLegends] Phillippines (https://www.op.gg/)](https://www.op.gg/)*: top 500, gaming, ph*
|
||||
1.  [OP.GG [LeagueOfLegends] Taiwan (https://www.op.gg/)](https://www.op.gg/)*: top 500, gaming, tw*
|
||||
1.  [OP.GG [LeagueOfLegends] Vietnam (https://www.op.gg/)](https://www.op.gg/)*: top 500, gaming, vn*
|
||||
1.  [OP.GG [LeagueOfLegends] Thailand (https://www.op.gg/)](https://www.op.gg/)*: top 500, gaming, th*
|
||||
1.  [OP.GG [LeagueOfLegends] Brazil (https://www.op.gg/)](https://www.op.gg/)*: top 500, br, gaming*, search is disabled
|
||||
1.  [OP.GG [LeagueOfLegends] North America (https://www.op.gg/)](https://www.op.gg/)*: top 500, gaming*, search is disabled
|
||||
1.  [OP.GG [LeagueOfLegends] Middle East (https://www.op.gg/)](https://www.op.gg/)*: top 500, gaming*, search is disabled
|
||||
1.  [OP.GG [LeagueOfLegends] Europe Nordic & East (https://www.op.gg/)](https://www.op.gg/)*: top 500, gaming*, search is disabled
|
||||
1.  [OP.GG [LeagueOfLegends] Europe West (https://www.op.gg/)](https://www.op.gg/)*: top 500, gaming*, search is disabled
|
||||
1.  [OP.GG [LeagueOfLegends] Oceania (https://www.op.gg/)](https://www.op.gg/)*: top 500, gaming*, search is disabled
|
||||
1.  [OP.GG [LeagueOfLegends] Korea (https://www.op.gg/)](https://www.op.gg/)*: top 500, gaming, kr*, search is disabled
|
||||
1.  [OP.GG [LeagueOfLegends] Japan (https://www.op.gg/)](https://www.op.gg/)*: top 500, gaming, jp*, search is disabled
|
||||
1.  [OP.GG [LeagueOfLegends] LAS (https://www.op.gg/)](https://www.op.gg/)*: top 500, gaming*, search is disabled
|
||||
1.  [OP.GG [LeagueOfLegends] LAN (https://www.op.gg/)](https://www.op.gg/)*: top 500, gaming*, search is disabled
|
||||
1.  [OP.GG [LeagueOfLegends] Russia (https://www.op.gg/)](https://www.op.gg/)*: top 500, gaming, ru*, search is disabled
|
||||
1.  [OP.GG [LeagueOfLegends] Turkey (https://www.op.gg/)](https://www.op.gg/)*: top 500, gaming, tr*, search is disabled
|
||||
1.  [OP.GG [LeagueOfLegends] Singapore (https://www.op.gg/)](https://www.op.gg/)*: top 500, gaming, sg*, search is disabled
|
||||
1.  [OP.GG [LeagueOfLegends] Phillippines (https://www.op.gg/)](https://www.op.gg/)*: top 500, gaming, ph*, search is disabled
|
||||
1.  [OP.GG [LeagueOfLegends] Taiwan (https://www.op.gg/)](https://www.op.gg/)*: top 500, gaming, tw*, search is disabled
|
||||
1.  [OP.GG [LeagueOfLegends] Vietnam (https://www.op.gg/)](https://www.op.gg/)*: top 500, gaming, vn*, search is disabled
|
||||
1.  [OP.GG [LeagueOfLegends] Thailand (https://www.op.gg/)](https://www.op.gg/)*: top 500, gaming, th*, search is disabled
|
||||
1.  [Quora (https://www.quora.com/)](https://www.quora.com/)*: top 500, education*
|
||||
1.  [TripAdvisor (https://tripadvisor.com/)](https://tripadvisor.com/)*: top 500, travel*
|
||||
1.  [Academia.edu (https://www.academia.edu/)](https://www.academia.edu/)*: top 500, id*
|
||||
@@ -182,7 +183,7 @@ Rank data fetched from Alexa by domains.
|
||||
1.  [note (https://note.com/)](https://note.com/)*: top 1K, jp*
|
||||
1.  [AfreecaTV (http://bjapi.afreecatv.com)](http://bjapi.afreecatv.com)*: top 1K, streaming*
|
||||
1.  [Redbubble (https://www.redbubble.com/)](https://www.redbubble.com/)*: top 1K, shopping, us*
|
||||
1.  [Tom's guide (http://forums.tomsguide.com)](http://forums.tomsguide.com)*: top 1K, forum, tech*
|
||||
1.  [Tom's guide (http://forums.tomsguide.com)](http://forums.tomsguide.com)*: top 1K, forum, tech*, search is disabled
|
||||
1.  [Yumpu (https://www.yumpu.com)](https://www.yumpu.com)*: top 1K, stock*, search is disabled
|
||||
1.  [community.brave.com (https://community.brave.com)](https://community.brave.com)*: top 1K, forum, us*
|
||||
1.  [Tinder (https://tinder.com/)](https://tinder.com/)*: top 1K, dating, us*
|
||||
@@ -194,7 +195,7 @@ Rank data fetched from Alexa by domains.
|
||||
1.  [Discogs (https://www.discogs.com/)](https://www.discogs.com/)*: top 5K, music, us*
|
||||
1.  [DiscussPython (https://discuss.python.org/)](https://discuss.python.org/)*: top 5K, coding, forum, us*
|
||||
1.  [Nairaland Forum (https://www.nairaland.com/)](https://www.nairaland.com/)*: top 5K, ng*
|
||||
1.  [Redtube (https://ru.redtube.com/)](https://ru.redtube.com/)*: top 5K, porn, us*
|
||||
1.  [Redtube (https://www.redtube.com/)](https://www.redtube.com/)*: top 5K, porn, us*
|
||||
1.  [Strava (https://www.strava.com/)](https://www.strava.com/)*: top 5K, us*, search is disabled
|
||||
1.  [Ameba (https://profile.ameba.jp)](https://profile.ameba.jp)*: top 5K, jp*
|
||||
1.  [adblockplus.org (https://adblockplus.org)](https://adblockplus.org)*: top 5K, us*
|
||||
@@ -210,9 +211,9 @@ Rank data fetched from Alexa by domains.
|
||||
1.  [forums.drom.ru (https://www.forumsdrom.ru/)](https://www.forumsdrom.ru/)*: top 5K, forum, ru*
|
||||
1.  [SoftwareInformer (https://users.software.informer.com)](https://users.software.informer.com)*: top 5K, in*
|
||||
1.  [Freecodecamp (https://www.freecodecamp.org/forum/)](https://www.freecodecamp.org/forum/)*: top 5K, coding, education, forum*
|
||||
1.  [Zomato (https://www.zomato.com/)](https://www.zomato.com/)*: top 5K, geosocial, in*
|
||||
1.  [Zomato (https://www.zomato.com/)](https://www.zomato.com/)*: top 5K, geosocial, in*, search is disabled
|
||||
1.  [Wowhead (https://www.wowhead.com)](https://www.wowhead.com)*: top 5K, gaming, us*
|
||||
1.  [Kaskus (https://www.kaskus.co.id)](https://www.kaskus.co.id)*: top 5K, id*
|
||||
1.  [Kaskus (https://www.kaskus.co.id)](https://www.kaskus.co.id)*: top 5K, id*, search is disabled
|
||||
1.  [PCGamer (https://pcgamer.com)](https://pcgamer.com)*: top 5K, gaming, news*
|
||||
1.  [Artstation (https://www.artstation.com)](https://www.artstation.com)*: top 5K, art, stock*
|
||||
1.  [Pikabu (https://pikabu.ru/)](https://pikabu.ru/)*: top 5K, ru, sharing*
|
||||
@@ -240,7 +241,7 @@ Rank data fetched from Alexa by domains.
|
||||
1.  [Otzovik (https://otzovik.com/)](https://otzovik.com/)*: top 5K, ru*
|
||||
1.  [LiveInternet (https://www.liveinternet.ru)](https://www.liveinternet.ru)*: top 5K, ru*
|
||||
1.  [LeetCode (https://leetcode.com/)](https://leetcode.com/)*: top 5K, coding*, search is disabled
|
||||
1.  [Kaggle (https://www.kaggle.com/)](https://www.kaggle.com/)*: top 5K, tech*
|
||||
1.  [Kaggle (https://www.kaggle.com/)](https://www.kaggle.com/)*: top 5K, tech*, search is disabled
|
||||
1.  [Codepen (https://codepen.io/)](https://codepen.io/)*: top 5K, coding, in*
|
||||
1.  [Rajce.net (https://www.rajce.idnes.cz/)](https://www.rajce.idnes.cz/)*: top 5K, cz*
|
||||
1.  [TomsHardware (https://forums.tomshardware.com/)](https://forums.tomshardware.com/)*: top 5K, forum, us*
|
||||
@@ -286,9 +287,9 @@ Rank data fetched from Alexa by domains.
|
||||
1.  [Warrior Forum (https://www.warriorforum.com/)](https://www.warriorforum.com/)*: top 5K, forum, us*
|
||||
1.  [Docker Hub (https://hub.docker.com/)](https://hub.docker.com/)*: top 5K, coding*
|
||||
1.  [forums.docker.com (https://forums.docker.com)](https://forums.docker.com)*: top 5K, forum, us*
|
||||
1.  [AdultFriendFinder (https://adultfriendfinder.com)](https://adultfriendfinder.com)*: top 5K, dating, us*
|
||||
1.  [AdultFriendFinder (https://adultfriendfinder.com)](https://adultfriendfinder.com)*: top 5K, dating, us*, search is disabled
|
||||
1.  [500px (https://500px.com/)](https://500px.com/)*: top 5K, photo*
|
||||
1.  [Livemaster (https://www.livemaster.ru)](https://www.livemaster.ru)*: top 5K, ru*
|
||||
1.  [Livemaster (https://www.livemaster.ru)](https://www.livemaster.ru)*: top 5K, ru*, search is disabled
|
||||
1.  [www.tagged.com (http://www.tagged.com)](http://www.tagged.com)*: top 5K, networking*
|
||||
1.  [Photobucket (https://photobucket.com/)](https://photobucket.com/)*: top 5K, photo, us*, search is disabled
|
||||
1.  [Eurogamer (https://www.eurogamer.net)](https://www.eurogamer.net)*: top 5K, us*, search is disabled
|
||||
@@ -318,13 +319,13 @@ Rank data fetched from Alexa by domains.
|
||||
1.  [Cracked (https://www.cracked.com/)](https://www.cracked.com/)*: top 5K, us*
|
||||
1.  [We Heart It (https://weheartit.com/)](https://weheartit.com/)*: top 5K, blog, in, photo*, search is disabled
|
||||
1.  [FilmWeb (https://www.filmweb.pl/user/adam)](https://www.filmweb.pl/user/adam)*: top 5K, movies, pl*
|
||||
1.  [forums.bulbagarden.net (http://forums.bulbagarden.net)](http://forums.bulbagarden.net)*: top 5K, forum, us*
|
||||
1.  [forums.bulbagarden.net (http://forums.bulbagarden.net)](http://forums.bulbagarden.net)*: top 5K, forum, us*, search is disabled
|
||||
1.  [videohive.net (https://videohive.net)](https://videohive.net)*: top 5K, video*
|
||||
1.  [ImgInn (https://imginn.com)](https://imginn.com)*: top 5K, photo*
|
||||
1.  [BoardGameGeek (https://boardgamegeek.com)](https://boardgamegeek.com)*: top 5K, gaming, us*
|
||||
1.  [osu! (https://osu.ppy.sh/)](https://osu.ppy.sh/)*: top 5K, us*
|
||||
1.  [Pluralsight (https://app.pluralsight.com)](https://app.pluralsight.com)*: top 5K, in, us*
|
||||
1.  [TechPowerUp (https://www.techpowerup.com)](https://www.techpowerup.com)*: top 5K, us*
|
||||
1.  [TechPowerUp (https://www.techpowerup.com)](https://www.techpowerup.com)*: top 5K, us*, search is disabled
|
||||
1.  [Guru (https://www.guru.com)](https://www.guru.com)*: top 5K, in*
|
||||
1.  [AllTrails (https://www.alltrails.com/)](https://www.alltrails.com/)*: top 5K, us*
|
||||
1.  [Cheezburger (https://profile.cheezburger.com)](https://profile.cheezburger.com)*: top 5K, us*
|
||||
@@ -336,11 +337,11 @@ Rank data fetched from Alexa by domains.
|
||||
1.  [Jigsawplanet (https://www.jigsawplanet.com)](https://www.jigsawplanet.com)*: top 5K, fr, us*
|
||||
1.  [hackernoon.com (https://hackernoon.com)](https://hackernoon.com)*: top 5K, news, us*
|
||||
1.  [PCPartPicker (https://pcpartpicker.com)](https://pcpartpicker.com)*: top 5K, us*, search is disabled
|
||||
1.  [AskFM (https://ask.fm/)](https://ask.fm/)*: top 5K, eg, in, ru*
|
||||
1.  [AskFM (https://ask.fm/)](https://ask.fm/)*: top 5K, eg, in, ru*, search is disabled
|
||||
1.  [GitLab (https://gitlab.com/)](https://gitlab.com/)*: top 5K, coding*
|
||||
1.  [DEV Community (https://dev.to/)](https://dev.to/)*: top 5K, coding*
|
||||
1.  [Gumroad (https://www.gumroad.com/)](https://www.gumroad.com/)*: top 5K, us*
|
||||
1.  [Gramho (https://gramho.com/)](https://gramho.com/)*: top 5K, photo*
|
||||
1.  [Gramho (https://gramho.com/)](https://gramho.com/)*: top 5K, photo*, search is disabled
|
||||
1.  [Taplink (https://taplink.cc/)](https://taplink.cc/)*: top 5K, links, ru*, search is disabled
|
||||
1.  [BuyMeACoffee (https://www.buymeacoffee.com/)](https://www.buymeacoffee.com/)*: top 5K, in*
|
||||
1.  [Muckrack (https://muckrack.com)](https://muckrack.com)*: top 5K, us*
|
||||
@@ -349,7 +350,7 @@ Rank data fetched from Alexa by domains.
|
||||
1.  [IFTTT (https://www.ifttt.com/)](https://www.ifttt.com/)*: top 5K, tech*
|
||||
1.  [www.minds.com (https://www.minds.com)](https://www.minds.com)*: top 5K, in*
|
||||
1.  [forums.imore.com (https://forums.imore.com)](https://forums.imore.com)*: top 5K, forum, us*, search is disabled
|
||||
1.  [iXBT (https://forum.ixbt.com)](https://forum.ixbt.com)*: top 10K, forum, ru*
|
||||
1.  [iXBT (https://forum.ixbt.com)](https://forum.ixbt.com)*: top 10K, forum, ru*, search is disabled
|
||||
1.  [Stihi.ru (https://www.stihi.ru/)](https://www.stihi.ru/)*: top 10K, ru, writing*
|
||||
1.  [Gitee (https://gitee.com/)](https://gitee.com/)*: top 10K, cn*
|
||||
1.  [VirusTotal (https://www.virustotal.com/)](https://www.virustotal.com/)*: top 10K, in*, search is disabled
|
||||
@@ -361,7 +362,7 @@ Rank data fetched from Alexa by domains.
|
||||
1.  [Newgrounds (https://newgrounds.com)](https://newgrounds.com)*: top 10K, art, forum, gaming*
|
||||
1.  [Gravatar (http://en.gravatar.com/)](http://en.gravatar.com/)*: top 10K, photo*
|
||||
1.  [Digitalspy (https://forums.digitalspy.com/)](https://forums.digitalspy.com/)*: top 10K, forum, gb, us*, search is disabled
|
||||
1.  [Bibsonomy (https://www.bibsonomy.org)](https://www.bibsonomy.org)*: top 10K, in*
|
||||
1.  [Bibsonomy (https://www.bibsonomy.org)](https://www.bibsonomy.org)*: top 10K, in*, search is disabled
|
||||
1.  [Slashdot (https://slashdot.org)](https://slashdot.org)*: top 10K, news*
|
||||
1.  [Netvibes (https://www.netvibes.com)](https://www.netvibes.com)*: top 10K, business, fr*, search is disabled
|
||||
1.  [opensource (https://opensource.com/)](https://opensource.com/)*: top 10K, in, us*
|
||||
@@ -371,9 +372,9 @@ Rank data fetched from Alexa by domains.
|
||||
1.  [Smugmug (https://smugmug.com/)](https://smugmug.com/)*: top 10K, us*
|
||||
1.  [NPM (https://www.npmjs.com/)](https://www.npmjs.com/)*: top 10K, coding*
|
||||
1.  [NPM-Package (https://www.npmjs.com/)](https://www.npmjs.com/)*: top 10K, coding*
|
||||
1.  [authorSTREAM (http://www.authorstream.com/)](http://www.authorstream.com/)*: top 10K, documents, in, sharing*
|
||||
1.  [authorSTREAM (http://www.authorstream.com/)](http://www.authorstream.com/)*: top 10K, documents, in, sharing*, search is disabled
|
||||
1.  [rapidapi.com (https://rapidapi.com)](https://rapidapi.com)*: top 10K, in*
|
||||
1.  [forums.serebii.net (https://forums.serebii.net)](https://forums.serebii.net)*: top 10K, forum, us*
|
||||
1.  [forums.serebii.net (https://forums.serebii.net)](https://forums.serebii.net)*: top 10K, forum, us*, search is disabled
|
||||
1.  [3dnews (http://forum.3dnews.ru/)](http://forum.3dnews.ru/)*: top 10K, forum, ru*, search is disabled
|
||||
1.  [VSCO (https://vsco.co/)](https://vsco.co/)*: top 10K, us*
|
||||
1.  [LonelyPlanet (https://www.lonelyplanet.com)](https://www.lonelyplanet.com)*: top 10K, us*, search is disabled
|
||||
@@ -420,7 +421,7 @@ Rank data fetched from Alexa by domains.
|
||||
1.  [Diigo (https://www.diigo.com/)](https://www.diigo.com/)*: top 10K, in*
|
||||
1.  [Yummly (https://www.yummly.com)](https://www.yummly.com)*: top 10K, us*
|
||||
1.  [TheStudentRoom (https://www.thestudentroom.co.uk)](https://www.thestudentroom.co.uk)*: top 10K, forum, gb*
|
||||
1.  [getmyuni (https://getmyuni.com/)](https://getmyuni.com/)*: top 10K, in*
|
||||
1.  [getmyuni (https://getmyuni.com/)](https://getmyuni.com/)*: top 10K, in*, search is disabled
|
||||
1.  [www.itemfix.com (https://www.itemfix.com)](https://www.itemfix.com)*: top 10K, us*
|
||||
1.  [WikimapiaProfile (http://wikimapia.org)](http://wikimapia.org)*: top 10K, maps, ru*
|
||||
1.  [WikimapiaSearch (http://wikimapia.org)](http://wikimapia.org)*: top 10K, maps, ru*
|
||||
@@ -462,14 +463,14 @@ Rank data fetched from Alexa by domains.
|
||||
1.  [Vivino (https://www.vivino.com/)](https://www.vivino.com/)*: top 100K, us*
|
||||
1.  [Freesound (https://freesound.org/)](https://freesound.org/)*: top 100K, music, us*
|
||||
1.  [Namepros (https://www.namepros.com/)](https://www.namepros.com/)*: top 100K, forum, in, us*
|
||||
1.  [Artsy (https://www.artsy.net)](https://www.artsy.net)*: top 100K, us*
|
||||
1.  [Artsy (https://www.artsy.net)](https://www.artsy.net)*: top 100K, us*, search is disabled
|
||||
1.  [ProductHunt (https://www.producthunt.com/)](https://www.producthunt.com/)*: top 100K, tech, us*
|
||||
1.  [forums.visual-paradigm.com (https://forums.visual-paradigm.com)](https://forums.visual-paradigm.com)*: top 100K, forum, in*
|
||||
1.  [MoneySavingExpert (https://forums.moneysavingexpert.com)](https://forums.moneysavingexpert.com)*: top 100K, forum, gb*
|
||||
1.  [Packagist (https://packagist.org/)](https://packagist.org/)*: top 100K, in, jp*
|
||||
1.  [Advego (https://advego.com/)](https://advego.com/)*: top 100K, ru*
|
||||
1.  [hi5 (http://www.hi5.com)](http://www.hi5.com)*: top 100K, networking*
|
||||
1.  [3ddd (https://3ddd.ru)](https://3ddd.ru)*: top 100K, ru*
|
||||
1.  [hi5 (http://www.hi5.com)](http://www.hi5.com)*: top 100K, networking*, search is disabled
|
||||
1.  [3ddd (https://3ddd.ru)](https://3ddd.ru)*: top 100K, ru*, search is disabled
|
||||
1.  [NameMC (https://namemc.com/)](https://namemc.com/)*: top 100K, us*
|
||||
1.  [B17 (https://www.b17.ru/)](https://www.b17.ru/)*: top 100K, ru*
|
||||
1.  [BeerMoneyForum (https://www.beermoneyforum.com)](https://www.beermoneyforum.com)*: top 100K, finance, forum, gambling*, search is disabled
|
||||
@@ -494,7 +495,7 @@ Rank data fetched from Alexa by domains.
|
||||
1.  [joyreactor.cc (http://joyreactor.cc)](http://joyreactor.cc)*: top 100K, art, nl, ru*
|
||||
1.  [Speakerdeck (https://speakerdeck.com)](https://speakerdeck.com)*: top 100K, in, us*
|
||||
1.  [Postila (https://postila.ru/)](https://postila.ru/)*: top 100K, ru*, search is disabled
|
||||
1.  [Pbase (https://pbase.com/)](https://pbase.com/)*: top 100K, in*
|
||||
1.  [Pbase (https://pbase.com/)](https://pbase.com/)*: top 100K, in*, search is disabled
|
||||
1.  [NICommunityForum (https://www.native-instruments.com/forum/)](https://www.native-instruments.com/forum/)*: top 100K, forum*
|
||||
1.  [spletnik (https://spletnik.ru/)](https://spletnik.ru/)*: top 100K, ru*
|
||||
1.  [Folkd (http://www.folkd.com/profile/)](http://www.folkd.com/profile/)*: top 100K, eu, in*, search is disabled
|
||||
@@ -505,7 +506,7 @@ Rank data fetched from Alexa by domains.
|
||||
1.  [forum.snapcraft.io (https://forum.snapcraft.io)](https://forum.snapcraft.io)*: top 100K, forum, in*
|
||||
1.  [forums.destructoid.com (https://forums.destructoid.com)](https://forums.destructoid.com)*: top 100K, forum, us*
|
||||
1.  [7dach (https://7dach.ru/)](https://7dach.ru/)*: top 100K, ru*
|
||||
1.  [BikeRadar (https://forum.bikeradar.com)](https://forum.bikeradar.com)*: top 100K, forum, gb, us*
|
||||
1.  [BikeRadar (https://forum.bikeradar.com)](https://forum.bikeradar.com)*: top 100K, forum, gb, us*, search is disabled
|
||||
1.  [lnk.bio (https://lnk.bio)](https://lnk.bio)*: top 100K, links*
|
||||
1.  [hashnode (https://hashnode.com)](https://hashnode.com)*: top 100K, in*
|
||||
1.  [Giantbomb (https://www.giantbomb.com)](https://www.giantbomb.com)*: top 100K, us*
|
||||
@@ -519,15 +520,14 @@ Rank data fetched from Alexa by domains.
|
||||
1.  [Metal-archives (https://www.metal-archives.com)](https://www.metal-archives.com)*: top 100K, de, music, pl, us*
|
||||
1.  [forums.steinberg.net (https://forums.steinberg.net)](https://forums.steinberg.net)*: top 100K, forum, in*
|
||||
1.  [mel.fm (https://mel.fm)](https://mel.fm)*: top 100K, ru*
|
||||
1.  [Influenster (https://www.influenster.com/)](https://www.influenster.com/)*: top 100K, us*
|
||||
1.  [forums.indiegala.com (https://forums.indiegala.com)](https://forums.indiegala.com)*: top 100K, forum, us*
|
||||
1.  [Picarto (https://ptvintern.picarto.tv)](https://ptvintern.picarto.tv)*: top 100K, art, streaming*
|
||||
1.  [Neoseeker (https://www.neoseeker.com)](https://www.neoseeker.com)*: top 100K, us*
|
||||
1.  [InfosecInstitute (https://community.infosecinstitute.com)](https://community.infosecinstitute.com)*: top 100K, us*, search is disabled
|
||||
1.  [Armorgames (https://armorgames.com)](https://armorgames.com)*: top 100K, gaming, us*
|
||||
1.  [giters.com (https://giters.com)](https://giters.com)*: top 100K, coding*
|
||||
1.  [giters.com (https://giters.com)](https://giters.com)*: top 100K, coding*, search is disabled
|
||||
1.  [teamtreehouse.com (https://teamtreehouse.com)](https://teamtreehouse.com)*: top 100K, us*
|
||||
1.  [Blu-ray (https://forum.blu-ray.com/)](https://forum.blu-ray.com/)*: top 100K, forum, us*
|
||||
1.  [Blu-ray (https://forum.blu-ray.com/)](https://forum.blu-ray.com/)*: top 100K, forum, us*, search is disabled
|
||||
1.  [TheOdysseyOnline (https://www.theodysseyonline.com)](https://www.theodysseyonline.com)*: top 100K, blog*
|
||||
1.  [DTF (https://dtf.ru)](https://dtf.ru)*: top 100K, ru*, search is disabled
|
||||
1.  [TRASHBOX.RU (https://trashbox.ru/)](https://trashbox.ru/)*: top 100K, az, ru*
|
||||
@@ -585,7 +585,7 @@ Rank data fetched from Alexa by domains.
|
||||
1.  [Facenama (https://facenama.com/)](https://facenama.com/)*: top 100K, ir*, search is disabled
|
||||
1.  [PushSquare (http://www.pushsquare.com)](http://www.pushsquare.com)*: top 100K, gaming, news, us*
|
||||
1.  [Myinstants (https://www.myinstants.com)](https://www.myinstants.com)*: top 100K, music*
|
||||
1.  [MoscowFlamp (https://moscow.flamp.ru/)](https://moscow.flamp.ru/)*: top 100K, ru*
|
||||
1.  [MoscowFlamp (https://moscow.flamp.ru/)](https://moscow.flamp.ru/)*: top 100K, ru*, search is disabled
|
||||
1.  [xenforo.com (https://xenforo.com/community/)](https://xenforo.com/community/)*: top 100K, forum, in, jp, tr, us*
|
||||
1.  [TheVillage.ru (https://www.the-village.ru/)](https://www.the-village.ru/)*: top 100K, ru*, search is disabled
|
||||
1.  [GameRevolution (https://forums.gamerevolution.com)](https://forums.gamerevolution.com)*: top 100K, forum, gaming, us*
|
||||
@@ -642,9 +642,9 @@ Rank data fetched from Alexa by domains.
|
||||
1.  [tripit.com (https://tripit.com)](https://tripit.com)*: top 100K, us*, search is disabled
|
||||
1.  [Rust-lang (https://users.rust-lang.org)](https://users.rust-lang.org)*: top 100K, coding, forum, us*
|
||||
1.  [Au (https://au.ru)](https://au.ru)*: top 100K, freelance, ru, shopping*
|
||||
1.  [Pepper (https://www.pepper.ru/)](https://www.pepper.ru/)*: top 100K, ru*
|
||||
1.  [Pepper (https://www.pepper.ru/)](https://www.pepper.ru/)*: top 100K, ru*, search is disabled
|
||||
1.  [Carbonmade (https://carbonmade.com/)](https://carbonmade.com/)*: top 100K, in, us*
|
||||
1.  [Wanelo (https://wanelo.co/adam)](https://wanelo.co/adam)*: top 100K, in, us*
|
||||
1.  [Wanelo (https://wanelo.com/)](https://wanelo.com/)*: top 100K, in, us*, search is disabled
|
||||
1.  [Proshkolu (https://proshkolu.ru)](https://proshkolu.ru)*: top 100K, ru*
|
||||
1.  [Nhl (https://nhl.ru)](https://nhl.ru)*: top 100K, by, cn, ru*, search is disabled
|
||||
1.  [Avforums (https://www.avforums.com)](https://www.avforums.com)*: top 100K, forum, gb, us*
|
||||
@@ -678,7 +678,7 @@ Rank data fetched from Alexa by domains.
|
||||
1.  [Weblancer (https://www.weblancer.net)](https://www.weblancer.net)*: top 100K, freelance, ru*
|
||||
1.  [viewbug (https://www.viewbug.com)](https://www.viewbug.com)*: top 100K, photo*
|
||||
1.  [amateurvoyeurforum.com (https://www.amateurvoyeurforum.com)](https://www.amateurvoyeurforum.com)*: top 100K, forum, us*
|
||||
1.  [Pinboard (http://pinboard.in)](http://pinboard.in)*: top 100K, in, us*
|
||||
1.  [Pinboard (http://pinboard.in)](http://pinboard.in)*: top 100K, in, us*, search is disabled
|
||||
1.  [lomography (https://www.lomography.com)](https://www.lomography.com)*: top 100K, photo*
|
||||
1.  [forums.stevehoffman.tv (https://forums.stevehoffman.tv)](https://forums.stevehoffman.tv)*: top 100K, forum, us*
|
||||
1.  [Ask Fedora (https://ask.fedoraproject.org/)](https://ask.fedoraproject.org/)*: top 100K, forum, in, us*
|
||||
@@ -701,7 +701,7 @@ Rank data fetched from Alexa by domains.
|
||||
1.  [club.7ya.ru (https://club.7ya.ru)](https://club.7ya.ru)*: top 100K, ru*
|
||||
1.  [gloria.tv (https://gloria.tv)](https://gloria.tv)*: top 100K, ar, mx, pl, sk, us*
|
||||
1.  [GaiaOnline (https://www.gaiaonline.com/)](https://www.gaiaonline.com/)*: top 100K, ro, us*
|
||||
1.  [forum.oneclickchicks.com (https://forum.oneclickchicks.com)](https://forum.oneclickchicks.com)*: top 100K*
|
||||
1.  [forum.oneclickchicks.com (https://forum.oneclickchicks.com)](https://forum.oneclickchicks.com)*: top 100K*, search is disabled
|
||||
1.  [Datpiff (https://www.datpiff.com)](https://www.datpiff.com)*: top 100K, us*
|
||||
1.  [Anobii (https://www.anobii.com)](https://www.anobii.com)*: top 100K, books*
|
||||
1.  [Trinixy (https://trinixy.ru)](https://trinixy.ru)*: top 100K, news, ru*
|
||||
@@ -717,7 +717,7 @@ Rank data fetched from Alexa by domains.
|
||||
1.  [TheFastlaneForum (https://www.thefastlaneforum.com)](https://www.thefastlaneforum.com)*: top 100K, forum, us*, search is disabled
|
||||
1.  [shor.by (https://shor.by)](https://shor.by)*: top 100K, links*
|
||||
1.  [Liveexpert (https://www.liveexpert.ru)](https://www.liveexpert.ru)*: top 100K, ru*
|
||||
1.  [Infura (https://community.infura.io)](https://community.infura.io)*: top 100K, forum, kr, us*
|
||||
1.  [Infura (https://community.infura.io)](https://community.infura.io)*: top 100K, forum, kr, us*, search is disabled
|
||||
1.  [Spark (https://spark.ru)](https://spark.ru)*: top 100K, ru*
|
||||
1.  [Suomi24 (https://www.suomi24.fi)](https://www.suomi24.fi)*: top 100K, fi, jp*
|
||||
1.  [Freelancehunt (https://freelancehunt.com)](https://freelancehunt.com)*: top 100K, freelance, ru, ua*
|
||||
@@ -750,7 +750,7 @@ Rank data fetched from Alexa by domains.
|
||||
1.  [forum.eksmo.ru (http://forum.eksmo.ru)](http://forum.eksmo.ru)*: top 100K, forum, ru*
|
||||
1.  [Davesgarden (https://davesgarden.com)](https://davesgarden.com)*: top 100K, us*
|
||||
1.  [forum.cxem.net (https://forum.cxem.net/)](https://forum.cxem.net/)*: top 100K, forum, ru*, search is disabled
|
||||
1.  [ICQ (https://icq.com)](https://icq.com)*: top 100K, ch, ru, tr*
|
||||
1.  [ICQ (https://icq.com)](https://icq.com)*: top 100K, ch, ru, tr*, search is disabled
|
||||
1.  [d3 (https://d3.ru/)](https://d3.ru/)*: top 100K, ru*
|
||||
1.  [dwg (https://forum.dwg.ru/)](https://forum.dwg.ru/)*: top 100K, forum, ru*
|
||||
1.  [Fotki (https://fotki.com)](https://fotki.com)*: top 100K, photo*
|
||||
@@ -774,7 +774,7 @@ Rank data fetched from Alexa by domains.
|
||||
1.  [appleinsider.ru (https://appleinsider.ru)](https://appleinsider.ru)*: top 100K, news, ru, tech*
|
||||
1.  [Hr (https://www.hr.com)](https://www.hr.com)*: top 100K, in, us*
|
||||
1.  [Funnyordie (https://www.funnyordie.com)](https://www.funnyordie.com)*: top 100K, in, us*, search is disabled
|
||||
1.  [Dev.by (https://id.dev.by)](https://id.dev.by)*: top 100K, by, news, tech*
|
||||
1.  [Dev.by (https://id.dev.by)](https://id.dev.by)*: top 100K, by, news, tech*, search is disabled
|
||||
1.  [hochu (http://forum.hochu.ua)](http://forum.hochu.ua)*: top 100K, forum, ru, ua*, search is disabled
|
||||
1.  [boards.straightdope.com (https://boards.straightdope.com)](https://boards.straightdope.com)*: top 100K, forum, us*
|
||||
1.  [24open (https://24open.ru)](https://24open.ru)*: top 100K, dating, ru, us*, search is disabled
|
||||
@@ -815,10 +815,10 @@ Rank data fetched from Alexa by domains.
|
||||
1.  [Twitter Shadowban (https://shadowban.eu)](https://shadowban.eu)*: top 100K, jp, sa*, search is disabled
|
||||
1.  [Psyera (https://psyera.ru)](https://psyera.ru)*: top 100K, ru*
|
||||
1.  [mfd (http://forum.mfd.ru)](http://forum.mfd.ru)*: top 100K, forum, ru*
|
||||
1.  [mirf (https://forum.mirf.ru/)](https://forum.mirf.ru/)*: top 100K, forum, ru*
|
||||
1.  [mirf (https://forum.mirf.ru/)](https://forum.mirf.ru/)*: top 100K, forum, ru*, search is disabled
|
||||
1.  [Fredmiranda (https://www.fredmiranda.com)](https://www.fredmiranda.com)*: top 100K, de, us*
|
||||
1.  [Bigsoccer (https://www.bigsoccer.com)](https://www.bigsoccer.com)*: top 100K, forum, us*
|
||||
1.  [VKMOnline (http://forums.vkmonline.com)](http://forums.vkmonline.com)*: top 100K, forum, ru*
|
||||
1.  [VKMOnline (http://forums.vkmonline.com)](http://forums.vkmonline.com)*: top 100K, forum, ru*, search is disabled
|
||||
1.  [fl (https://www.fl.ru/)](https://www.fl.ru/)*: top 100K, ru*
|
||||
1.  [Huntingnet (https://www.huntingnet.com)](https://www.huntingnet.com)*: top 100K, in, us*
|
||||
1.  [Realmeye (https://www.realmeye.com/)](https://www.realmeye.com/)*: top 100K, gaming*
|
||||
@@ -880,7 +880,7 @@ Rank data fetched from Alexa by domains.
|
||||
1.  [Sprashivai (http://sprashivai.ru)](http://sprashivai.ru)*: top 100K, ru*
|
||||
1.  [Lenov (https://lenov.ru)](https://lenov.ru)*: top 100K, ru*
|
||||
1.  [Travelblog (https://www.travelblog.org)](https://www.travelblog.org)*: top 100K, blog, travel*
|
||||
1.  [PacketStormSecurity (https://packetstormsecurity.com)](https://packetstormsecurity.com)*: top 100K, in, tr, us*
|
||||
1.  [PacketStormSecurity (https://packetstormsecurity.com)](https://packetstormsecurity.com)*: top 100K, in, tr, us*, search is disabled
|
||||
1.  [Avtomarket (https://avtomarket.ru)](https://avtomarket.ru)*: top 100K, ru*
|
||||
1.  [tv.ucoz.club (http://tv.ucoz.club)](http://tv.ucoz.club)*: top 100K, ru*
|
||||
1.  [fanat1k (https://forum.fanat1k.ru)](https://forum.fanat1k.ru)*: top 100K, forum, ru*, search is disabled
|
||||
@@ -904,7 +904,7 @@ Rank data fetched from Alexa by domains.
|
||||
1.  [AllTheLyrics (https://www.allthelyrics.com)](https://www.allthelyrics.com)*: top 100K, forum, music*
|
||||
1.  [Ccmixter (http://ccmixter.org/)](http://ccmixter.org/)*: top 100K, music*
|
||||
1.  [swedroid.se (http://swedroid.se/forum)](http://swedroid.se/forum)*: top 100K, forum, se*
|
||||
1.  [Vero (https://vero.co)](https://vero.co)*: top 100K, in, us*
|
||||
1.  [Vero (https://vero.co)](https://vero.co)*: top 100K, in, us*, search is disabled
|
||||
1.  [subaruforester.org (https://subaruforester.org)](https://subaruforester.org)*: top 100K, forum, us*
|
||||
1.  [Gvectors (https://gvectors.com)](https://gvectors.com)*: top 100K, in, us*
|
||||
1.  [Redcafe (https://www.redcafe.net)](https://www.redcafe.net)*: top 100K, forum, gb, sg, us*
|
||||
@@ -946,11 +946,11 @@ Rank data fetched from Alexa by domains.
|
||||
1.  [savingadvice.com (https://savingadvice.com)](https://savingadvice.com)*: top 10M, in, us*
|
||||
1.  [Pbnation (https://www.pbnation.com/)](https://www.pbnation.com/)*: top 10M, ca, us*, search is disabled
|
||||
1.  [community.sphero.com (https://community.sphero.com)](https://community.sphero.com)*: top 10M, forum, tech, us*
|
||||
1.  [Pinme (https://www.pinme.ru)](https://www.pinme.ru)*: top 10M, ru*
|
||||
1.  [Pinme (https://www.pinme.ru)](https://www.pinme.ru)*: top 10M, ru*, search is disabled
|
||||
1.  [Showme (https://www.showme.com)](https://www.showme.com)*: top 10M, in, us*
|
||||
1.  [devRant (https://devrant.com/)](https://devrant.com/)*: top 10M, coding, in*
|
||||
1.  [forum.endeavouros.com (https://forum.endeavouros.com)](https://forum.endeavouros.com)*: top 10M, forum, in*
|
||||
1.  [php.ru (https://php.ru/forum/)](https://php.ru/forum/)*: top 10M, forum, ru*
|
||||
1.  [php.ru (https://php.ru/forum/)](https://php.ru/forum/)*: top 10M, forum, ru*, search is disabled
|
||||
1.  [Noblogs (https://noblogs.org/)](https://noblogs.org/)*: top 10M, blog*
|
||||
1.  [forum.rzn.info (https://forum.rzn.info)](https://forum.rzn.info)*: top 10M, forum, ru*
|
||||
1.  [forums.eagle.ru (https://forums.eagle.ru)](https://forums.eagle.ru)*: top 10M, ca, forum, gaming, gb, in, us*, search is disabled
|
||||
@@ -964,11 +964,11 @@ Rank data fetched from Alexa by domains.
|
||||
1.  [Quibblo (https://www.quibblo.com/)](https://www.quibblo.com/)*: top 10M, in*, search is disabled
|
||||
1.  [Riftgame (http://forums.riftgame.com)](http://forums.riftgame.com)*: top 10M, cr, forum, us*
|
||||
1.  [ForumOdUa (https://forumodua.com)](https://forumodua.com)*: top 10M, forum, ro, ua*, search is disabled
|
||||
1.  [IRC-Galleria (https://irc-galleria.net)](https://irc-galleria.net)*: top 10M, fi, us*
|
||||
1.  [IRC-Galleria (https://irc-galleria.net)](https://irc-galleria.net)*: top 10M, fi, us*, search is disabled
|
||||
1.  [Rapforce (http://www.rapforce.net)](http://www.rapforce.net)*: top 10M, fr, ru*
|
||||
1.  [GunsAndAmmo (https://gunsandammo.com/)](https://gunsandammo.com/)*: top 10M, us*, search is disabled
|
||||
1.  [Mybuilder (https://www.mybuilder.com)](https://www.mybuilder.com)*: top 10M, gb, hk, in, us*
|
||||
1.  [ContactInBio (domain) (http://username.contactin.bio)](http://username.contactin.bio)*: top 10M, links*
|
||||
1.  [ContactInBio (domain) (http://username.contactin.bio)](http://username.contactin.bio)*: top 10M, links*, search is disabled
|
||||
1.  [forum.ubuntu-it.org (https://forum.ubuntu-it.org)](https://forum.ubuntu-it.org)*: top 10M, ch, forum, in, it*
|
||||
1.  [support.ilovegrowingmarijuana.com (https://support.ilovegrowingmarijuana.com)](https://support.ilovegrowingmarijuana.com)*: top 10M, forum, us*
|
||||
1.  [free-otvet.ru (https://free-otvet.ru)](https://free-otvet.ru)*: top 10M, q&a*
|
||||
@@ -977,7 +977,7 @@ Rank data fetched from Alexa by domains.
|
||||
1.  [Touristlink (https://www.touristlink.com)](https://www.touristlink.com)*: top 10M, in*
|
||||
1.  [Rmmedia (https://rmmedia.ru)](https://rmmedia.ru)*: top 10M, forum, ru*
|
||||
1.  [forum.mxlinux.org (https://forum.mxlinux.org)](https://forum.mxlinux.org)*: top 10M, forum*
|
||||
1.  [Glav (https://glav.su)](https://glav.su)*: top 10M, ru*
|
||||
1.  [Glav (https://glav.su)](https://glav.su)*: top 10M, ru*, search is disabled
|
||||
1.  [board.phpbuilder.com (https://board.phpbuilder.com)](https://board.phpbuilder.com)*: top 10M, in*
|
||||
1.  [Mylespaul (https://www.mylespaul.com)](https://www.mylespaul.com)*: top 10M, cl, us*
|
||||
1.  [forum.palemoon.org (https://forum.palemoon.org)](https://forum.palemoon.org)*: top 10M, forum, in*
|
||||
@@ -986,12 +986,12 @@ Rank data fetched from Alexa by domains.
|
||||
1.  [forum.exkavator.ru (https://forum.exkavator.ru)](https://forum.exkavator.ru)*: top 10M, forum, ru*
|
||||
1.  [navi (http://forum.navi.gg/)](http://forum.navi.gg/)*: top 10M, forum, ru*
|
||||
1.  [The AnswerBank (https://www.theanswerbank.co.uk)](https://www.theanswerbank.co.uk)*: top 10M, gb, q&a*
|
||||
1.  [picturepush.com (https://picturepush.com)](https://picturepush.com)*: top 10M, photo*
|
||||
1.  [picturepush.com (https://picturepush.com)](https://picturepush.com)*: top 10M, photo*, search is disabled
|
||||
1.  [Mobile-files (https://www.mobile-files.com/)](https://www.mobile-files.com/)*: top 10M, forum, ru, us*
|
||||
1.  [Fluther (https://www.fluther.com/)](https://www.fluther.com/)*: top 10M, in, us*
|
||||
1.  [Comedy (https://www.comedy.co.uk)](https://www.comedy.co.uk)*: top 10M, gb, in, movies, pk, us*
|
||||
1.  [sessionize.com (https://sessionize.com)](https://sessionize.com)*: top 10M, business*
|
||||
1.  [Fireworktv (https://fireworktv.com)](https://fireworktv.com)*: top 10M, in, jp*
|
||||
1.  [Fireworktv (https://fireworktv.com)](https://fireworktv.com)*: top 10M, in, jp*, search is disabled
|
||||
1.  [Expono (http://www.expono.com)](http://www.expono.com)*: top 10M, photo*
|
||||
1.  [funcom (https://forums.funcom.com)](https://forums.funcom.com)*: top 10M, forum, us*
|
||||
1.  [rt20.getbb.ru (http://www.rt20.getbb.ru)](http://www.rt20.getbb.ru)*: top 10M, forum, ru*
|
||||
@@ -1010,7 +1010,7 @@ Rank data fetched from Alexa by domains.
|
||||
1.  [TamTam (https://tamtam.chat/)](https://tamtam.chat/)*: top 10M, ru*
|
||||
1.  [Velomania (https://forum.velomania.ru/)](https://forum.velomania.ru/)*: top 10M, forum, ru*
|
||||
1.  [ITVDN Forum (https://forum.itvdn.com)](https://forum.itvdn.com)*: top 10M, forum, ru, ua*
|
||||
1.  [Videosift (https://videosift.com)](https://videosift.com)*: top 10M, us*
|
||||
1.  [Videosift (https://videosift.com)](https://videosift.com)*: top 10M, us*, search is disabled
|
||||
1.  [forum.spyderco.com (https://forum.spyderco.com)](https://forum.spyderco.com)*: top 10M, forum, us*
|
||||
1.  [Rlocman (https://www.rlocman.ru)](https://www.rlocman.ru)*: top 10M, forum, ru*
|
||||
1.  [Vxzone (https://www.vxzone.com)](https://www.vxzone.com)*: top 10M, ru*, search is disabled
|
||||
@@ -1033,9 +1033,9 @@ Rank data fetched from Alexa by domains.
|
||||
1.  [Bdoutdoors (https://www.bdoutdoors.com)](https://www.bdoutdoors.com)*: top 10M, us*
|
||||
1.  [Fcdin (http://fcdin.com)](http://fcdin.com)*: top 10M, forum, ru*
|
||||
1.  [Mixupload (https://mixupload.com/)](https://mixupload.com/)*: top 10M, ru*
|
||||
1.  [OnanistovNet (https://onanistov.net)](https://onanistov.net)*: top 10M, ru*
|
||||
1.  [OnanistovNet (https://onanistov.net)](https://onanistov.net)*: top 10M, ru*, search is disabled
|
||||
1.  [Storycorps (https://archive.storycorps.org)](https://archive.storycorps.org)*: top 10M, us*
|
||||
1.  [VegasCreativeSoftware (https://www.vegascreativesoftware.info)](https://www.vegascreativesoftware.info)*: top 10M, us*
|
||||
1.  [VegasCreativeSoftware (https://www.vegascreativesoftware.info)](https://www.vegascreativesoftware.info)*: top 10M, us*, search is disabled
|
||||
1.  [ForumKinopoisk (https://forumkinopoisk.ru)](https://forumkinopoisk.ru)*: top 10M, forum, ru*, search is disabled
|
||||
1.  [Ethresear (https://ethresear.ch)](https://ethresear.ch)*: top 10M, ch, cr, forum, us*
|
||||
1.  [Sysadmins (https://sysadmins.ru)](https://sysadmins.ru)*: top 10M, forum, ru, tech*
|
||||
@@ -1053,8 +1053,8 @@ Rank data fetched from Alexa by domains.
|
||||
1.  [ResidentAdvisor (https://www.residentadvisor.net)](https://www.residentadvisor.net)*: top 10M, us*
|
||||
1.  [Weburg (https://weburg.net)](https://weburg.net)*: top 10M, ru*
|
||||
1.  [Blast (https://www.blast.hk)](https://www.blast.hk)*: top 10M, forum, ru*
|
||||
1.  [Hubski (https://hubski.com/)](https://hubski.com/)*: top 10M, blog*
|
||||
1.  [Magix (https://www.magix.info)](https://www.magix.info)*: top 10M*
|
||||
1.  [Hubski (https://hubski.com/)](https://hubski.com/)*: top 10M, blog*, search is disabled
|
||||
1.  [Magix (https://www.magix.info)](https://www.magix.info)*: top 10M*, search is disabled
|
||||
1.  [Crevado (https://crevado.com/)](https://crevado.com/)*: top 10M, in, us*
|
||||
1.  [Msofficeforums (https://www.msofficeforums.com)](https://www.msofficeforums.com)*: top 10M, forum, ir, us*
|
||||
1.  [Lushstories (https://www.lushstories.com)](https://www.lushstories.com)*: top 10M, us*
|
||||
@@ -1086,7 +1086,7 @@ Rank data fetched from Alexa by domains.
|
||||
1.  [artinvestment (https://forum.artinvestment.ru/)](https://forum.artinvestment.ru/)*: top 10M, forum, ru*
|
||||
1.  [www.marykay.ru (https://www.marykay.ru)](https://www.marykay.ru)*: top 10M, ru*, search is disabled
|
||||
1.  [Golangbridge (https://forum.golangbridge.org/)](https://forum.golangbridge.org/)*: top 10M, forum, in, sa, ua, us, vn*
|
||||
1.  [Rcforum (http://www.rcforum.ru)](http://www.rcforum.ru)*: top 10M, forum, ru*
|
||||
1.  [Rcforum (http://www.rcforum.ru)](http://www.rcforum.ru)*: top 10M, forum, ru*, search is disabled
|
||||
1.  [samesound.ru (https://samesound.ru)](https://samesound.ru)*: top 10M, ru*
|
||||
1.  [Windows10forums (https://www.windows10forums.com/)](https://www.windows10forums.com/)*: top 10M, forum, in, us*
|
||||
1.  [writingforums.org (http://www.writingforums.org/)](http://www.writingforums.org/)*: top 10M, ca, forum*
|
||||
@@ -1105,7 +1105,7 @@ Rank data fetched from Alexa by domains.
|
||||
1.  [Bobrdobr (https://bobrdobr.ru)](https://bobrdobr.ru)*: top 10M, az, in, ru, tr, ua*
|
||||
1.  [F3.cool (https://f3.cool/)](https://f3.cool/)*: top 10M, ru*
|
||||
1.  [cowboyszone.com (https://cowboyszone.com)](https://cowboyszone.com)*: top 10M, forum, us*
|
||||
1.  [Filmwatch (https://filmwatch.com)](https://filmwatch.com)*: top 10M, ca, in, pk, us*
|
||||
1.  [Filmwatch (https://filmwatch.com)](https://filmwatch.com)*: top 10M, ca, in, pk, us*, search is disabled
|
||||
1.  [RussianFI (http://www.russian.fi/)](http://www.russian.fi/)*: top 10M, forum, ru*
|
||||
1.  [AreKamrbb (https://are.kamrbb.ru)](https://are.kamrbb.ru)*: top 10M, ru*
|
||||
1.  [Hyundaitruckclub (https://hyundaitruckclub.kamrbb.ru)](https://hyundaitruckclub.kamrbb.ru)*: top 10M, ru*
|
||||
@@ -1120,10 +1120,10 @@ Rank data fetched from Alexa by domains.
|
||||
1.  [commons.ishtar-collective.net (https://commons.ishtar-collective.net)](https://commons.ishtar-collective.net)*: top 10M, forum, gaming*
|
||||
1.  [4cheat (https://4cheat.ru)](https://4cheat.ru)*: top 10M, forum, ru*, search is disabled
|
||||
1.  [svtperformance.com (https://svtperformance.com)](https://svtperformance.com)*: top 10M, forum, us*
|
||||
1.  [githubplus.com (https://githubplus.com)](https://githubplus.com)*: top 10M, coding*
|
||||
1.  [githubplus.com (https://githubplus.com)](https://githubplus.com)*: top 10M, coding*, search is disabled
|
||||
1.  [Runitonce (https://www.runitonce.com/)](https://www.runitonce.com/)*: top 10M, ca, us*
|
||||
1.  [Paypal (https://www.paypal.me)](https://www.paypal.me)*: top 10M, finance*
|
||||
1.  [Seatracker (https://seatracker.ru/)](https://seatracker.ru/)*: top 10M, ru*
|
||||
1.  [Seatracker (https://seatracker.ru/)](https://seatracker.ru/)*: top 10M, ru*, search is disabled
|
||||
1.  [Hctorpedo (http://hctorpedo.ru)](http://hctorpedo.ru)*: top 10M, ru*
|
||||
1.  [forums.zooclub.ru (https://forums.zooclub.ru)](https://forums.zooclub.ru)*: top 10M*
|
||||
1.  [getmakerlog.com (https://getmakerlog.com)](https://getmakerlog.com)*: top 10M, business*
|
||||
@@ -1131,13 +1131,13 @@ Rank data fetched from Alexa by domains.
|
||||
1.  [popjustice (https://forum.popjustice.com)](https://forum.popjustice.com)*: top 10M, co, forum, in, sg, us*
|
||||
1.  [forums.scummvm.org (https://forums.scummvm.org)](https://forums.scummvm.org)*: top 10M, au, forum*
|
||||
1.  [hozpitality (https://www.hozpitality.com)](https://www.hozpitality.com)*: top 10M*
|
||||
1.  [RPGGeek (https://rpggeek.com)](https://rpggeek.com)*: top 10M, gaming, us*
|
||||
1.  [RPGGeek (https://rpggeek.com)](https://rpggeek.com)*: top 10M, gaming, us*, search is disabled
|
||||
1.  [www.freelancejob.ru (https://www.freelancejob.ru)](https://www.freelancejob.ru)*: top 10M, ru*
|
||||
1.  [Ecoustics (https://www.ecoustics.com/)](https://www.ecoustics.com/)*: top 10M, hk, in, us*
|
||||
1.  [Hellboundhackers (https://www.hellboundhackers.org)](https://www.hellboundhackers.org)*: top 10M, in*
|
||||
1.  [SportsTracker (https://www.sports-tracker.com/)](https://www.sports-tracker.com/)*: top 10M, pt, ru*
|
||||
1.  [Chpoking (http://chpoking.ru)](http://chpoking.ru)*: top 10M, ru*
|
||||
1.  [Root-me (https://www.root-me.org)](https://www.root-me.org)*: top 10M, hacking, in, ir, pk, us*
|
||||
1.  [Root-me (https://www.root-me.org)](https://www.root-me.org)*: top 10M, hacking, in, ir, pk, us*, search is disabled
|
||||
1.  [forum.languagelearningwithnetflix.com (https://forum.languagelearningwithnetflix.com)](https://forum.languagelearningwithnetflix.com)*: top 10M, forum, jp*
|
||||
1.  [Italia (http://italia-ru.com/)](http://italia-ru.com/)*: top 10M, it, ru, ua*
|
||||
1.  [crafta.ua (https://crafta.ua)](https://crafta.ua)*: top 10M, ua*
|
||||
@@ -1152,7 +1152,7 @@ Rank data fetched from Alexa by domains.
|
||||
1.  [instaprofi.ru (https://instaprofi.ru)](https://instaprofi.ru)*: top 10M, photo*
|
||||
1.  [Lobsters (https://lobste.rs/)](https://lobste.rs/)*: top 10M, in, us, vn*
|
||||
1.  [Whonix Forum (https://forums.whonix.org/)](https://forums.whonix.org/)*: top 10M, forum, in, ir, tech, us*
|
||||
1.  [Pesiq (http://pesiq.ru/)](http://pesiq.ru/)*: top 10M, forum, ru*
|
||||
1.  [Pesiq (http://pesiq.ru/)](http://pesiq.ru/)*: top 10M, forum, ru*, search is disabled
|
||||
1.  [www.kinokopilka.pro (https://www.kinokopilka.pro)](https://www.kinokopilka.pro)*: top 10M, il*
|
||||
1.  [Appearoo (http://appearoo.com)](http://appearoo.com)*: top 10M, in*, search is disabled
|
||||
1.  [forum.rmnt.ru (https://forum.rmnt.ru)](https://forum.rmnt.ru)*: top 10M, forum, ru*
|
||||
@@ -1196,7 +1196,7 @@ Rank data fetched from Alexa by domains.
|
||||
1.  [Steamid (by id) (https://steamid.uk/)](https://steamid.uk/)*: top 10M, gaming*
|
||||
1.  [Eightbit (http://eightbit.me/)](http://eightbit.me/)*: top 10M*, search is disabled
|
||||
1.  [Desu (https://desu.me)](https://desu.me)*: top 10M, by, forum, ru*
|
||||
1.  [Shoppingzone (http://shoppingzone.ru)](http://shoppingzone.ru)*: top 10M, ru*
|
||||
1.  [Shoppingzone (http://shoppingzone.ru)](http://shoppingzone.ru)*: top 10M, ru*, search is disabled
|
||||
1.  [gcup.ru (http://gcup.ru)](http://gcup.ru)*: top 10M, ru*
|
||||
1.  [si-sv.com (http://si-sv.com)](http://si-sv.com)*: top 10M, ru*
|
||||
1.  [Orbys (https://orbys.net)](https://orbys.net)*: top 10M, us*
|
||||
@@ -1213,11 +1213,11 @@ Rank data fetched from Alexa by domains.
|
||||
1.  [Rpgwatch (https://www.rpgwatch.com)](https://www.rpgwatch.com)*: top 10M, ca, forum, in, ru, us*, search is disabled
|
||||
1.  [Weasyl (https://www.weasyl.com)](https://www.weasyl.com)*: top 10M, in*
|
||||
1.  [Kerch Forum (http://forum.kerch.com.ru)](http://forum.kerch.com.ru)*: top 10M, forum, ru, ua*, search is disabled
|
||||
1.  [Mbclub (https://www.mbclub.ru/)](https://www.mbclub.ru/)*: top 10M, ru*
|
||||
1.  [Mbclub (https://www.mbclub.ru/)](https://www.mbclub.ru/)*: top 10M, ru*, search is disabled
|
||||
1.  [only-paper.ru (http://only-paper.ru)](http://only-paper.ru)*: top 10M, ru*
|
||||
1.  [Quartertothree (https://forum.quartertothree.com)](https://forum.quartertothree.com)*: top 10M, forum, us*
|
||||
1.  [Aminus3 (https://aminus3.com)](https://aminus3.com)*: top 10M, photo*
|
||||
1.  [Chessclub (https://www.chessclub.com)](https://www.chessclub.com)*: top 10M, us*
|
||||
1.  [Chessclub (https://www.chessclub.com)](https://www.chessclub.com)*: top 10M, us*, search is disabled
|
||||
1.  [Finforum (https://finforum.net)](https://finforum.net)*: top 10M, forum, ru, us, vn*
|
||||
1.  [sanatorii (http://forum.sanatorii.by)](http://forum.sanatorii.by)*: top 10M, by, forum, ru*
|
||||
1.  [YaPishu.net (https://yapishu.net)](https://yapishu.net)*: top 10M, ru*
|
||||
@@ -1239,7 +1239,7 @@ Rank data fetched from Alexa by domains.
|
||||
1.  [Faqusha (https://faqusha.ru)](https://faqusha.ru)*: top 10M, ru*
|
||||
1.  [Skyrimforums (https://skyrimforums.org)](https://skyrimforums.org)*: top 10M, forum, in, us*
|
||||
1.  [juce (https://forum.juce.com)](https://forum.juce.com)*: top 10M, ca, forum, us*
|
||||
1.  [rblx.trade (https://rblx.trade)](https://rblx.trade)*: top 10M, gaming*
|
||||
1.  [rblx.trade (https://rblx.trade)](https://rblx.trade)*: top 10M, gaming*, search is disabled
|
||||
1.  [quik (https://forum.quik.ru)](https://forum.quik.ru)*: top 10M, forum, ru*
|
||||
1.  [navimba.com (https://navimba.com)](https://navimba.com)*: top 10M*
|
||||
1.  [Gardenstew (https://www.gardenstew.com)](https://www.gardenstew.com)*: top 10M, forum, in, us*, search is disabled
|
||||
@@ -1251,7 +1251,7 @@ Rank data fetched from Alexa by domains.
|
||||
1.  [IssueHunt (https://issuehunt.io)](https://issuehunt.io)*: top 10M, dz, finance, in, ir, tr, us*
|
||||
1.  [mywishboard.com (https://mywishboard.com)](https://mywishboard.com)*: top 10M, in*
|
||||
1.  [cs-strikez.org (http://cs-strikez.org)](http://cs-strikez.org)*: top 10M, by, ru, ua*
|
||||
1.  [Pogovorim (https://pogovorim.by)](https://pogovorim.by)*: top 10M, by, ru*
|
||||
1.  [Pogovorim (https://pogovorim.by)](https://pogovorim.by)*: top 10M, by, ru*, search is disabled
|
||||
1.  [Qbn (https://www.qbn.com/)](https://www.qbn.com/)*: top 10M, in, us*
|
||||
1.  [Mobrep (https://www.mobrep.ru)](https://www.mobrep.ru)*: top 10M, ru*
|
||||
1.  [Hipforums (https://www.hipforums.com/)](https://www.hipforums.com/)*: top 10M, forum, in, ru, us*, search is disabled
|
||||
@@ -1302,7 +1302,7 @@ Rank data fetched from Alexa by domains.
|
||||
1.  [kazamuza.net (http://kazamuza.net)](http://kazamuza.net)*: top 10M, kz*
|
||||
1.  [2d-3d (https://www.2d-3d.ru)](https://www.2d-3d.ru)*: top 10M, ru*
|
||||
1.  [Ethereum-magicians (https://ethereum-magicians.org)](https://ethereum-magicians.org)*: top 10M, cr, forum*
|
||||
1.  [bbs.evony.com (http://bbs.evony.com)](http://bbs.evony.com)*: top 10M, forum, in, pk, tr, us*
|
||||
1.  [bbs.evony.com (http://bbs.evony.com)](http://bbs.evony.com)*: top 10M, forum, in, pk, tr, us*, search is disabled
|
||||
1.  [Animeforum (https://www.animeforum.com)](https://www.animeforum.com)*: top 10M, forum, pk, us, vn*
|
||||
1.  [Kinooh (https://kinooh.ru)](https://kinooh.ru)*: top 10M, ru*, search is disabled
|
||||
1.  [forum.web.ru (https://forum.web.ru)](https://forum.web.ru)*: top 10M, forum, ru*
|
||||
@@ -1360,7 +1360,7 @@ Rank data fetched from Alexa by domains.
|
||||
1.  [yka.kz (http://yka.kz)](http://yka.kz)*: top 10M, kz*
|
||||
1.  [android-gameworld.ru (http://android-gameworld.ru)](http://android-gameworld.ru)*: top 10M, ru*
|
||||
1.  [codeseller.ru (https://codeseller.ru)](https://codeseller.ru)*: top 10M, kz, ru*
|
||||
1.  [Runnersworld (https://forums.runnersworld.co.uk/)](https://forums.runnersworld.co.uk/)*: top 10M, forum, sport*
|
||||
1.  [Runnersworld (https://forums.runnersworld.co.uk/)](https://forums.runnersworld.co.uk/)*: top 10M, forum, sport*, search is disabled
|
||||
1.  [Partyflock (https://partyflock.nl)](https://partyflock.nl)*: top 10M, in, nl*
|
||||
1.  [Bratsk Forum (http://forum.bratsk.org)](http://forum.bratsk.org)*: top 10M, forum, ru*
|
||||
1.  [Armtorg (https://armtorg.ru/)](https://armtorg.ru/)*: top 10M, forum, ru*
|
||||
@@ -1368,7 +1368,7 @@ Rank data fetched from Alexa by domains.
|
||||
1.  [Damochka (https://www.damochka.ru)](https://www.damochka.ru)*: top 10M, kz, ru*, search is disabled
|
||||
1.  [Player (http://player.ru)](http://player.ru)*: top 10M, forum, ru, shopping*
|
||||
1.  [python.su (https://python.su/)](https://python.su/)*: top 10M, ru*
|
||||
1.  [forum.setcombg.com (https://forum.setcombg.com)](https://forum.setcombg.com)*: top 10M*
|
||||
1.  [forum.setcombg.com (https://forum.setcombg.com)](https://forum.setcombg.com)*: top 10M*, search is disabled
|
||||
1.  [School-school (https://school-school.ru)](https://school-school.ru)*: top 10M, ru*
|
||||
1.  [modnaya (https://forum.modnaya.org/)](https://forum.modnaya.org/)*: top 10M, forum, ru, ua*
|
||||
1.  [v-twinforum.com (https://v-twinforum.com)](https://v-twinforum.com)*: top 10M, auto, forum*
|
||||
@@ -1427,7 +1427,7 @@ Rank data fetched from Alexa by domains.
|
||||
1.  [sign-forum.ru (https://sign-forum.ru)](https://sign-forum.ru)*: top 10M*
|
||||
1.  [Vlmi (https://vlmi.biz)](https://vlmi.biz)*: top 10M, forum, ru, ua*
|
||||
1.  [sciax2.it (https://www.sciax2.it/forum/)](https://www.sciax2.it/forum/)*: top 10M, forum, tr*
|
||||
1.  [Solaris-club (https://solaris-club.net)](https://solaris-club.net)*: top 10M, forum, ru*
|
||||
1.  [Solaris-club (https://solaris-club.net)](https://solaris-club.net)*: top 10M, forum, ru*, search is disabled
|
||||
1.  [Megane2 (http://megane2.ru/)](http://megane2.ru/)*: top 10M, ru*
|
||||
1.  [Volkodavcaoko (https://volkodavcaoko.forum24.ru)](https://volkodavcaoko.forum24.ru)*: top 10M, forum, kz, ru, ua*
|
||||
1.  [Caduser (https://www.caduser.ru/)](https://www.caduser.ru/)*: top 10M, ru, ua*
|
||||
@@ -1438,7 +1438,7 @@ Rank data fetched from Alexa by domains.
|
||||
1.  [Cad (https://cad.ru)](https://cad.ru)*: top 10M, ru*
|
||||
1.  [Texasguntalk (https://www.texasguntalk.com)](https://www.texasguntalk.com)*: top 10M, forum, us*
|
||||
1.  [ttsport.ru (https://www.ttsport.ru/forum)](https://www.ttsport.ru/forum)*: top 10M, forum, ru*
|
||||
1.  [VideogameGeek (https://videogamegeek.com)](https://videogamegeek.com)*: top 10M, gaming, news*
|
||||
1.  [VideogameGeek (https://videogamegeek.com)](https://videogamegeek.com)*: top 10M, gaming, news*, search is disabled
|
||||
1.  [Omoimot (https://omoimot.ru/)](https://omoimot.ru/)*: top 10M, ru*
|
||||
1.  [memoriam.ru (https://memoriam.ru/forum)](https://memoriam.ru/forum)*: top 10M, forum, ru*
|
||||
1.  [nikoncafe.com (https://www.nikoncafe.com/)](https://www.nikoncafe.com/)*: top 10M, forum, photo*
|
||||
@@ -1460,7 +1460,7 @@ Rank data fetched from Alexa by domains.
|
||||
1.  [Dcpg (https://dcpg.ru/)](https://dcpg.ru/)*: top 10M, ru, ua*
|
||||
1.  [Whyislam (https://www.whyislam.to)](https://www.whyislam.to)*: top 10M, forum, ru*
|
||||
1.  [hevc-club.ucoz.net (http://hevc-club.ucoz.net)](http://hevc-club.ucoz.net)*: top 10M*
|
||||
1.  [forum.zone-game.info (https://forum.zone-game.info)](https://forum.zone-game.info)*: top 10M, forum*
|
||||
1.  [forum.zone-game.info (https://forum.zone-game.info)](https://forum.zone-game.info)*: top 10M, forum*, search is disabled
|
||||
1.  [Tabun (https://tabun.everypony.ru)](https://tabun.everypony.ru)*: top 10M, ru*
|
||||
1.  [ShitpostBot5000 (https://www.shitpostbot.com/)](https://www.shitpostbot.com/)*: top 10M, us*
|
||||
1.  [forum.1796web.com (https://forum.1796web.com)](https://forum.1796web.com)*: top 10M, forum, ru*
|
||||
@@ -1505,7 +1505,7 @@ Rank data fetched from Alexa by domains.
|
||||
1.  [deeptor.ws (https://deeptor.ws)](https://deeptor.ws)*: top 10M, forum, tr*
|
||||
1.  [Nygunforum (https://nygunforum.com)](https://nygunforum.com)*: top 10M, forum, us*
|
||||
1.  [Astra-club (http://www.astra-club.ru)](http://www.astra-club.ru)*: top 10M, ru, ua*, search is disabled
|
||||
1.  [Phrack (http://phrack.org)](http://phrack.org)*: top 10M*
|
||||
1.  [Phrack (http://phrack.org)](http://phrack.org)*: top 10M*, search is disabled
|
||||
1.  [Esate (http://esate.ru)](http://esate.ru)*: top 10M, ru*
|
||||
1.  [phorum.armavir.ru (http://phorum.armavir.ru)](http://phorum.armavir.ru)*: top 10M, forum*
|
||||
1.  [Ieoc (https://ieoc.com/)](https://ieoc.com/)*: top 10M, us*, search is disabled
|
||||
@@ -1556,7 +1556,7 @@ Rank data fetched from Alexa by domains.
|
||||
1.  [onanizm.club (http://onanizm.club)](http://onanizm.club)*: top 10M, forum*, search is disabled
|
||||
1.  [PeopleAndCountries (http://peopleandcountries.com)](http://peopleandcountries.com)*: top 10M, forum, ru*, search is disabled
|
||||
1.  [33bru (http://33bru.com/)](http://33bru.com/)*: top 10M, ru, ua*, search is disabled
|
||||
1.  [Astrogalaxy (https://astrogalaxy.ru)](https://astrogalaxy.ru)*: top 10M, ru*
|
||||
1.  [Astrogalaxy (https://astrogalaxy.ru)](https://astrogalaxy.ru)*: top 10M, ru*, search is disabled
|
||||
1.  [Maccentre (https://maccentre.ru)](https://maccentre.ru)*: top 10M, ru*
|
||||
1.  [arcolinuxforum.com (https://arcolinuxforum.com)](https://arcolinuxforum.com)*: top 10M, forum*
|
||||
1.  [hikvision.msk.ru (http://hikvision.msk.ru)](http://hikvision.msk.ru)*: top 10M, ru*
|
||||
@@ -1619,7 +1619,7 @@ Rank data fetched from Alexa by domains.
|
||||
1.  [mstdn.io (https://mstdn.io/)](https://mstdn.io/)*: top 10M*
|
||||
1.  [devushka (https://devushka.ru/)](https://devushka.ru/)*: top 10M, forum, ru*
|
||||
1.  [AllTheSoft (http://www.allthesoft.com)](http://www.allthesoft.com)*: top 10M, in*, search is disabled
|
||||
1.  [Starvault (https://starvault.se)](https://starvault.se)*: top 10M, gaming*
|
||||
1.  [Starvault (https://starvault.se)](https://starvault.se)*: top 10M, gaming*, search is disabled
|
||||
1.  [theprodigy (https://forum.theprodigy.ru/)](https://forum.theprodigy.ru/)*: top 10M, forum, ru, ua*, search is disabled
|
||||
1.  [forum.dusterclub.ru (http://forum.dusterclub.ru)](http://forum.dusterclub.ru)*: top 10M, forum*, search is disabled
|
||||
1.  [Footballforums (http://www.footballforums.net)](http://www.footballforums.net)*: top 10M, forum, gb*, search is disabled
|
||||
@@ -1661,7 +1661,7 @@ Rank data fetched from Alexa by domains.
|
||||
1.  [chaos.social (https://chaos.social/)](https://chaos.social/)*: top 10M, networking*
|
||||
1.  [mastodon.social (https://chaos.social/)](https://chaos.social/)*: top 10M, networking*
|
||||
1.  [forum.nemodniy.ru (http://forum.nemodniy.ru)](http://forum.nemodniy.ru)*: top 10M, forum, ru*, search is disabled
|
||||
1.  [bluesystem (http://forum.bluesystem.online)](http://forum.bluesystem.online)*: top 10M, forum, ru*
|
||||
1.  [bluesystem (http://forum.bluesystem.online)](http://forum.bluesystem.online)*: top 10M, forum, ru*, search is disabled
|
||||
1.  [Gamblejoe (https://www.gamblejoe.com)](https://www.gamblejoe.com)*: top 10M, de, mk, ua*
|
||||
1.  [Prosvetlenie (http://www.prosvetlenie.org)](http://www.prosvetlenie.org)*: top 10M, kg, ru*
|
||||
1.  [Derevnyaonline (https://derevnyaonline.ru)](https://derevnyaonline.ru)*: top 10M, ru*
|
||||
@@ -1716,10 +1716,10 @@ Rank data fetched from Alexa by domains.
|
||||
1.  [discuss.bootstrapped.fm (https://discuss.bootstrapped.fm)](https://discuss.bootstrapped.fm)*: top 10M*
|
||||
1.  [TEENUS (http://www.teenus.info)](http://www.teenus.info)*: top 10M, business, ee*, search is disabled
|
||||
1.  [Uwr1 (http://uwr1.de)](http://uwr1.de)*: top 10M, de*
|
||||
1.  [Wot-game (https://wot-game.com)](https://wot-game.com)*: top 10M, ru*
|
||||
1.  [Wot-game (https://wot-game.com)](https://wot-game.com)*: top 10M, ru*, search is disabled
|
||||
1.  [Chatujme.cz (https://chatujme.cz/)](https://chatujme.cz/)*: top 10M, cz, dating*
|
||||
1.  [mark.szenprogs.ru (http://mark.szenprogs.ru)](http://mark.szenprogs.ru)*: top 10M*
|
||||
1.  [Railfan (http://forums.railfan.net)](http://forums.railfan.net)*: top 10M, forum, us*
|
||||
1.  [Railfan (http://forums.railfan.net)](http://forums.railfan.net)*: top 10M, forum, us*, search is disabled
|
||||
1.  [lithotherapy (https://forum.lithotherapy.ru)](https://forum.lithotherapy.ru)*: top 10M, forum, ru*
|
||||
1.  [club-fiat.org.ua (http://club-fiat.org.ua)](http://club-fiat.org.ua)*: top 10M, ua*
|
||||
1.  [ForumJizni (http://www.forumjizni.ru)](http://www.forumjizni.ru)*: top 10M, forum, ru*
|
||||
@@ -1730,7 +1730,7 @@ Rank data fetched from Alexa by domains.
|
||||
1.  [Mastersofcrypto (https://mastersofcrypto.com)](https://mastersofcrypto.com)*: top 10M, forum*, search is disabled
|
||||
1.  [Wuz (http://wuz.by)](http://wuz.by)*: top 10M, by, ru*, search is disabled
|
||||
1.  [Anarcho-punk (https://www.anarcho-punk.net/)](https://www.anarcho-punk.net/)*: top 10M, forum, us*
|
||||
1.  [caravelgames (http://forum.caravelgames.com)](http://forum.caravelgames.com)*: top 10M, forum*
|
||||
1.  [caravelgames (http://forum.caravelgames.com)](http://forum.caravelgames.com)*: top 10M, forum*, search is disabled
|
||||
1.  [Opelclub (http://www.opelclub.ru)](http://www.opelclub.ru)*: top 10M, ru*
|
||||
1.  [GDProfiles (https://gdprofiles.com/)](https://gdprofiles.com/)*: top 10M*
|
||||
1.  [safesurvival.net (https://www.safesurvival.net)](https://www.safesurvival.net)*: top 10M, forum*
|
||||
@@ -1806,7 +1806,7 @@ Rank data fetched from Alexa by domains.
|
||||
1.  [forum.lancerx.ru (https://forum.lancerx.ru)](https://forum.lancerx.ru)*: top 10M*
|
||||
1.  [dapf.ru (https://dapf.ru)](https://dapf.ru)*: top 10M, forum*
|
||||
1.  [pv-afghan.ucoz.ru (http://pv-afghan.ucoz.ru)](http://pv-afghan.ucoz.ru)*: top 10M*
|
||||
1.  [mfarmer.ru (http://www.mfarmer.ru)](http://www.mfarmer.ru)*: top 10M*
|
||||
1.  [mfarmer.ru (http://www.mfarmer.ru)](http://www.mfarmer.ru)*: top 10M*, search is disabled
|
||||
1.  [Pyha (https://pyha.ru/)](https://pyha.ru/)*: top 10M, ru*
|
||||
1.  [Bookandreader (https://www.bookandreader.com)](https://www.bookandreader.com)*: top 10M, forum, us*
|
||||
1.  [Uaksu (https://uaksu.forum24.ru/)](https://uaksu.forum24.ru/)*: top 10M, forum, ru, ua*
|
||||
@@ -1827,7 +1827,7 @@ Rank data fetched from Alexa by domains.
|
||||
1.  [pticevodov.ru (http://pticevodov.ru)](http://pticevodov.ru)*: top 10M*, search is disabled
|
||||
1.  [abho.ru (http://abho.ru)](http://abho.ru)*: top 10M*
|
||||
1.  [japanesedolls.ru (http://japanesedolls.ru)](http://japanesedolls.ru)*: top 10M*
|
||||
1.  [Boxing (http://boxing.ru/)](http://boxing.ru/)*: top 10M, ru*
|
||||
1.  [Boxing (http://boxing.ru/)](http://boxing.ru/)*: top 10M, ru*, search is disabled
|
||||
1.  [xn----7sbcctevcqafop1aviko5l.xn--p1ai (http://xn----7sbcctevcqafop1aviko5l.xn--p1ai)](http://xn----7sbcctevcqafop1aviko5l.xn--p1ai)*: top 10M*
|
||||
1.  [forumbebas.com (https://forumbebas.com)](https://forumbebas.com)*: top 10M, forum, id*
|
||||
1.  [lampoviedushi.hammarlund.ru (http://lampoviedushi.hammarlund.ru)](http://lampoviedushi.hammarlund.ru)*: top 10M, forum*
|
||||
@@ -1866,7 +1866,7 @@ Rank data fetched from Alexa by domains.
|
||||
1.  [fforum.ru (http://www.fforum.ru)](http://www.fforum.ru)*: top 10M, forum, ru*
|
||||
1.  [Pro-cats (http://pro-cats.ru)](http://pro-cats.ru)*: top 10M, ru*
|
||||
1.  [sputnikkey.ru (http://sputnikkey.ru)](http://sputnikkey.ru)*: top 10M*
|
||||
1.  [Billkiene (https://www.billkiene.com)](https://www.billkiene.com)*: top 10M, forum, hobby*
|
||||
1.  [Billkiene (https://www.billkiene.com)](https://www.billkiene.com)*: top 10M, forum, hobby*, search is disabled
|
||||
1.  [fanacmilan.com (http://fanacmilan.com)](http://fanacmilan.com)*: top 10M*, search is disabled
|
||||
1.  [vento-club.com (http://vento-club.com)](http://vento-club.com)*: top 10M*
|
||||
1.  [bce-tyt.ru (http://bce-tyt.ru)](http://bce-tyt.ru)*: top 10M*
|
||||
@@ -1887,7 +1887,7 @@ Rank data fetched from Alexa by domains.
|
||||
1.  [74507.ucoz.ru (http://74507.ucoz.ru)](http://74507.ucoz.ru)*: top 10M*
|
||||
1.  [animal-hope.ru (http://animal-hope.ru)](http://animal-hope.ru)*: top 10M*
|
||||
1.  [forum.heroesleague.ru (http://forum.heroesleague.ru)](http://forum.heroesleague.ru)*: top 10M, forum*
|
||||
1.  [Sexwin (https://sexforum.win)](https://sexforum.win)*: top 10M, forum, ru*
|
||||
1.  [Sexwin (https://sexforum.win)](https://sexforum.win)*: top 10M, forum, ru*, search is disabled
|
||||
1.  [TikTok Online Viewer (https://ttonlineviewer.com)](https://ttonlineviewer.com)*: top 10M, us*, search is disabled
|
||||
1.  [tavr-obrazovanie.ru (http://tavr-obrazovanie.ru)](http://tavr-obrazovanie.ru)*: top 10M, ru*
|
||||
1.  [studentur.com.ua (http://studentur.com.ua)](http://studentur.com.ua)*: top 10M, ua*
|
||||
@@ -1951,7 +1951,7 @@ Rank data fetched from Alexa by domains.
|
||||
1.  [news.toretsk.online (http://news.toretsk.online)](http://news.toretsk.online)*: top 10M, ru*
|
||||
1.  [alka-mine.at.ua (http://alka-mine.at.ua)](http://alka-mine.at.ua)*: top 10M*
|
||||
1.  [2el5.ucoz.ua (http://2el5.ucoz.ua)](http://2el5.ucoz.ua)*: top 10M, ru*
|
||||
1.  [Totseans (http://www.totseans.com/bbs/profile/Vizier)](http://www.totseans.com/bbs/profile/Vizier)*: top 10M, forum*
|
||||
1.  [Totseans (http://www.totseans.com/bbs/profile/Vizier)](http://www.totseans.com/bbs/profile/Vizier)*: top 10M, forum*, search is disabled
|
||||
1.  [TrackmaniaLadder (http://en.tm-ladder.com/index.php)](http://en.tm-ladder.com/index.php)*: top 10M, au*, search is disabled
|
||||
1.  [SwimmingForum (http://forumswimming.ru)](http://forumswimming.ru)*: top 10M, forum, ru*
|
||||
1.  [psy-dv.org (http://psy-dv.org)](http://psy-dv.org)*: top 10M*, search is disabled
|
||||
@@ -2000,9 +2000,9 @@ Rank data fetched from Alexa by domains.
|
||||
1.  [sovgavan.ru (http://sovgavan.ru)](http://sovgavan.ru)*: top 10M, ru*
|
||||
1.  [ooo.do.am (http://ooo.do.am)](http://ooo.do.am)*: top 10M*
|
||||
1.  [FurryFandom (https://furry-fandom.ru/)](https://furry-fandom.ru/)*: top 10M, ru*, search is disabled
|
||||
1.  [Rugby-forum (http://rugby-forum.ru)](http://rugby-forum.ru)*: top 10M, forum, ru*
|
||||
1.  [Rugby-forum (http://rugby-forum.ru)](http://rugby-forum.ru)*: top 10M, forum, ru*, search is disabled
|
||||
1.  [n-ataeva.ru (http://n-ataeva.ru)](http://n-ataeva.ru)*: top 10M*
|
||||
1.  [ForumProSport (https://forumprosport.ru/)](https://forumprosport.ru/)*: top 10M, forum, ru*
|
||||
1.  [ForumProSport (https://forumprosport.ru/)](https://forumprosport.ru/)*: top 10M, forum, ru*, search is disabled
|
||||
1.  [FCRubin (https://www.fcrubin.ru)](https://www.fcrubin.ru)*: top 10M, forum, ru*
|
||||
1.  [tuning.lviv.ua (http://tuning.lviv.ua/forum)](http://tuning.lviv.ua/forum)*: top 10M, forum, ua*
|
||||
1.  [Automania (https://automania.ru)](https://automania.ru)*: top 10M, ru*
|
||||
@@ -2102,16 +2102,16 @@ Rank data fetched from Alexa by domains.
|
||||
1.  [Bentbox ()]()*: top 100M*
|
||||
1.  [Bezuzyteczna ()]()*: top 100M*
|
||||
1.  [Biggerpockets ()]()*: top 100M*
|
||||
1.  [Bikemap ()]()*: top 100M*
|
||||
1.  [Bikemap ()]()*: top 100M*, search is disabled
|
||||
1.  [Bitwarden ()]()*: top 100M*
|
||||
1.  [Blogi.pl ()]()*: top 100M*
|
||||
1.  [Bluesky (https://bsky.app)](https://bsky.app)*: top 100M, messaging*
|
||||
1.  [Bugcrowd ()]()*: top 100M*
|
||||
1.  [Buzznet ()]()*: top 100M*
|
||||
1.  [Buzznet ()]()*: top 100M*, search is disabled
|
||||
1.  [Caringbridge ()]()*: top 100M*
|
||||
1.  [Carrd.co ()]()*: top 100M*
|
||||
1.  [Cash.app ()]()*: top 100M*
|
||||
1.  [Castingcallclub ()]()*: top 100M*
|
||||
1.  [Castingcallclub ()]()*: top 100M*, search is disabled
|
||||
1.  [CD-Action ()]()*: top 100M*
|
||||
1.  [Cda.pl ()]()*: top 100M*
|
||||
1.  [Chamsko.pl ()]()*: top 100M*
|
||||
@@ -2124,7 +2124,7 @@ Rank data fetched from Alexa by domains.
|
||||
1.  [Club-comedy.clan.su (https://club-comedy.clan.su)](https://club-comedy.clan.su)*: top 100M, ru*
|
||||
1.  [CryptomatorForum ()]()*: top 100M*
|
||||
1.  [Cults3d ()]()*: top 100M*
|
||||
1.  [Cyberclock (https://cyberclock.cc)](https://cyberclock.cc)*: top 100M, ru*
|
||||
1.  [Cyberclock (https://cyberclock.cc)](https://cyberclock.cc)*: top 100M, ru*, search is disabled
|
||||
1.  [Cydak (http://www.cydak.ru)](http://www.cydak.ru)*: top 100M, ru*
|
||||
1.  [Cytoid.io ()]()*: top 100M*
|
||||
1.  [d3.ru ()]()*: top 100M*
|
||||
@@ -2149,7 +2149,7 @@ Rank data fetched from Alexa by domains.
|
||||
1.  [Fancentro ()]()*: top 100M*
|
||||
1.  [Fansly ()]()*: top 100M*
|
||||
1.  [Fedi.lewactwo.pl ()]()*: top 100M*, search is disabled
|
||||
1.  [Forumprawne.org ()]()*: top 100M*
|
||||
1.  [Forumprawne.org ()]()*: top 100M*, search is disabled
|
||||
1.  [Fosstodon ()]()*: top 100M*
|
||||
1.  [Fotka ()]()*: top 100M*
|
||||
1.  [Friendfinder ()]()*: top 100M*, search is disabled
|
||||
@@ -2159,11 +2159,12 @@ Rank data fetched from Alexa by domains.
|
||||
1.  [Fegatch (http://www.fegatch.com/)](http://www.fegatch.com/)*: top 100M, ru*, search is disabled
|
||||
1.  [Filmogs (https://www.filmo.gs/)](https://www.filmo.gs/)*: top 100M, movies*, search is disabled
|
||||
1.  [Finanzfrage ()]()*: top 100M*
|
||||
1.  [Forum.quake2.com.ru (http://forum.quake2.com.ru/)](http://forum.quake2.com.ru/)*: top 100M, forum, ru*
|
||||
1.  [Forum.quake2.com.ru (http://forum.quake2.com.ru/)](http://forum.quake2.com.ru/)*: top 100M, forum, ru*, search is disabled
|
||||
1.  [ForumTauck (https://forums.tauck.com)](https://forums.tauck.com)*: top 100M, forum, us*
|
||||
1.  [Framapiaf (https://framapiaf.org)](https://framapiaf.org)*: top 100M, mastodon*
|
||||
1.  [G2g.com ()]()*: top 100M*
|
||||
1.  [Gam1ng (https://gam1ng.com.br)](https://gam1ng.com.br)*: top 100M, br, webcam*, search is disabled
|
||||
1.  [GeniusArtists ()]()*: top 100M*
|
||||
1.  [GeniusArtists ()]()*: top 100M*, search is disabled
|
||||
1.  [Gesundheitsfrage ()]()*: top 100M*
|
||||
1.  [Gitbook ()]()*: top 100M*
|
||||
1.  [Gliger (http://www.gliger.ru)](http://www.gliger.ru)*: top 100M, ru*, search is disabled
|
||||
@@ -2202,6 +2203,7 @@ Rank data fetched from Alexa by domains.
|
||||
1.  [Macqa (https://macqa.ru)](https://macqa.ru)*: top 100M, ru*, search is disabled
|
||||
1.  [Maga-Chat ()]()*: top 100M*
|
||||
1.  [Magabook ()]()*: top 100M*
|
||||
1.  [Mamot (https://mamot.fr)](https://mamot.fr)*: top 100M, mastodon*
|
||||
1.  [Mapify.travel ()]()*: top 100M*
|
||||
1.  [MapMyTracks ()]()*: top 100M*
|
||||
1.  [Marshmallow ()]()*: top 100M*
|
||||
@@ -2218,16 +2220,18 @@ Rank data fetched from Alexa by domains.
|
||||
1.  [MurmanskLife (http://murmansk-life.ru)](http://murmansk-life.ru)*: top 100M, ru*, search is disabled
|
||||
1.  [Mym.fans ()]()*: top 100M*
|
||||
1.  [Naturalnews ()]()*: top 100M*
|
||||
1.  [Ninjakiwi ()]()*: top 100M*
|
||||
1.  [Ninjakiwi ()]()*: top 100M*, search is disabled
|
||||
1.  [Needrom ()]()*: top 100M*
|
||||
1.  [Nyaa.si ()]()*: top 100M*
|
||||
1.  [Oglaszamy24h ()]()*: top 100M*
|
||||
1.  [Olx.pl ()]()*: top 100M*
|
||||
1.  [Ourfreedombook ()]()*: top 100M*
|
||||
1.  [Outgress (https://outgress.com/)](https://outgress.com/)*: top 100M*
|
||||
1.  [Ow.ly ()]()*: top 100M*
|
||||
1.  [Patronite ()]()*: top 100M*
|
||||
1.  [Pewex.pl ()]()*: top 100M*
|
||||
1.  [Piekielni ()]()*: top 100M*
|
||||
1.  [pixelfed.social (https://pixelfed.social/)](https://pixelfed.social/)*: top 100M, art, pixelfed*
|
||||
1.  [Pol.social ()]()*: top 100M*
|
||||
1.  [Polczat.pl ()]()*: top 100M*
|
||||
1.  [Policja2009 ()]()*: top 100M*
|
||||
@@ -2238,6 +2242,7 @@ Rank data fetched from Alexa by domains.
|
||||
1.  [PalexaRankru (https://palexaRankru.net/)](https://palexaRankru.net/)*: top 100M, forum, ru*, search is disabled
|
||||
1.  [ProgrammersForum (https://www.programmersforum)](https://www.programmersforum)*: top 100M, forum, ru*, search is disabled
|
||||
1.  [Prv.pl ()]()*: top 100M*
|
||||
1.  [programming.dev (https://programming.dev)](https://programming.dev)*: top 100M, lemmy*
|
||||
1.  [Quitter.pl ()]()*: top 100M*
|
||||
1.  [Quizlet ()]()*: top 100M*
|
||||
1.  [Rammclan (http://www.rammclan.ru)](http://www.rammclan.ru)*: top 100M, ru*
|
||||
@@ -2261,11 +2266,11 @@ Rank data fetched from Alexa by domains.
|
||||
1.  [Solikick ()]()*: top 100M*, search is disabled
|
||||
1.  [Spankpay ()]()*: top 100M*, search is disabled
|
||||
1.  [SpiceWorks (https://community.spiceworks.co)](https://community.spiceworks.co)*: top 100M, forum, tech*
|
||||
1.  [Splice ()]()*: top 100M*
|
||||
1.  [Splice ()]()*: top 100M*, search is disabled
|
||||
1.  [Sportlerfrage ()]()*: top 100M*
|
||||
1.  [Star Citizens Community (https://robertsspaceindustries.com/)](https://robertsspaceindustries.com/)*: top 100M, de, us*
|
||||
1.  [Statistika (http://statistika.ru)](http://statistika.ru)*: top 100M, forum, ru*, search is disabled
|
||||
1.  [Suzuri.jp ()]()*: top 100M*
|
||||
1.  [Suzuri.jp ()]()*: top 100M*, search is disabled
|
||||
1.  [Swapd ()]()*: top 100M*
|
||||
1.  [Syktforum (http://syktforum.ru)](http://syktforum.ru)*: top 100M, forum, ru*
|
||||
1.  [SyktyvkarOnline (http://syktyvkar-online.ru)](http://syktyvkar-online.ru)*: top 100M, ru*, search is disabled
|
||||
@@ -2886,7 +2891,7 @@ Rank data fetched from Alexa by domains.
|
||||
1.  [DarkNet Trust (http://dntrustmucd4mwec.onion)](http://dntrustmucd4mwec.onion)*: top 100M, tor*
|
||||
1.  [i2pforum (http://i2pforum.i2p)](http://i2pforum.i2p)*: top 100M, i2p*
|
||||
1.  [kazanlashkigalab.com (https://kazanlashkigalab.com)](https://kazanlashkigalab.com)*: top 100M, kz*
|
||||
1.  [airlinepilot.life ()]()*: top 100M*
|
||||
1.  [airlinepilot.life ()]()*: top 100M*, search is disabled
|
||||
1.  [algowiki-project.org ()]()*: top 100M*
|
||||
1.  [alimero.ru ()]()*: top 100M*
|
||||
1.  [baseball-reference.com ()]()*: top 100M*
|
||||
@@ -2956,7 +2961,7 @@ Rank data fetched from Alexa by domains.
|
||||
1.  [exploretalent.com ()]()*: top 100M*
|
||||
1.  [fandalism.com ()]()*: top 100M*, search is disabled
|
||||
1.  [fanfiktion.de ()]()*: top 100M*
|
||||
1.  [ffm.bio ()]()*: top 100M*
|
||||
1.  [ffm.bio ()]()*: top 100M*, search is disabled
|
||||
1.  [finmessage.com ()]()*: top 100M*
|
||||
1.  [flipsnack.com ()]()*: top 100M*
|
||||
1.  [flirtic.ee ()]()*: top 100M*
|
||||
@@ -3043,7 +3048,7 @@ Rank data fetched from Alexa by domains.
|
||||
1.  [splatoonwiki.org ()]()*: top 100M*
|
||||
1.  [spreadshirt.com ()]()*: top 100M*
|
||||
1.  [ssbwiki.com ()]()*: top 100M*
|
||||
1.  [stackshare.io ()]()*: top 100M*
|
||||
1.  [stackshare.io ()]()*: top 100M*, search is disabled
|
||||
1.  [starfywiki.org ()]()*: top 100M*
|
||||
1.  [steller.co ()]()*: top 100M*
|
||||
1.  [strategywiki.org ()]()*: top 100M*
|
||||
@@ -3088,7 +3093,7 @@ Rank data fetched from Alexa by domains.
|
||||
1.  [HuggingFace (https://huggingface.co/)](https://huggingface.co/)*: top 100M*
|
||||
1.  [ManifoldMarkets (https://manifold.markets/)](https://manifold.markets/)*: top 100M*
|
||||
1.  [lyricsTraining ()]()*: top 100M, music*
|
||||
1.  [expoForum ()]()*: top 100M, coding, forum*
|
||||
1.  [expoForum ()]()*: top 100M, coding, forum*, search is disabled
|
||||
1.  [rawg.io ()]()*: top 100M, gaming*
|
||||
1.  [SchemeColor ()]()*: top 100M, art, design*
|
||||
1.  [aetherhub ()]()*: top 100M, gaming*
|
||||
@@ -3113,7 +3118,7 @@ Rank data fetched from Alexa by domains.
|
||||
1.  [peername.com/tor (https://peername.com/)](https://peername.com/)*: top 100M, crypto*
|
||||
1.  [PromptBase (https://promptbase.com)](https://promptbase.com)*: top 100M, ai*
|
||||
1.  [ngl.link (https://ngl.link)](https://ngl.link)*: top 100M, q&a*
|
||||
1.  [bitpapa.com (https://bitpapa.com)](https://bitpapa.com)*: top 100M, crypto*
|
||||
1.  [bitpapa.com (https://bitpapa.com)](https://bitpapa.com)*: top 100M, crypto*, search is disabled
|
||||
1.  [sst.hiberworld.com (https://sst.hiberworld.com/user/{username})](https://sst.hiberworld.com/user/{username})*: top 100M*
|
||||
1.  [DeepDreamGenerator (https://deepdreamgenerator.com)](https://deepdreamgenerator.com)*: top 100M*
|
||||
1.  [PeriscopeTv (https://www.pscp.tv)](https://www.pscp.tv)*: top 100M*
|
||||
@@ -3122,7 +3127,7 @@ Rank data fetched from Alexa by domains.
|
||||
1.  [DimensionalMe (https://www.dimensional.me)](https://www.dimensional.me)*: top 100M*
|
||||
1.  [www.portal-pisarski.pl (https://www.portal-pisarski.pl)](https://www.portal-pisarski.pl)*: top 100M*
|
||||
1.  [www.dateamillionaire.com (https://www.dateamillionaire.com)](https://www.dateamillionaire.com)*: top 100M*
|
||||
1.  [www.stopstalk.com (https://www.stopstalk.com)](https://www.stopstalk.com)*: top 100M*
|
||||
1.  [www.stopstalk.com (https://www.stopstalk.com)](https://www.stopstalk.com)*: top 100M*, search is disabled
|
||||
1.  [www.polywork.com (https://www.polywork.com)](https://www.polywork.com)*: top 100M*
|
||||
1.  [oshwlab.com (https://oshwlab.com)](https://oshwlab.com)*: top 100M*
|
||||
1.  [www.xshaker.net (https://www.xshaker.net)](https://www.xshaker.net)*: top 100M*
|
||||
@@ -3134,41 +3139,42 @@ Rank data fetched from Alexa by domains.
|
||||
1.  [www.gta-multiplayer.cz (https://www.gta-multiplayer.cz)](https://www.gta-multiplayer.cz)*: top 100M*
|
||||
1.  [www.inaturalist.org (https://www.inaturalist.org)](https://www.inaturalist.org)*: top 100M*
|
||||
1.  [archive.transformativeworks.org (https://archive.transformativeworks.org)](https://archive.transformativeworks.org)*: top 100M*
|
||||
1.  [www.tnaflix.com (https://www.tnaflix.com)](https://www.tnaflix.com)*: top 100M*
|
||||
1.  [www.tnaflix.com (https://www.tnaflix.com)](https://www.tnaflix.com)*: top 100M*, search is disabled
|
||||
1.  [massagerepublic.com (https://massagerepublic.com)](https://massagerepublic.com)*: top 100M*
|
||||
1.  [mynickname.com (https://mynickname.com)](https://mynickname.com)*: top 100M*
|
||||
1.  [Substack (https://substack.com)](https://substack.com)*: top 100M, blog*
|
||||
1.  [OP.GG [PUBG] (https://pubg.op.gg)](https://pubg.op.gg)*: top 100M, gaming*
|
||||
1.  [OP.GG [Valorant] (https://valorant.op.gg)](https://valorant.op.gg)*: top 100M, gaming*
|
||||
1.  [write.as (https://write.as)](https://write.as)*: top 100M, writefreely*
|
||||
|
||||
The list was updated at (2024-12-13)
|
||||
The list was updated at (2026-03-22)
|
||||
## Statistics
|
||||
|
||||
Enabled/total sites: 2684/3137 = 85.56%
|
||||
Enabled/total sites: 2569/3144 = 81.71%
|
||||
|
||||
Incomplete message checks: 394/2684 = 14.68% (false positive risks)
|
||||
Incomplete message checks: 342/2569 = 13.31% (false positive risks)
|
||||
|
||||
Status code checks: 615/2684 = 22.91% (false positive risks)
|
||||
Status code checks: 586/2569 = 22.81% (false positive risks)
|
||||
|
||||
False positive risk (total): 37.59%
|
||||
False positive risk (total): 36.12%
|
||||
|
||||
Sites with probing: 500px, Aparat, BinarySearch (disabled), BongaCams, BuyMeACoffee, Cent, Disqus, Docker Hub, Duolingo, Gab, GitHub, GitLab, Google Plus (archived), Gravatar, Imgur, Issuu, Keybase, Livejasmin, LocalCryptos (disabled), MixCloud, Niftygateway, Reddit Search (Pushshift) (disabled), SportsTracker, Spotify (disabled), TAP'D, Trello, Twitch, Twitter, Twitter Shadowban (disabled), UnstoppableDomains, Vimeo, Weibo, Yapisal (disabled), YouNow, nightbot, notabug.org, polarsteps, qiwi.me (disabled)
|
||||
Sites with probing: 500px, Aparat (disabled), Armchairgm, BinarySearch (disabled), BleachFandom, BongaCams, BuyMeACoffee, Cent, Chess, Disqus, Docker Hub, Duolingo, FandomCommunityCentral, Gab, GitHub, GitLab, Google Plus (archived), Gravatar, Imgur, Issuu, Keybase, Livejasmin, LocalCryptos (disabled), MicrosoftLearn, MixCloud, Niftygateway, Picsart, Reddit, Reddit Search (Pushshift) (disabled), SportsTracker, Spotify (disabled), Substack, TAP'D, Trello, Twitch, Twitter, Twitter Shadowban (disabled), UnstoppableDomains, Vimeo, Weibo, Yapisal (disabled), YouNow, hashnode, nightbot, notabug.org, polarsteps, qiwi.me (disabled)
|
||||
|
||||
Sites with activation: Spotify (disabled), Twitter, Vimeo, Weibo
|
||||
|
||||
Top 20 profile URLs:
|
||||
- (796) `{urlMain}/index/8-0-{username} (uCoz)`
|
||||
- (301) `/{username}`
|
||||
- (303) `/{username}`
|
||||
- (221) `{urlMain}{urlSubpath}/members/?username={username} (XenForo)`
|
||||
- (161) `/user/{username}`
|
||||
- (133) `{urlMain}{urlSubpath}/member.php?username={username} (vBulletin)`
|
||||
- (127) `{urlMain}{urlSubpath}/search.php?author={username} (phpBB/Search)`
|
||||
- (118) `/profile/{username}`
|
||||
- (111) `/u/{username}`
|
||||
- (88) `/users/{username}`
|
||||
- (112) `/u/{username}`
|
||||
- (87) `/users/{username}`
|
||||
- (87) `{urlMain}/u/{username}/summary (Discourse)`
|
||||
- (54) `/@{username}`
|
||||
- (54) `/wiki/User:{username}`
|
||||
- (52) `/@{username}`
|
||||
- (41) `/members/?username={username}`
|
||||
- (41) `SUBDOMAIN`
|
||||
- (32) `/members/{username}`
|
||||
@@ -3180,20 +3186,20 @@ Top 20 profile URLs:
|
||||
|
||||
|
||||
Top 20 tags:
|
||||
- (1105) `NO_TAGS` (non-standard)
|
||||
- (1106) `NO_TAGS` (non-standard)
|
||||
- (735) `forum`
|
||||
- (92) `gaming`
|
||||
- (48) `photo`
|
||||
- (41) `coding`
|
||||
- (30) `tech`
|
||||
- (31) `tech`
|
||||
- (29) `news`
|
||||
- (28) `blog`
|
||||
- (23) `music`
|
||||
- (19) `finance`
|
||||
- (18) `crypto`
|
||||
- (16) `sharing`
|
||||
- (16) `art`
|
||||
- (16) `freelance`
|
||||
- (15) `art`
|
||||
- (15) `shopping`
|
||||
- (13) `sport`
|
||||
- (13) `business`
|
||||
|
||||
@@ -7,7 +7,7 @@ description: |
|
||||
|
||||
Currently supported more than 3000 sites, search is launched against 500 popular sites in descending order of popularity by default. Also supported checking of Tor sites, I2P sites, and domains (via DNS resolving).
|
||||
|
||||
version: 0.5.0a1
|
||||
version: 0.5.0
|
||||
license: MIT
|
||||
base: core22
|
||||
confinement: strict
|
||||
|
||||
|
Before Width: | Height: | Size: 1.6 MiB After Width: | Height: | Size: 1.6 MiB |
|
After Width: | Height: | Size: 501 KiB |
|
After Width: | Height: | Size: 312 KiB |
@@ -5,11 +5,13 @@ from typing import Dict, Any
|
||||
|
||||
DEFAULT_ARGS: Dict[str, Any] = {
|
||||
'all_sites': False,
|
||||
'auto_disable': False,
|
||||
'connections': 100,
|
||||
'cookie_file': None,
|
||||
'csv': False,
|
||||
'db_file': 'resources/data.json',
|
||||
'debug': False,
|
||||
'diagnose': False,
|
||||
'disable_extracting': False,
|
||||
'disable_recursive_search': False,
|
||||
'folderoutput': 'reports',
|
||||
@@ -42,6 +44,7 @@ DEFAULT_ARGS: Dict[str, Any] = {
|
||||
'use_disabled_sites': False,
|
||||
'username': [],
|
||||
'verbose': False,
|
||||
'web': None,
|
||||
'with_domains': False,
|
||||
'xmind': False,
|
||||
}
|
||||
@@ -55,7 +58,8 @@ def test_args_search_mode(argparser):
|
||||
want_args = dict(DEFAULT_ARGS)
|
||||
want_args.update({'username': ['username']})
|
||||
|
||||
assert args == Namespace(**want_args)
|
||||
for arg in vars(args):
|
||||
assert getattr(args, arg) == want_args[arg]
|
||||
|
||||
|
||||
def test_args_search_mode_several_usernames(argparser):
|
||||
@@ -66,7 +70,8 @@ def test_args_search_mode_several_usernames(argparser):
|
||||
want_args = dict(DEFAULT_ARGS)
|
||||
want_args.update({'username': ['username1', 'username2']})
|
||||
|
||||
assert args == Namespace(**want_args)
|
||||
for arg in vars(args):
|
||||
assert getattr(args, arg) == want_args[arg]
|
||||
|
||||
|
||||
def test_args_self_check_mode(argparser):
|
||||
@@ -81,7 +86,8 @@ def test_args_self_check_mode(argparser):
|
||||
}
|
||||
)
|
||||
|
||||
assert args == Namespace(**want_args)
|
||||
for arg in vars(args):
|
||||
assert getattr(args, arg) == want_args[arg]
|
||||
|
||||
|
||||
def test_args_multiple_sites(argparser):
|
||||
@@ -97,4 +103,5 @@ def test_args_multiple_sites(argparser):
|
||||
}
|
||||
)
|
||||
|
||||
assert args == Namespace(**want_args)
|
||||
for arg in vars(args):
|
||||
assert getattr(args, arg) == want_args[arg]
|
||||
|
||||
@@ -8,6 +8,7 @@ from maigret.executors import (
|
||||
AsyncioProgressbarExecutor,
|
||||
AsyncioProgressbarSemaphoreExecutor,
|
||||
AsyncioProgressbarQueueExecutor,
|
||||
AsyncioQueueGeneratorExecutor,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -76,3 +77,35 @@ async def test_asyncio_progressbar_queue_executor():
|
||||
assert await executor.run(tasks) == [0, 3, 6, 9, 1, 4, 7, 2, 5, 8]
|
||||
assert executor.execution_time > 0.2
|
||||
assert executor.execution_time < 0.4
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_asyncio_queue_generator_executor():
|
||||
tasks = [(func, [n], {}) for n in range(10)]
|
||||
|
||||
executor = AsyncioQueueGeneratorExecutor(logger=logger, in_parallel=2)
|
||||
results = [result async for result in executor.run(tasks)]
|
||||
assert results == [0, 1, 3, 2, 4, 6, 7, 5, 9, 8]
|
||||
assert executor.execution_time > 0.5
|
||||
assert executor.execution_time < 0.6
|
||||
|
||||
executor = AsyncioQueueGeneratorExecutor(logger=logger, in_parallel=3)
|
||||
results = [result async for result in executor.run(tasks)]
|
||||
assert results == [0, 3, 1, 4, 6, 2, 7, 9, 5, 8]
|
||||
assert executor.execution_time > 0.4
|
||||
assert executor.execution_time < 0.5
|
||||
|
||||
executor = AsyncioQueueGeneratorExecutor(logger=logger, in_parallel=5)
|
||||
results = [result async for result in executor.run(tasks)]
|
||||
assert results in (
|
||||
[0, 3, 6, 1, 4, 7, 9, 2, 5, 8],
|
||||
[0, 3, 6, 1, 4, 9, 7, 2, 5, 8],
|
||||
)
|
||||
assert executor.execution_time > 0.3
|
||||
assert executor.execution_time < 0.4
|
||||
|
||||
executor = AsyncioQueueGeneratorExecutor(logger=logger, in_parallel=10)
|
||||
results = [result async for result in executor.run(tasks)]
|
||||
assert results == [0, 3, 6, 9, 1, 4, 7, 2, 5, 8]
|
||||
assert executor.execution_time > 0.2
|
||||
assert executor.execution_time < 0.3
|
||||
|
||||
@@ -27,7 +27,9 @@ async def test_self_check_db(test_db):
|
||||
assert test_db.sites_dict['ValidActive'].disabled is False
|
||||
assert test_db.sites_dict['InvalidInactive'].disabled is True
|
||||
|
||||
await self_check(test_db, test_db.sites_dict, logger, silent=False)
|
||||
await self_check(
|
||||
test_db, test_db.sites_dict, logger, silent=False, auto_disable=True
|
||||
)
|
||||
|
||||
assert test_db.sites_dict['InvalidActive'].disabled is True
|
||||
assert test_db.sites_dict['ValidInactive'].disabled is False
|
||||
|
||||
@@ -0,0 +1,53 @@
|
||||
import unittest
|
||||
from unittest.mock import patch, mock_open
|
||||
|
||||
from maigret.settings import Settings
|
||||
|
||||
|
||||
class TestSettings(unittest.TestCase):
|
||||
@patch('json.load')
|
||||
@patch('builtins.open', new_callable=mock_open)
|
||||
def test_settings_cascade_and_override(self, mock_file, mock_json_load):
|
||||
file1_data = {"timeout": 10, "retries_count": 3, "proxy_url": "http://proxy1"}
|
||||
file2_data = {"timeout": 20, "recursive_search": True}
|
||||
file3_data = {"proxy_url": "http://proxy3", "print_not_found": False}
|
||||
|
||||
mock_json_load.side_effect = [file1_data, file2_data, file3_data]
|
||||
|
||||
settings = Settings()
|
||||
paths = ['file1.json', 'file2.json', 'file3.json']
|
||||
|
||||
was_inited, msg = settings.load(paths)
|
||||
|
||||
self.assertTrue(was_inited)
|
||||
self.assertEqual(settings.retries_count, 3)
|
||||
self.assertEqual(settings.timeout, 20)
|
||||
self.assertTrue(settings.recursive_search)
|
||||
self.assertEqual(settings.proxy_url, "http://proxy3")
|
||||
self.assertFalse(settings.print_not_found)
|
||||
|
||||
@patch('builtins.open')
|
||||
def test_settings_file_not_found(self, mock_open_func):
|
||||
mock_open_func.side_effect = FileNotFoundError()
|
||||
|
||||
settings = Settings()
|
||||
paths = ['nonexistent.json']
|
||||
|
||||
was_inited, msg = settings.load(paths)
|
||||
|
||||
self.assertFalse(was_inited)
|
||||
self.assertIn('None of the default settings files found', msg)
|
||||
|
||||
@patch('json.load')
|
||||
@patch('builtins.open', new_callable=mock_open)
|
||||
def test_settings_invalid_json(self, mock_file, mock_json_load):
|
||||
mock_json_load.side_effect = ValueError("Expecting value")
|
||||
|
||||
settings = Settings()
|
||||
paths = ['invalid.json']
|
||||
|
||||
was_inited, msg = settings.load(paths)
|
||||
|
||||
self.assertFalse(was_inited)
|
||||
self.assertIsInstance(msg, ValueError)
|
||||
self.assertIn('Problem with parsing json contents', str(msg))
|
||||
@@ -182,6 +182,49 @@ def test_ranked_sites_dict_id_type():
|
||||
assert len(db.ranked_sites_dict(id_type='gaia_id')) == 1
|
||||
|
||||
|
||||
def test_ranked_sites_dict_mirrors_disabled_parent():
|
||||
"""Mirror is included when parent ranks in top N but parent is disabled."""
|
||||
db = MaigretDatabase()
|
||||
db.update_site(
|
||||
MaigretSite(
|
||||
'ParentPlatform',
|
||||
{'alexaRank': 5, 'disabled': True, 'type': 'username'},
|
||||
)
|
||||
)
|
||||
db.update_site(
|
||||
MaigretSite(
|
||||
'OtherSite',
|
||||
{'alexaRank': 100, 'type': 'username'},
|
||||
)
|
||||
)
|
||||
db.update_site(
|
||||
MaigretSite(
|
||||
'MirrorSite',
|
||||
{
|
||||
'alexaRank': 99999999,
|
||||
'source': 'ParentPlatform',
|
||||
'type': 'username',
|
||||
},
|
||||
)
|
||||
)
|
||||
|
||||
result = db.ranked_sites_dict(top=1, disabled=False, id_type='username')
|
||||
assert list(result.keys()) == ['OtherSite', 'MirrorSite']
|
||||
|
||||
|
||||
def test_ranked_sites_dict_mirrors_no_extra_without_parent_in_top():
|
||||
db = MaigretDatabase()
|
||||
db.update_site(MaigretSite('A', {'alexaRank': 1, 'type': 'username'}))
|
||||
db.update_site(
|
||||
MaigretSite(
|
||||
'B',
|
||||
{'alexaRank': 2, 'source': 'NotInDb', 'type': 'username'},
|
||||
)
|
||||
)
|
||||
|
||||
assert list(db.ranked_sites_dict(top=1, id_type='username').keys()) == ['A']
|
||||
|
||||
|
||||
def test_get_url_template():
|
||||
site = MaigretSite(
|
||||
"test",
|
||||
|
||||
@@ -1,9 +1,8 @@
|
||||
import pytest
|
||||
from unittest.mock import AsyncMock, MagicMock, patch
|
||||
from maigret.submit import Submitter, MaigretSite, MaigretEngine
|
||||
from unittest.mock import MagicMock, patch
|
||||
from maigret.submit import Submitter
|
||||
from aiohttp import ClientSession
|
||||
from maigret.sites import MaigretDatabase
|
||||
from maigret.settings import Settings
|
||||
import logging
|
||||
|
||||
|
||||
@@ -272,7 +271,7 @@ async def test_dialog_adds_site_negative(settings):
|
||||
]
|
||||
|
||||
with patch('builtins.input', side_effect=user_inputs):
|
||||
result = await submitter.dialog("https://icq.im/sokrat", None)
|
||||
result = await submitter.dialog("https://icq.com/sokrat", None)
|
||||
await submitter.close()
|
||||
|
||||
assert result is False
|
||||
|
||||
@@ -0,0 +1,63 @@
|
||||
"""Tests for the Twitter / X site entry and GraphQL probe."""
|
||||
|
||||
import re
|
||||
|
||||
import pytest
|
||||
import requests
|
||||
|
||||
from maigret.sites import MaigretSite
|
||||
|
||||
|
||||
def _twitter_site(site: MaigretSite) -> None:
|
||||
assert site.name == "Twitter"
|
||||
assert site.disabled is False
|
||||
assert site.check_type == "message"
|
||||
assert site.url_probe and "{username}" in site.url_probe
|
||||
assert "UserByScreenName" in site.url_probe or "graphql" in site.url_probe
|
||||
assert site.regex_check
|
||||
assert re.fullmatch(site.regex_check, site.username_claimed)
|
||||
assert re.fullmatch(site.regex_check, site.username_unclaimed)
|
||||
assert site.absence_strs
|
||||
assert site.activation.get("method") == "twitter"
|
||||
assert site.activation.get("url")
|
||||
assert "authorization" in {k.lower() for k in site.headers.keys()}
|
||||
|
||||
|
||||
def test_twitter_site_entry_config(default_db):
|
||||
"""Twitter entry in data.json must define probe URL, regex, and activation."""
|
||||
site = default_db.sites_dict["Twitter"]
|
||||
assert isinstance(site, MaigretSite)
|
||||
_twitter_site(site)
|
||||
|
||||
|
||||
@pytest.mark.slow
|
||||
def test_twitter_graphql_probe_claimed_vs_unclaimed(default_db):
|
||||
"""
|
||||
Live check: guest activation + UserByScreenName GraphQL returns a user for
|
||||
usernameClaimed and no user for usernameUnclaimed (same flow as urlProbe).
|
||||
"""
|
||||
site = default_db.sites_dict["Twitter"]
|
||||
_twitter_site(site)
|
||||
|
||||
headers = dict(site.headers)
|
||||
headers.pop("x-guest-token", None)
|
||||
|
||||
act = requests.post(site.activation["url"], headers=headers, timeout=45)
|
||||
assert act.status_code == 200, act.text[:500]
|
||||
body = act.json()
|
||||
assert "guest_token" in body
|
||||
headers["x-guest-token"] = body["guest_token"]
|
||||
|
||||
def fetch(username: str) -> dict:
|
||||
url = site.url_probe.format(username=username)
|
||||
resp = requests.get(url, headers=headers, timeout=45)
|
||||
resp.raise_for_status()
|
||||
return resp.json()
|
||||
|
||||
claimed_json = fetch(site.username_claimed)
|
||||
assert "data" in claimed_json
|
||||
assert claimed_json["data"].get("user") is not None
|
||||
|
||||
unclaimed_json = fetch(site.username_unclaimed)
|
||||
data = unclaimed_json.get("data") or {}
|
||||
assert data == {} or data.get("user") is None
|
||||
@@ -0,0 +1,480 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Mass site checking utility for Maigret development.
|
||||
Check top-N sites from data.json and generate a report.
|
||||
|
||||
Usage:
|
||||
python utils/check_top_n.py --top 100 # Check top 100 sites
|
||||
python utils/check_top_n.py --top 50 --parallel 10 # Check with 10 parallel requests
|
||||
python utils/check_top_n.py --top 100 --output report.json
|
||||
python utils/check_top_n.py --top 100 --fix # Auto-fix simple issues
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import asyncio
|
||||
import json
|
||||
import sys
|
||||
import time
|
||||
from collections import defaultdict
|
||||
from dataclasses import dataclass, field, asdict
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Optional, Tuple
|
||||
|
||||
# Add parent dir for imports
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent))
|
||||
|
||||
try:
|
||||
import aiohttp
|
||||
except ImportError:
|
||||
print("aiohttp not installed. Run: pip install aiohttp")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
class Colors:
|
||||
RED = "\033[91m"
|
||||
GREEN = "\033[92m"
|
||||
YELLOW = "\033[93m"
|
||||
BLUE = "\033[94m"
|
||||
CYAN = "\033[96m"
|
||||
RESET = "\033[0m"
|
||||
BOLD = "\033[1m"
|
||||
|
||||
|
||||
def color(text: str, c: str) -> str:
|
||||
return f"{c}{text}{Colors.RESET}"
|
||||
|
||||
|
||||
@dataclass
|
||||
class SiteCheckResult:
|
||||
"""Result of checking a single site."""
|
||||
site_name: str
|
||||
alexa_rank: int
|
||||
disabled: bool
|
||||
check_type: str
|
||||
|
||||
# Status
|
||||
status: str = "unknown" # working, broken, timeout, error, anti_bot, disabled
|
||||
|
||||
# HTTP results
|
||||
claimed_http_status: Optional[int] = None
|
||||
unclaimed_http_status: Optional[int] = None
|
||||
claimed_error: Optional[str] = None
|
||||
unclaimed_error: Optional[str] = None
|
||||
|
||||
# Issues detected
|
||||
issues: List[str] = field(default_factory=list)
|
||||
warnings: List[str] = field(default_factory=list)
|
||||
|
||||
# Recommendations
|
||||
recommendations: List[str] = field(default_factory=list)
|
||||
|
||||
# Timing
|
||||
check_time_ms: int = 0
|
||||
|
||||
|
||||
DEFAULT_HEADERS = {
|
||||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
|
||||
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
||||
"Accept-Language": "en-US,en;q=0.5",
|
||||
}
|
||||
|
||||
|
||||
async def check_url(url: str, headers: dict, timeout: int = 15) -> dict:
|
||||
"""Quick URL check returning status and basic info."""
|
||||
result = {
|
||||
"status": None,
|
||||
"final_url": None,
|
||||
"content_length": 0,
|
||||
"error": None,
|
||||
"error_type": None,
|
||||
"content": None,
|
||||
"markers": {},
|
||||
}
|
||||
|
||||
try:
|
||||
connector = aiohttp.TCPConnector(ssl=False)
|
||||
timeout_obj = aiohttp.ClientTimeout(total=timeout)
|
||||
|
||||
async with aiohttp.ClientSession(connector=connector, timeout=timeout_obj) as session:
|
||||
async with session.get(url, headers=headers, allow_redirects=True) as resp:
|
||||
result["status"] = resp.status
|
||||
result["final_url"] = str(resp.url)
|
||||
|
||||
try:
|
||||
text = await resp.text()
|
||||
result["content_length"] = len(text)
|
||||
result["content"] = text
|
||||
|
||||
text_lower = text.lower()
|
||||
result["markers"] = {
|
||||
"404_text": any(m in text_lower for m in ["not found", "404", "doesn't exist"]),
|
||||
"captcha": any(m in text_lower for m in ["captcha", "recaptcha", "challenge"]),
|
||||
"cloudflare": "cloudflare" in text_lower,
|
||||
"login": any(m in text_lower for m in ["log in", "login", "sign in"]),
|
||||
}
|
||||
except Exception as e:
|
||||
result["error"] = f"Content error: {e}"
|
||||
result["error_type"] = "content"
|
||||
|
||||
except asyncio.TimeoutError:
|
||||
result["error"] = "Timeout"
|
||||
result["error_type"] = "timeout"
|
||||
except aiohttp.ClientError as e:
|
||||
result["error"] = str(e)
|
||||
result["error_type"] = "client"
|
||||
except Exception as e:
|
||||
result["error"] = str(e)
|
||||
result["error_type"] = "unknown"
|
||||
|
||||
return result
|
||||
|
||||
|
||||
async def check_site(site_name: str, config: dict, timeout: int = 15) -> SiteCheckResult:
|
||||
"""Check a single site and return detailed result."""
|
||||
start_time = time.time()
|
||||
|
||||
result = SiteCheckResult(
|
||||
site_name=site_name,
|
||||
alexa_rank=config.get("alexaRank", 999999),
|
||||
disabled=config.get("disabled", False),
|
||||
check_type=config.get("checkType", "status_code"),
|
||||
)
|
||||
|
||||
# Skip disabled sites
|
||||
if result.disabled:
|
||||
result.status = "disabled"
|
||||
return result
|
||||
|
||||
# Build URL
|
||||
url_template = config.get("url", "")
|
||||
url_main = config.get("urlMain", "")
|
||||
url_subpath = config.get("urlSubpath", "")
|
||||
url_template = url_template.replace("{urlMain}", url_main).replace("{urlSubpath}", url_subpath)
|
||||
|
||||
claimed = config.get("usernameClaimed")
|
||||
unclaimed = config.get("usernameUnclaimed", "noonewouldeverusethis7")
|
||||
|
||||
if not claimed:
|
||||
result.status = "error"
|
||||
result.issues.append("No usernameClaimed defined")
|
||||
return result
|
||||
|
||||
# Prepare headers
|
||||
headers = DEFAULT_HEADERS.copy()
|
||||
if config.get("headers"):
|
||||
headers.update(config["headers"])
|
||||
|
||||
# Check both URLs
|
||||
url_claimed = url_template.replace("{username}", claimed)
|
||||
url_unclaimed = url_template.replace("{username}", unclaimed)
|
||||
|
||||
try:
|
||||
claimed_result, unclaimed_result = await asyncio.gather(
|
||||
check_url(url_claimed, headers, timeout),
|
||||
check_url(url_unclaimed, headers, timeout),
|
||||
)
|
||||
except Exception as e:
|
||||
result.status = "error"
|
||||
result.issues.append(f"Check failed: {e}")
|
||||
return result
|
||||
|
||||
result.claimed_http_status = claimed_result["status"]
|
||||
result.unclaimed_http_status = unclaimed_result["status"]
|
||||
result.claimed_error = claimed_result.get("error")
|
||||
result.unclaimed_error = unclaimed_result.get("error")
|
||||
|
||||
# Categorize result
|
||||
if claimed_result["error_type"] == "timeout" or unclaimed_result["error_type"] == "timeout":
|
||||
result.status = "timeout"
|
||||
result.issues.append("Request timeout")
|
||||
|
||||
elif claimed_result["status"] == 403 or claimed_result["status"] == 429:
|
||||
result.status = "anti_bot"
|
||||
result.issues.append(f"Anti-bot protection (HTTP {claimed_result['status']})")
|
||||
|
||||
elif claimed_result.get("markers", {}).get("captcha"):
|
||||
result.status = "anti_bot"
|
||||
result.issues.append("Captcha detected")
|
||||
|
||||
elif claimed_result.get("markers", {}).get("cloudflare"):
|
||||
result.status = "anti_bot"
|
||||
result.warnings.append("Cloudflare protection detected")
|
||||
|
||||
elif claimed_result["error"] or unclaimed_result["error"]:
|
||||
result.status = "error"
|
||||
if claimed_result["error"]:
|
||||
result.issues.append(f"Claimed error: {claimed_result['error']}")
|
||||
if unclaimed_result["error"]:
|
||||
result.issues.append(f"Unclaimed error: {unclaimed_result['error']}")
|
||||
|
||||
else:
|
||||
# Validate check type
|
||||
check_type = config.get("checkType", "status_code")
|
||||
|
||||
if check_type == "status_code":
|
||||
if claimed_result["status"] == unclaimed_result["status"]:
|
||||
result.status = "broken"
|
||||
result.issues.append(f"Same status code ({claimed_result['status']}) for both")
|
||||
# Suggest fix
|
||||
if claimed_result["final_url"] != unclaimed_result["final_url"]:
|
||||
result.recommendations.append("Switch to checkType: response_url")
|
||||
else:
|
||||
result.status = "working"
|
||||
|
||||
elif check_type == "response_url":
|
||||
if claimed_result["final_url"] == unclaimed_result["final_url"]:
|
||||
result.status = "broken"
|
||||
result.issues.append("Same final URL for both")
|
||||
if claimed_result["status"] != unclaimed_result["status"]:
|
||||
result.recommendations.append("Switch to checkType: status_code")
|
||||
else:
|
||||
result.status = "working"
|
||||
|
||||
elif check_type == "message":
|
||||
presense_strs = config.get("presenseStrs", [])
|
||||
absence_strs = config.get("absenceStrs", [])
|
||||
|
||||
claimed_content = claimed_result.get("content", "") or ""
|
||||
unclaimed_content = unclaimed_result.get("content", "") or ""
|
||||
|
||||
presense_ok = not presense_strs or any(s in claimed_content for s in presense_strs)
|
||||
absence_claimed = absence_strs and any(s in claimed_content for s in absence_strs)
|
||||
absence_unclaimed = absence_strs and any(s in unclaimed_content for s in absence_strs)
|
||||
|
||||
if presense_strs and not presense_ok:
|
||||
result.status = "broken"
|
||||
result.issues.append(f"presenseStrs not found: {presense_strs}")
|
||||
# Check if status_code would work
|
||||
if claimed_result["status"] != unclaimed_result["status"]:
|
||||
result.recommendations.append(f"Switch to checkType: status_code ({claimed_result['status']} vs {unclaimed_result['status']})")
|
||||
elif absence_claimed:
|
||||
result.status = "broken"
|
||||
result.issues.append(f"absenceStrs found in claimed page")
|
||||
elif absence_strs and not absence_unclaimed:
|
||||
result.status = "broken"
|
||||
result.warnings.append("absenceStrs not found in unclaimed page")
|
||||
else:
|
||||
result.status = "working"
|
||||
|
||||
else:
|
||||
result.status = "unknown"
|
||||
result.warnings.append(f"Unknown checkType: {check_type}")
|
||||
|
||||
result.check_time_ms = int((time.time() - start_time) * 1000)
|
||||
return result
|
||||
|
||||
|
||||
def load_sites(db_path: Path) -> Dict[str, dict]:
|
||||
"""Load all sites from data.json."""
|
||||
with open(db_path) as f:
|
||||
data = json.load(f)
|
||||
return data.get("sites", {})
|
||||
|
||||
|
||||
def get_top_sites(sites: Dict[str, dict], n: int) -> List[Tuple[str, dict]]:
|
||||
"""Get top N sites by Alexa rank."""
|
||||
ranked = []
|
||||
for name, config in sites.items():
|
||||
rank = config.get("alexaRank", 999999)
|
||||
ranked.append((name, config, rank))
|
||||
|
||||
ranked.sort(key=lambda x: x[2])
|
||||
return [(name, config) for name, config, _ in ranked[:n]]
|
||||
|
||||
|
||||
async def check_sites_batch(sites: List[Tuple[str, dict]], parallel: int = 5,
|
||||
timeout: int = 15, progress_callback=None) -> List[SiteCheckResult]:
|
||||
"""Check multiple sites with parallelism control."""
|
||||
results = []
|
||||
semaphore = asyncio.Semaphore(parallel)
|
||||
|
||||
async def check_with_semaphore(name, config, index):
|
||||
async with semaphore:
|
||||
if progress_callback:
|
||||
progress_callback(index, len(sites), name)
|
||||
return await check_site(name, config, timeout)
|
||||
|
||||
tasks = [
|
||||
check_with_semaphore(name, config, i)
|
||||
for i, (name, config) in enumerate(sites)
|
||||
]
|
||||
|
||||
results = await asyncio.gather(*tasks)
|
||||
return results
|
||||
|
||||
|
||||
def print_progress(current: int, total: int, site_name: str):
|
||||
"""Print progress indicator."""
|
||||
pct = int(current / total * 100)
|
||||
bar_width = 30
|
||||
filled = int(bar_width * current / total)
|
||||
bar = "█" * filled + "░" * (bar_width - filled)
|
||||
print(f"\r[{bar}] {pct:3d}% ({current}/{total}) {site_name:<30}", end="", flush=True)
|
||||
|
||||
|
||||
def generate_report(results: List[SiteCheckResult]) -> dict:
|
||||
"""Generate a summary report from check results."""
|
||||
report = {
|
||||
"summary": {
|
||||
"total": len(results),
|
||||
"working": 0,
|
||||
"broken": 0,
|
||||
"disabled": 0,
|
||||
"timeout": 0,
|
||||
"anti_bot": 0,
|
||||
"error": 0,
|
||||
"unknown": 0,
|
||||
},
|
||||
"by_status": defaultdict(list),
|
||||
"issues": [],
|
||||
"recommendations": [],
|
||||
}
|
||||
|
||||
for r in results:
|
||||
report["summary"][r.status] = report["summary"].get(r.status, 0) + 1
|
||||
report["by_status"][r.status].append(r.site_name)
|
||||
|
||||
if r.issues:
|
||||
report["issues"].append({
|
||||
"site": r.site_name,
|
||||
"rank": r.alexa_rank,
|
||||
"issues": r.issues,
|
||||
})
|
||||
|
||||
if r.recommendations:
|
||||
report["recommendations"].append({
|
||||
"site": r.site_name,
|
||||
"rank": r.alexa_rank,
|
||||
"recommendations": r.recommendations,
|
||||
})
|
||||
|
||||
return report
|
||||
|
||||
|
||||
def print_report(report: dict, results: List[SiteCheckResult]):
|
||||
"""Print a formatted report to console."""
|
||||
summary = report["summary"]
|
||||
|
||||
print(f"\n{'='*60}")
|
||||
print(f"{color('SITE CHECK REPORT', Colors.CYAN)}")
|
||||
print(f"{'='*60}\n")
|
||||
|
||||
print(f"{color('SUMMARY:', Colors.BOLD)}")
|
||||
print(f" Total sites checked: {summary['total']}")
|
||||
print(f" {color('Working:', Colors.GREEN)} {summary['working']}")
|
||||
print(f" {color('Broken:', Colors.RED)} {summary['broken']}")
|
||||
print(f" {color('Disabled:', Colors.YELLOW)} {summary['disabled']}")
|
||||
print(f" {color('Timeout:', Colors.YELLOW)} {summary['timeout']}")
|
||||
print(f" {color('Anti-bot:', Colors.YELLOW)} {summary['anti_bot']}")
|
||||
print(f" {color('Error:', Colors.RED)} {summary['error']}")
|
||||
|
||||
# Broken sites
|
||||
if report["by_status"]["broken"]:
|
||||
print(f"\n{color('BROKEN SITES:', Colors.RED)}")
|
||||
for site in report["by_status"]["broken"][:20]:
|
||||
r = next(x for x in results if x.site_name == site)
|
||||
print(f" - {site} (rank {r.alexa_rank}): {', '.join(r.issues)}")
|
||||
if len(report["by_status"]["broken"]) > 20:
|
||||
print(f" ... and {len(report['by_status']['broken']) - 20} more")
|
||||
|
||||
# Timeout sites
|
||||
if report["by_status"]["timeout"]:
|
||||
print(f"\n{color('TIMEOUT SITES:', Colors.YELLOW)}")
|
||||
for site in report["by_status"]["timeout"][:10]:
|
||||
print(f" - {site}")
|
||||
if len(report["by_status"]["timeout"]) > 10:
|
||||
print(f" ... and {len(report['by_status']['timeout']) - 10} more")
|
||||
|
||||
# Anti-bot sites
|
||||
if report["by_status"]["anti_bot"]:
|
||||
print(f"\n{color('ANTI-BOT PROTECTED:', Colors.YELLOW)}")
|
||||
for site in report["by_status"]["anti_bot"][:10]:
|
||||
r = next(x for x in results if x.site_name == site)
|
||||
print(f" - {site}: {', '.join(r.issues)}")
|
||||
if len(report["by_status"]["anti_bot"]) > 10:
|
||||
print(f" ... and {len(report['by_status']['anti_bot']) - 10} more")
|
||||
|
||||
# Recommendations
|
||||
if report["recommendations"]:
|
||||
print(f"\n{color('RECOMMENDATIONS:', Colors.CYAN)}")
|
||||
for rec in report["recommendations"][:15]:
|
||||
print(f" {rec['site']} (rank {rec['rank']}):")
|
||||
for r in rec["recommendations"]:
|
||||
print(f" -> {r}")
|
||||
if len(report["recommendations"]) > 15:
|
||||
print(f" ... and {len(report['recommendations']) - 15} more")
|
||||
|
||||
|
||||
async def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Mass site checking for Maigret",
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
)
|
||||
parser.add_argument("--top", "-n", type=int, default=100,
|
||||
help="Check top N sites by Alexa rank (default: 100)")
|
||||
parser.add_argument("--parallel", "-p", type=int, default=5,
|
||||
help="Number of parallel requests (default: 5)")
|
||||
parser.add_argument("--timeout", "-t", type=int, default=15,
|
||||
help="Request timeout in seconds (default: 15)")
|
||||
parser.add_argument("--output", "-o", help="Output JSON report to file")
|
||||
parser.add_argument("--include-disabled", action="store_true",
|
||||
help="Include disabled sites in results")
|
||||
parser.add_argument("--only-broken", action="store_true",
|
||||
help="Only show broken sites")
|
||||
parser.add_argument("--json", action="store_true",
|
||||
help="Output as JSON only")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Load sites
|
||||
db_path = Path(__file__).parent.parent / "maigret" / "resources" / "data.json"
|
||||
if not db_path.exists():
|
||||
print(f"Database not found: {db_path}")
|
||||
sys.exit(1)
|
||||
|
||||
sites = load_sites(db_path)
|
||||
top_sites = get_top_sites(sites, args.top)
|
||||
|
||||
if not args.json:
|
||||
print(f"Checking top {len(top_sites)} sites (parallel={args.parallel}, timeout={args.timeout}s)...")
|
||||
print()
|
||||
|
||||
# Run checks
|
||||
progress = print_progress if not args.json else None
|
||||
results = await check_sites_batch(top_sites, args.parallel, args.timeout, progress)
|
||||
|
||||
if not args.json:
|
||||
print() # Clear progress line
|
||||
|
||||
# Filter results
|
||||
if not args.include_disabled:
|
||||
results = [r for r in results if r.status != "disabled"]
|
||||
if args.only_broken:
|
||||
results = [r for r in results if r.status in ("broken", "error", "timeout")]
|
||||
|
||||
# Generate report
|
||||
report = generate_report(results)
|
||||
|
||||
# Output
|
||||
if args.json:
|
||||
output = {
|
||||
"report": report,
|
||||
"results": [asdict(r) for r in results],
|
||||
}
|
||||
print(json.dumps(output, indent=2))
|
||||
else:
|
||||
print_report(report, results)
|
||||
|
||||
# Save to file
|
||||
if args.output:
|
||||
output = {
|
||||
"report": report,
|
||||
"results": [asdict(r) for r in results],
|
||||
}
|
||||
with open(args.output, "w") as f:
|
||||
json.dump(output, f, indent=2)
|
||||
print(f"\nReport saved to: {args.output}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
@@ -0,0 +1,223 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Probe likely false-positive sites among the top-N Alexa-ranked entries.
|
||||
|
||||
For each of K random *distinct* usernames taken from ``usernameClaimed`` fields in
|
||||
the Maigret database, runs a clean ``maigret`` scan (``--top-sites N --json simple|ndjson``).
|
||||
Sites that return CLAIMED in *every* run are reported: unrelated random claimed
|
||||
handles are unlikely to all exist on the same third-party site, so such sites are
|
||||
candidates for broken checks.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import random
|
||||
import shutil
|
||||
import subprocess
|
||||
import sys
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def repo_root() -> Path:
|
||||
return Path(__file__).resolve().parent.parent
|
||||
|
||||
|
||||
def load_username_claimed_pool(db_path: Path) -> list[str]:
|
||||
with db_path.open(encoding="utf-8") as f:
|
||||
data = json.load(f)
|
||||
sites = data.get("sites") or {}
|
||||
seen: set[str] = set()
|
||||
pool: list[str] = []
|
||||
for _name, site in sites.items():
|
||||
u = (site or {}).get("usernameClaimed")
|
||||
if not u or not isinstance(u, str):
|
||||
continue
|
||||
u = u.strip()
|
||||
if not u or u in seen:
|
||||
continue
|
||||
seen.add(u)
|
||||
pool.append(u)
|
||||
return pool
|
||||
|
||||
|
||||
def run_maigret(
|
||||
*,
|
||||
username: str,
|
||||
db_path: Path,
|
||||
out_dir: Path,
|
||||
top_sites: int,
|
||||
json_format: str,
|
||||
quiet: bool,
|
||||
) -> Path:
|
||||
"""Run maigret subprocess; return path to the written JSON report."""
|
||||
safe = username.replace("/", "_")
|
||||
report_name = f"report_{safe}_{json_format}.json"
|
||||
report_path = out_dir / report_name
|
||||
|
||||
cmd = [
|
||||
sys.executable,
|
||||
"-m",
|
||||
"maigret",
|
||||
username,
|
||||
"--db",
|
||||
str(db_path),
|
||||
"--top-sites",
|
||||
str(top_sites),
|
||||
"--json",
|
||||
json_format,
|
||||
"--folderoutput",
|
||||
str(out_dir),
|
||||
"--no-progressbar",
|
||||
"--no-color",
|
||||
"--no-recursion",
|
||||
"--no-extracting",
|
||||
]
|
||||
sink = subprocess.DEVNULL if quiet else None
|
||||
proc = subprocess.run(
|
||||
cmd,
|
||||
cwd=str(repo_root()),
|
||||
text=True,
|
||||
stdout=sink,
|
||||
stderr=sink,
|
||||
)
|
||||
if proc.returncode != 0:
|
||||
raise RuntimeError(
|
||||
f"maigret exited with {proc.returncode} for username {username!r}"
|
||||
)
|
||||
if not report_path.is_file():
|
||||
raise FileNotFoundError(f"Expected report missing: {report_path}")
|
||||
return report_path
|
||||
|
||||
|
||||
def claimed_sites_from_report(path: Path, json_format: str) -> set[str]:
|
||||
if json_format == "simple":
|
||||
with path.open(encoding="utf-8") as f:
|
||||
data = json.load(f)
|
||||
if not isinstance(data, dict):
|
||||
return set()
|
||||
return set(data.keys())
|
||||
# ndjson: one object per line, each has "sitename"
|
||||
sites: set[str] = set()
|
||||
with path.open(encoding="utf-8") as f:
|
||||
for line in f:
|
||||
line = line.strip()
|
||||
if not line:
|
||||
continue
|
||||
obj = json.loads(line)
|
||||
name = obj.get("sitename")
|
||||
if isinstance(name, str) and name:
|
||||
sites.add(name)
|
||||
return sites
|
||||
|
||||
|
||||
def main() -> int:
|
||||
parser = argparse.ArgumentParser(
|
||||
description=(
|
||||
"Pick random distinct usernameClaimed values, run maigret --top-sites N "
|
||||
"with JSON reports, and list sites that claimed all of them (suspicious FP)."
|
||||
)
|
||||
)
|
||||
parser.add_argument(
|
||||
"--db",
|
||||
"-b",
|
||||
type=Path,
|
||||
default=repo_root() / "maigret" / "resources" / "data.json",
|
||||
help="Path to Maigret data.json (a temp copy is used for runs).",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--top-sites",
|
||||
"-n",
|
||||
type=int,
|
||||
default=500,
|
||||
metavar="N",
|
||||
help="Value for maigret --top-sites (default: 500).",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--samples",
|
||||
"-k",
|
||||
type=int,
|
||||
default=5,
|
||||
metavar="K",
|
||||
help="How many distinct random usernames to draw (default: 5).",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--seed",
|
||||
type=int,
|
||||
default=None,
|
||||
help="RNG seed for reproducible username selection.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--json",
|
||||
dest="json_format",
|
||||
default="simple",
|
||||
choices=["simple", "ndjson"],
|
||||
help="JSON report type passed to maigret -J (default: simple).",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--verbose",
|
||||
"-v",
|
||||
action="store_true",
|
||||
default=False,
|
||||
help="Print maigret stdout/stderr (default: suppress child output).",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
quiet = not args.verbose
|
||||
|
||||
db_src = args.db.resolve()
|
||||
if not db_src.is_file():
|
||||
print(f"Database not found: {db_src}", file=sys.stderr)
|
||||
return 2
|
||||
|
||||
pool = load_username_claimed_pool(db_src)
|
||||
if len(pool) < args.samples:
|
||||
print(
|
||||
f"Need at least {args.samples} distinct usernameClaimed entries, "
|
||||
f"found {len(pool)}.",
|
||||
file=sys.stderr,
|
||||
)
|
||||
return 2
|
||||
|
||||
rng = random.Random(args.seed)
|
||||
picked = rng.sample(pool, args.samples)
|
||||
|
||||
print(f"Database: {db_src}")
|
||||
print(f"--top-sites {args.top_sites}, {args.samples} random usernameClaimed:")
|
||||
for i, u in enumerate(picked, 1):
|
||||
print(f" {i}. {u}")
|
||||
|
||||
site_sets: list[set[str]] = []
|
||||
with tempfile.TemporaryDirectory(prefix="maigret_fp_probe_") as tmp:
|
||||
tmp_path = Path(tmp)
|
||||
db_work = tmp_path / "data.json"
|
||||
shutil.copyfile(db_src, db_work)
|
||||
|
||||
for u in picked:
|
||||
print(f"\nRunning maigret for {u!r} ...", flush=True)
|
||||
report = run_maigret(
|
||||
username=u,
|
||||
db_path=db_work,
|
||||
out_dir=tmp_path,
|
||||
top_sites=args.top_sites,
|
||||
json_format=args.json_format,
|
||||
quiet=quiet,
|
||||
)
|
||||
sites = claimed_sites_from_report(report, args.json_format)
|
||||
site_sets.append(sites)
|
||||
print(f" -> {len(sites)} positive site(s) in JSON", flush=True)
|
||||
|
||||
always = set.intersection(*site_sets) if site_sets else set()
|
||||
print("\n--- Sites with CLAIMED in all runs (candidates for false positives) ---")
|
||||
if not always:
|
||||
print("(none)")
|
||||
else:
|
||||
for name in sorted(always):
|
||||
print(name)
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
@@ -0,0 +1,750 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Site check utility for Maigret development.
|
||||
Quickly test site availability, find valid usernames, and diagnose check issues.
|
||||
|
||||
Usage:
|
||||
python utils/site_check.py --site "SiteName" --check-claimed
|
||||
python utils/site_check.py --site "SiteName" --maigret # Test via Maigret
|
||||
python utils/site_check.py --site "SiteName" --compare-methods # aiohttp vs Maigret
|
||||
python utils/site_check.py --url "https://example.com/user/{username}" --test "john"
|
||||
python utils/site_check.py --site "SiteName" --find-user
|
||||
python utils/site_check.py --site "SiteName" --diagnose # Full diagnosis
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
import re
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Optional, Tuple
|
||||
|
||||
# Add parent dir for imports
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent))
|
||||
|
||||
try:
|
||||
import aiohttp
|
||||
except ImportError:
|
||||
print("aiohttp not installed. Run: pip install aiohttp")
|
||||
sys.exit(1)
|
||||
|
||||
# Maigret imports (optional, for --maigret mode)
|
||||
MAIGRET_AVAILABLE = False
|
||||
try:
|
||||
from maigret.sites import MaigretDatabase, MaigretSite
|
||||
from maigret.checking import (
|
||||
SimpleAiohttpChecker,
|
||||
check_site_for_username,
|
||||
process_site_result,
|
||||
make_site_result,
|
||||
)
|
||||
from maigret.notify import QueryNotifyPrint
|
||||
from maigret.result import QueryStatus
|
||||
MAIGRET_AVAILABLE = True
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
|
||||
DEFAULT_HEADERS = {
|
||||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
|
||||
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
||||
"Accept-Language": "en-US,en;q=0.5",
|
||||
}
|
||||
|
||||
COMMON_USERNAMES = ["blue", "test", "admin", "user", "john", "alex", "david", "mike", "chris", "dan"]
|
||||
|
||||
|
||||
class Colors:
|
||||
"""ANSI color codes for terminal output."""
|
||||
RED = "\033[91m"
|
||||
GREEN = "\033[92m"
|
||||
YELLOW = "\033[93m"
|
||||
BLUE = "\033[94m"
|
||||
MAGENTA = "\033[95m"
|
||||
CYAN = "\033[96m"
|
||||
RESET = "\033[0m"
|
||||
BOLD = "\033[1m"
|
||||
|
||||
|
||||
def color(text: str, c: str) -> str:
|
||||
"""Wrap text with color codes."""
|
||||
return f"{c}{text}{Colors.RESET}"
|
||||
|
||||
|
||||
async def check_url_aiohttp(url: str, headers: dict = None, follow_redirects: bool = True,
|
||||
timeout: int = 15, ssl_verify: bool = False) -> dict:
|
||||
"""Check a URL using aiohttp and return detailed response info."""
|
||||
headers = headers or DEFAULT_HEADERS.copy()
|
||||
result = {
|
||||
"method": "aiohttp",
|
||||
"url": url,
|
||||
"status": None,
|
||||
"final_url": None,
|
||||
"redirects": [],
|
||||
"content_length": 0,
|
||||
"content": None,
|
||||
"title": None,
|
||||
"error": None,
|
||||
"error_type": None,
|
||||
"markers": {},
|
||||
}
|
||||
|
||||
try:
|
||||
connector = aiohttp.TCPConnector(ssl=ssl_verify)
|
||||
timeout_obj = aiohttp.ClientTimeout(total=timeout)
|
||||
|
||||
async with aiohttp.ClientSession(connector=connector, timeout=timeout_obj) as session:
|
||||
async with session.get(url, headers=headers, allow_redirects=follow_redirects) as resp:
|
||||
result["status"] = resp.status
|
||||
result["final_url"] = str(resp.url)
|
||||
|
||||
# Get redirect history
|
||||
if resp.history:
|
||||
result["redirects"] = [str(r.url) for r in resp.history]
|
||||
|
||||
# Read content
|
||||
try:
|
||||
text = await resp.text()
|
||||
result["content_length"] = len(text)
|
||||
result["content"] = text
|
||||
|
||||
# Extract title
|
||||
title_match = re.search(r'<title>([^<]*)</title>', text, re.IGNORECASE)
|
||||
if title_match:
|
||||
result["title"] = title_match.group(1).strip()[:100]
|
||||
|
||||
# Check common markers
|
||||
text_lower = text.lower()
|
||||
markers = {
|
||||
"404_text": any(m in text_lower for m in ["not found", "404", "doesn't exist", "does not exist"]),
|
||||
"profile_markers": any(m in text_lower for m in ["profile", "user", "member", "account"]),
|
||||
"error_markers": any(m in text_lower for m in ["error", "banned", "suspended", "blocked"]),
|
||||
"login_required": any(m in text_lower for m in ["log in", "login", "sign in", "signin"]),
|
||||
"captcha": any(m in text_lower for m in ["captcha", "recaptcha", "challenge", "verify you"]),
|
||||
"cloudflare": "cloudflare" in text_lower or "cf-ray" in text_lower,
|
||||
"rate_limit": any(m in text_lower for m in ["rate limit", "too many requests", "429"]),
|
||||
}
|
||||
result["markers"] = markers
|
||||
|
||||
# First 500 chars of body for inspection
|
||||
result["body_preview"] = text[:500].replace("\n", " ").strip()
|
||||
|
||||
except Exception as e:
|
||||
result["error"] = f"Content read error: {e}"
|
||||
result["error_type"] = "content_error"
|
||||
|
||||
except asyncio.TimeoutError:
|
||||
result["error"] = "Timeout"
|
||||
result["error_type"] = "timeout"
|
||||
except aiohttp.ClientError as e:
|
||||
result["error"] = f"Client error: {e}"
|
||||
result["error_type"] = "client_error"
|
||||
except Exception as e:
|
||||
result["error"] = f"Error: {e}"
|
||||
result["error_type"] = "unknown"
|
||||
|
||||
return result
|
||||
|
||||
|
||||
async def check_url_maigret(site: 'MaigretSite', username: str, logger=None) -> dict:
|
||||
"""Check a URL using Maigret's checking mechanism."""
|
||||
if not MAIGRET_AVAILABLE:
|
||||
return {"error": "Maigret not available", "method": "maigret"}
|
||||
|
||||
if logger is None:
|
||||
logger = logging.getLogger("site_check")
|
||||
logger.setLevel(logging.WARNING)
|
||||
|
||||
result = {
|
||||
"method": "maigret",
|
||||
"url": None,
|
||||
"status": None,
|
||||
"status_str": None,
|
||||
"http_status": None,
|
||||
"final_url": None,
|
||||
"error": None,
|
||||
"error_type": None,
|
||||
"ids_data": None,
|
||||
}
|
||||
|
||||
try:
|
||||
# Create query options
|
||||
options = {
|
||||
"parsing": False,
|
||||
"cookie_jar": None,
|
||||
"timeout": 15,
|
||||
}
|
||||
|
||||
# Create a simple notifier
|
||||
class SilentNotify:
|
||||
def start(self, msg=None): pass
|
||||
def update(self, status, similar=False): pass
|
||||
def finish(self, msg=None, status=None): pass
|
||||
|
||||
notifier = SilentNotify()
|
||||
|
||||
# Run the check
|
||||
site_name, site_result = await check_site_for_username(
|
||||
site, username, options, logger, notifier
|
||||
)
|
||||
|
||||
result["url"] = site_result.get("url_user")
|
||||
result["status"] = site_result.get("status")
|
||||
result["status_str"] = str(site_result.get("status"))
|
||||
result["http_status"] = site_result.get("http_status")
|
||||
result["ids_data"] = site_result.get("ids_data")
|
||||
|
||||
# Check for errors
|
||||
status = site_result.get("status")
|
||||
if status and hasattr(status, 'error') and status.error:
|
||||
result["error"] = f"{status.error.type}: {status.error.desc}"
|
||||
result["error_type"] = str(status.error.type)
|
||||
|
||||
except Exception as e:
|
||||
result["error"] = str(e)
|
||||
result["error_type"] = "exception"
|
||||
|
||||
return result
|
||||
|
||||
|
||||
async def find_valid_username(url_template: str, usernames: list = None, headers: dict = None) -> Optional[str]:
|
||||
"""Try common usernames to find one that works."""
|
||||
usernames = usernames or COMMON_USERNAMES
|
||||
headers = headers or DEFAULT_HEADERS.copy()
|
||||
|
||||
print(f"Testing {len(usernames)} usernames on {url_template}...")
|
||||
|
||||
for username in usernames:
|
||||
url = url_template.replace("{username}", username)
|
||||
result = await check_url_aiohttp(url, headers)
|
||||
|
||||
status = result["status"]
|
||||
markers = result.get("markers", {})
|
||||
|
||||
# Good signs: 200 status, profile markers, no 404 text
|
||||
if status == 200 and not markers.get("404_text") and markers.get("profile_markers"):
|
||||
print(f" {color('[+]', Colors.GREEN)} {username}: status={status}, has profile markers")
|
||||
return username
|
||||
elif status == 200 and not markers.get("404_text"):
|
||||
print(f" {color('[?]', Colors.YELLOW)} {username}: status={status}, might work")
|
||||
else:
|
||||
print(f" {color('[-]', Colors.RED)} {username}: status={status}")
|
||||
|
||||
return None
|
||||
|
||||
|
||||
async def compare_users_aiohttp(url_template: str, claimed: str, unclaimed: str = "noonewouldeverusethis7",
|
||||
headers: dict = None) -> Tuple[dict, dict]:
|
||||
"""Compare responses for claimed vs unclaimed usernames using aiohttp."""
|
||||
headers = headers or DEFAULT_HEADERS.copy()
|
||||
|
||||
print(f"\n{'='*60}")
|
||||
print(f"Comparing: {color(claimed, Colors.GREEN)} vs {color(unclaimed, Colors.RED)}")
|
||||
print(f"URL template: {url_template}")
|
||||
print(f"Method: aiohttp")
|
||||
print(f"{'='*60}\n")
|
||||
|
||||
url_claimed = url_template.replace("{username}", claimed)
|
||||
url_unclaimed = url_template.replace("{username}", unclaimed)
|
||||
|
||||
result_claimed, result_unclaimed = await asyncio.gather(
|
||||
check_url_aiohttp(url_claimed, headers),
|
||||
check_url_aiohttp(url_unclaimed, headers)
|
||||
)
|
||||
|
||||
def print_result(name, r, c):
|
||||
print(f"--- {color(name, c)} ---")
|
||||
print(f" URL: {r['url']}")
|
||||
print(f" Status: {color(str(r['status']), Colors.GREEN if r['status'] == 200 else Colors.RED)}")
|
||||
if r["redirects"]:
|
||||
print(f" Redirects: {' -> '.join(r['redirects'])} -> {r['final_url']}")
|
||||
print(f" Final URL: {r['final_url']}")
|
||||
print(f" Content length: {r['content_length']}")
|
||||
print(f" Title: {r['title']}")
|
||||
if r["error"]:
|
||||
print(f" Error: {color(r['error'], Colors.RED)}")
|
||||
print(f" Markers: {r['markers']}")
|
||||
print()
|
||||
|
||||
print_result(f"CLAIMED ({claimed})", result_claimed, Colors.GREEN)
|
||||
print_result(f"UNCLAIMED ({unclaimed})", result_unclaimed, Colors.RED)
|
||||
|
||||
# Analysis
|
||||
print(f"--- {color('ANALYSIS', Colors.CYAN)} ---")
|
||||
recommendations = []
|
||||
|
||||
if result_claimed["status"] != result_unclaimed["status"]:
|
||||
print(f" [!] Status codes differ: {result_claimed['status']} vs {result_unclaimed['status']}")
|
||||
recommendations.append(("status_code", f"Status codes: {result_claimed['status']} vs {result_unclaimed['status']}"))
|
||||
|
||||
if result_claimed["final_url"] != result_unclaimed["final_url"]:
|
||||
print(f" [!] Final URLs differ")
|
||||
recommendations.append(("response_url", "Final URLs differ"))
|
||||
|
||||
if result_claimed["content_length"] != result_unclaimed["content_length"]:
|
||||
diff = abs(result_claimed["content_length"] - result_unclaimed["content_length"])
|
||||
print(f" [!] Content length differs by {diff} bytes")
|
||||
recommendations.append(("message", f"Content differs by {diff} bytes"))
|
||||
|
||||
if result_claimed["title"] != result_unclaimed["title"]:
|
||||
print(f" [!] Titles differ:")
|
||||
print(f" Claimed: {result_claimed['title']}")
|
||||
print(f" Unclaimed: {result_unclaimed['title']}")
|
||||
recommendations.append(("message", f"Titles differ: '{result_claimed['title']}' vs '{result_unclaimed['title']}'"))
|
||||
|
||||
# Check for problems
|
||||
if result_claimed.get("markers", {}).get("captcha"):
|
||||
print(f" {color('[WARN]', Colors.YELLOW)} Captcha detected on claimed page")
|
||||
if result_claimed.get("markers", {}).get("cloudflare"):
|
||||
print(f" {color('[WARN]', Colors.YELLOW)} Cloudflare protection detected")
|
||||
if result_claimed.get("markers", {}).get("login_required"):
|
||||
print(f" {color('[WARN]', Colors.YELLOW)} Login may be required")
|
||||
|
||||
if recommendations:
|
||||
print(f"\n {color('Recommended checkType:', Colors.BOLD)} {recommendations[0][0]}")
|
||||
else:
|
||||
print(f" {color('[!]', Colors.RED)} No clear difference found - site may need special handling")
|
||||
|
||||
return result_claimed, result_unclaimed
|
||||
|
||||
|
||||
async def compare_methods(site: 'MaigretSite', claimed: str, unclaimed: str) -> dict:
|
||||
"""Compare aiohttp vs Maigret results for the same site."""
|
||||
if not MAIGRET_AVAILABLE:
|
||||
print(color("Maigret not available for comparison", Colors.RED))
|
||||
return {}
|
||||
|
||||
print(f"\n{'='*60}")
|
||||
print(f"{color('METHOD COMPARISON', Colors.CYAN)}: aiohttp vs Maigret")
|
||||
print(f"Site: {site.name}")
|
||||
print(f"Claimed: {claimed}, Unclaimed: {unclaimed}")
|
||||
print(f"{'='*60}\n")
|
||||
|
||||
# Build URL template
|
||||
url_template = site.url
|
||||
url_template = url_template.replace("{urlMain}", site.url_main or "")
|
||||
url_template = url_template.replace("{urlSubpath}", getattr(site, 'url_subpath', '') or "")
|
||||
|
||||
headers = DEFAULT_HEADERS.copy()
|
||||
if hasattr(site, 'headers') and site.headers:
|
||||
headers.update(site.headers)
|
||||
|
||||
# Run all checks in parallel
|
||||
url_claimed = url_template.replace("{username}", claimed)
|
||||
url_unclaimed = url_template.replace("{username}", unclaimed)
|
||||
|
||||
aiohttp_claimed, aiohttp_unclaimed, maigret_claimed, maigret_unclaimed = await asyncio.gather(
|
||||
check_url_aiohttp(url_claimed, headers),
|
||||
check_url_aiohttp(url_unclaimed, headers),
|
||||
check_url_maigret(site, claimed),
|
||||
check_url_maigret(site, unclaimed),
|
||||
)
|
||||
|
||||
def status_icon(status):
|
||||
if status == 200:
|
||||
return color("200", Colors.GREEN)
|
||||
elif status == 404:
|
||||
return color("404", Colors.YELLOW)
|
||||
elif status and status >= 400:
|
||||
return color(str(status), Colors.RED)
|
||||
return str(status)
|
||||
|
||||
def maigret_status_icon(status_str):
|
||||
if "Claimed" in str(status_str):
|
||||
return color("Claimed", Colors.GREEN)
|
||||
elif "Available" in str(status_str):
|
||||
return color("Available", Colors.YELLOW)
|
||||
else:
|
||||
return color(str(status_str), Colors.RED)
|
||||
|
||||
print(f"{'Method':<12} {'Username':<25} {'HTTP Status':<12} {'Result':<20}")
|
||||
print("-" * 70)
|
||||
print(f"{'aiohttp':<12} {claimed:<25} {status_icon(aiohttp_claimed['status']):<20} {'OK' if not aiohttp_claimed['error'] else aiohttp_claimed['error'][:20]}")
|
||||
print(f"{'aiohttp':<12} {unclaimed:<25} {status_icon(aiohttp_unclaimed['status']):<20} {'OK' if not aiohttp_unclaimed['error'] else aiohttp_unclaimed['error'][:20]}")
|
||||
print(f"{'Maigret':<12} {claimed:<25} {status_icon(maigret_claimed.get('http_status')):<20} {maigret_status_icon(maigret_claimed.get('status_str'))}")
|
||||
print(f"{'Maigret':<12} {unclaimed:<25} {status_icon(maigret_unclaimed.get('http_status')):<20} {maigret_status_icon(maigret_unclaimed.get('status_str'))}")
|
||||
|
||||
# Check for discrepancies
|
||||
print(f"\n--- {color('DISCREPANCY ANALYSIS', Colors.CYAN)} ---")
|
||||
issues = []
|
||||
|
||||
if aiohttp_claimed['status'] != maigret_claimed.get('http_status'):
|
||||
issues.append(f"HTTP status mismatch for claimed: aiohttp={aiohttp_claimed['status']}, Maigret={maigret_claimed.get('http_status')}")
|
||||
|
||||
if aiohttp_unclaimed['status'] != maigret_unclaimed.get('http_status'):
|
||||
issues.append(f"HTTP status mismatch for unclaimed: aiohttp={aiohttp_unclaimed['status']}, Maigret={maigret_unclaimed.get('http_status')}")
|
||||
|
||||
# Check Maigret detection correctness
|
||||
claimed_detected = "Claimed" in str(maigret_claimed.get('status_str', ''))
|
||||
unclaimed_detected = "Available" in str(maigret_unclaimed.get('status_str', ''))
|
||||
|
||||
if not claimed_detected:
|
||||
issues.append(f"Maigret did NOT detect claimed user '{claimed}' as Claimed")
|
||||
if not unclaimed_detected:
|
||||
issues.append(f"Maigret did NOT detect unclaimed user '{unclaimed}' as Available")
|
||||
|
||||
if issues:
|
||||
for issue in issues:
|
||||
print(f" {color('[!]', Colors.RED)} {issue}")
|
||||
else:
|
||||
print(f" {color('[OK]', Colors.GREEN)} Both methods agree on results")
|
||||
|
||||
return {
|
||||
"aiohttp_claimed": aiohttp_claimed,
|
||||
"aiohttp_unclaimed": aiohttp_unclaimed,
|
||||
"maigret_claimed": maigret_claimed,
|
||||
"maigret_unclaimed": maigret_unclaimed,
|
||||
"issues": issues,
|
||||
}
|
||||
|
||||
|
||||
async def diagnose_site(site_config: dict, site_name: str) -> dict:
|
||||
"""Full diagnosis of a site configuration."""
|
||||
print(f"\n{'='*60}")
|
||||
print(f"{color('FULL SITE DIAGNOSIS', Colors.CYAN)}: {site_name}")
|
||||
print(f"{'='*60}\n")
|
||||
|
||||
diagnosis = {
|
||||
"site_name": site_name,
|
||||
"issues": [],
|
||||
"warnings": [],
|
||||
"recommendations": [],
|
||||
"working": False,
|
||||
}
|
||||
|
||||
# 1. Config analysis
|
||||
print(f"--- {color('1. CONFIGURATION', Colors.BOLD)} ---")
|
||||
check_type = site_config.get("checkType", "status_code")
|
||||
url = site_config.get("url", "")
|
||||
url_main = site_config.get("urlMain", "")
|
||||
claimed = site_config.get("usernameClaimed")
|
||||
unclaimed = site_config.get("usernameUnclaimed", "noonewouldeverusethis7")
|
||||
disabled = site_config.get("disabled", False)
|
||||
|
||||
print(f" checkType: {check_type}")
|
||||
print(f" URL: {url}")
|
||||
print(f" urlMain: {url_main}")
|
||||
print(f" usernameClaimed: {claimed}")
|
||||
print(f" disabled: {disabled}")
|
||||
|
||||
if disabled:
|
||||
diagnosis["issues"].append("Site is disabled")
|
||||
print(f" {color('[!]', Colors.YELLOW)} Site is disabled")
|
||||
|
||||
if not claimed:
|
||||
diagnosis["issues"].append("No usernameClaimed defined")
|
||||
print(f" {color('[!]', Colors.RED)} No usernameClaimed defined")
|
||||
return diagnosis
|
||||
|
||||
# Build full URL
|
||||
url_template = url.replace("{urlMain}", url_main).replace("{urlSubpath}", site_config.get("urlSubpath", ""))
|
||||
|
||||
headers = DEFAULT_HEADERS.copy()
|
||||
if site_config.get("headers"):
|
||||
headers.update(site_config["headers"])
|
||||
|
||||
# 2. Connectivity test
|
||||
print(f"\n--- {color('2. CONNECTIVITY TEST', Colors.BOLD)} ---")
|
||||
url_claimed = url_template.replace("{username}", claimed)
|
||||
url_unclaimed = url_template.replace("{username}", unclaimed)
|
||||
|
||||
result_claimed, result_unclaimed = await asyncio.gather(
|
||||
check_url_aiohttp(url_claimed, headers),
|
||||
check_url_aiohttp(url_unclaimed, headers)
|
||||
)
|
||||
|
||||
print(f" Claimed ({claimed}): status={result_claimed['status']}, error={result_claimed['error']}")
|
||||
print(f" Unclaimed ({unclaimed}): status={result_unclaimed['status']}, error={result_unclaimed['error']}")
|
||||
|
||||
# Check for common problems
|
||||
if result_claimed["error_type"] == "timeout":
|
||||
diagnosis["issues"].append("Timeout on claimed username")
|
||||
if result_unclaimed["error_type"] == "timeout":
|
||||
diagnosis["issues"].append("Timeout on unclaimed username")
|
||||
|
||||
if result_claimed.get("markers", {}).get("cloudflare"):
|
||||
diagnosis["warnings"].append("Cloudflare protection detected")
|
||||
if result_claimed.get("markers", {}).get("captcha"):
|
||||
diagnosis["warnings"].append("Captcha detected")
|
||||
if result_claimed["status"] == 403:
|
||||
diagnosis["issues"].append("403 Forbidden - possible anti-bot protection")
|
||||
if result_claimed["status"] == 429:
|
||||
diagnosis["issues"].append("429 Rate Limited")
|
||||
|
||||
# 3. Check type validation
|
||||
print(f"\n--- {color('3. CHECK TYPE VALIDATION', Colors.BOLD)} ---")
|
||||
|
||||
if check_type == "status_code":
|
||||
if result_claimed["status"] == result_unclaimed["status"]:
|
||||
diagnosis["issues"].append(f"status_code check but same status ({result_claimed['status']}) for both")
|
||||
print(f" {color('[FAIL]', Colors.RED)} Same status code for claimed and unclaimed: {result_claimed['status']}")
|
||||
else:
|
||||
print(f" {color('[OK]', Colors.GREEN)} Status codes differ: {result_claimed['status']} vs {result_unclaimed['status']}")
|
||||
diagnosis["working"] = True
|
||||
|
||||
elif check_type == "response_url":
|
||||
if result_claimed["final_url"] == result_unclaimed["final_url"]:
|
||||
diagnosis["issues"].append("response_url check but same final URL for both")
|
||||
print(f" {color('[FAIL]', Colors.RED)} Same final URL for both")
|
||||
else:
|
||||
print(f" {color('[OK]', Colors.GREEN)} Final URLs differ")
|
||||
diagnosis["working"] = True
|
||||
|
||||
elif check_type == "message":
|
||||
presense_strs = site_config.get("presenseStrs", [])
|
||||
absence_strs = site_config.get("absenceStrs", [])
|
||||
|
||||
print(f" presenseStrs: {presense_strs}")
|
||||
print(f" absenceStrs: {absence_strs}")
|
||||
|
||||
claimed_content = result_claimed.get("content", "") or ""
|
||||
unclaimed_content = result_unclaimed.get("content", "") or ""
|
||||
|
||||
# Check presenseStrs
|
||||
presense_found_claimed = any(s in claimed_content for s in presense_strs) if presense_strs else True
|
||||
presense_found_unclaimed = any(s in unclaimed_content for s in presense_strs) if presense_strs else True
|
||||
|
||||
# Check absenceStrs
|
||||
absence_found_claimed = any(s in claimed_content for s in absence_strs) if absence_strs else False
|
||||
absence_found_unclaimed = any(s in unclaimed_content for s in absence_strs) if absence_strs else False
|
||||
|
||||
print(f" Claimed - presenseStrs found: {presense_found_claimed}, absenceStrs found: {absence_found_claimed}")
|
||||
print(f" Unclaimed - presenseStrs found: {presense_found_unclaimed}, absenceStrs found: {absence_found_unclaimed}")
|
||||
|
||||
if presense_strs and not presense_found_claimed:
|
||||
diagnosis["issues"].append(f"presenseStrs {presense_strs} not found in claimed page")
|
||||
print(f" {color('[FAIL]', Colors.RED)} presenseStrs not found in claimed page")
|
||||
if absence_strs and absence_found_claimed:
|
||||
diagnosis["issues"].append(f"absenceStrs {absence_strs} found in claimed page (should not be)")
|
||||
print(f" {color('[FAIL]', Colors.RED)} absenceStrs found in claimed page")
|
||||
if absence_strs and not absence_found_unclaimed:
|
||||
diagnosis["warnings"].append(f"absenceStrs not found in unclaimed page")
|
||||
print(f" {color('[WARN]', Colors.YELLOW)} absenceStrs not found in unclaimed page")
|
||||
|
||||
if presense_found_claimed and not absence_found_claimed and absence_found_unclaimed:
|
||||
print(f" {color('[OK]', Colors.GREEN)} Message check should work correctly")
|
||||
diagnosis["working"] = True
|
||||
|
||||
# 4. Recommendations
|
||||
print(f"\n--- {color('4. RECOMMENDATIONS', Colors.BOLD)} ---")
|
||||
|
||||
if not diagnosis["working"]:
|
||||
# Suggest alternatives
|
||||
if result_claimed["status"] != result_unclaimed["status"]:
|
||||
diagnosis["recommendations"].append(f"Switch to checkType: status_code (status {result_claimed['status']} vs {result_unclaimed['status']})")
|
||||
if result_claimed["final_url"] != result_unclaimed["final_url"]:
|
||||
diagnosis["recommendations"].append("Switch to checkType: response_url")
|
||||
if result_claimed["title"] != result_unclaimed["title"]:
|
||||
diagnosis["recommendations"].append(f"Use title as marker: presenseStrs=['{result_claimed['title']}'] or absenceStrs=['{result_unclaimed['title']}']")
|
||||
|
||||
if diagnosis["recommendations"]:
|
||||
for rec in diagnosis["recommendations"]:
|
||||
print(f" -> {rec}")
|
||||
elif diagnosis["working"]:
|
||||
print(f" {color('Site appears to be working correctly', Colors.GREEN)}")
|
||||
else:
|
||||
print(f" {color('No clear fix found - site may need special handling or should be disabled', Colors.RED)}")
|
||||
|
||||
# Summary
|
||||
print(f"\n--- {color('SUMMARY', Colors.BOLD)} ---")
|
||||
if diagnosis["issues"]:
|
||||
print(f" Issues: {len(diagnosis['issues'])}")
|
||||
for issue in diagnosis["issues"]:
|
||||
print(f" - {issue}")
|
||||
if diagnosis["warnings"]:
|
||||
print(f" Warnings: {len(diagnosis['warnings'])}")
|
||||
for warn in diagnosis["warnings"]:
|
||||
print(f" - {warn}")
|
||||
print(f" Working: {color('YES', Colors.GREEN) if diagnosis['working'] else color('NO', Colors.RED)}")
|
||||
|
||||
return diagnosis
|
||||
|
||||
|
||||
def load_site_from_db(site_name: str) -> Tuple[Optional[dict], Optional['MaigretSite']]:
|
||||
"""Load site config from data.json. Returns (config_dict, MaigretSite or None)."""
|
||||
db_path = Path(__file__).parent.parent / "maigret" / "resources" / "data.json"
|
||||
|
||||
with open(db_path) as f:
|
||||
data = json.load(f)
|
||||
|
||||
config = None
|
||||
if site_name in data["sites"]:
|
||||
config = data["sites"][site_name]
|
||||
else:
|
||||
# Try case-insensitive search
|
||||
for name, cfg in data["sites"].items():
|
||||
if name.lower() == site_name.lower():
|
||||
config = cfg
|
||||
site_name = name
|
||||
break
|
||||
|
||||
if not config:
|
||||
return None, None
|
||||
|
||||
# Also load MaigretSite if available
|
||||
maigret_site = None
|
||||
if MAIGRET_AVAILABLE:
|
||||
try:
|
||||
db = MaigretDatabase().load_from_path(db_path)
|
||||
maigret_site = db.sites_dict.get(site_name)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return config, maigret_site
|
||||
|
||||
|
||||
async def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Site check utility for Maigret development",
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog="""
|
||||
Examples:
|
||||
%(prog)s --site "VK" --check-claimed # Test site with aiohttp
|
||||
%(prog)s --site "VK" --maigret # Test site with Maigret
|
||||
%(prog)s --site "VK" --compare-methods # Compare aiohttp vs Maigret
|
||||
%(prog)s --site "VK" --diagnose # Full diagnosis
|
||||
%(prog)s --url "https://vk.com/{username}" --compare blue nobody123
|
||||
%(prog)s --site "VK" --find-user # Find a valid username
|
||||
"""
|
||||
)
|
||||
parser.add_argument("--site", "-s", help="Site name from data.json")
|
||||
parser.add_argument("--url", "-u", help="URL template with {username}")
|
||||
parser.add_argument("--test", "-t", help="Username to test")
|
||||
parser.add_argument("--compare", "-c", nargs=2, metavar=("CLAIMED", "UNCLAIMED"),
|
||||
help="Compare two usernames")
|
||||
parser.add_argument("--find-user", "-f", action="store_true",
|
||||
help="Find a valid username")
|
||||
parser.add_argument("--check-claimed", action="store_true",
|
||||
help="Check if claimed username still works (aiohttp)")
|
||||
parser.add_argument("--maigret", "-m", action="store_true",
|
||||
help="Test using Maigret's checker instead of aiohttp")
|
||||
parser.add_argument("--compare-methods", action="store_true",
|
||||
help="Compare aiohttp vs Maigret results")
|
||||
parser.add_argument("--diagnose", "-d", action="store_true",
|
||||
help="Full diagnosis of site configuration")
|
||||
parser.add_argument("--headers", help="Custom headers as JSON")
|
||||
parser.add_argument("--timeout", type=int, default=15, help="Request timeout in seconds")
|
||||
parser.add_argument("--json", action="store_true", help="Output results as JSON")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
url_template = None
|
||||
claimed = None
|
||||
unclaimed = "noonewouldeverusethis7"
|
||||
headers = DEFAULT_HEADERS.copy()
|
||||
site_config = None
|
||||
maigret_site = None
|
||||
|
||||
# Load from site name
|
||||
if args.site:
|
||||
site_config, maigret_site = load_site_from_db(args.site)
|
||||
if not site_config:
|
||||
print(f"Site '{args.site}' not found in database")
|
||||
sys.exit(1)
|
||||
|
||||
url_template = site_config.get("url", "")
|
||||
url_main = site_config.get("urlMain", "")
|
||||
url_subpath = site_config.get("urlSubpath", "")
|
||||
url_template = url_template.replace("{urlMain}", url_main).replace("{urlSubpath}", url_subpath)
|
||||
|
||||
claimed = site_config.get("usernameClaimed")
|
||||
unclaimed = site_config.get("usernameUnclaimed", unclaimed)
|
||||
|
||||
if site_config.get("headers"):
|
||||
headers.update(site_config["headers"])
|
||||
|
||||
if not args.json:
|
||||
print(f"Loaded site: {args.site}")
|
||||
print(f" URL: {url_template}")
|
||||
print(f" Claimed: {claimed}")
|
||||
print(f" CheckType: {site_config.get('checkType', 'unknown')}")
|
||||
print(f" Disabled: {site_config.get('disabled', False)}")
|
||||
|
||||
# Override with explicit URL
|
||||
if args.url:
|
||||
url_template = args.url
|
||||
|
||||
# Custom headers
|
||||
if args.headers:
|
||||
headers.update(json.loads(args.headers))
|
||||
|
||||
# Actions
|
||||
if args.diagnose:
|
||||
if not site_config:
|
||||
print("--diagnose requires --site")
|
||||
sys.exit(1)
|
||||
result = await diagnose_site(site_config, args.site)
|
||||
if args.json:
|
||||
print(json.dumps(result, indent=2, default=str))
|
||||
|
||||
elif args.compare_methods:
|
||||
if not maigret_site:
|
||||
if not MAIGRET_AVAILABLE:
|
||||
print("Maigret imports not available")
|
||||
else:
|
||||
print("Could not load MaigretSite object")
|
||||
sys.exit(1)
|
||||
result = await compare_methods(maigret_site, claimed, unclaimed)
|
||||
if args.json:
|
||||
print(json.dumps(result, indent=2, default=str))
|
||||
|
||||
elif args.maigret:
|
||||
if not maigret_site:
|
||||
if not MAIGRET_AVAILABLE:
|
||||
print("Maigret imports not available")
|
||||
else:
|
||||
print("Could not load MaigretSite object")
|
||||
sys.exit(1)
|
||||
|
||||
print(f"\n--- Testing with Maigret ---")
|
||||
for username in [claimed, unclaimed]:
|
||||
result = await check_url_maigret(maigret_site, username)
|
||||
print(f" {username}: status={result.get('status_str')}, http={result.get('http_status')}, error={result.get('error')}")
|
||||
|
||||
elif args.find_user:
|
||||
if not url_template:
|
||||
print("--find-user requires --site or --url")
|
||||
sys.exit(1)
|
||||
result = await find_valid_username(url_template, headers=headers)
|
||||
if result:
|
||||
print(f"\n{color('Found valid username:', Colors.GREEN)} {result}")
|
||||
else:
|
||||
print(f"\n{color('No valid username found', Colors.RED)}")
|
||||
|
||||
elif args.compare:
|
||||
if not url_template:
|
||||
print("--compare requires --site or --url")
|
||||
sys.exit(1)
|
||||
result = await compare_users_aiohttp(url_template, args.compare[0], args.compare[1], headers)
|
||||
if args.json:
|
||||
# Remove content field for JSON output (too large)
|
||||
for r in result:
|
||||
if isinstance(r, dict) and "content" in r:
|
||||
del r["content"]
|
||||
print(json.dumps(result, indent=2, default=str))
|
||||
|
||||
elif args.check_claimed and claimed:
|
||||
result = await compare_users_aiohttp(url_template, claimed, unclaimed, headers)
|
||||
|
||||
elif args.test:
|
||||
if not url_template:
|
||||
print("--test requires --site or --url")
|
||||
sys.exit(1)
|
||||
url = url_template.replace("{username}", args.test)
|
||||
result = await check_url_aiohttp(url, headers, timeout=args.timeout)
|
||||
if "content" in result:
|
||||
del result["content"] # Too large for display
|
||||
print(json.dumps(result, indent=2, default=str))
|
||||
|
||||
else:
|
||||
# Default: check claimed username if available
|
||||
if url_template and claimed:
|
||||
await compare_users_aiohttp(url_template, claimed, unclaimed, headers)
|
||||
else:
|
||||
parser.print_help()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||