Compare commits

..

16 Commits

Author SHA1 Message Date
Soxoj bc8f23b005 Replaced to an own workflow 2024-12-04 01:39:03 +01:00
Soxoj 9c4ae21465 Trying to remove by tag 2024-12-04 01:29:24 +01:00
Soxoj 20e32772b4 Repeat the release 2024-12-04 01:27:50 +01:00
Soxoj 042f053bb7 Repeat the build 2024-12-04 01:25:40 +01:00
Soxoj 48c9363f6c Unique release 2024-12-04 01:23:53 +01:00
Soxoj db69eaa290 Update name again 2024-12-04 01:22:30 +01:00
Soxoj 3e6cad63f0 Revert full release name 2024-12-04 01:21:14 +01:00
Soxoj 0ab12d95d8 Added GitHub token 2024-12-04 01:19:52 +01:00
Soxoj b26a711ace Remove release by tag 2024-12-04 01:17:59 +01:00
Soxoj 94e1e8e22e Remove another release 2024-12-04 01:16:47 +01:00
Soxoj 0e77ee47b4 Remove old release 2024-12-04 01:15:26 +01:00
Soxoj 003247453b Merge steps in one 2024-12-04 01:11:49 +01:00
Soxoj 373f40dee8 Syntax fix 2024-12-04 01:10:09 +01:00
Soxoj dd485e8d9c Test steps, some fixes 2024-12-04 01:07:54 +01:00
Soxoj a57f9734a1 Updated step names and ref tags 2024-12-04 00:51:53 +01:00
Soxoj 0bdc49f493 Workflow to update PyInstaller Windows binary each commit in main and dev 2024-12-04 00:42:55 +01:00
65 changed files with 1884 additions and 5400 deletions
+1 -2
View File
@@ -1,3 +1,2 @@
#!/bin/sh #!/bin/sh
echo 'Activating update_sitesmd hook script...' python3 ./utils/update_site_data.py
poetry run update_sitesmd
@@ -1,61 +0,0 @@
name: Close Invalid Telegram PRs
on:
schedule:
# Run daily at 2 AM UTC
- cron: '0 2 * * *'
workflow_dispatch:
# Allow manual triggering
inputs:
dry_run:
description: 'Run in dry-run mode (show what would be closed without closing)'
required: false
default: 'false'
type: boolean
jobs:
close-invalid-prs:
runs-on: ubuntu-latest
permissions:
# Need write permissions for pull requests and issues
pull-requests: write
issues: write
contents: read
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: '3.10'
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install requests
- name: Make script executable
run: chmod +x utils/close_invalid_telegram_prs.py
- name: Run PR closer script (dry-run for manual trigger)
if: github.event_name == 'workflow_dispatch' && github.event.inputs.dry_run == 'true'
run: |
python utils/close_invalid_telegram_prs.py --dry-run
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: Run PR closer script (live for manual trigger)
if: github.event_name == 'workflow_dispatch' && github.event.inputs.dry_run == 'false'
run: |
python utils/close_invalid_telegram_prs.py
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: Run PR closer script (automated daily)
if: github.event_name == 'schedule'
run: |
python utils/close_invalid_telegram_prs.py --dry-run
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+46 -29
View File
@@ -11,45 +11,62 @@ jobs:
- name: Checkout - name: Checkout
uses: actions/checkout@v4 uses: actions/checkout@v4
- name: PyInstaller Windows Build - name: TEST PyInstaller Windows Build
uses: JackMcKew/pyinstaller-action-windows@main shell: bash
with: run: |
path: pyinstaller echo "test" > maigret_standalone_win32
- name: Upload PyInstaller Binary to Workflow as Artifact - name: TEST Upload PyInstaller Binary to Workflow as Artifact
uses: actions/upload-artifact@v4 uses: actions/upload-artifact@v4
with: with:
name: maigret_standalone_win32 name: maigret_standalone_win32
path: pyinstaller/dist/windows path: maigret_standalone_win32
# - name: PyInstaller Windows Build
# uses: JackMcKew/pyinstaller-action-windows@main
# with:
# path: pyinstaller
# - name: Upload PyInstaller Binary to Workflow as Artifact
# uses: actions/upload-artifact@v4
# with:
# name: maigret_standalone_win32
# path: pyinstaller/dist/windows
- name: Download PyInstaller Binary - name: Download PyInstaller Binary
uses: actions/download-artifact@v4 uses: actions/download-artifact@v4
with: with:
name: maigret_standalone_win32 name: maigret_standalone_win32
- name: Create New Release and Upload PyInstaller Binary to Release - name: Remove Previous Release
uses: ncipollo/release-action@v1.14.0 uses: soxoj/delete-release-action@v1
id: create_release
with: with:
allowUpdates: true release_name: ${{ github.ref_name }}
draft: false
prerelease: false
artifactErrorsFailBuild: true
makeLatest: true
replacesArtifacts: true
artifacts: maigret_standalone.exe
name: Development Windows Release [${{ github.ref_name }}]
tag: ${{ github.ref_name }}
body: |
This is a development release built from the **${{ github.ref_name }}** branch.
Take into account that `dev` releases may be unstable.
Please, use [the development release](https://github.com/soxoj/maigret/releases/tag/main) build from the **main** branch.
Instructions:
- Download the attached file `maigret_standalone.exe` to get the Windows executable.
- Video guide on how to run it: https://youtu.be/qIgwTZOmMmM
- For detailed documentation, visit: https://maigret.readthedocs.io/en/latest/
env: env:
GITHUB_TOKEN: ${{ github.token }} GITHUB_TOKEN: ${{ github.token }}
# test change
- name: Create New Release
uses: actions/create-release@v1
id: create_release
with:
draft: false
prerelease: true
release_name: Windows Release [${{ github.ref_name }}]
tag_name: ${{ github.ref_name }}-${{ github.run_number }}
body: |
This is a development release, built from the branch **${{ github.ref_name }}**.
Download the attached file "maigret_standalone_win32.zip" to get the Windows executable.
env:
GITHUB_TOKEN: ${{ github.token }}
- name: Upload PyInstaller Binary to Release
uses: actions/upload-release-asset@v1
env:
GITHUB_TOKEN: ${{ github.token }}
with:
upload_url: ${{ steps.create_release.outputs.upload_url }}
asset_path: ./maigret_standalone_win32
asset_name: maigret_standalone_win32
asset_content_type: application/zip
+4 -12
View File
@@ -13,11 +13,10 @@ jobs:
runs-on: ubuntu-latest runs-on: ubuntu-latest
strategy: strategy:
matrix: matrix:
python-version: ["3.10", "3.11", "3.12", "3.13"] python-version: ["3.10", "3.11", "3.12"]
steps: steps:
- name: Checkout - uses: actions/checkout@v2
uses: actions/checkout@v2
- name: Set up Python ${{ matrix.python-version }} - name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v2 uses: actions/setup-python@v2
with: with:
@@ -27,13 +26,6 @@ jobs:
python -m pip install --upgrade pip python -m pip install --upgrade pip
python -m pip install poetry python -m pip install poetry
python -m poetry install --with dev python -m poetry install --with dev
- name: Test with Coverage and Pytest (Fail if coverage is low) - name: Test with pytest
run: | run: |
poetry run coverage run --source=./maigret -m pytest --reruns 3 --reruns-delay 5 tests poetry run pytest --reruns 3 --reruns-delay 5
poetry run coverage report --fail-under=60
poetry run coverage html
- name: Upload coverage report
uses: actions/upload-artifact@v4
with:
name: htmlcov-${{ strategy.job-index }}
path: htmlcov
+22 -15
View File
@@ -1,21 +1,28 @@
name: Upload Python Package to PyPI when a Release is Created name: Upload Python Package
on: on:
release: release:
types: [created] types: [created]
push:
tags:
- "v*"
permissions:
id-token: write
contents: read
jobs: jobs:
build-and-publish: deploy:
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v2
- uses: astral-sh/setup-uv@v3 - name: Set up Python
- run: uv build uses: actions/setup-python@v2
- name: Publish to PyPI (Trusted Publishing) with:
uses: pypa/gh-action-pypi-publish@release/v1 python-version: '3.x'
with: - name: Install dependencies
packages-dir: dist run: |
python -m pip install --upgrade pip
pip install setuptools wheel twine
- name: Build and publish
env:
TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
run: |
python setup.py sdist bdist_wheel
twine upload dist/*
+1 -1
View File
@@ -42,4 +42,4 @@ settings.json
# other # other
*.egg-info *.egg-info
build build
+1 -249
View File
@@ -1,254 +1,6 @@
# Changelog # Changelog
## [0.5.0] - 2025-08-10 ## [Unreleased]
* Site Supression by @C3n7ral051nt4g3ncy in https://github.com/soxoj/maigret/pull/627
* Bump yarl from 1.7.2 to 1.8.1 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/626
* Streaming sites by @soxoj in https://github.com/soxoj/maigret/pull/628
* Mirrors by @fen0s in https://github.com/soxoj/maigret/pull/630
* Added Instagram scrapers by @soxoj in https://github.com/soxoj/maigret/pull/633
* Bump psutil from 5.9.1 to 5.9.2 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/624
* Bump pypdf2 from 2.10.4 to 2.10.5 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/625
* Invalid results fixes by @soxoj in https://github.com/soxoj/maigret/pull/634
* Bump pytest-httpserver from 1.0.5 to 1.0.6 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/638
* Bump pypdf2 from 2.10.5 to 2.10.8 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/641
* Bump certifi from 2022.6.15 to 2022.9.14 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/644
* Bump idna from 3.3 to 3.4 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/640
* fix false positives from bot by @fen0s in https://github.com/soxoj/maigret/pull/663
* Add pre commit hook by @fen0s in https://github.com/soxoj/maigret/pull/664
* site deletion by @C3n7ral051nt4g3ncy in https://github.com/soxoj/maigret/pull/648
* Changed docker run to interactive and remove on exit by @dr-BEat in https://github.com/soxoj/maigret/pull/675
* Corrected grammar in README.md by @Trkzi-Omar in https://github.com/soxoj/maigret/pull/674
* fix sites from issues by @fen0s in https://github.com/soxoj/maigret/pull/680
* correct username in usage examples by @LeonGr in https://github.com/soxoj/maigret/pull/673
* Update README.md by @johanburati in https://github.com/soxoj/maigret/pull/669
* Fix typos by @LorenzoSapora in https://github.com/soxoj/maigret/pull/681
* Build docker images for arm64 and amd64 by @krydos in https://github.com/soxoj/maigret/pull/687
* Bump certifi from 2022.9.14 to 2022.9.24 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/652
* Bump aiohttp from 3.8.1 to 3.8.3 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/651
* Bump arabic-reshaper from 2.1.3 to 2.1.4 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/650
* Update README.md, Repl.it -> Replit with new badge by @PeterDaveHello in https://github.com/soxoj/maigret/pull/692
* Refactor Dockerfile with best practices by @PeterDaveHello in https://github.com/soxoj/maigret/pull/691
* Improve README.md Installation section by @PeterDaveHello in https://github.com/soxoj/maigret/pull/690
* Bump pytest-cov from 3.0.0 to 4.0.0 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/688
* Bump stem from 1.8.0 to 1.8.1 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/689
* Bump typing-extensions from 4.3.0 to 4.4.0 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/698
* Typo fixes in error.py by @Ben-Chapman in https://github.com/soxoj/maigret/pull/711
* Fixed docs about tags by @soxoj in https://github.com/soxoj/maigret/pull/715
* Fixed lightstalking.com by @soxoj in https://github.com/soxoj/maigret/pull/716
* Fixed YouTube by @soxoj in https://github.com/soxoj/maigret/pull/717
* Bump pytest-asyncio from 0.19.0 to 0.20.1 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/732
* Updated snapcraft yaml by @kz6fittycent in https://github.com/soxoj/maigret/pull/720
* Bump colorama from 0.4.5 to 0.4.6 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/733
* Bump pytest from 7.1.3 to 7.2.0 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/734
* disable not working sites by @fen0s in https://github.com/soxoj/maigret/pull/739
* disable broken sites by @fen0s in https://github.com/soxoj/maigret/pull/756
* Bump cloudscraper from 1.2.64 to 1.2.66 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/769
* fix opensea and shutterstock, disable a few dead sites by @fen0s in https://github.com/soxoj/maigret/pull/798
* Fixed documentation URL by @soxoj in https://github.com/soxoj/maigret/pull/799
* Small readme fix by @soxoj in https://github.com/soxoj/maigret/pull/857
* docs spelling error by @Nadeem-05 in https://github.com/soxoj/maigret/pull/866
* Fix Pinterest false positive by @therealchiendat in https://github.com/soxoj/maigret/pull/862
* Added new Websites by @codyMar30 in https://github.com/soxoj/maigret/pull/838
* Update "future" package to v0.18.3 by @PeterDaveHello in https://github.com/soxoj/maigret/pull/834
* Bump certifi from 2022.9.24 to 2022.12.7 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/793
* Update dependency - networkx from v2.5.1 to v2.6 by @PeterDaveHello in https://github.com/soxoj/maigret/pull/738
* Bump reportlab from 3.6.11 to 3.6.12 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/735
* Bump typing-extensions from 4.4.0 to 4.5.0 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/888
* Bump psutil from 5.9.2 to 5.9.4 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/741
* Bump attrs from 22.1.0 to 22.2.0 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/892
* Bump multidict from 6.0.2 to 6.0.4 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/891
* Fixed false positives, updated networkx dep, some lint fixes by @soxoj in https://github.com/soxoj/maigret/pull/894
* Bump lxml from 4.9.1 to 4.9.2 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/900
* Bump yarl from 1.8.1 to 1.8.2 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/899
* Fixed false positives on Mastodon sites by @soxoj in https://github.com/soxoj/maigret/pull/901
* Added valid regex for Mastodon instances (#848) by @soxoj in https://github.com/soxoj/maigret/pull/906
* Fix missing Mastodon Regex on #906 by @therealchiendat in https://github.com/soxoj/maigret/pull/908
* Bump tqdm from 4.64.1 to 4.65.0 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/905
* Bump requests from 2.28.1 to 2.28.2 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/904
* Bump psutil from 5.9.4 to 5.9.5 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/910
* fix deployment of tests by @noraj in https://github.com/soxoj/maigret/pull/933
* Added 26 ENS and similar domains with tag `crypto` by @soxoj in https://github.com/soxoj/maigret/pull/942
* Bump requests from 2.28.2 to 2.31.0 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/957
* Update wizard.py by @engNoori in https://github.com/soxoj/maigret/pull/1016
* Improved search through UnstoppableDomains by @soxoj in https://github.com/soxoj/maigret/pull/1040
* Added memory.lol (Twitter usernames archive) by @soxoj in https://github.com/soxoj/maigret/pull/1067
* Disabled and fixed several sites by @soxoj in https://github.com/soxoj/maigret/pull/1132
* Fixed some sites (again) by @soxoj in https://github.com/soxoj/maigret/pull/1133
* fix(sec): upgrade reportlab to 3.6.13 by @realize096 in https://github.com/soxoj/maigret/pull/1051
* Add compatibility with pytest >= 7.3.0 by @tjni in https://github.com/soxoj/maigret/pull/1117
* Additionally fixed sites, win32 build fix by @soxoj in https://github.com/soxoj/maigret/pull/1148
* Sites fixes 250823 by @soxoj in https://github.com/soxoj/maigret/pull/1149
* Bump reportlab from 3.6.12 to 4.0.4 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1160
* Bump certifi from 2022.12.7 to 2023.7.22 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1070
* fix(sec): upgrade certifi to 2022.12.07 by @realize096 in https://github.com/soxoj/maigret/pull/1173
* Bump cloudscraper from 1.2.66 to 1.2.71 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/914
* Some sites fixed & cloudflare detection by @soxoj in https://github.com/soxoj/maigret/pull/1178
* EasyInstaller because everyone likes saving time :) by @CatchySmile in https://github.com/soxoj/maigret/pull/1212
* Tests fixes + last updates by @soxoj in https://github.com/soxoj/maigret/pull/1228
* Bump pypdf2 from 2.10.8 to 3.0.1 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/815
* Bump pyvis from 0.2.1 to 0.3.2 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/861
* Bump xhtml2pdf from 0.2.8 to 0.2.11 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/935
* Bump flake8 from 5.0.4 to 6.1.0 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1091
* Bump aiohttp from 3.8.3 to 3.8.6 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1222
* Specified pyinstaller version by @soxoj in https://github.com/soxoj/maigret/pull/1230
* Pyinstaller fix by @soxoj in https://github.com/soxoj/maigret/pull/1231
* Test pyinstaller on dev branch by @soxoj in https://github.com/soxoj/maigret/pull/1233
* Update main from dev again by @soxoj in https://github.com/soxoj/maigret/pull/1234
* Bump typing-extensions from 4.5.0 to 4.8.0 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1239
* Bump pytest-rerunfailures from 10.2 to 12.0 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1237
* Bump async-timeout from 4.0.2 to 4.0.3 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1238
* Changed pyinstaller dir by @soxoj in https://github.com/soxoj/maigret/pull/1245
* Bump tqdm from 4.65.0 to 4.66.1 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1235
* Updating site checkers, disabling suspended sites by @MeowyPouncer in https://github.com/soxoj/maigret/pull/1266
* Updated site statistics by @soxoj in https://github.com/soxoj/maigret/pull/1273
* Compat RegataOS (Opensuse) by @Jeiel0rbit in https://github.com/soxoj/maigret/pull/1308
* fix reddit by @hhhtylerw in https://github.com/soxoj/maigret/pull/1296
* Added Telegram bot link by @soxoj in https://github.com/soxoj/maigret/pull/1321
* Added SOWEL classification by @soxoj in https://github.com/soxoj/maigret/pull/1453
* Bump jinja2 from 3.1.2 to 3.1.3 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1358
* Fixed/Disabled sites. Update requirements.txt by @rly0nheart in https://github.com/soxoj/maigret/pull/1517
* Fixed 4 sites, added 6 sites, disabled 27 sites by @rly0nheart in https://github.com/soxoj/maigret/pull/1536
* Fixed 3 sites, disabed 3, added by @rly0nheart in https://github.com/soxoj/maigret/pull/1539
* Bump socid-extractor from 0.0.24 to 0.0.26 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1546
* Added code conventions to CONTRIBUTING.md by @Lord-Topa in https://github.com/soxoj/maigret/pull/1589
* Readme by @Lord-Topa in https://github.com/soxoj/maigret/pull/1588
* Update data.json by @ranlo in https://github.com/soxoj/maigret/pull/1559
* Adding permutator feature for usernames by @balestek in https://github.com/soxoj/maigret/pull/1575
* Alik.cz indirectly requests removal by @ppfeister in https://github.com/soxoj/maigret/pull/1671
* Fixed 1 site, PyInstaller workflow, Google Colab example by @Ixve in https://github.com/soxoj/maigret/pull/1558
* Bump soupsieve from 2.5 to 2.6 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1708
* Added dev documentation, fixed some sites, removed GitHub issue links… by @soxoj in https://github.com/soxoj/maigret/pull/1869
* Bump cryptography from 42.0.7 to 43.0.1 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1870
* Bump requests-futures from 1.0.1 to 1.0.2 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1868
* Bump werkzeug from 3.0.3 to 3.0.6 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1846
* Added .readthedocs.yaml, fixed Pyinstaller and Docker workflows by @soxoj in https://github.com/soxoj/maigret/pull/1874
* Added GitHub and BuyMeACoffee sponsorships by @soxoj in https://github.com/soxoj/maigret/pull/1875
* Bump psutil from 5.9.5 to 6.1.0 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1839
* Bump flake8 from 6.1.0 to 7.1.1 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1692
* Bump future from 0.18.3 to 1.0.0 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1545
* Bump urllib3 from 2.2.1 to 2.2.2 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1600
* Bump certifi from 2023.11.17 to 2024.8.30 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1840
* Fixed test for aiohttp 3.10 by @soxoj in https://github.com/soxoj/maigret/pull/1876
* Bump aiohttp from 3.9.5 to 3.10.5 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1721
* Added new badges to README by @soxoj in https://github.com/soxoj/maigret/pull/1877
* Show detailed error statistics for `-v` by @soxoj in https://github.com/soxoj/maigret/pull/1879
* Disabled unavailable sites by @soxoj in https://github.com/soxoj/maigret/pull/1880
* Added 7 sites, implemented integration with Marple, docs update by @soxoj in https://github.com/soxoj/maigret/pull/1881
* Bump pefile from 2022.5.30 to 2024.8.26 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1883
* Bump lxml from 4.9.4 to 5.3.0 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1884
* New sites added by @soxoj in https://github.com/soxoj/maigret/pull/1888
* Improved self-check mode, added 15 sites by @soxoj in https://github.com/soxoj/maigret/pull/1887
* Bump pyinstaller from 6.1 to 6.11.1 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1882
* Bump pytest-asyncio from 0.23.7 to 0.23.8 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1885
* Pyinstaller bump & pefile fix by @soxoj in https://github.com/soxoj/maigret/pull/1890
* Bump python-bidi from 0.4.2 to 0.6.3 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1886
* Sites checks fixes by @soxoj in https://github.com/soxoj/maigret/pull/1896
* Parallel execution optimization by @soxoj in https://github.com/soxoj/maigret/pull/1897
* Maigret bot support (custom progress function fixed) by @soxoj in https://github.com/soxoj/maigret/pull/1898
* Bump markupsafe from 2.1.5 to 3.0.2 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1895
* Retries set to 0 by default, refactored code of executor with progress by @soxoj in https://github.com/soxoj/maigret/pull/1899
* Bump aiohttp-socks from 0.7.1 to 0.9.1 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1900
* Bump pycountry from 23.12.11 to 24.6.1 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1903
* Bump pytest-cov from 4.1.0 to 6.0.0 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1902
* Bump pyvis from 0.2.1 to 0.3.2 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1893
* Close http connections (#1595) by @soxoj in https://github.com/soxoj/maigret/pull/1905
* New logo by @soxoj in https://github.com/soxoj/maigret/pull/1906
* Fixed dateutil parsing error for CDT timezone by @soxoj in https://github.com/soxoj/maigret/pull/1907
* Bump alive-progress from 2.4.1 to 3.2.0 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1910
* Permutator output and documentation updates by @soxoj in https://github.com/soxoj/maigret/pull/1914
* Bump aiohttp from 3.11.7 to 3.11.8 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1912
* Bump async-timeout from 4.0.3 to 5.0.1 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1909
* An recursive search animation in README has been updated by @soxoj in https://github.com/soxoj/maigret/pull/1915
* Bump pytest-rerunfailures from 12.0 to 15.0 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1911
* Bump attrs from 22.2.0 to 24.2.0 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1913
* Sites fixes by @soxoj in https://github.com/soxoj/maigret/pull/1917
* Update README.md by @soxoj in https://github.com/soxoj/maigret/pull/1919
* Refactored sites module, updated documentation by @soxoj in https://github.com/soxoj/maigret/pull/1918
* Bump aiohttp from 3.11.8 to 3.11.9 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1920
* Bump pytest from 7.4.4 to 8.3.4 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1923
* Bump yarl from 1.18.0 to 1.18.3 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1922
* Bump pytest-asyncio from 0.23.8 to 0.24.0 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1925
* Documentation update by @soxoj in https://github.com/soxoj/maigret/pull/1926
* Bump mock from 4.0.3 to 5.1.0 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1921
* Bump pywin32-ctypes from 0.2.1 to 0.2.3 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1924
* Installation docs update by @soxoj in https://github.com/soxoj/maigret/pull/1927
* Disabled Figma check by @soxoj in https://github.com/soxoj/maigret/pull/1928
* Put Windows executable in Releases for each dev and main commit by @soxoj in https://github.com/soxoj/maigret/pull/1929
* Updated PyInstaller workflow by @soxoj in https://github.com/soxoj/maigret/pull/1930
* Documentation update by @soxoj in https://github.com/soxoj/maigret/pull/1931
* Fixed Figma check and some bugs by @soxoj in https://github.com/soxoj/maigret/pull/1932
* Bump six from 1.16.0 to 1.17.0 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1933
* Activation mechanism documentation added by @soxoj in https://github.com/soxoj/maigret/pull/1935
* Readme/docs update based on GH discussions by @soxoj in https://github.com/soxoj/maigret/pull/1936
* Bump aiohttp from 3.11.9 to 3.11.10 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1937
* Weibo site check fix, activation mechanism added by @soxoj in https://github.com/soxoj/maigret/pull/1938
* Fixed Ebay and BongaCams checks by @soxoj in https://github.com/soxoj/maigret/pull/1939
* Sites fixes by @soxoj in https://github.com/soxoj/maigret/pull/1940
* Fixed Linktr and discourse.mozilla.org by @soxoj in https://github.com/soxoj/maigret/pull/1941
* Refactored self-check method, code formatting, small lint fixes by @soxoj in https://github.com/soxoj/maigret/pull/1942
* Refactoring, test coverage increased to 60% by @soxoj in https://github.com/soxoj/maigret/pull/1943
* Added a test for submitter by @soxoj in https://github.com/soxoj/maigret/pull/1944
* Update README.md by @soxoj in https://github.com/soxoj/maigret/pull/1949
* Updated OP.GG checks by @soxoj in https://github.com/soxoj/maigret/pull/1950
* Fixed ProductHunt check by @soxoj in https://github.com/soxoj/maigret/pull/1951
* Improved check feature extraction function, added tests by @soxoj in https://github.com/soxoj/maigret/pull/1952
* Submit improvements and site check fixes by @soxoj in https://github.com/soxoj/maigret/pull/1956
* chore: update submit.py by @eltociear in https://github.com/soxoj/maigret/pull/1957
* Fixed Gravatar parsing (socid_extractor) by @soxoj in https://github.com/soxoj/maigret/pull/1958
* Site check fixes by @soxoj in https://github.com/soxoj/maigret/pull/1962
* fix bad linux filename generation by @overcuriousity in https://github.com/soxoj/maigret/pull/1961
* Bump pytest-asyncio from 0.24.0 to 0.25.0 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1963
* Fixed flaky tests to check cookies by @soxoj in https://github.com/soxoj/maigret/pull/1965
* Preparation of 0.5.0 alpha version by @soxoj in https://github.com/soxoj/maigret/pull/1966
* Created web frontend launched via --web flag by @overcuriousity in https://github.com/soxoj/maigret/pull/1967
* Bump certifi from 2024.8.30 to 2024.12.14 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1969
* Bump attrs from 24.2.0 to 24.3.0 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1970
* Added web interface docs by @soxoj in https://github.com/soxoj/maigret/pull/1972
* Small docs and parameters fixes for web interface mode by @soxoj in https://github.com/soxoj/maigret/pull/1973
* [ImgBot] Optimize images by @imgbot[bot] in https://github.com/soxoj/maigret/pull/1974
* Improving the web interface by @overcuriousity in https://github.com/soxoj/maigret/pull/1975
* make graph more meaningful by @overcuriousity in https://github.com/soxoj/maigret/pull/1977
* Async generator-executor for site checks by @soxoj in https://github.com/soxoj/maigret/pull/1978
* Bump aiohttp from 3.11.10 to 3.11.11 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1979
* Bump psutil from 6.1.0 to 6.1.1 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1980
* Bump aiohttp-socks from 0.9.1 to 0.10.0 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1985
* Bump mypy from 1.13.0 to 1.14.0 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1983
* Bump aiohttp-socks from 0.10.0 to 0.10.1 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1987
* Bump jinja2 from 3.1.4 to 3.1.5 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1982
* Bump coverage from 7.6.9 to 7.6.10 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1986
* Bump pytest-asyncio from 0.25.0 to 0.25.1 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1989
* Bump mypy from 1.14.0 to 1.14.1 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1988
* Bump pytest-asyncio from 0.25.1 to 0.25.2 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1990
* docs: update usage-examples.rst by @eltociear in https://github.com/soxoj/maigret/pull/1996
* upload-artifact action in python test workflow updated to v4 by @soxoj in https://github.com/soxoj/maigret/pull/2024
* Pass db_file configuration to web interface by @pykereaper in https://github.com/soxoj/maigret/pull/2019
* Fix usage of data.json files from web by @pykereaper in https://github.com/soxoj/maigret/pull/2020
* Bump black from 24.10.0 to 25.1.0 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2001
* Important Update Installer.bat by @CatchySmile in https://github.com/soxoj/maigret/pull/1994
* Bump cryptography from 44.0.0 to 44.0.1 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2005
* Bump jinja2 from 3.1.5 to 3.1.6 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2011
* [#2010] Add 6 more websites to manage by @pylapp in https://github.com/soxoj/maigret/pull/2009
* Bump flask from 3.1.0 to 3.1.1 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2028
* Bump requests from 2.32.3 to 2.32.4 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2026
* Bump pycares from 4.5.0 to 4.9.0 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2025
* Bump pytest-asyncio from 0.25.2 to 0.26.0 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2016
* Bump urllib3 from 2.2.3 to 2.5.0 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2027
* Disable ICQ site by @Echo-Darlyson in https://github.com/soxoj/maigret/pull/1993
* Bump attrs from 24.3.0 to 25.3.0 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2014
* Bump certifi from 2024.12.14 to 2025.1.31 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2004
* Bump typing-extensions from 4.12.2 to 4.14.1 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2038
* Disable AskFM by @MR-VL in https://github.com/soxoj/maigret/pull/2037
* Bump platformdirs from 4.3.6 to 4.3.8 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2033
* Bump coverage from 7.6.10 to 7.9.2 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2039
* Bump aiohttp from 3.11.11 to 3.12.14 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2041
* Bump yarl from 1.18.3 to 1.20.1 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2032
* Fixed test dialog_adds_site_negative by @soxoj in https://github.com/soxoj/maigret/pull/2107
* Bump reportlab from 4.2.5 to 4.4.3 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2063
* Bump asgiref from 3.8.1 to 3.9.1 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2040
* Bump multidict from 6.1.0 to 6.6.3 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2034
* Bump pytest-rerunfailures from 15.0 to 15.1 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2030
**Full Changelog**: https://github.com/soxoj/maigret/compare/v0.4.4...v0.5.0
## [0.4.4] - 2022-09-03 ## [0.4.4] - 2022-09-03
* Fixed some false positives by @soxoj in https://github.com/soxoj/maigret/pull/433 * Fixed some false positives by @soxoj in https://github.com/soxoj/maigret/pull/433
+78 -68
View File
@@ -1,61 +1,85 @@
@echo off @echo off
REM check if running as admin
goto check_Permissions goto check_Permissions
:check_Permissions :check_Permissions
echo Administrative permissions required. Detecting permissions...
net session >nul 2>&1 net session >nul 2>&1
if %errorLevel% == 0 ( if %errorLevel% == 0 (
echo Success: Elevated permissions granted. goto 1
) else ( ) else (
echo Failure: Requires elevated permissions. cls
pause >nul echo Failure: You MUST run this as administator, otherwise commands will fail.
) )
cls pause >nul
echo --------------------------------------------------------
echo Python 3.8 or higher and pip3 required.
echo --------------------------------------------------------
echo Press [I] to begin installation.
echo Press [R] If already installed.
echo --------------------------------------------------------
choice /c IR
if %errorlevel%==1 goto check_python
if %errorlevel%==2 goto after
:check_python
cls
for /f "tokens=2 delims= " %%i in ('python --version 2^>nul') do (
for /f "tokens=1,2 delims=." %%j in ("%%i") do (
if %%j GEQ 3 (
if %%k GEQ 8 (
goto check_pip
)
)
)
)
echo Python 3.8 or higher is required. Please install it first.
pause
exit /b
:check_pip
pip --version 2>nul | findstr /r /c:"pip" >nul REM Step 2: Check if Python and pip3 are installed
python --version >nul 2>&1
if %errorlevel% neq 0 ( if %errorlevel% neq 0 (
echo pip is required. Please install it first. echo Python is not installed. Please install Python 3.8 or higher.
pause pause
exit /b exit /b
) )
goto install1
pip3 --version >nul 2>&1
if %errorlevel% neq 0 (
echo pip3 is not installed. Please install pip3.
pause
exit /b
)
REM Step 3: Check Python version
python -c "import sys; exit(0) if sys.version_info >= (3,8) else exit(1)"
if %errorlevel% neq 0 (
echo Python version 3.8 or higher is required.
pause
exit /b
)
:1
cls
:::===============================================================
::: ______ __ __ _ _
::: | ____| | \/ | (_) | |
::: | |__ __ _ ___ _ _ | \ / | __ _ _ __ _ _ __ ___| |_
::: | __| / _` / __| | | | | |\/| |/ _` | |/ _` | '__/ _ \ __|
::: | |___| (_| \__ \ |_| | | | | | (_| | | (_| | | | __/ |_
::: |______\__,_|___/\__, | |_| |_|\__,_|_|\__, |_| \___|\__|
::: __/ | __/ |
::: |___/ |___/
:::
:::===============================================================
echo.
for /f "delims=: tokens=*" %%A in ('findstr /b ::: "%~f0"') do @echo(%%A
echo.
echo ----------------------------------------------------------------
echo Python 3.8 or higher and pip3 required.
echo ----------------------------------------------------------------
echo Press [I] to begin installation.
echo Press [R] If already installed.
echo ----------------------------------------------------------------
choice /c IR
if %errorlevel%==1 goto install1
if %errorlevel%==2 goto after
:install1 :install1
cls cls
echo ======================================================== echo ========================================================
echo Maigret Installation echo Maigret Installation Script
echo ======================================================== echo ========================================================
echo. echo.
echo -------------------------------------------------------- echo --------------------------------------------------------
echo If your pip installation is outdated, it could cause echo If your pip installation is outdated, it could cause
echo cryptography to fail on installation. echo cryptography to fail on installation.
echo -------------------------------------------------------- echo --------------------------------------------------------
echo Check for and install pip 23.3.2 now? echo check for and install pip updates now?
echo -------------------------------------------------------- echo --------------------------------------------------------
choice /c YN choice /c YN
if %errorlevel%==1 goto install2 if %errorlevel%==1 goto install2
@@ -63,56 +87,42 @@ if %errorlevel%==2 goto install3
:install2 :install2
cls cls
python -m pip install --upgrade pip==23.3.2 python -m pip install --upgrade pip
if %errorlevel% neq 0 ( goto:install3
echo Failed to update pip to version 23.3.2. Please check your installation.
pause
exit /b
)
goto install3
:install3 :install3
cls cls
echo ======================================================== echo ========================================================
echo Maigret Installation echo Maigret Installation Script
echo ======================================================== echo ========================================================
echo. echo.
echo -------------------------------------------------------- echo --------------------------------------------------------
echo Installing Maigret... echo Install requirements and maigret?
python -m pip install maigret echo --------------------------------------------------------
if %errorlevel% neq 0 ( choice /c YN
echo Failed to install Maigret. Please check your installation. if %errorlevel%==1 goto install4
pause if %errorlevel%==2 goto 1
exit /b
) :install4
echo. cls
echo +------------------------------------------------------+ pip install .
echo Maigret installed successfully. pip install maigret
echo +------------------------------------------------------+ goto:after
pause
goto after
:after :after
cls cls
echo ======================================================== echo ========================================================
echo Maigret Usage echo Maigret Background Search
echo ======================================================== echo ========================================================
echo. echo.
echo +--------------------------------------------------------+ echo --------------------------------------------------------
echo To use Maigret, you can run the following command: echo Please Enter Username / Email
echo --------------------------------------------------------
set /p input=
maigret %input%
echo. echo.
echo maigret [options] [username]
echo. echo.
echo For example, to search for a username:
echo. echo.
echo maigret example_username
echo. echo.
echo For more options and usage details, refer to the Maigret documentation.
echo.
echo https://github.com/soxoj/maigret/blob/5b3b81b4822f6deb2e9c31eb95039907f25beb5e/README.md
echo +--------------------------------------------------------+
echo.
cmd
pause pause
exit /b goto:after
exit /b
+1 -1
View File
@@ -1,7 +1,7 @@
LINT_FILES=maigret wizard.py tests LINT_FILES=maigret wizard.py tests
test: test:
coverage run --source=./maigret,./maigret/web -m pytest tests coverage run --source=./maigret -m pytest tests
coverage report -m coverage report -m
coverage html coverage html
+9 -52
View File
@@ -29,41 +29,29 @@
## About ## About
**Maigret** collects a dossier on a person **by username only**, checking for accounts on a huge number of sites and gathering all the available information from web pages. No API keys are required. Maigret is an easy-to-use and powerful fork of [Sherlock](https://github.com/sherlock-project/sherlock). **Maigret** collects a dossier on a person **by username only**, checking for accounts on a huge number of sites and gathering all the available information from web pages. No API keys required. Maigret is an easy-to-use and powerful fork of [Sherlock](https://github.com/sherlock-project/sherlock).
Currently supports more than 3000 sites ([full list](https://github.com/soxoj/maigret/blob/main/sites.md)), search is launched against 500 popular sites in descending order of popularity by default. Also supported checking Tor sites, I2P sites, and domains (via DNS resolving). Currently supported more than 3000 sites ([full list](https://github.com/soxoj/maigret/blob/main/sites.md)), search is launched against 500 popular sites in descending order of popularity by default. Also supported checking of Tor sites, I2P sites, and domains (via DNS resolving).
## Powered By Maigret
These are professional tools for social media content analysis and OSINT investigations that use Maigret (banners are clickable).
<a href="https://github.com/SocialLinks-IO/sociallinks-api"><img height="60" alt="Social Links API" src="https://github.com/user-attachments/assets/789747b2-d7a0-4d4e-8868-ffc4427df660"></a>
<a href="https://sociallinks.io/products/sl-crimewall"><img height="60" alt="Social Links Crimewall" src="https://github.com/user-attachments/assets/0b18f06c-2f38-477b-b946-1be1a632a9d1"></a>
<a href="https://usersearch.ai/"><img height="60" alt="UserSearch" src="https://github.com/user-attachments/assets/66daa213-cf7d-40cf-9267-42f97cf77580"></a>
## Main features ## Main features
* Profile page parsing, [extraction](https://github.com/soxoj/socid_extractor) of personal info, links to other profiles, etc. * Profile pages parsing, [extraction](https://github.com/soxoj/socid_extractor) of personal info, links to other profiles, etc.
* Recursive search by new usernames and other IDs found * Recursive search by new usernames and other ids found
* Search by tags (site categories, countries) * Search by tags (site categories, countries)
* Censorship and captcha detection * Censorship and captcha detection
* Requests retries * Requests retries
See the full description of Maigret features [in the documentation](https://maigret.readthedocs.io/en/latest/features.html). See full description of Maigret features [in the documentation](https://maigret.readthedocs.io/en/latest/features.html).
## Installation ## Installation
‼️ Maigret is available online via [official Telegram bot](https://t.me/osint_maigret_bot). Consider using it if you don't want to install anything. ‼️ Maigret is available online via [official Telegram bot](https://t.me/osint_maigret_bot).
### Windows Maigret can be installed using pip, Docker, or simply can be launched from the cloned repo.
Standalone EXE-binaries for Windows are located in [Releases section](https://github.com/soxoj/maigret/releases) of GitHub repository. Standalone EXE-binaries for Windows are located in [Releases section](https://github.com/soxoj/maigret/releases) of GitHub repository.
Video guide on how to run it: https://youtu.be/qIgwTZOmMmM. Also, you can run Maigret using cloud shells and Jupyter notebooks (see buttons below).
### Installation in Cloud Shells
You can launch Maigret using cloud shells and Jupyter notebooks. Press one of the buttons below and follow the instructions to launch it in your browser.
[![Open in Cloud Shell](https://user-images.githubusercontent.com/27065646/92304704-8d146d80-ef80-11ea-8c29-0deaabb1c702.png)](https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/soxoj/maigret&tutorial=README.md) [![Open in Cloud Shell](https://user-images.githubusercontent.com/27065646/92304704-8d146d80-ef80-11ea-8c29-0deaabb1c702.png)](https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/soxoj/maigret&tutorial=README.md)
<a href="https://repl.it/github/soxoj/maigret"><img src="https://replit.com/badge/github/soxoj/maigret" alt="Run on Replit" height="50"></a> <a href="https://repl.it/github/soxoj/maigret"><img src="https://replit.com/badge/github/soxoj/maigret" alt="Run on Replit" height="50"></a>
@@ -71,10 +59,7 @@ You can launch Maigret using cloud shells and Jupyter notebooks. Press one of th
<a href="https://colab.research.google.com/gist/soxoj/879b51bc3b2f8b695abb054090645000/maigret-collab.ipynb"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab" height="45"></a> <a href="https://colab.research.google.com/gist/soxoj/879b51bc3b2f8b695abb054090645000/maigret-collab.ipynb"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab" height="45"></a>
<a href="https://mybinder.org/v2/gist/soxoj/9d65c2f4d3bec5dd25949197ea73cf3a/HEAD"><img src="https://mybinder.org/badge_logo.svg" alt="Open In Binder" height="45"></a> <a href="https://mybinder.org/v2/gist/soxoj/9d65c2f4d3bec5dd25949197ea73cf3a/HEAD"><img src="https://mybinder.org/badge_logo.svg" alt="Open In Binder" height="45"></a>
### Local installation ### Package installing
Maigret can be installed using pip, Docker, or simply can be launched from the cloned repo.
**NOTE**: Python 3.10 or higher and pip is required, **Python 3.11 is recommended.** **NOTE**: Python 3.10 or higher and pip is required, **Python 3.11 is recommended.**
@@ -132,30 +117,6 @@ maigret user1 user2 user3 -a
Use `maigret --help` to get full options description. Also options [are documented](https://maigret.readthedocs.io/en/latest/command-line-options.html). Use `maigret --help` to get full options description. Also options [are documented](https://maigret.readthedocs.io/en/latest/command-line-options.html).
### Web interface
You can run Maigret with a web interface, where you can view the graph with results and download reports of all formats on a single page.
<details>
<summary>Web Interface Screenshots</summary>
![Web interface: how to start](https://raw.githubusercontent.com/soxoj/maigret/main/static/web_interface_screenshot_start.png)
![Web interface: results](https://raw.githubusercontent.com/soxoj/maigret/main/static/web_interface_screenshot.png)
</details>
Instructions:
1. Run Maigret with the ``--web`` flag and specify the port number.
```console
maigret --web 5000
```
2. Open http://127.0.0.1:5000 in your browser and enter one or more usernames to make a search.
3. Wait a bit for the search to complete and view the graph with results, the table with all accounts found, and download reports of all formats.
## Contributing ## Contributing
Maigret has open-source code, so you may contribute your own sites by adding them to `data.json` file, or bring changes to it's code! Maigret has open-source code, so you may contribute your own sites by adding them to `data.json` file, or bring changes to it's code!
@@ -188,10 +149,6 @@ It is your sole responsibility to ensure that your use of this tool complies wit
The authors and developers of this tool bear no responsibility for any misuse or unlawful activities conducted by its users. The authors and developers of this tool bear no responsibility for any misuse or unlawful activities conducted by its users.
## Feedback
If you have any questions, suggestions, or feedback, please feel free to [open an issue](https://github.com/soxoj/maigret/issues), create a [GitHub discussion](https://github.com/soxoj/maigret/discussions), or contact the author directly via [Telegram](https://t.me/soxoj).
## SOWEL classification ## SOWEL classification
This tool uses the following OSINT techniques: This tool uses the following OSINT techniques:
-121
View File
@@ -1,121 +0,0 @@
# Invalid Telegram PR Auto-Closer
This repository includes an automated solution to identify and close pull requests with titles matching the pattern "Invalid result https://t.me/...". These PRs are typically auto-generated or spam submissions that should not be processed.
## Components
### 1. Python Script (`utils/close_invalid_telegram_prs.py`)
A utility script that:
- Searches for open PRs matching the pattern "Invalid result https://t.me/..."
- Optionally closes them with a descriptive comment
- Supports dry-run mode for testing
- Uses the GitHub API to interact with the repository
#### Usage
```bash
# Dry run (show what would be closed without closing)
python utils/close_invalid_telegram_prs.py --dry-run
# Close matching PRs interactively
python utils/close_invalid_telegram_prs.py
# Close PRs with custom comment
python utils/close_invalid_telegram_prs.py --comment "Custom closure message"
# Use with different repository
python utils/close_invalid_telegram_prs.py --owner username --repo repository
```
#### Requirements
- Python 3.6+
- `requests` library: `pip install requests`
- GitHub personal access token with repository access
#### Authentication
Set your GitHub token via:
- Command line: `--token YOUR_TOKEN`
- Environment variable: `export GITHUB_TOKEN=YOUR_TOKEN`
### 2. GitHub Actions Workflow (`.github/workflows/close-invalid-telegram-prs.yml`)
An automated workflow that:
- Runs daily at 2 AM UTC (in dry-run mode by default)
- Can be manually triggered with option to actually close PRs
- Uses the repository's `GITHUB_TOKEN` for authentication
#### Manual Trigger
1. Go to the Actions tab in your GitHub repository
2. Select "Close Invalid Telegram PRs" workflow
3. Click "Run workflow"
4. Choose whether to run in dry-run mode or actually close PRs
### 3. Tests (`tests/test_close_invalid_telegram_prs.py`)
Unit tests that verify:
- Correct identification of matching PR titles
- Proper rejection of non-matching titles
- Case-insensitive pattern matching
- Whitespace handling
Run tests with:
```bash
python tests/test_close_invalid_telegram_prs.py
```
## Pattern Detection
The script identifies PRs with titles matching:
- `Invalid result https://t.me/...` (case insensitive)
- Various whitespace and formatting variations
- Any Telegram URL after the pattern
### Examples of Matching Titles
- "Invalid result https://t.me/someuser"
- "INVALID RESULT https://t.me/channel123"
- "Invalid Result https://t.me/bot_name"
- " Invalid result https://t.me/user/123 " (with whitespace)
### Examples of Non-Matching Titles
- "Valid result https://t.me/someuser" (not "Invalid")
- "Invalid results https://t.me/someuser" (plural "results")
- "Fix invalid result https://t.me/someuser" (extra words)
- "Invalid result http://t.me/someuser" (http instead of https)
## Security
- The GitHub Actions workflow only has the minimum required permissions
- The script requires explicit confirmation before closing PRs (except in automated mode)
- All actions are logged and can be audited
- Dry-run mode is available for testing
## Customization
You can customize the behavior by:
- Modifying the regex pattern in `is_invalid_telegram_pr()` function
- Changing the default comment message
- Adjusting the GitHub Actions schedule
- Adding additional validation logic
## Troubleshooting
### Common Issues
1. **Permission Denied**: Ensure your GitHub token has the required permissions
2. **No PRs Found**: This is normal if there are no matching PRs
3. **Rate Limiting**: The script handles GitHub API rate limits automatically
### Debug Mode
Run with verbose output:
```bash
python utils/close_invalid_telegram_prs.py --dry-run
```
This will show exactly which PRs match the pattern without closing them.
+3 -3
View File
@@ -3,11 +3,11 @@
# -- Project information # -- Project information
project = 'Maigret' project = 'Maigret'
copyright = '2025, soxoj' copyright = '2024, soxoj'
author = 'soxoj' author = 'soxoj'
release = '0.5.0' release = '0.4.4'
version = '0.5' version = '0.4.4'
# -- General configuration # -- General configuration
+2 -65
View File
@@ -47,9 +47,6 @@ Use the following commands to check Maigret:
# - mypy checks # - mypy checks
make lint make lint
# run black formatter
make format
# run testing with coverage html report # run testing with coverage html report
# current test coverage is 58% # current test coverage is 58%
make test make test
@@ -113,65 +110,6 @@ There are few options for sites data.json helpful in various cases:
- ``requestHeadOnly`` - set to ``true`` if it's enough to make a HEAD request to the site - ``requestHeadOnly`` - set to ``true`` if it's enough to make a HEAD request to the site
- ``regexCheck`` - a regex to check if the username is valid, in case of frequent false-positives - ``regexCheck`` - a regex to check if the username is valid, in case of frequent false-positives
.. _activation-mechanism:
Activation mechanism
--------------------
The activation mechanism helps make requests to sites requiring additional authentication like cookies, JWT tokens, or custom headers.
Let's study the Vimeo site check record from the Maigret database:
.. code-block:: json
"Vimeo": {
"tags": [
"us",
"video"
],
"headers": {
"Authorization": "jwt eyJ0..."
},
"activation": {
"url": "https://vimeo.com/_rv/viewer",
"marks": [
"Something strange occurred. Please get in touch with the app's creator."
],
"method": "vimeo"
},
"urlProbe": "https://api.vimeo.com/users/{username}?fields=name...",
"checkType": "status_code",
"alexaRank": 148,
"urlMain": "https://vimeo.com/",
"url": "https://vimeo.com/{username}",
"usernameClaimed": "blue",
"usernameUnclaimed": "noonewouldeverusethis7"
},
The activation method is:
.. code-block:: python
def vimeo(site, logger, cookies={}):
headers = dict(site.headers)
if "Authorization" in headers:
del headers["Authorization"]
import requests
r = requests.get(site.activation["url"], headers=headers)
jwt_token = r.json()["jwt"]
site.headers["Authorization"] = "jwt " + jwt_token
Here's how the activation process works when a JWT token becomes invalid:
1. The site check makes an HTTP request to ``urlProbe`` with the invalid token
2. The response contains an error message specified in the ``activation``/``marks`` field
3. When this error is detected, the ``vimeo`` activation function is triggered
4. The activation function obtains a new JWT token and updates it in the site check record
5. On the next site check (either through retry or a new Maigret run), the valid token is used and the check succeeds
Examples of activation mechanism implementation are available in `activation.py <https://github.com/soxoj/maigret/blob/main/maigret/activation.py>`_ file.
How to publish new version of Maigret How to publish new version of Maigret
------------------------------------- -------------------------------------
@@ -194,10 +132,9 @@ PyPi package.
2. Update Maigret version in three files manually: 2. Update Maigret version in three files manually:
- pyproject.toml - setup.py
- maigret/__version__.py - maigret/__version__.py
- docs/source/conf.py - docs/source/conf.py
- snapcraft.yaml
3. Create a new empty text section in the beginning of the file `CHANGELOG.md` with a current date: 3. Create a new empty text section in the beginning of the file `CHANGELOG.md` with a current date:
+4 -48
View File
@@ -5,34 +5,6 @@ Features
This is the list of Maigret features. This is the list of Maigret features.
.. _web-interface:
Web Interface
-------------
You can run Maigret with a web interface, where you can view the graph with results and download reports of all formats on a single page.
.. image:: https://raw.githubusercontent.com/soxoj/maigret/main/static/web_interface_screenshot_start.png
:alt: Web interface: how to start
.. image:: https://raw.githubusercontent.com/soxoj/maigret/main/static/web_interface_screenshot.png
:alt: Web interface: results
Instructions:
1. Run Maigret with the ``--web`` flag and specify the port number.
.. code-block:: console
maigret --web 5000
2. Open http://127.0.0.1:5000 in your browser and enter one or more usernames to make a search.
3. Wait a bit for the search to complete and view the graph with results, the table with all accounts found, and download reports of all formats.
Personal info gathering Personal info gathering
----------------------- -----------------------
@@ -175,32 +147,16 @@ Archives and mirrors checking
The Maigret database contains not only the original websites, but also mirrors, archives, and aggregators. For example: The Maigret database contains not only the original websites, but also mirrors, archives, and aggregators. For example:
- `Reddit BigData search <https://camas.github.io/reddit-search/>`_
- `Picuki <https://www.picuki.com/>`_, Instagram mirror - `Picuki <https://www.picuki.com/>`_, Instagram mirror
- (no longer available) `Reddit BigData search <https://camas.github.io/reddit-search/>`_ - `Twitter shadowban <https://shadowban.eu/>`_ checker
- (no longer available) `Twitter shadowban <https://shadowban.eu/>`_ checker
It allows getting additional info about the person and checking the existence of the account even if the main site is unavailable (bot protection, captcha, etc.) It allows getting additional info about the person and checking the existence of the account even if the main site is unavailable (bot protection, captcha, etc.)
Activation
----------
The activation mechanism helps make requests to sites requiring additional authentication like cookies, JWT tokens, or custom headers.
It works by implementing a custom function that:
1. Makes a specialized HTTP request to a specific website endpoint
2. Processes the response
3. Updates the headers/cookies for that site in the local Maigret database
Since activation only triggers after encountering specific errors, a retry (or another Maigret run) is needed to obtain a valid response with the updated authentication.
The activation mechanism is enabled by default, and cannot be disabled at the moment.
See for more details in Development section :ref:`activation-mechanism`.
.. _extracting-information-from-pages: .. _extracting-information-from-pages:
Extraction of information from account pages Extractiion of information from account pages
-------------------------------------------- ---------------------------------------------
Maigret can parse URLs and content of web pages by URLs to extract info about account owner and other meta information. Maigret can parse URLs and content of web pages by URLs to extract info about account owner and other meta information.
+33 -37
View File
@@ -7,43 +7,8 @@ Maigret can be installed using pip, Docker, or simply can be launched from the c
Also, it is available online via `official Telegram bot <https://t.me/osint_maigret_bot>`_, Also, it is available online via `official Telegram bot <https://t.me/osint_maigret_bot>`_,
source code of a bot is `available on GitHub <https://github.com/soxoj/maigret-tg-bot>`_. source code of a bot is `available on GitHub <https://github.com/soxoj/maigret-tg-bot>`_.
Windows Standalone EXE-binaries Package installing
------------------------------- ------------------
Standalone EXE-binaries for Windows are located in the `Releases section <https://github.com/soxoj/maigret/releases>`_ of GitHub repository.
Currently, the new binary is created automatically after each commit to **main** and **dev** branches.
Video guide on how to run it: https://youtu.be/qIgwTZOmMmM.
Cloud Shells and Jupyter notebooks
----------------------------------
In case you don't want to install Maigret locally, you can use cloud shells and Jupyter notebooks.
Press one of the buttons below and follow the instructions to launch it in your browser.
.. image:: https://user-images.githubusercontent.com/27065646/92304704-8d146d80-ef80-11ea-8c29-0deaabb1c702.png
:target: https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/soxoj/maigret&tutorial=README.md
:alt: Open in Cloud Shell
.. image:: https://replit.com/badge/github/soxoj/maigret
:target: https://repl.it/github/soxoj/maigret
:alt: Run on Replit
:height: 50
.. image:: https://colab.research.google.com/assets/colab-badge.svg
:target: https://colab.research.google.com/gist/soxoj/879b51bc3b2f8b695abb054090645000/maigret-collab.ipynb
:alt: Open In Colab
:height: 45
.. image:: https://mybinder.org/badge_logo.svg
:target: https://mybinder.org/v2/gist/soxoj/9d65c2f4d3bec5dd25949197ea73cf3a/HEAD
:alt: Open In Binder
:height: 45
Local installation from PyPi
----------------------------
Please note that the sites database in the PyPI package may be outdated. Please note that the sites database in the PyPI package may be outdated.
If you encounter frequent false positive results, we recommend installing the latest development version from GitHub instead. If you encounter frequent false positive results, we recommend installing the latest development version from GitHub instead.
@@ -77,6 +42,37 @@ Development version (GitHub)
pip3 install poetry pip3 install poetry
poetry run maigret poetry run maigret
Cloud shells and Jupyter notebooks
----------------------------------
In case you don't want to install Maigret locally, you can use cloud shells and Jupyter notebooks.
.. image:: https://user-images.githubusercontent.com/27065646/92304704-8d146d80-ef80-11ea-8c29-0deaabb1c702.png
:target: https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/soxoj/maigret&tutorial=README.md
:alt: Open in Cloud Shell
.. image:: https://replit.com/badge/github/soxoj/maigret
:target: https://repl.it/github/soxoj/maigret
:alt: Run on Replit
:height: 50
.. image:: https://colab.research.google.com/assets/colab-badge.svg
:target: https://colab.research.google.com/gist/soxoj/879b51bc3b2f8b695abb054090645000/maigret-collab.ipynb
:alt: Open In Colab
:height: 45
.. image:: https://mybinder.org/badge_logo.svg
:target: https://mybinder.org/v2/gist/soxoj/9d65c2f4d3bec5dd25949197ea73cf3a/HEAD
:alt: Open In Binder
:height: 45
Windows standalone EXE-binaries
-------------------------------
Standalone EXE-binaries for Windows are located in the `Releases section <https://github.com/soxoj/maigret/releases>`_ of GitHub repository.
Currently, the new binary is created automatically after each commit to the main branch, but is not deployed to the Releases section automatically.
Docker Docker
------ ------
Binary file not shown.

Before

Width:  |  Height:  |  Size: 234 KiB

After

Width:  |  Height:  |  Size: 375 KiB

+1 -11
View File
@@ -3,16 +3,6 @@
Usage examples Usage examples
============== ==============
You can use Maigret as:
- a command line tool: initial and a default mode
- a `web interface <#web-interface>`_: view the graph with results and download all report formats on a single page
- a library: integrate Maigret into your own project
Use Cases
---------
1. Search for accounts with username ``machine42`` on top 500 sites (by default, according to Alexa rank) from the Maigret DB. 1. Search for accounts with username ``machine42`` on top 500 sites (by default, according to Alexa rank) from the Maigret DB.
.. code-block:: console .. code-block:: console
@@ -64,7 +54,7 @@ or
.. code-block:: console .. code-block:: console
maigret machine42 --tags us,jp maigret machine42 --tags en,jp
7. Search for accounts with username ``machine42`` only on sites related to software development. 7. Search for accounts with username ``machine42`` only on sites related to software development.
+1 -1
View File
@@ -1,3 +1,3 @@
"""Maigret version file""" """Maigret version file"""
__version__ = '0.5.0' __version__ = '0.4.4'
-37
View File
@@ -1,4 +1,3 @@
import json
from http.cookiejar import MozillaCookieJar from http.cookiejar import MozillaCookieJar
from http.cookies import Morsel from http.cookies import Morsel
@@ -26,7 +25,6 @@ class ParsingActivator:
import requests import requests
r = requests.get(site.activation["url"], headers=headers) r = requests.get(site.activation["url"], headers=headers)
logger.debug(f"Vimeo viewer activation: {json.dumps(r.json(), indent=4)}")
jwt_token = r.json()["jwt"] jwt_token = r.json()["jwt"]
site.headers["Authorization"] = "jwt " + jwt_token site.headers["Authorization"] = "jwt " + jwt_token
@@ -41,41 +39,6 @@ class ParsingActivator:
bearer_token = r.json()["accessToken"] bearer_token = r.json()["accessToken"]
site.headers["authorization"] = f"Bearer {bearer_token}" site.headers["authorization"] = f"Bearer {bearer_token}"
@staticmethod
def weibo(site, logger):
headers = dict(site.headers)
import requests
session = requests.Session()
# 1 stage: get the redirect URL
r = session.get(
"https://weibo.com/clairekuo", headers=headers, allow_redirects=False
)
logger.debug(
f"1 stage: {'success' if r.status_code == 302 else 'no 302 redirect, fail!'}"
)
location = r.headers.get("Location")
# 2 stage: go to passport visitor page
headers["Referer"] = location
r = session.get(location, headers=headers)
logger.debug(
f"2 stage: {'success' if r.status_code == 200 else 'no 200 response, fail!'}"
)
# 3 stage: gen visitor token
headers["Referer"] = location
r = session.post(
"https://passport.weibo.com/visitor/genvisitor2",
headers=headers,
data={'cb': 'visitor_gray_callback', 'tid': '', 'from': 'weibo'},
)
cookies = r.headers.get('set-cookie')
logger.debug(
f"3 stage: {'success' if r.status_code == 200 and cookies else 'no 200 response and cookies, fail!'}"
)
site.headers["Cookie"] = cookies
def import_aiohttp_cookies(cookiestxt_filename): def import_aiohttp_cookies(cookiestxt_filename):
cookies_obj = MozillaCookieJar(cookiestxt_filename) cookies_obj = MozillaCookieJar(cookiestxt_filename)
+80 -114
View File
@@ -16,7 +16,6 @@ from aiohttp import ClientSession, TCPConnector, http_exceptions
from aiohttp.client_exceptions import ClientConnectorError, ServerDisconnectedError from aiohttp.client_exceptions import ClientConnectorError, ServerDisconnectedError
from python_socks import _errors as proxy_errors from python_socks import _errors as proxy_errors
from socid_extractor import extract from socid_extractor import extract
try: try:
from mock import Mock from mock import Mock
except ImportError: except ImportError:
@@ -26,8 +25,12 @@ except ImportError:
from . import errors from . import errors
from .activation import ParsingActivator, import_aiohttp_cookies from .activation import ParsingActivator, import_aiohttp_cookies
from .errors import CheckError from .errors import CheckError
from .executors import AsyncioQueueGeneratorExecutor from .executors import (
from .result import MaigretCheckResult, MaigretCheckStatus AsyncExecutor,
AsyncioSimpleExecutor,
AsyncioProgressbarQueueExecutor,
)
from .result import QueryResult, QueryStatus
from .sites import MaigretDatabase, MaigretSite from .sites import MaigretDatabase, MaigretSite
from .types import QueryOptions, QueryResultWrapper from .types import QueryOptions, QueryResultWrapper
from .utils import ascii_data_display, get_random_user_agent from .utils import ascii_data_display, get_random_user_agent
@@ -74,9 +77,7 @@ class SimpleAiohttpChecker(CheckerBase):
async def close(self): async def close(self):
pass pass
async def _make_request( async def _make_request(self, session, url, headers, allow_redirects, timeout, method, logger) -> Tuple[str, int, Optional[CheckError]]:
self, session, url, headers, allow_redirects, timeout, method, logger
) -> Tuple[str, int, Optional[CheckError]]:
try: try:
request_method = session.get if method == 'get' else session.head request_method = session.get if method == 'get' else session.head
async with request_method( async with request_method(
@@ -119,19 +120,13 @@ class SimpleAiohttpChecker(CheckerBase):
async def check(self) -> Tuple[str, int, Optional[CheckError]]: async def check(self) -> Tuple[str, int, Optional[CheckError]]:
from aiohttp_socks import ProxyConnector from aiohttp_socks import ProxyConnector
connector = ProxyConnector.from_url(self.proxy) if self.proxy else TCPConnector(ssl=False)
connector = (
ProxyConnector.from_url(self.proxy)
if self.proxy
else TCPConnector(ssl=False)
)
connector.verify_ssl = False connector.verify_ssl = False
async with ClientSession( async with ClientSession(
connector=connector, connector=connector,
trust_env=True, trust_env=True,
# TODO: tests cookie_jar=self.cookie_jar.copy() if self.cookie_jar else None,
cookie_jar=self.cookie_jar if self.cookie_jar else None,
) as session: ) as session:
html_text, status_code, error = await self._make_request( html_text, status_code, error = await self._make_request(
session, session,
@@ -140,7 +135,7 @@ class SimpleAiohttpChecker(CheckerBase):
self.allow_redirects, self.allow_redirects,
self.timeout, self.timeout,
self.method, self.method,
self.logger, self.logger
) )
if error and str(error) == "Invalid proxy response": if error and str(error) == "Invalid proxy response":
@@ -281,16 +276,14 @@ def process_site_result(
) )
if site.activation and html_text and is_need_activation: if site.activation and html_text and is_need_activation:
logger.debug(f"Activation for {site.name}")
method = site.activation["method"] method = site.activation["method"]
try: try:
activate_fun = getattr(ParsingActivator(), method) activate_fun = getattr(ParsingActivator(), method)
# TODO: async call # TODO: async call
activate_fun(site, logger) activate_fun(site, logger)
except AttributeError as e: except AttributeError:
logger.warning( logger.warning(
f"Activation method {method} for site {site.name} not found!", f"Activation method {method} for site {site.name} not found!"
exc_info=True,
) )
except Exception as e: except Exception as e:
logger.warning( logger.warning(
@@ -318,7 +311,7 @@ def process_site_result(
break break
def build_result(status, **kwargs): def build_result(status, **kwargs):
return MaigretCheckResult( return QueryResult(
username, username,
site_name, site_name,
url, url,
@@ -330,11 +323,11 @@ def process_site_result(
if check_error: if check_error:
logger.warning(check_error) logger.warning(check_error)
result = MaigretCheckResult( result = QueryResult(
username, username,
site_name, site_name,
url, url,
MaigretCheckStatus.UNKNOWN, QueryStatus.UNKNOWN,
query_time=response_time, query_time=response_time,
error=check_error, error=check_error,
context=str(CheckError), context=str(CheckError),
@@ -346,15 +339,15 @@ def process_site_result(
[(absence_flag in html_text) for absence_flag in site.absence_strs] [(absence_flag in html_text) for absence_flag in site.absence_strs]
) )
if not is_absence_detected and is_presense_detected: if not is_absence_detected and is_presense_detected:
result = build_result(MaigretCheckStatus.CLAIMED) result = build_result(QueryStatus.CLAIMED)
else: else:
result = build_result(MaigretCheckStatus.AVAILABLE) result = build_result(QueryStatus.AVAILABLE)
elif check_type in "status_code": elif check_type in "status_code":
# Checks if the status code of the response is 2XX # Checks if the status code of the response is 2XX
if 200 <= status_code < 300: if 200 <= status_code < 300:
result = build_result(MaigretCheckStatus.CLAIMED) result = build_result(QueryStatus.CLAIMED)
else: else:
result = build_result(MaigretCheckStatus.AVAILABLE) result = build_result(QueryStatus.AVAILABLE)
elif check_type == "response_url": elif check_type == "response_url":
# For this detection method, we have turned off the redirect. # For this detection method, we have turned off the redirect.
# So, there is no need to check the response URL: it will always # So, there is no need to check the response URL: it will always
@@ -362,9 +355,9 @@ def process_site_result(
# code indicates that the request was successful (i.e. no 404, or # code indicates that the request was successful (i.e. no 404, or
# forward to some odd redirect). # forward to some odd redirect).
if 200 <= status_code < 300 and is_presense_detected: if 200 <= status_code < 300 and is_presense_detected:
result = build_result(MaigretCheckStatus.CLAIMED) result = build_result(QueryStatus.CLAIMED)
else: else:
result = build_result(MaigretCheckStatus.AVAILABLE) result = build_result(QueryStatus.AVAILABLE)
else: else:
# It should be impossible to ever get here... # It should be impossible to ever get here...
raise ValueError( raise ValueError(
@@ -373,13 +366,33 @@ def process_site_result(
extracted_ids_data = {} extracted_ids_data = {}
if is_parsing_enabled and result.status == MaigretCheckStatus.CLAIMED: if is_parsing_enabled and result.status == QueryStatus.CLAIMED:
extracted_ids_data = extract_ids_data(html_text, logger, site) try:
extracted_ids_data = extract(html_text)
except Exception as e:
logger.warning(f"Error while parsing {site.name}: {e}", exc_info=True)
if extracted_ids_data: if extracted_ids_data:
new_usernames = parse_usernames(extracted_ids_data, logger) new_usernames = {}
results_info = update_results_info( for k, v in extracted_ids_data.items():
results_info, extracted_ids_data, new_usernames if "username" in k and not "usernames" in k:
) new_usernames[v] = "username"
elif "usernames" in k:
try:
tree = ast.literal_eval(v)
if type(tree) == list:
for n in tree:
new_usernames[n] = "username"
except Exception as e:
logger.warning(e)
if k in SUPPORTED_IDS:
new_usernames[v] = k
results_info["ids_usernames"] = new_usernames
links = ascii_data_display(extracted_ids_data.get("links", "[]"))
if "website" in extracted_ids_data:
links.append(extracted_ids_data["website"])
results_info["ids_links"] = links
result.ids_data = extracted_ids_data result.ids_data = extracted_ids_data
# Save status of request # Save status of request
@@ -438,29 +451,29 @@ def make_site_result(
# site check is disabled # site check is disabled
if site.disabled and not options['forced']: if site.disabled and not options['forced']:
logger.debug(f"Site {site.name} is disabled, skipping...") logger.debug(f"Site {site.name} is disabled, skipping...")
results_site["status"] = MaigretCheckResult( results_site["status"] = QueryResult(
username, username,
site.name, site.name,
url, url,
MaigretCheckStatus.ILLEGAL, QueryStatus.ILLEGAL,
error=CheckError("Check is disabled"), error=CheckError("Check is disabled"),
) )
# current username type could not be applied # current username type could not be applied
elif site.type != options["id_type"]: elif site.type != options["id_type"]:
results_site["status"] = MaigretCheckResult( results_site["status"] = QueryResult(
username, username,
site.name, site.name,
url, url,
MaigretCheckStatus.ILLEGAL, QueryStatus.ILLEGAL,
error=CheckError('Unsupported identifier type', f'Want "{site.type}"'), error=CheckError('Unsupported identifier type', f'Want "{site.type}"'),
) )
# username is not allowed. # username is not allowed.
elif site.regex_check and re.search(site.regex_check, username) is None: elif site.regex_check and re.search(site.regex_check, username) is None:
results_site["status"] = MaigretCheckResult( results_site["status"] = QueryResult(
username, username,
site.name, site.name,
url, url,
MaigretCheckStatus.ILLEGAL, QueryStatus.ILLEGAL,
error=CheckError( error=CheckError(
'Unsupported username format', f'Want "{site.regex_check}"' 'Unsupported username format', f'Want "{site.regex_check}"'
), ),
@@ -533,7 +546,7 @@ async def check_site_for_username(
) )
# future = default_result.get("future") # future = default_result.get("future")
# if not future: # if not future:
# return site.name, default_result # return site.name, default_result
checker = default_result.get("checker") checker = default_result.get("checker")
if not checker: if not checker:
@@ -666,13 +679,17 @@ async def maigret(
await debug_ip_request(clearweb_checker, logger) await debug_ip_request(clearweb_checker, logger)
# setup parallel executor # setup parallel executor
executor = AsyncioQueueGeneratorExecutor( executor: Optional[AsyncExecutor] = None
logger=logger, if no_progressbar:
in_parallel=max_connections, executor = AsyncioSimpleExecutor(logger=logger)
timeout=timeout + 0.5, else:
*args, executor = AsyncioProgressbarQueueExecutor(
**kwargs, logger=logger,
) in_parallel=max_connections,
timeout=timeout + 0.5,
*args,
**kwargs,
)
# make options objects for all the requests # make options objects for all the requests
options: QueryOptions = {} options: QueryOptions = {}
@@ -702,11 +719,11 @@ async def maigret(
continue continue
default_result: QueryResultWrapper = { default_result: QueryResultWrapper = {
'site': site, 'site': site,
'status': MaigretCheckResult( 'status': QueryResult(
username, username,
sitename, sitename,
'', '',
MaigretCheckStatus.UNKNOWN, QueryStatus.UNKNOWN,
error=CheckError('Request failed'), error=CheckError('Request failed'),
), ),
} }
@@ -719,17 +736,13 @@ async def maigret(
}, },
) )
cur_results = [] cur_results = await executor.run(tasks_dict.values())
with alive_bar(
len(tasks_dict), title="Searching", force_tty=True, disable=no_progressbar # wait for executor timeout errors
) as progress: await asyncio.sleep(1)
async for result in executor.run(tasks_dict.values()):
cur_results.append(result)
progress()
all_results.update(cur_results) all_results.update(cur_results)
# rerun for failed sites
sites = get_failed_sites(dict(cur_results)) sites = get_failed_sites(dict(cur_results))
attempts -= 1 attempts -= 1
@@ -786,16 +799,14 @@ async def site_self_check(
proxy=None, proxy=None,
tor_proxy=None, tor_proxy=None,
i2p_proxy=None, i2p_proxy=None,
skip_errors=False,
cookies=None,
): ):
changes = { changes = {
"disabled": False, "disabled": False,
} }
check_data = [ check_data = [
(site.username_claimed, MaigretCheckStatus.CLAIMED), (site.username_claimed, QueryStatus.CLAIMED),
(site.username_unclaimed, MaigretCheckStatus.AVAILABLE), (site.username_unclaimed, QueryStatus.AVAILABLE),
] ]
logger.info(f"Checking {site.name}...") logger.info(f"Checking {site.name}...")
@@ -814,7 +825,6 @@ async def site_self_check(
proxy=proxy, proxy=proxy,
tor_proxy=tor_proxy, tor_proxy=tor_proxy,
i2p_proxy=i2p_proxy, i2p_proxy=i2p_proxy,
cookies=cookies,
) )
# don't disable entries with other ids types # don't disable entries with other ids types
@@ -834,21 +844,16 @@ async def site_self_check(
site_status = result.status site_status = result.status
if site_status != status: if site_status != status:
if site_status == MaigretCheckStatus.UNKNOWN: if site_status == QueryStatus.UNKNOWN:
msgs = site.absence_strs msgs = site.absence_strs
etype = site.check_type etype = site.check_type
logger.warning( logger.warning(
f"Error while searching {username} in {site.name}: {result.context}, {msgs}, type {etype}" f"Error while searching {username} in {site.name}: {result.context}, {msgs}, type {etype}"
) )
# don't disable sites after the error
# meaning that the site could be available, but returned error for the check
# e.g. many sites protected by cloudflare and available in general
if skip_errors:
pass
# don't disable in case of available username # don't disable in case of available username
elif status == MaigretCheckStatus.CLAIMED: if status == QueryStatus.CLAIMED:
changes["disabled"] = True changes["disabled"] = True
elif status == MaigretCheckStatus.CLAIMED: elif status == QueryStatus.CLAIMED:
logger.warning( logger.warning(
f"Not found `{username}` in {site.name}, must be claimed" f"Not found `{username}` in {site.name}, must be claimed"
) )
@@ -863,7 +868,7 @@ async def site_self_check(
if changes["disabled"] != site.disabled: if changes["disabled"] != site.disabled:
site.disabled = changes["disabled"] site.disabled = changes["disabled"]
logger.info(f"Switching property 'disabled' for {site.name} to {site.disabled}") logger.info(f"Switching disabled status of {site.name} to {site.disabled}")
db.update_site(site) db.update_site(site)
if not silent: if not silent:
action = "Disabled" if site.disabled else "Enabled" action = "Disabled" if site.disabled else "Enabled"
@@ -894,14 +899,12 @@ async def self_check(
def disabled_count(lst): def disabled_count(lst):
return len(list(filter(lambda x: x.disabled, lst))) return len(list(filter(lambda x: x.disabled, lst)))
unchecked_old_count = len( unchecked_old_count = len([site for site in all_sites.values() if "unchecked" in site.tags])
[site for site in all_sites.values() if "unchecked" in site.tags]
)
disabled_old_count = disabled_count(all_sites.values()) disabled_old_count = disabled_count(all_sites.values())
for _, site in all_sites.items(): for _, site in all_sites.items():
check_coro = site_self_check( check_coro = site_self_check(
site, logger, sem, db, silent, proxy, tor_proxy, i2p_proxy, skip_errors=True site, logger, sem, db, silent, proxy, tor_proxy, i2p_proxy
) )
future = asyncio.ensure_future(check_coro) future = asyncio.ensure_future(check_coro)
tasks.append(future) tasks.append(future)
@@ -912,9 +915,7 @@ async def self_check(
await f await f
progress() # Update the progress bar progress() # Update the progress bar
unchecked_new_count = len( unchecked_new_count = len([site for site in all_sites.values() if "unchecked" in site.tags])
[site for site in all_sites.values() if "unchecked" in site.tags]
)
disabled_new_count = disabled_count(all_sites.values()) disabled_new_count = disabled_count(all_sites.values())
total_disabled = disabled_new_count - disabled_old_count total_disabled = disabled_new_count - disabled_old_count
@@ -935,38 +936,3 @@ async def self_check(
print(f"Unchecked sites verified: {unchecked_old_count - unchecked_new_count}") print(f"Unchecked sites verified: {unchecked_old_count - unchecked_new_count}")
return total_disabled != 0 or unchecked_new_count != unchecked_old_count return total_disabled != 0 or unchecked_new_count != unchecked_old_count
def extract_ids_data(html_text, logger, site) -> Dict:
try:
return extract(html_text)
except Exception as e:
logger.warning(f"Error while parsing {site.name}: {e}", exc_info=True)
return {}
def parse_usernames(extracted_ids_data, logger) -> Dict:
new_usernames = {}
for k, v in extracted_ids_data.items():
if "username" in k and not "usernames" in k:
new_usernames[v] = "username"
elif "usernames" in k:
try:
tree = ast.literal_eval(v)
if type(tree) == list:
for n in tree:
new_usernames[n] = "username"
except Exception as e:
logger.warning(e)
if k in SUPPORTED_IDS:
new_usernames[v] = k
return new_usernames
def update_results_info(results_info, extracted_ids_data, new_usernames):
results_info["ids_usernames"] = new_usernames
links = ascii_data_display(extracted_ids_data.get("links", "[]"))
if "website" in extracted_ids_data:
links.append(extracted_ids_data["website"])
results_info["ids_links"] = links
return results_info
+7 -47
View File
@@ -1,6 +1,6 @@
from typing import Dict, List, Any, Tuple from typing import Dict, List, Any
from .result import MaigretCheckResult from .result import QueryResult
from .types import QueryResultWrapper from .types import QueryResultWrapper
@@ -58,10 +58,12 @@ COMMON_ERRORS = {
'Сайт заблокирован хостинг-провайдером': CheckError( 'Сайт заблокирован хостинг-провайдером': CheckError(
'Site-specific', 'Site is disabled (Beget)' 'Site-specific', 'Site is disabled (Beget)'
), ),
'Generated by cloudfront (CloudFront)': CheckError('Request blocked', 'Cloudflare'), 'Generated by cloudfront (CloudFront)': CheckError(
'Request blocked', 'Cloudflare'
),
'/cdn-cgi/challenge-platform/h/b/orchestrate/chl_page': CheckError( '/cdn-cgi/challenge-platform/h/b/orchestrate/chl_page': CheckError(
'Just a moment: bot redirect challenge', 'Cloudflare' 'Just a moment: bot redirect challenge', 'Cloudflare'
), )
} }
ERRORS_TYPES = { ERRORS_TYPES = {
@@ -114,7 +116,7 @@ def extract_and_group(search_res: QueryResultWrapper) -> List[Dict[str, Any]]:
errors_counts: Dict[str, int] = {} errors_counts: Dict[str, int] = {}
for r in search_res.values(): for r in search_res.values():
if r and isinstance(r, dict) and r.get('status'): if r and isinstance(r, dict) and r.get('status'):
if not isinstance(r['status'], MaigretCheckResult): if not isinstance(r['status'], QueryResult):
continue continue
err = r['status'].error err = r['status'].error
@@ -133,45 +135,3 @@ def extract_and_group(search_res: QueryResultWrapper) -> List[Dict[str, Any]]:
) )
return counts return counts
def notify_about_errors(
search_results: QueryResultWrapper, query_notify, show_statistics=False
) -> List[Tuple]:
"""
Prepare error notifications in search results, text + symbol,
to be displayed by notify object.
Example:
[
("Too many errors of type "timeout" (50.0%)", "!")
("Verbose error statistics:", "-")
]
"""
results = []
errs = extract_and_group(search_results)
was_errs_displayed = False
for e in errs:
if not is_important(e):
continue
text = f'Too many errors of type "{e["err"]}" ({round(e["perc"],2)}%)'
solution = solution_of(e['err'])
if solution:
text = '. '.join([text, solution.capitalize()])
results.append((text, '!'))
was_errs_displayed = True
if show_statistics:
results.append(('Verbose error statistics:', '-'))
for e in errs:
text = f'{e["err"]}: {round(e["perc"],2)}%'
results.append((text, '!'))
if was_errs_displayed:
results.append(
('You can see detailed site check errors with a flag `--print-errors`', '-')
)
return results
+3 -74
View File
@@ -1,14 +1,13 @@
import asyncio import asyncio
import sys import sys
import time import time
from typing import Any, Iterable, List, Callable from typing import Any, Iterable, List
import alive_progress import alive_progress
from alive_progress import alive_bar from alive_progress import alive_bar
from .types import QueryDraft from .types import QueryDraft
def create_task_func(): def create_task_func():
if sys.version_info.minor > 6: if sys.version_info.minor > 6:
create_asyncio_task = asyncio.create_task create_asyncio_task = asyncio.create_task
@@ -19,7 +18,6 @@ def create_task_func():
class AsyncExecutor: class AsyncExecutor:
# Deprecated: will be removed soon, don't use it
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
self.logger = kwargs['logger'] self.logger = kwargs['logger']
@@ -35,7 +33,6 @@ class AsyncExecutor:
class AsyncioSimpleExecutor(AsyncExecutor): class AsyncioSimpleExecutor(AsyncExecutor):
# Deprecated: will be removed soon, don't use it
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs) super().__init__(*args, **kwargs)
self.semaphore = asyncio.Semaphore(kwargs.get('in_parallel', 100)) self.semaphore = asyncio.Semaphore(kwargs.get('in_parallel', 100))
@@ -50,7 +47,6 @@ class AsyncioSimpleExecutor(AsyncExecutor):
class AsyncioProgressbarExecutor(AsyncExecutor): class AsyncioProgressbarExecutor(AsyncExecutor):
# Deprecated: will be removed soon, don't use it
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs) super().__init__(*args, **kwargs)
@@ -74,7 +70,6 @@ class AsyncioProgressbarExecutor(AsyncExecutor):
class AsyncioProgressbarSemaphoreExecutor(AsyncExecutor): class AsyncioProgressbarSemaphoreExecutor(AsyncExecutor):
# Deprecated: will be removed soon, don't use it
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs) super().__init__(*args, **kwargs)
self.semaphore = asyncio.Semaphore(kwargs.get('in_parallel', 1)) self.semaphore = asyncio.Semaphore(kwargs.get('in_parallel', 1))
@@ -161,9 +156,7 @@ class AsyncioProgressbarQueueExecutor(AsyncExecutor):
# Initialize the progress bar # Initialize the progress bar
if self.progress_func: if self.progress_func:
with self.progress_func( with self.progress_func(len(queries_list), title="Searching", force_tty=True) as bar:
len(queries_list), title="Searching", force_tty=True
) as bar:
self.progress = bar # Assign alive_bar's callable to self.progress self.progress = bar # Assign alive_bar's callable to self.progress
# Add tasks to the queue # Add tasks to the queue
@@ -177,68 +170,4 @@ class AsyncioProgressbarQueueExecutor(AsyncExecutor):
for w in workers: for w in workers:
w.cancel() w.cancel()
return self.results return self.results
class AsyncioQueueGeneratorExecutor:
# Deprecated: will be removed soon, don't use it
def __init__(self, *args, **kwargs):
self.workers_count = kwargs.get('in_parallel', 10)
self.queue = asyncio.Queue()
self.timeout = kwargs.get('timeout')
self.logger = kwargs['logger']
self._results = asyncio.Queue()
self._stop_signal = object()
async def worker(self):
"""Process tasks from the queue and put results into the results queue."""
while True:
task = await self.queue.get()
if task is self._stop_signal:
self.queue.task_done()
break
try:
f, args, kwargs = task
query_future = f(*args, **kwargs)
query_task = create_task_func()(query_future)
try:
result = await asyncio.wait_for(query_task, timeout=self.timeout)
except asyncio.TimeoutError:
result = kwargs.get('default')
await self._results.put(result)
except Exception as e:
self.logger.error(f"Error in worker: {e}")
finally:
self.queue.task_done()
async def run(self, queries: Iterable[Callable[..., Any]]):
"""Run workers to process queries in parallel."""
start_time = time.time()
# Add tasks to the queue
for t in queries:
await self.queue.put(t)
# Create workers
workers = [
asyncio.create_task(self.worker()) for _ in range(self.workers_count)
]
# Add stop signals
for _ in range(self.workers_count):
await self.queue.put(self._stop_signal)
try:
while any(w.done() is False for w in workers) or not self._results.empty():
try:
result = await asyncio.wait_for(self._results.get(), timeout=1)
yield result
except asyncio.TimeoutError:
pass
finally:
# Ensure all workers are awaited
await asyncio.gather(*workers)
self.execution_time = time.time() - start_time
self.logger.debug(f"Spent time: {self.execution_time}")
+35 -53
View File
@@ -1,14 +1,12 @@
""" """
Maigret main module Maigret main module
""" """
import ast import ast
import asyncio import asyncio
import logging import logging
import os import os
import sys import sys
import platform import platform
import re
from argparse import ArgumentParser, RawDescriptionHelpFormatter from argparse import ArgumentParser, RawDescriptionHelpFormatter
from typing import List, Tuple from typing import List, Tuple
import os.path as path import os.path as path
@@ -46,6 +44,31 @@ from .settings import Settings
from .permutator import Permute from .permutator import Permute
def notify_about_errors(search_results: QueryResultWrapper, query_notify, show_statistics=False):
errs = errors.extract_and_group(search_results)
was_errs_displayed = False
for e in errs:
if not errors.is_important(e):
continue
text = f'Too many errors of type "{e["err"]}" ({round(e["perc"],2)}%)'
solution = errors.solution_of(e['err'])
if solution:
text = '. '.join([text, solution.capitalize()])
query_notify.warning(text, '!')
was_errs_displayed = True
if show_statistics:
query_notify.warning(f'Verbose error statistics:')
for e in errs:
text = f'{e["err"]}: {round(e["perc"],2)}%'
query_notify.warning(text, '!')
if was_errs_displayed:
query_notify.warning(
'You can see detailed site check errors with a flag `--print-errors`'
)
def extract_ids_from_page(url, logger, timeout=5) -> dict: def extract_ids_from_page(url, logger, timeout=5) -> dict:
results = {} results = {}
# url, headers # url, headers
@@ -77,7 +100,7 @@ def extract_ids_from_page(url, logger, timeout=5) -> dict:
tree = ast.literal_eval(v) tree = ast.literal_eval(v)
if type(tree) == list: if type(tree) == list:
for n in tree: for n in tree:
results[n] = 'username' results[n] = 'username'
except Exception as e: except Exception as e:
logger.warning(e) logger.warning(e)
if k in SUPPORTED_IDS: if k in SUPPORTED_IDS:
@@ -324,15 +347,7 @@ def setup_arguments_parser(settings: Settings):
default=False, default=False,
help="Show database statistics (most frequent sites engines and tags).", help="Show database statistics (most frequent sites engines and tags).",
) )
modes_group.add_argument(
"--web",
metavar='PORT',
type=int,
nargs='?', # Optional PORT value
const=5000, # Default PORT if `--web` is provided without a value
default=None, # Explicitly set default to None
help="Launch the web interface on the specified port (default: 5000 if no PORT is provided).",
)
output_group = parser.add_argument_group( output_group = parser.add_argument_group(
'Output options', 'Options to change verbosity and view of the console output' 'Output options', 'Options to change verbosity and view of the console output'
) )
@@ -520,9 +535,7 @@ async def main():
if args.tags: if args.tags:
args.tags = list(set(str(args.tags).split(','))) args.tags = list(set(str(args.tags).split(',')))
db_file = args.db_file \ db_file = path.join(path.dirname(path.realpath(__file__)), args.db_file)
if (args.db_file.startswith("http://") or args.db_file.startswith("https://")) \
else path.join(path.dirname(path.realpath(__file__)), args.db_file)
if args.top_sites == 0 or args.all_sites: if args.top_sites == 0 or args.all_sites:
args.top_sites = sys.maxsize args.top_sites = sys.maxsize
@@ -553,19 +566,14 @@ async def main():
is_submitted = await submitter.dialog(args.new_site_to_submit, args.cookie_file) is_submitted = await submitter.dialog(args.new_site_to_submit, args.cookie_file)
if is_submitted: if is_submitted:
db.save_to_file(db_file) db.save_to_file(db_file)
await submitter.close()
# Database self-checking # Database self-checking
if args.self_check: if args.self_check:
if len(site_data) == 0: if len(site_data) == 0:
query_notify.warning( query_notify.warning('No sites to self-check with the current filters! Exiting...')
'No sites to self-check with the current filters! Exiting...'
)
return return
query_notify.success( query_notify.success(f'Maigret sites database self-check started for {len(site_data)} sites...')
f'Maigret sites database self-check started for {len(site_data)} sites...'
)
is_need_update = await self_check( is_need_update = await self_check(
db, db,
site_data, site_data,
@@ -586,9 +594,7 @@ async def main():
print('Updates will be applied only for current search session.') print('Updates will be applied only for current search session.')
if args.verbose or args.debug: if args.verbose or args.debug:
query_notify.info( query_notify.info('Scan sessions flags stats: ' + str(db.get_scan_stats(site_data)))
'Scan sessions flags stats: ' + str(db.get_scan_stats(site_data))
)
# Database statistics # Database statistics
if args.stats: if args.stats:
@@ -602,27 +608,15 @@ async def main():
# Define one report filename template # Define one report filename template
report_filepath_tpl = path.join(report_dir, 'report_{username}{postfix}') report_filepath_tpl = path.join(report_dir, 'report_{username}{postfix}')
# Web interface
if args.web is not None:
from maigret.web.app import app
app.config["MAIGRET_DB_FILE"] = db_file
port = (
args.web if args.web else 5000
) # args.web is either the specified port or 5000 by default
app.run(port=port)
return
if usernames == {}: if usernames == {}:
# magic params to exit after init # magic params to exit after init
query_notify.warning('No usernames to check, exiting.') query_notify.warning('No usernames to check, exiting.')
sys.exit(0) sys.exit(0)
if len(usernames) > 1 and args.permute and args.id_type == 'username': if len(usernames) > 1 and args.permute and args.id_type == 'username':
query_notify.warning( query_notify.warning(
f"{len(usernames)} permutations from {original_usernames} to check..." f"{len(usernames)} permutations from {original_usernames} to check..." +
+ get_dict_ascii_tree(usernames, prepend="\t") get_dict_ascii_tree(usernames, prepend="\t")
) )
if not site_data: if not site_data:
@@ -688,11 +682,7 @@ async def main():
check_domains=args.with_domains, check_domains=args.with_domains,
) )
errs = errors.notify_about_errors( notify_about_errors(results, query_notify, show_statistics=args.verbose)
results, query_notify, show_statistics=args.verbose
)
for e in errs:
query_notify.warning(*e)
if args.reports_sorting == "data": if args.reports_sorting == "data":
results = sort_report_by_data_points(results) results = sort_report_by_data_points(results)
@@ -702,30 +692,25 @@ async def main():
# TODO: tests # TODO: tests
if recursive_search_enabled: if recursive_search_enabled:
extracted_ids = extract_ids_from_results(results, db) extracted_ids = extract_ids_from_results(results, db)
query_notify.warning(f'Extracted IDs: {extracted_ids}')
usernames.update(extracted_ids) usernames.update(extracted_ids)
# reporting for a one username # reporting for a one username
if args.xmind: if args.xmind:
username = username.replace('/', '_')
filename = report_filepath_tpl.format(username=username, postfix='.xmind') filename = report_filepath_tpl.format(username=username, postfix='.xmind')
save_xmind_report(filename, username, results) save_xmind_report(filename, username, results)
query_notify.warning(f'XMind report for {username} saved in {filename}') query_notify.warning(f'XMind report for {username} saved in {filename}')
if args.csv: if args.csv:
username = username.replace('/', '_')
filename = report_filepath_tpl.format(username=username, postfix='.csv') filename = report_filepath_tpl.format(username=username, postfix='.csv')
save_csv_report(filename, username, results) save_csv_report(filename, username, results)
query_notify.warning(f'CSV report for {username} saved in {filename}') query_notify.warning(f'CSV report for {username} saved in {filename}')
if args.txt: if args.txt:
username = username.replace('/', '_')
filename = report_filepath_tpl.format(username=username, postfix='.txt') filename = report_filepath_tpl.format(username=username, postfix='.txt')
save_txt_report(filename, username, results) save_txt_report(filename, username, results)
query_notify.warning(f'TXT report for {username} saved in {filename}') query_notify.warning(f'TXT report for {username} saved in {filename}')
if args.json: if args.json:
username = username.replace('/', '_')
filename = report_filepath_tpl.format( filename = report_filepath_tpl.format(
username=username, postfix=f'_{args.json}.json' username=username, postfix=f'_{args.json}.json'
) )
@@ -743,7 +728,6 @@ async def main():
username = report_context['username'] username = report_context['username']
if args.html: if args.html:
username = username.replace('/', '_')
filename = report_filepath_tpl.format( filename = report_filepath_tpl.format(
username=username, postfix='_plain.html' username=username, postfix='_plain.html'
) )
@@ -751,13 +735,11 @@ async def main():
query_notify.warning(f'HTML report on all usernames saved in {filename}') query_notify.warning(f'HTML report on all usernames saved in {filename}')
if args.pdf: if args.pdf:
username = username.replace('/', '_')
filename = report_filepath_tpl.format(username=username, postfix='.pdf') filename = report_filepath_tpl.format(username=username, postfix='.pdf')
save_pdf_report(filename, report_context) save_pdf_report(filename, report_context)
query_notify.warning(f'PDF report on all usernames saved in {filename}') query_notify.warning(f'PDF report on all usernames saved in {filename}')
if args.graph: if args.graph:
username = username.replace('/', '_')
filename = report_filepath_tpl.format( filename = report_filepath_tpl.format(
username=username, postfix='_graph.html' username=username, postfix='_graph.html'
) )
+5 -6
View File
@@ -3,12 +3,11 @@
This module defines the objects for notifying the caller about the This module defines the objects for notifying the caller about the
results of queries. results of queries.
""" """
import sys import sys
from colorama import Fore, Style, init from colorama import Fore, Style, init
from .result import MaigretCheckStatus from .result import QueryStatus
from .utils import get_dict_ascii_tree from .utils import get_dict_ascii_tree
@@ -245,7 +244,7 @@ class QueryNotifyPrint(QueryNotify):
ids_data_text = get_dict_ascii_tree(self.result.ids_data.items(), " ") ids_data_text = get_dict_ascii_tree(self.result.ids_data.items(), " ")
# Output to the terminal is desired. # Output to the terminal is desired.
if result.status == MaigretCheckStatus.CLAIMED: if result.status == QueryStatus.CLAIMED:
color = Fore.BLUE if is_similar else Fore.GREEN color = Fore.BLUE if is_similar else Fore.GREEN
status = "?" if is_similar else "+" status = "?" if is_similar else "+"
notify = self.make_terminal_notify( notify = self.make_terminal_notify(
@@ -255,7 +254,7 @@ class QueryNotifyPrint(QueryNotify):
color, color,
result.site_url_user + ids_data_text, result.site_url_user + ids_data_text,
) )
elif result.status == MaigretCheckStatus.AVAILABLE: elif result.status == QueryStatus.AVAILABLE:
if not self.print_found_only: if not self.print_found_only:
notify = self.make_terminal_notify( notify = self.make_terminal_notify(
"-", "-",
@@ -264,7 +263,7 @@ class QueryNotifyPrint(QueryNotify):
Fore.YELLOW, Fore.YELLOW,
"Not found!" + ids_data_text, "Not found!" + ids_data_text,
) )
elif result.status == MaigretCheckStatus.UNKNOWN: elif result.status == QueryStatus.UNKNOWN:
if not self.skip_check_errors: if not self.skip_check_errors:
notify = self.make_terminal_notify( notify = self.make_terminal_notify(
"?", "?",
@@ -273,7 +272,7 @@ class QueryNotifyPrint(QueryNotify):
Fore.RED, Fore.RED,
str(self.result.error) + ids_data_text, str(self.result.error) + ids_data_text,
) )
elif result.status == MaigretCheckStatus.ILLEGAL: elif result.status == QueryStatus.ILLEGAL:
if not self.print_found_only: if not self.print_found_only:
text = "Illegal Username Format For This Site!" text = "Illegal Username Format For This Site!"
notify = self.make_terminal_notify( notify = self.make_terminal_notify(
+75 -110
View File
@@ -13,7 +13,7 @@ from dateutil.parser import parse as parse_datetime_str
from jinja2 import Template from jinja2 import Template
from .checking import SUPPORTED_IDS from .checking import SUPPORTED_IDS
from .result import MaigretCheckStatus from .result import QueryStatus
from .sites import MaigretDatabase from .sites import MaigretDatabase
from .utils import is_country_tag, CaseConverter, enrich_link_str from .utils import is_country_tag, CaseConverter, enrich_link_str
@@ -98,20 +98,21 @@ class MaigretGraph:
def __init__(self, graph): def __init__(self, graph):
self.G = graph self.G = graph
def add_node(self, key, value, color=None): def add_node(self, key, value):
node_name = f'{key}: {value}' node_name = f'{key}: {value}'
params = dict(self.other_params) params = self.other_params
if key in SUPPORTED_IDS: if key in SUPPORTED_IDS:
params = dict(self.username_params) params = self.username_params
elif value.startswith('http'): elif value.startswith('http'):
params = dict(self.site_params) params = self.site_params
params['title'] = node_name self.G.add_node(node_name, title=node_name, **params)
if color:
params['color'] = color if value != value.lower():
normalized_node_name = self.add_node(key, value.lower())
self.link(node_name, normalized_node_name)
self.G.add_node(node_name, **params)
return node_name return node_name
def link(self, node1_name, node2_name): def link(self, node1_name, node2_name):
@@ -119,126 +120,94 @@ class MaigretGraph:
def save_graph_report(filename: str, username_results: list, db: MaigretDatabase): def save_graph_report(filename: str, username_results: list, db: MaigretDatabase):
# moved here to speed up the launch of Maigret
import networkx as nx import networkx as nx
G = nx.Graph() G = nx.Graph()
graph = MaigretGraph(G) graph = MaigretGraph(G)
base_site_nodes = {}
site_account_nodes = {}
processed_values = {} # Track processed values to avoid duplicates
for username, id_type, results in username_results: for username, id_type, results in username_results:
# Add username node, using normalized version directly if different username_node_name = graph.add_node(id_type, username)
norm_username = username.lower()
username_node_name = graph.add_node(id_type, norm_username)
for website_name, dictionary in results.items(): for website_name in results:
if not dictionary or dictionary.get("is_similar"): dictionary = results[website_name]
# TODO: fix no site data issue
if not dictionary:
continue
if dictionary.get("is_similar"):
continue continue
status = dictionary.get("status") status = dictionary.get("status")
if not status or status.status != MaigretCheckStatus.CLAIMED: if not status: # FIXME: currently in case of timeout
continue continue
# base site node if dictionary["status"].status != QueryStatus.CLAIMED:
site_base_url = website_name continue
if site_base_url not in base_site_nodes:
base_site_nodes[site_base_url] = graph.add_node(
'site', site_base_url, color='#28a745'
) # Green color
site_base_node_name = base_site_nodes[site_base_url] site_fallback_name = dictionary.get(
'url_user', f'{website_name}: {username.lower()}'
# account node )
account_url = dictionary.get('url_user', f'{site_base_url}/{norm_username}') # site_node_name = dictionary.get('url_user', f'{website_name}: {username.lower()}')
account_node_id = f"{site_base_url}: {account_url}" site_node_name = graph.add_node('site', site_fallback_name)
if account_node_id not in site_account_nodes: graph.link(username_node_name, site_node_name)
site_account_nodes[account_node_id] = graph.add_node(
'account', account_url
)
account_node_name = site_account_nodes[account_node_id]
# link username → account → site
graph.link(username_node_name, account_node_name)
graph.link(account_node_name, site_base_node_name)
def process_ids(parent_node, ids): def process_ids(parent_node, ids):
for k, v in ids.items(): for k, v in ids.items():
if ( if k.endswith('_count') or k.startswith('is_') or k.endswith('_at'):
k.endswith('_count') continue
or k.startswith('is_') if k in 'image':
or k.endswith('_at')
or k in 'image'
):
continue continue
# Normalize value if string v_data = v
norm_v = v.lower() if isinstance(v, str) else v if v.startswith('['):
value_key = f"{k}:{norm_v}" try:
v_data = ast.literal_eval(v)
except Exception as e:
logging.error(e)
if value_key in processed_values: # value is a list
ids_data_name = processed_values[value_key] if isinstance(v_data, list):
else: list_node_name = graph.add_node(k, site_fallback_name)
v_data = v for vv in v_data:
if isinstance(v, str) and v.startswith('['): data_node_name = graph.add_node(vv, site_fallback_name)
try: graph.link(list_node_name, data_node_name)
v_data = ast.literal_eval(v)
except Exception as e:
logging.error(e)
continue
if isinstance(v_data, list):
list_node_name = graph.add_node(k, site_base_url)
processed_values[value_key] = list_node_name
for vv in v_data:
data_node_name = graph.add_node(vv, site_base_url)
graph.link(list_node_name, data_node_name)
add_ids = {
a: b for b, a in db.extract_ids_from_url(vv).items()
}
if add_ids:
process_ids(data_node_name, add_ids)
ids_data_name = list_node_name
else:
ids_data_name = graph.add_node(k, norm_v)
processed_values[value_key] = ids_data_name
if 'username' in k or k in SUPPORTED_IDS:
new_username_key = f"username:{norm_v}"
if new_username_key not in processed_values:
new_username_node_name = graph.add_node(
'username', norm_v
)
processed_values[new_username_key] = (
new_username_node_name
)
graph.link(ids_data_name, new_username_node_name)
add_ids = { add_ids = {
k: v for v, k in db.extract_ids_from_url(v).items() a: b for b, a in db.extract_ids_from_url(vv).items()
} }
if add_ids: if add_ids:
process_ids(ids_data_name, add_ids) process_ids(data_node_name, add_ids)
else:
# value is just a string
# ids_data_name = f'{k}: {v}'
# if ids_data_name == parent_node:
# continue
graph.link(parent_node, ids_data_name) ids_data_name = graph.add_node(k, v)
# G.add_node(ids_data_name, size=10, title=ids_data_name, group=3)
graph.link(parent_node, ids_data_name)
# check for username
if 'username' in k or k in SUPPORTED_IDS:
new_username_node_name = graph.add_node('username', v)
graph.link(ids_data_name, new_username_node_name)
add_ids = {k: v for v, k in db.extract_ids_from_url(v).items()}
if add_ids:
process_ids(ids_data_name, add_ids)
if status.ids_data: if status.ids_data:
process_ids(account_node_name, status.ids_data) process_ids(site_node_name, status.ids_data)
# Remove overly long nodes nodes_to_remove = []
nodes_to_remove = [node for node in G.nodes if len(str(node)) > 100] for node in G.nodes:
G.remove_nodes_from(nodes_to_remove) if len(str(node)) > 100:
nodes_to_remove.append(node)
# Remove site nodes with only one connection [G.remove_node(node) for node in nodes_to_remove]
single_degree_sites = [
n for n, deg in G.degree() if n.startswith("site:") and deg <= 1
]
G.remove_nodes_from(single_degree_sites)
# Generate interactive visualization # moved here to speed up the launch of Maigret
from pyvis.network import Network from pyvis.network import Network
nt = Network(notebook=True, height="750px", width="100%") nt = Network(notebook=True, height="750px", width="100%")
@@ -326,12 +295,8 @@ def generate_report_context(username_results: list):
first_seen = created_at first_seen = created_at
else: else:
try: try:
known_time = parse_datetime_str( known_time = parse_datetime_str(first_seen, tzinfos=ADDITIONAL_TZINFO)
first_seen, tzinfos=ADDITIONAL_TZINFO new_time = parse_datetime_str(created_at, tzinfos=ADDITIONAL_TZINFO)
)
new_time = parse_datetime_str(
created_at, tzinfos=ADDITIONAL_TZINFO
)
if new_time < known_time: if new_time < known_time:
first_seen = created_at first_seen = created_at
except Exception as e: except Exception as e:
@@ -372,7 +337,7 @@ def generate_report_context(username_results: list):
new_ids.append((u, utype)) new_ids.append((u, utype))
usernames[u] = {"type": utype} usernames[u] = {"type": utype}
if status.status == MaigretCheckStatus.CLAIMED: if status.status == QueryStatus.CLAIMED:
found_accounts += 1 found_accounts += 1
dictionary["found"] = True dictionary["found"] = True
else: else:
@@ -452,7 +417,7 @@ def generate_txt_report(username: str, results: dict, file):
continue continue
if ( if (
dictionary.get("status") dictionary.get("status")
and dictionary["status"].status == MaigretCheckStatus.CLAIMED and dictionary["status"].status == QueryStatus.CLAIMED
): ):
exists_counter += 1 exists_counter += 1
file.write(dictionary["url_user"] + "\n") file.write(dictionary["url_user"] + "\n")
@@ -469,7 +434,7 @@ def generate_json_report(username: str, results: dict, file, report_type):
if not site_result or not site_result.get("status"): if not site_result or not site_result.get("status"):
continue continue
if site_result["status"].status != MaigretCheckStatus.CLAIMED: if site_result["status"].status != QueryStatus.CLAIMED:
continue continue
data = dict(site_result) data = dict(site_result)
@@ -530,7 +495,7 @@ def design_xmind_sheet(sheet, username, results):
continue continue
result_status = dictionary.get("status") result_status = dictionary.get("status")
# TODO: fix the reason # TODO: fix the reason
if not result_status or result_status.status != MaigretCheckStatus.CLAIMED: if not result_status or result_status.status != QueryStatus.CLAIMED:
continue continue
stripped_tags = list(map(lambda x: x.strip(), result_status.tags)) stripped_tags = list(map(lambda x: x.strip(), result_status.tags))
File diff suppressed because it is too large Load Diff
+1 -2
View File
@@ -53,6 +53,5 @@
"xmind_report": false, "xmind_report": false,
"graph_report": false, "graph_report": false,
"pdf_report": false, "pdf_report": false,
"html_report": false, "html_report": false
"web_interface_port": 5000
} }
+11 -10
View File
@@ -2,11 +2,10 @@
This module defines various objects for recording the results of queries. This module defines various objects for recording the results of queries.
""" """
from enum import Enum from enum import Enum
class MaigretCheckStatus(Enum): class QueryStatus(Enum):
"""Query Status Enumeration. """Query Status Enumeration.
Describes status of query about a given username. Describes status of query about a given username.
@@ -29,9 +28,10 @@ class MaigretCheckStatus(Enum):
return self.value return self.value
class MaigretCheckResult: class QueryResult:
""" """Query Result Object.
Describes result of checking a given username on a given site
Describes result of query about a given username.
""" """
def __init__( def __init__(
@@ -46,7 +46,11 @@ class MaigretCheckResult:
error=None, error=None,
tags=[], tags=[],
): ):
""" """Create Query Result Object.
Contains information about a specific method of detecting usernames on
a given type of web sites.
Keyword Arguments: Keyword Arguments:
self -- This object. self -- This object.
username -- String indicating username that query result username -- String indicating username that query result
@@ -93,10 +97,7 @@ class MaigretCheckResult:
} }
def is_found(self): def is_found(self):
return self.status == MaigretCheckStatus.CLAIMED return self.status == QueryStatus.CLAIMED
def __repr__(self):
return f"<{self.__str__()}>"
def __str__(self): def __str__(self):
"""Convert Object To String. """Convert Object To String.
-1
View File
@@ -42,7 +42,6 @@ class Settings:
pdf_report: bool pdf_report: bool
html_report: bool html_report: bool
graph_report: bool graph_report: bool
web_interface_port: int
# submit mode settings # submit mode settings
presence_strings: list presence_strings: list
+17 -61
View File
@@ -115,43 +115,26 @@ class MaigretSite:
lower_name = self.name.lower() lower_name = self.name.lower()
lower_url_main = self.url_main.lower() lower_url_main = self.url_main.lower()
return ( return \
lower_name == lower_url_or_name_str lower_name == lower_url_or_name_str or \
or (lower_url_main and lower_url_main == lower_url_or_name_str) (lower_url_main and lower_url_main == lower_url_or_name_str) or \
or (lower_url_main and lower_url_main in lower_url_or_name_str) (lower_url_main and lower_url_main in lower_url_or_name_str) or \
or (lower_url_main and lower_url_or_name_str in lower_url_main) (lower_url_main and lower_url_or_name_str in lower_url_main) or \
or (lower_url and lower_url_or_name_str in lower_url) (lower_url and lower_url_or_name_str in lower_url)
)
def __eq__(self, other): def __eq__(self, other):
if isinstance(other, MaigretSite): if isinstance(other, MaigretSite):
# Compare only relevant attributes, not internal state like request_future # Compare only relevant attributes, not internal state like request_future
attrs_to_compare = [ attrs_to_compare = [
'name', 'name', 'url_main', 'url_subpath', 'type', 'headers',
'url_main', 'errors', 'activation', 'regex_check', 'url_probe',
'url_subpath', 'check_type', 'request_head_only', 'get_params',
'type', 'presense_strs', 'absence_strs', 'stats', 'engine',
'headers', 'engine_data', 'alexa_rank', 'source', 'protocol'
'errors',
'activation',
'regex_check',
'url_probe',
'check_type',
'request_head_only',
'get_params',
'presense_strs',
'absence_strs',
'stats',
'engine',
'engine_data',
'alexa_rank',
'source',
'protocol',
] ]
return all( return all(getattr(self, attr) == getattr(other, attr)
getattr(self, attr) == getattr(other, attr) for attr in attrs_to_compare for attr in attrs_to_compare)
)
elif isinstance(other, str): elif isinstance(other, str):
# Compare only by name (exactly) or url_main (partial similarity) # Compare only by name (exactly) or url_main (partial similarity)
return self.__is_equal_by_url_or_name(other) return self.__is_equal_by_url_or_name(other)
@@ -560,19 +543,6 @@ class MaigretDatabase:
checks_perc = round(100 * message_checks_one_factor / enabled_count, 2) checks_perc = round(100 * message_checks_one_factor / enabled_count, 2)
status_checks_perc = round(100 * status_checks / enabled_count, 2) status_checks_perc = round(100 * status_checks / enabled_count, 2)
# Sites with probing and activation (kinda special cases, let's watch them)
site_with_probing = []
site_with_activation = []
for site in sites_dict.values():
def get_site_label(site):
return f"{site.name}{' (disabled)' if site.disabled else ''}"
if site.url_probe:
site_with_probing.append(get_site_label(site))
if site.activation:
site_with_activation.append(get_site_label(site))
# Format output # Format output
separator = "\n\n" separator = "\n\n"
output = [ output = [
@@ -580,32 +550,18 @@ class MaigretDatabase:
f"Incomplete message checks: {message_checks_one_factor}/{enabled_count} = {checks_perc}% (false positive risks)", f"Incomplete message checks: {message_checks_one_factor}/{enabled_count} = {checks_perc}% (false positive risks)",
f"Status code checks: {status_checks}/{enabled_count} = {status_checks_perc}% (false positive risks)", f"Status code checks: {status_checks}/{enabled_count} = {status_checks_perc}% (false positive risks)",
f"False positive risk (total): {checks_perc + status_checks_perc:.2f}%", f"False positive risk (total): {checks_perc + status_checks_perc:.2f}%",
f"Sites with probing: {', '.join(sorted(site_with_probing))}",
f"Sites with activation: {', '.join(sorted(site_with_activation))}",
self._format_top_items("profile URLs", urls, 20, is_markdown), self._format_top_items("profile URLs", urls, 20, is_markdown),
self._format_top_items("tags", tags, 20, is_markdown, self._tags), self._format_top_items("tags", tags, 20, is_markdown, self._tags),
] ]
return separator.join(output) return separator.join(output)
def _format_top_items( def _format_top_items(self, title, items_dict, limit, is_markdown, valid_items=None):
self, title, items_dict, limit, is_markdown, valid_items=None
):
"""Helper method to format top items lists""" """Helper method to format top items lists"""
output = f"Top {limit} {title}:\n" output = f"Top {limit} {title}:\n"
for item, count in sorted(items_dict.items(), key=lambda x: x[1], reverse=True)[ for item, count in sorted(items_dict.items(), key=lambda x: x[1], reverse=True)[:limit]:
:limit
]:
if count == 1: if count == 1:
break break
mark = ( mark = " (non-standard)" if valid_items is not None and item not in valid_items else ""
" (non-standard)" output += f"- ({count})\t`{item}`{mark}\n" if is_markdown else f"{count}\t{item}{mark}\n"
if valid_items is not None and item not in valid_items
else ""
)
output += (
f"- ({count})\t`{item}`{mark}\n"
if is_markdown
else f"{count}\t{item}{mark}\n"
)
return output return output
+210 -335
View File
@@ -1,22 +1,19 @@
import asyncio import asyncio
import json import json
import re import re
import os from typing import List
import logging from xml.etree import ElementTree
from typing import Any, Dict, List, Optional, Tuple from aiohttp import TCPConnector, ClientSession
import requests
from aiohttp import ClientSession, TCPConnector
from aiohttp_socks import ProxyConnector
import cloudscraper import cloudscraper
from colorama import Fore, Style from colorama import Fore, Style
from .activation import import_aiohttp_cookies from .activation import import_aiohttp_cookies
from .result import MaigretCheckResult from .checking import maigret
from .result import QueryStatus
from .settings import Settings from .settings import Settings
from .sites import MaigretDatabase, MaigretEngine, MaigretSite from .sites import MaigretDatabase, MaigretSite, MaigretEngine
from .utils import get_random_user_agent from .utils import get_random_user_agent, get_match_ratio
from .checking import site_self_check
from .utils import get_match_ratio, generate_random_username
class CloudflareSession: class CloudflareSession:
@@ -63,10 +60,7 @@ class Submitter:
proxy = self.args.proxy proxy = self.args.proxy
cookie_jar = None cookie_jar = None
if args.cookie_file: if args.cookie_file:
if not os.path.exists(args.cookie_file): cookie_jar = import_aiohttp_cookies(args.cookie_file)
logger.error(f"Cookie file {args.cookie_file} does not exist!")
else:
cookie_jar = import_aiohttp_cookies(args.cookie_file)
connector = ProxyConnector.from_url(proxy) if proxy else TCPConnector(ssl=False) connector = ProxyConnector.from_url(proxy) if proxy else TCPConnector(ssl=False)
connector.verify_ssl = False connector.verify_ssl = False
@@ -74,14 +68,8 @@ class Submitter:
connector=connector, trust_env=True, cookie_jar=cookie_jar connector=connector, trust_env=True, cookie_jar=cookie_jar
) )
async def close(self):
await self.session.close()
@staticmethod @staticmethod
def get_alexa_rank(site_url_main): def get_alexa_rank(site_url_main):
import requests
import xml.etree.ElementTree as ElementTree
url = f"http://data.alexa.com/data?cli=10&url={site_url_main}" url = f"http://data.alexa.com/data?cli=10&url={site_url_main}"
xml_data = requests.get(url).text xml_data = requests.get(url).text
root = ElementTree.fromstring(xml_data) root = ElementTree.fromstring(xml_data)
@@ -99,18 +87,78 @@ class Submitter:
return "/".join(url.split("/", 3)[:3]) return "/".join(url.split("/", 3)[:3])
async def site_self_check(self, site, semaphore, silent=False): async def site_self_check(self, site, semaphore, silent=False):
# Call the general function from the checking.py changes = {
changes = await site_self_check( "disabled": False,
site=site, }
logger=self.logger,
semaphore=semaphore, check_data = [
db=self.db, (site.username_claimed, QueryStatus.CLAIMED),
silent=silent, (site.username_unclaimed, QueryStatus.AVAILABLE),
proxy=self.args.proxy, ]
cookies=self.args.cookie_file,
# Don't skip errors in submit mode - we need check both false positives/true negatives self.logger.info(f"Checking {site.name}...")
skip_errors=False,
) for username, status in check_data:
results_dict = await maigret(
username=username,
site_dict={site.name: site},
proxy=self.args.proxy,
logger=self.logger,
cookies=self.args.cookie_file,
timeout=30,
id_type=site.type,
forced=True,
no_progressbar=True,
)
# don't disable entries with other ids types
# TODO: make normal checking
if site.name not in results_dict:
self.logger.info(results_dict)
changes["disabled"] = True
continue
result = results_dict[site.name]["status"]
site_status = result.status
if site_status != status:
if site_status == QueryStatus.UNKNOWN:
msgs = site.absence_strs
etype = site.check_type
self.logger.warning(
"Error while searching '%s' in %s: %s, %s, check type %s",
username,
site.name,
result.context,
msgs,
etype,
)
# don't disable in case of available username
if status == QueryStatus.CLAIMED:
changes["disabled"] = True
elif status == QueryStatus.CLAIMED:
print(
f"{Fore.YELLOW}[!] Not found `{username}` in {site.name}, must be claimed{Style.RESET_ALL}"
)
self.logger.warning(site.json)
changes["disabled"] = True
else:
print(
f"{Fore.YELLOW}[!] Found `{username}` in {site.name}, must be available{Style.RESET_ALL}"
)
self.logger.warning(site.json)
changes["disabled"] = True
else:
print(f"{Fore.GREEN}[+] {username} is successfully checked: {status} in {site.name}{Style.RESET_ALL}")
self.logger.info(f"Site {site.name} checking is finished")
# remove service tag "unchecked"
if "unchecked" in site.tags:
site.tags.remove("unchecked")
changes["tags"] = site.tags
return changes return changes
def generate_additional_fields_dialog(self, engine: MaigretEngine, dialog): def generate_additional_fields_dialog(self, engine: MaigretEngine, dialog):
@@ -126,13 +174,19 @@ class Submitter:
return fields return fields
async def detect_known_engine( async def detect_known_engine(
self, url_exists, url_mainpage, session, follow_redirects, headers self, url_exists, url_mainpage
) -> [List[MaigretSite], str]: ) -> [List[MaigretSite], str]:
resp_text = ''
session = session or self.session try:
resp_text, _ = await self.get_html_response_to_compare( r = await self.session.get(url_mainpage)
url_exists, session, follow_redirects, headers content = await r.content.read()
) charset = r.charset or "utf-8"
resp_text = content.decode(charset, "ignore")
self.logger.debug(resp_text)
except Exception as e:
self.logger.warning(e)
print("Some error while checking main page")
return [], resp_text
for engine in self.db.engines: for engine in self.db.engines:
strs_to_check = engine.__dict__.get("presenseStrs") strs_to_check = engine.__dict__.get("presenseStrs")
@@ -159,7 +213,7 @@ class Submitter:
for u in usernames_to_check: for u in usernames_to_check:
site_data = { site_data = {
"urlMain": url_mainpage, "urlMain": url_mainpage,
"name": url_mainpage.split("//")[1].split("/")[0], "name": url_mainpage.split("//")[1],
"engine": engine_name, "engine": engine_name,
"usernameClaimed": u, "usernameClaimed": u,
"usernameUnclaimed": "noonewouldeverusethis7", "usernameUnclaimed": "noonewouldeverusethis7",
@@ -184,149 +238,127 @@ class Submitter:
url_parts = url.rstrip("/").split("/") url_parts = url.rstrip("/").split("/")
supposed_username = url_parts[-1].strip('@') supposed_username = url_parts[-1].strip('@')
entered_username = input( entered_username = input(
f"{Fore.GREEN}[?] Is \"{supposed_username}\" a valid username? If not, write it manually: {Style.RESET_ALL}" f'Is "{supposed_username}" a valid username? If not, write it manually: '
) )
return entered_username if entered_username else supposed_username return entered_username if entered_username else supposed_username
# TODO: replace with checking.py/SimpleAiohttpChecker call
@staticmethod
async def get_html_response_to_compare(
url: str, session: ClientSession = None, redirects=False, headers: Dict = None
):
async with session.get(
url, allow_redirects=redirects, headers=headers
) as response:
# Try different encodings or fallback to 'ignore' errors
try:
html_response = await response.text(encoding='utf-8')
except UnicodeDecodeError:
try:
html_response = await response.text(encoding='latin1')
except UnicodeDecodeError:
html_response = await response.text(errors='ignore')
return html_response, response.status
async def check_features_manually( async def check_features_manually(
self, self, url_exists, url_mainpage, cookie_file, redirects=False
username: str, ):
url_exists: str, custom_headers = {}
cookie_filename="", # TODO: use cookies while self.args.verbose:
session: ClientSession = None, header_key = input(
follow_redirects=False, 'Specify custom header if you need or just press Enter to skip. Header name: '
headers: dict = None,
) -> Tuple[List[str], List[str], str, str]:
random_username = generate_random_username()
url_of_non_existing_account = url_exists.lower().replace(
username.lower(), random_username
)
try:
session = session or self.session
first_html_response, first_status = await self.get_html_response_to_compare(
url_exists, session, follow_redirects, headers
) )
second_html_response, second_status = ( if not header_key:
await self.get_html_response_to_compare( break
url_of_non_existing_account, session, follow_redirects, headers header_value = input('Header value: ')
) custom_headers[header_key.strip()] = header_value.strip()
)
await session.close()
except Exception as e:
self.logger.error(
f"Error while getting HTTP response for username {username}: {e}",
exc_info=True,
)
return None, None, str(e), random_username
self.logger.info(f"URL with existing account: {url_exists}") supposed_username = self.extract_username_dialog(url_exists)
self.logger.info( non_exist_username = "noonewouldeverusethis7"
f"HTTP response status for URL with existing account: {first_status}"
)
self.logger.info(
f"HTTP response length URL with existing account: {len(first_html_response)}"
)
self.logger.debug(first_html_response)
self.logger.info(f"URL with existing account: {url_of_non_existing_account}") url_user = url_exists.replace(supposed_username, "{username}")
self.logger.info( url_not_exists = url_exists.replace(supposed_username, non_exist_username)
f"HTTP response status for URL with non-existing account: {second_status}"
)
self.logger.info(
f"HTTP response length URL with non-existing account: {len(second_html_response)}"
)
self.logger.debug(second_html_response)
# TODO: filter by errors, move to dialog function headers = dict(self.HEADERS)
if ( headers.update(custom_headers)
"/cdn-cgi/challenge-platform" in first_html_response
or "\t\t\t\tnow: " in first_html_response
or "Sorry, you have been blocked" in first_html_response
):
self.logger.info("Cloudflare detected, skipping")
return None, None, "Cloudflare detected, skipping", random_username
tokens_a = set(re.split(f'[{self.SEPARATORS}]', first_html_response)) exists_resp = await self.session.get(
tokens_b = set(re.split(f'[{self.SEPARATORS}]', second_html_response)) url_exists,
headers=headers,
allow_redirects=redirects,
)
exists_resp_text = await exists_resp.text()
self.logger.debug(url_exists)
self.logger.debug(exists_resp.status)
self.logger.debug(exists_resp_text)
non_exists_resp = await self.session.get(
url_not_exists,
headers=headers,
allow_redirects=redirects,
)
non_exists_resp_text = await non_exists_resp.text()
self.logger.debug(url_not_exists)
self.logger.debug(non_exists_resp.status)
self.logger.debug(non_exists_resp_text)
a = exists_resp_text
b = non_exists_resp_text
tokens_a = set(re.split(f'[{self.SEPARATORS}]', a))
tokens_b = set(re.split(f'[{self.SEPARATORS}]', b))
a_minus_b = tokens_a.difference(tokens_b) a_minus_b = tokens_a.difference(tokens_b)
b_minus_a = tokens_b.difference(tokens_a) b_minus_a = tokens_b.difference(tokens_a)
a_minus_b = list(map(lambda x: x.strip('\\'), a_minus_b)) # additional filtering by html response
b_minus_a = list(map(lambda x: x.strip('\\'), b_minus_a)) a_minus_b = [t for t in a_minus_b if not t in non_exists_resp_text]
b_minus_a = [t for t in b_minus_a if not t in exists_resp_text]
# Filter out strings containing usernames
a_minus_b = [s for s in a_minus_b if username.lower() not in s.lower()]
b_minus_a = [s for s in b_minus_a if random_username.lower() not in s.lower()]
def filter_tokens(token: str, html_response: str) -> bool:
is_in_html = token in html_response
is_long_str = len(token) >= 50
is_number = re.match(r'^\d\.?\d+$', token) or re.match(r':^\d+$', token)
is_whitelisted_number = token in ['200', '404', '403']
return not (
is_in_html or is_long_str or (is_number and not is_whitelisted_number)
)
a_minus_b = list(
filter(lambda t: filter_tokens(t, second_html_response), a_minus_b)
)
b_minus_a = list(
filter(lambda t: filter_tokens(t, first_html_response), b_minus_a)
)
if len(a_minus_b) == len(b_minus_a) == 0: if len(a_minus_b) == len(b_minus_a) == 0:
return ( print("The pages for existing and non-existing account are the same!")
None,
None, top_features_count = int(
"HTTP responses for pages with existing and non-existing accounts are the same", input(
random_username, f"Specify count of features to extract [default {self.TOP_FEATURES}]: "
) )
or self.TOP_FEATURES
)
match_fun = get_match_ratio(self.settings.presence_strings) match_fun = get_match_ratio(self.settings.presence_strings)
presence_list = sorted(a_minus_b, key=match_fun, reverse=True)[ presence_list = sorted(a_minus_b, key=match_fun, reverse=True)[
: self.TOP_FEATURES :top_features_count
] ]
self.logger.debug([(keyword, match_fun(keyword)) for keyword in presence_list])
print("Detected text features of existing account: " + ", ".join(presence_list))
features = input("If features was not detected correctly, write it manually: ")
if features:
presence_list = list(map(str.strip, features.split(",")))
absence_list = sorted(b_minus_a, key=match_fun, reverse=True)[ absence_list = sorted(b_minus_a, key=match_fun, reverse=True)[
: self.TOP_FEATURES :top_features_count
] ]
self.logger.debug([(keyword, match_fun(keyword)) for keyword in absence_list])
self.logger.info(f"Detected presence features: {presence_list}") print(
self.logger.info(f"Detected absence features: {absence_list}") "Detected text features of non-existing account: " + ", ".join(absence_list)
)
features = input("If features was not detected correctly, write it manually: ")
return presence_list, absence_list, "Found", random_username if features:
absence_list = list(map(str.strip, features.split(",")))
site_data = {
"absenceStrs": absence_list,
"presenseStrs": presence_list,
"url": url_user,
"urlMain": url_mainpage,
"usernameClaimed": supposed_username,
"usernameUnclaimed": non_exist_username,
"checkType": "message",
}
if headers != self.HEADERS:
site_data['headers'] = headers
site = MaigretSite(url_mainpage.split("/")[-1], site_data)
return site
async def add_site(self, site): async def add_site(self, site):
sem = asyncio.Semaphore(1) sem = asyncio.Semaphore(1)
print( print(f"{Fore.BLUE}{Style.BRIGHT}[*] Adding site {site.name}, let's check it...{Style.RESET_ALL}")
f"{Fore.BLUE}{Style.BRIGHT}[*] Adding site {site.name}, let's check it...{Style.RESET_ALL}"
)
result = await self.site_self_check(site, sem) result = await self.site_self_check(site, sem)
if result["disabled"]: if result["disabled"]:
print(f"Checks failed for {site.name}, please, verify them manually.") print(
f"Checks failed for {site.name}, please, verify them manually."
)
return { return {
"valid": False, "valid": False,
"reason": "checks_failed", "reason": "checks_failed",
@@ -373,9 +405,7 @@ class Submitter:
if choice in editable_fields: if choice in editable_fields:
field = editable_fields[choice] field = editable_fields[choice]
current_value = getattr(site, field) current_value = getattr(site, field)
new_value = input( new_value = input(f"Enter new value for {field} (current: {current_value}): ").strip()
f"Enter new value for {field} (current: {current_value}): "
).strip()
if field in ['tags', 'presense_strs', 'absence_strs']: if field in ['tags', 'presense_strs', 'absence_strs']:
new_value = list(map(str.strip, new_value.split(','))) new_value = list(map(str.strip, new_value.split(',')))
@@ -391,19 +421,6 @@ class Submitter:
} }
async def dialog(self, url_exists, cookie_file): async def dialog(self, url_exists, cookie_file):
"""
An implementation of the submit mode:
- User provides a URL of a existing social media account
- Maigret tries to detect the site engine and understand how to check
for account presence with HTTP responses analysis
- If detection succeeds, Maigret generates a new site entry/replace old one in the database
"""
old_site = None
additional_options_enabled = self.logger.level in (
logging.DEBUG,
logging.WARNING,
)
domain_raw = self.URL_RE.sub("", url_exists).strip().strip("/") domain_raw = self.URL_RE.sub("", url_exists).strip().strip("/")
domain_raw = domain_raw.split("/")[0] domain_raw = domain_raw.split("/")[0]
self.logger.info('Domain is %s', domain_raw) self.logger.info('Domain is %s', domain_raw)
@@ -414,11 +431,9 @@ class Submitter:
) )
if matched_sites: if matched_sites:
# TODO: update the existing site
print( print(
f"{Fore.YELLOW}[!] Sites with domain \"{domain_raw}\" already exists in the Maigret database!{Style.RESET_ALL}" f'Sites with domain "{domain_raw}" already exists in the Maigret database!'
) )
status = lambda s: "(disabled)" if s.disabled else "" status = lambda s: "(disabled)" if s.disabled else ""
url_block = lambda s: f"\n\t{s.url_main}\n\t{s.url}" url_block = lambda s: f"\n\t{s.url_main}\n\t{s.url}"
print( print(
@@ -430,130 +445,48 @@ class Submitter:
) )
) )
if ( if input("Do you want to continue? [yN] ").lower() in "n":
input(
f"{Fore.GREEN}[?] Do you want to continue? [yN] {Style.RESET_ALL}"
).lower()
in "n"
):
return False return False
site_names = [site.name for site in matched_sites]
site_name = (
input(
f"{Fore.GREEN}[?] Which site do you want to update in case of success? 1st by default. [{', '.join(site_names)}] {Style.RESET_ALL}"
)
or matched_sites[0].name
)
old_site = next(
(site for site in matched_sites if site.name == site_name), None
)
print(
f'{Fore.GREEN}[+] We will update site "{old_site.name}" in case of success.{Style.RESET_ALL}'
)
# Check if the site check is ordinary or not
if old_site and (old_site.url_probe or old_site.activation):
skip = input(
f"{Fore.RED}[!] The site check depends on activation / probing mechanism! Consider to update it manually. Continue? [yN]{Style.RESET_ALL}"
)
if skip.lower() in ['n', '']:
return False
# TODO: urlProbe support
# TODO: activation support
url_mainpage = self.extract_mainpage_url(url_exists) url_mainpage = self.extract_mainpage_url(url_exists)
# headers update
custom_headers = dict(self.HEADERS)
while additional_options_enabled:
header_key = input(
f'{Fore.GREEN}[?] Specify custom header if you need or just press Enter to skip. Header name: {Style.RESET_ALL}'
)
if not header_key:
break
header_value = input(f'{Fore.GREEN}[?] Header value: {Style.RESET_ALL}')
custom_headers[header_key.strip()] = header_value.strip()
# redirects settings update
redirects = False
if additional_options_enabled:
redirects = (
'y'
in input(
f'{Fore.GREEN}[?] Should we do redirects automatically? [yN] {Style.RESET_ALL}'
).lower()
)
print('Detecting site engine, please wait...') print('Detecting site engine, please wait...')
sites = [] sites = []
text = None text = None
try: try:
sites, text = await self.detect_known_engine( sites, text = await self.detect_known_engine(url_exists, url_exists)
url_exists,
url_exists,
session=None,
follow_redirects=redirects,
headers=custom_headers,
)
except KeyboardInterrupt: except KeyboardInterrupt:
print('Engine detect process is interrupted.') print('Engine detect process is interrupted.')
if 'cloudflare' in text.lower(): if 'cloudflare' in text.lower():
print( print(
'Cloudflare protection detected. I will use cloudscraper for further work' 'Cloudflare protection detected. I will use cloudscraper for futher work'
) )
# self.session = CloudflareSession() # self.session = CloudflareSession()
if not sites: if not sites:
print("Unable to detect site engine, lets generate checking features") print("Unable to detect site engine, lets generate checking features")
supposed_username = self.extract_username_dialog(url_exists) redirects = False
self.logger.info(f"Supposed username: {supposed_username}") if self.args.verbose:
redirects = (
'y' in input('Should we do redirects automatically? [yN] ').lower()
)
# TODO: pass status_codes sites = [
# check it here and suggest to enable / auto-enable redirects
presence_list, absence_list, status, non_exist_username = (
await self.check_features_manually( await self.check_features_manually(
username=supposed_username, url_exists,
url_exists=url_exists, url_mainpage,
cookie_filename=cookie_file, cookie_file,
follow_redirects=redirects, redirects,
headers=custom_headers,
) )
) ]
if status == "Found":
site_data = {
"absenceStrs": absence_list,
"presenseStrs": presence_list,
"url": url_exists.replace(supposed_username, '{username}'),
"urlMain": url_mainpage,
"usernameClaimed": supposed_username,
"usernameUnclaimed": non_exist_username,
"headers": custom_headers,
"checkType": "message",
}
self.logger.info(json.dumps(site_data, indent=4))
if custom_headers != self.HEADERS:
site_data['headers'] = custom_headers
site = MaigretSite(url_mainpage.split("/")[-1], site_data)
sites.append(site)
else:
print(
f"{Fore.RED}[!] The check for site failed! Reason: {status}{Style.RESET_ALL}"
)
return False
self.logger.debug(sites[0].__dict__) self.logger.debug(sites[0].__dict__)
sem = asyncio.Semaphore(1) sem = asyncio.Semaphore(1)
print(f"{Fore.GREEN}[*] Checking, please wait...{Style.RESET_ALL}") print("Checking, please wait...")
found = False found = False
chosen_site = None chosen_site = None
for s in sites: for s in sites:
@@ -575,7 +508,7 @@ class Submitter:
else: else:
if ( if (
input( input(
f"{Fore.GREEN}[?] Site {chosen_site.name} successfully checked. Do you want to save it in the Maigret DB? [Yn] {Style.RESET_ALL}" f"Site {chosen_site.name} successfully checked. Do you want to save it in the Maigret DB? [Yn] "
) )
.lower() .lower()
.strip("y") .strip("y")
@@ -583,82 +516,24 @@ class Submitter:
return False return False
if self.args.verbose: if self.args.verbose:
self.logger.info( source = input("Name the source site if it is mirror: ")
"Verbose mode is enabled, additional settings are available"
)
source = input(
f"{Fore.GREEN}[?] Name the source site if it is mirror: {Style.RESET_ALL}"
)
if source: if source:
chosen_site.source = source chosen_site.source = source
default_site_name = old_site.name if old_site else chosen_site.name chosen_site.name = input("Change site name if you want: ") or chosen_site.name
new_name = ( chosen_site.tags = list(map(str.strip, input("Site tags: ").split(',')))
input(
f"{Fore.GREEN}[?] Change site name if you want [{default_site_name}]: {Style.RESET_ALL}"
)
or default_site_name
)
if new_name != default_site_name:
self.logger.info(f"New site name is {new_name}")
chosen_site.name = new_name
default_tags_str = ""
if old_site:
default_tags_str = f' [{", ".join(old_site.tags)}]'
new_tags = input(
f"{Fore.GREEN}[?] Site tags{default_tags_str}: {Style.RESET_ALL}"
)
if new_tags:
chosen_site.tags = list(map(str.strip, new_tags.split(',')))
else:
chosen_site.tags = []
self.logger.info(f"Site tags are: {', '.join(chosen_site.tags)}")
# rank = Submitter.get_alexa_rank(chosen_site.url_main) # rank = Submitter.get_alexa_rank(chosen_site.url_main)
# if rank: # if rank:
# print(f'New alexa rank: {rank}') # print(f'New alexa rank: {rank}')
# chosen_site.alexa_rank = rank # chosen_site.alexa_rank = rank
self.logger.info(chosen_site.json) self.logger.debug(chosen_site.json)
site_data = chosen_site.strip_engine_data() site_data = chosen_site.strip_engine_data()
self.logger.info(site_data.json) self.logger.debug(site_data.json)
self.db.update_site(site_data)
if old_site: if self.args.db:
# Update old site with new values and log changes print(f"{Fore.GREEN}[+] Maigret DB is saved to {self.args.db}.{Style.RESET_ALL}")
fields_to_check = {
'url': 'URL',
'url_main': 'Main URL',
'username_claimed': 'Username claimed',
'username_unclaimed': 'Username unclaimed',
'check_type': 'Check type',
'presense_strs': 'Presence strings',
'absence_strs': 'Absence strings',
'tags': 'Tags',
'source': 'Source',
'headers': 'Headers',
}
for field, display_name in fields_to_check.items():
old_value = getattr(old_site, field)
new_value = getattr(site_data, field)
if field == 'tags' and not new_tags:
continue
if str(old_value) != str(new_value):
print(
f"{Fore.YELLOW}[*] '{display_name}' updated: {Fore.RED}{old_value} {Fore.YELLOW}to {Fore.GREEN}{new_value}{Style.RESET_ALL}"
)
old_site.__dict__[field] = new_value
# update the site
final_site = old_site if old_site else site_data
self.db.update_site(final_site)
# save the db in file
if self.args.db_file != self.settings.sites_db_path:
print(
f"{Fore.GREEN}[+] Maigret DB is saved to {self.args.db}.{Style.RESET_ALL}"
)
self.db.save_to_file(self.args.db) self.db.save_to_file(self.args.db)
return True return True
-5
View File
@@ -3,7 +3,6 @@ import ast
import difflib import difflib
import re import re
import random import random
import string
from typing import Any from typing import Any
@@ -120,7 +119,3 @@ def get_match_ratio(base_strs: list):
) )
return get_match_inner return get_match_inner
def generate_random_username():
return ''.join(random.choices(string.ascii_lowercase, k=10))
-341
View File
@@ -1,341 +0,0 @@
from flask import (
Flask,
render_template,
request,
send_file,
Response,
flash,
redirect,
url_for,
)
import logging
import os
import asyncio
from datetime import datetime
from threading import Thread
import maigret
import maigret.settings
from maigret.sites import MaigretDatabase
from maigret.report import generate_report_context
app = Flask(__name__)
app.secret_key = 'your-secret-key-here'
# add background job tracking
background_jobs = {}
job_results = {}
# Configuration
app.config["MAIGRET_DB_FILE"] = os.path.join('maigret', 'resources', 'data.json')
app.config["COOKIES_FILE"] = "cookies.txt"
app.config["UPLOAD_FOLDER"] = 'uploads'
app.config["REPORTS_FOLDER"] = os.path.abspath('/tmp/maigret_reports')
def setup_logger(log_level, name):
logger = logging.getLogger(name)
logger.setLevel(log_level)
return logger
async def maigret_search(username, options):
logger = setup_logger(logging.WARNING, 'maigret')
try:
db = MaigretDatabase().load_from_path(app.config["MAIGRET_DB_FILE"])
top_sites = int(options.get('top_sites') or 500)
if options.get('all_sites'):
top_sites = 999999999 # effectively all
tags = options.get('tags', [])
site_list = options.get('site_list', [])
logger.info(f"Filtering sites by tags: {tags}")
sites = db.ranked_sites_dict(
top=top_sites,
tags=tags,
names=site_list,
disabled=False,
id_type='username',
)
logger.info(f"Found {len(sites)} sites matching the tag criteria")
results = await maigret.search(
username=username,
site_dict=sites,
timeout=int(options.get('timeout', 30)),
logger=logger,
id_type='username',
cookies=app.config["COOKIES_FILE"] if options.get('use_cookies') else None,
is_parsing_enabled=(not options.get('disable_extracting', False)),
recursive_search_enabled=(
not options.get('disable_recursive_search', False)
),
check_domains=options.get('with_domains', False),
proxy=options.get('proxy', None),
tor_proxy=options.get('tor_proxy', None),
i2p_proxy=options.get('i2p_proxy', None),
)
return results
except Exception as e:
logger.error(f"Error during search: {str(e)}")
raise
async def search_multiple_usernames(usernames, options):
results = []
for username in usernames:
try:
search_results = await maigret_search(username.strip(), options)
results.append((username.strip(), 'username', search_results))
except Exception as e:
logging.error(f"Error searching username {username}: {str(e)}")
return results
def process_search_task(usernames, options, timestamp):
try:
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
general_results = loop.run_until_complete(
search_multiple_usernames(usernames, options)
)
os.makedirs(app.config["REPORTS_FOLDER"], exist_ok=True)
session_folder = os.path.join(
app.config["REPORTS_FOLDER"], f"search_{timestamp}"
)
os.makedirs(session_folder, exist_ok=True)
graph_path = os.path.join(session_folder, "combined_graph.html")
maigret.report.save_graph_report(
graph_path,
general_results,
MaigretDatabase().load_from_path(app.config["MAIGRET_DB_FILE"]),
)
individual_reports = []
for username, id_type, results in general_results:
report_base = os.path.join(session_folder, f"report_{username}")
csv_path = f"{report_base}.csv"
json_path = f"{report_base}.json"
pdf_path = f"{report_base}.pdf"
html_path = f"{report_base}.html"
context = generate_report_context(general_results)
maigret.report.save_csv_report(csv_path, username, results)
maigret.report.save_json_report(
json_path, username, results, report_type='ndjson'
)
maigret.report.save_pdf_report(pdf_path, context)
maigret.report.save_html_report(html_path, context)
claimed_profiles = []
for site_name, site_data in results.items():
if (
site_data.get('status')
and site_data['status'].status
== maigret.result.MaigretCheckStatus.CLAIMED
):
claimed_profiles.append(
{
'site_name': site_name,
'url': site_data.get('url_user', ''),
'tags': (
site_data.get('status').tags
if site_data.get('status')
else []
),
}
)
individual_reports.append(
{
'username': username,
'csv_file': os.path.join(
f"search_{timestamp}", f"report_{username}.csv"
),
'json_file': os.path.join(
f"search_{timestamp}", f"report_{username}.json"
),
'pdf_file': os.path.join(
f"search_{timestamp}", f"report_{username}.pdf"
),
'html_file': os.path.join(
f"search_{timestamp}", f"report_{username}.html"
),
'claimed_profiles': claimed_profiles,
}
)
# save results and mark job as complete using timestamp as key
job_results[timestamp] = {
'status': 'completed',
'session_folder': f"search_{timestamp}",
'graph_file': os.path.join(f"search_{timestamp}", "combined_graph.html"),
'usernames': usernames,
'individual_reports': individual_reports,
}
except Exception as e:
logging.error(f"Error in search task for timestamp {timestamp}: {str(e)}")
job_results[timestamp] = {'status': 'failed', 'error': str(e)}
finally:
background_jobs[timestamp]['completed'] = True
@app.route('/')
def index():
# load site data for autocomplete
db = MaigretDatabase().load_from_path(app.config["MAIGRET_DB_FILE"])
site_options = []
for site in db.sites:
# add main site name
site_options.append(site.name)
# add URL if different from name
if site.url_main and site.url_main not in site_options:
site_options.append(site.url_main)
# sort and deduplicate
site_options = sorted(set(site_options))
return render_template('index.html', site_options=site_options)
# Modified search route
@app.route('/search', methods=['POST'])
def search():
usernames_input = request.form.get('usernames', '').strip()
if not usernames_input:
flash('At least one username is required', 'danger')
return redirect(url_for('index'))
usernames = [
u.strip() for u in usernames_input.replace(',', ' ').split() if u.strip()
]
# Create timestamp for this search session
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
# Get selected tags - ensure it's a list
selected_tags = request.form.getlist('tags')
logging.info(f"Selected tags: {selected_tags}")
options = {
'top_sites': request.form.get('top_sites') or '500',
'timeout': request.form.get('timeout') or '30',
'use_cookies': 'use_cookies' in request.form,
'all_sites': 'all_sites' in request.form,
'disable_recursive_search': 'disable_recursive_search' in request.form,
'disable_extracting': 'disable_extracting' in request.form,
'with_domains': 'with_domains' in request.form,
'proxy': request.form.get('proxy', None) or None,
'tor_proxy': request.form.get('tor_proxy', None) or None,
'i2p_proxy': request.form.get('i2p_proxy', None) or None,
'permute': 'permute' in request.form,
'tags': selected_tags, # Pass selected tags as a list
'site_list': [
s.strip() for s in request.form.get('site', '').split(',') if s.strip()
],
}
logging.info(
f"Starting search for usernames: {usernames} with tags: {selected_tags}"
)
# Start background job
background_jobs[timestamp] = {
'completed': False,
'thread': Thread(
target=process_search_task, args=(usernames, options, timestamp)
),
}
background_jobs[timestamp]['thread'].start()
return redirect(url_for('status', timestamp=timestamp))
@app.route('/status/<timestamp>')
def status(timestamp):
logging.info(f"Status check for timestamp: {timestamp}")
# Validate timestamp
if timestamp not in background_jobs:
flash('Invalid search session.', 'danger')
logging.error(f"Invalid search session: {timestamp}")
return redirect(url_for('index'))
# Check if job is completed
if background_jobs[timestamp]['completed']:
result = job_results.get(timestamp)
if not result:
flash('No results found for this search session.', 'warning')
logging.error(f"No results found for completed session: {timestamp}")
return redirect(url_for('index'))
if result['status'] == 'completed':
# Note: use the session_folder from the results to redirect
return redirect(url_for('results', session_id=result['session_folder']))
else:
error_msg = result.get('error', 'Unknown error occurred.')
flash(f'Search failed: {error_msg}', 'danger')
logging.error(f"Search failed for session {timestamp}: {error_msg}")
return redirect(url_for('index'))
# If job is still running, show a status page
return render_template('status.html', timestamp=timestamp)
@app.route('/results/<session_id>')
def results(session_id):
# Find completed results that match this session_folder
result_data = next(
(
r
for r in job_results.values()
if r.get('status') == 'completed' and r['session_folder'] == session_id
),
None,
)
if not result_data:
flash('No results found for this session ID.', 'danger')
logging.error(f"Results for session {session_id} not found in job_results.")
return redirect(url_for('index'))
return render_template(
'results.html',
usernames=result_data['usernames'],
graph_file=result_data['graph_file'],
individual_reports=result_data['individual_reports'],
timestamp=session_id.replace('search_', ''),
)
@app.route('/reports/<path:filename>')
def download_report(filename):
try:
os.makedirs(app.config["REPORTS_FOLDER"], exist_ok=True)
file_path = os.path.normpath(
os.path.join(app.config["REPORTS_FOLDER"], filename)
)
if not file_path.startswith(app.config["REPORTS_FOLDER"]):
raise Exception("Invalid file path")
return send_file(file_path)
except Exception as e:
logging.error(f"Error serving file {filename}: {str(e)}")
return "File not found", 404
if __name__ == '__main__':
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
)
debug_mode = os.getenv('FLASK_DEBUG', 'False').lower() in ['true', '1', 't']
app.run(debug=debug_mode)
Binary file not shown.

Before

Width:  |  Height:  |  Size: 45 KiB

-118
View File
@@ -1,118 +0,0 @@
<!DOCTYPE html>
<html lang="en" data-bs-theme="dark">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Maigret Web Interface</title>
<link href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0/dist/css/bootstrap.min.css" rel="stylesheet">
<style>
body {
min-height: 100vh;
display: flex;
flex-direction: column;
}
.main-container {
flex: 1;
padding-top: 2rem;
}
.form-container {
max-width: auto;
margin: auto;
padding-bottom: 2rem;
}
[data-bs-theme="dark"] {
--bs-body-bg: #212529;
--bs-body-color: #dee2e6;
}
.header {
padding: 1rem 0;
margin-bottom: 2rem;
border-bottom: 1px solid var(--bs-border-color);
}
.header-content {
display: flex;
align-items: center;
justify-content: space-between;
}
.logo-container {
display: flex;
align-items: center;
gap: 1rem;
}
.logo {
height: 40px;
width: auto;
}
.footer {
margin-top: auto;
padding: 1rem 0;
text-align: center;
border-top: 1px solid var(--bs-border-color);
font-size: 0.9rem;
}
.footer a {
color: inherit;
text-decoration: none;
}
.footer a:hover {
text-decoration: underline;
}
</style>
</head>
<body>
<div class="header">
<div class="container">
<div class="header-content">
<div class="logo-container">
<img src="{{ url_for('static', filename='maigret.png') }}" alt="Maigret Logo" class="logo">
<h1 class="h4 mb-0">Maigret Web Interface</h1>
</div>
<button class="btn btn-outline-secondary" id="theme-toggle">
Toggle Dark/Light Mode
</button>
</div>
</div>
</div>
<div class="main-container">
<div class="container">
{% block content %}{% endblock %}
</div>
</div>
<footer class="footer">
<div class="container">
<p class="mb-0">
Powered by <a href="https://github.com/soxoj/maigret" target="_blank">Maigret</a> |
Licensed under <a href="https://github.com/soxoj/maigret/blob/main/LICENSE" target="_blank">MIT
License</a>
</p>
</div>
</footer>
<script src="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0/dist/js/bootstrap.bundle.min.js"></script>
<script>
document.getElementById('theme-toggle').addEventListener('click', function () {
const html = document.documentElement;
if (html.getAttribute('data-bs-theme') === 'dark') {
html.setAttribute('data-bs-theme', 'light');
} else {
html.setAttribute('data-bs-theme', 'dark');
}
});
</script>
</body>
</html>
-383
View File
@@ -1,383 +0,0 @@
{% extends "base.html" %}
{% block content %}
<style>
.tag-cloud {
display: flex;
flex-wrap: wrap;
gap: 8px;
padding: 15px;
border-radius: 8px;
background: rgba(0, 0, 0, 0.05);
margin-bottom: 20px;
}
.tag {
display: inline-block;
padding: 5px 10px;
border-radius: 15px;
background-color: #dc3545;
color: white;
cursor: pointer;
font-size: 14px;
transition: all 0.3s ease;
user-select: none;
}
.tag.selected {
background-color: #28a745;
}
.tag:hover {
transform: translateY(-2px);
box-shadow: 0 2px 5px rgba(0, 0, 0, 0.2);
}
.hidden-select {
display: none !important;
}
.site-input-container {
position: relative;
}
.site-input {
width: 100%;
}
.selected-sites {
display: flex;
flex-wrap: wrap;
gap: 8px;
padding: 10px 0;
}
.selected-site {
background-color: #214e7b;
padding: 2px 8px;
border-radius: 12px;
font-size: 14px;
display: inline-flex;
align-items: center;
gap: 5px;
}
.remove-site {
cursor: pointer;
color: #dc3545;
font-weight: bold;
}
.section-header {
cursor: pointer;
padding: 1rem;
background: rgba(255, 255, 255, 0.05);
border-radius: 4px;
margin-bottom: 0.5rem;
display: flex;
justify-content: space-between;
align-items: center;
}
.section-content {
padding: 1rem;
display: none;
}
.section-content.show {
display: block;
}
.chevron::after {
content: '▼';
transition: transform 0.2s;
}
.chevron.collapsed::after {
transform: rotate(-90deg);
}
.main-search-section {
background: rgba(255, 255, 255, 0.03);
padding: 2rem;
border-radius: 8px;
margin-bottom: 2rem;
}
.search-button {
width: 100%;
padding: 1rem;
font-size: 1.2rem;
margin-top: 2rem;
}
</style>
<div class="form-container">
{% if error %}
<div class="alert alert-danger">{{ error }}</div>
{% endif %}
<form method="POST" action="{{ url_for('search') }}" class="mb-4">
<!-- Main Search Section -->
<div class="main-search-section">
<div class="mb-4">
<label for="usernames" class="form-label h5">Usernames to Search</label>
<textarea class="form-control" id="usernames" name="usernames" rows="3" required
placeholder="Enter one or more usernames (separated by spaces or commas)..."></textarea>
</div>
<div class="row align-items-center">
<div class="col-md-6">
<label for="top_sites" class="form-label">Number of Sites</label>
<input type="number" class="form-control" id="top_sites" name="top_sites" min="1" max="10000"
placeholder="Default: 500">
</div>
<div class="col-md-6">
<label for="timeout" class="form-label">Timeout (seconds)</label>
<input type="number" class="form-control" id="timeout" name="timeout" min="1"
placeholder="Default: 30">
</div>
<div class="col-12 mt-3">
<div class="form-check">
<input type="checkbox" class="form-check-input" id="all_sites" name="all_sites"
onchange="document.getElementById('top_sites').disabled = this.checked;">
<label class="form-check-label" for="all_sites">Search All Sites</label>
</div>
</div>
</div>
</div>
<!-- Filters Section -->
<div class="mb-4">
<div class="section-header" onclick="toggleSection('filters')">
<h5 class="mb-0">Filters</h5>
<span class="chevron"></span>
</div>
<div id="filters" class="section-content">
<div class="mb-3 site-input-container">
<label for="site" class="form-label">Specify Sites (Optional)</label>
<input type="text" class="form-control site-input" id="siteInput"
placeholder="Type to search for sites..." list="siteOptions">
<input type="hidden" id="site" name="site">
<datalist id="siteOptions">
{% for site in site_options %}
<option value="{{ site }}">
{% endfor %}
</datalist>
<div class="selected-sites" id="selectedSites"></div>
</div>
<div class="mb-3">
<label class="form-label">Tags (click to select)</label>
<div class="tag-cloud" id="tagCloud"></div>
<select multiple class="hidden-select" id="tags" name="tags">
<option value="gaming">Gaming</option>
<option value="coding">Coding</option>
<option value="photo">Photo</option>
<option value="music">Music</option>
<option value="blog">Blog</option>
<option value="finance">Finance</option>
<option value="freelance">Freelance</option>
<option value="dating">Dating</option>
<option value="tech">Tech</option>
<option value="forum">Forum</option>
<option value="porn">Porn</option>
<option value="erotic">Erotic</option>
<option value="webcam">Webcam</option>
<option value="video">Video</option>
<option value="movies">Movies</option>
<option value="hacking">Hacking</option>
<option value="art">Art</option>
<option value="discussion">Discussion</option>
<option value="sharing">Sharing</option>
<option value="writing">Writing</option>
<option value="wiki">Wiki</option>
<option value="business">Business</option>
<option value="shopping">Shopping</option>
<option value="sport">Sport</option>
<option value="books">Books</option>
<option value="news">News</option>
<option value="documents">Documents</option>
<option value="travel">Travel</option>
<option value="maps">Maps</option>
<option value="hobby">Hobby</option>
<option value="apps">Apps</option>
<option value="classified">Classified</option>
<option value="career">Career</option>
<option value="geosocial">Geosocial</option>
<option value="streaming">Streaming</option>
<option value="education">Education</option>
<option value="networking">Networking</option>
<option value="torrent">Torrent</option>
<option value="science">Science</option>
<option value="medicine">Medicine</option>
<option value="reading">Reading</option>
<option value="stock">Stock</option>
<option value="messaging">Messaging</option>
<option value="trading">Trading</option>
<option value="links">Links</option>
<option value="fashion">Fashion</option>
<option value="tasks">Tasks</option>
<option value="military">Military</option>
<option value="auto">Auto</option>
<option value="gambling">Gambling</option>
<option value="cybercriminal">Cybercriminal</option>
<option value="review">Review</option>
<option value="bookmarks">Bookmarks</option>
<option value="design">Design</option>
<option value="tor">Tor</option>
<option value="i2p">I2P</option>
<option value="q&a">Q&A</option>
<option value="crypto">Crypto</option>
<option value="ai">AI</option>
</select>
</div>
</div>
</div>
<!-- Advanced Options Section -->
<div class="mb-4">
<div class="section-header" onclick="toggleSection('advanced')">
<h5 class="mb-0">Advanced Options</h5>
<span class="chevron"></span>
</div>
<div id="advanced" class="section-content">
<div class="mb-3 form-check">
<input type="checkbox" class="form-check-input" id="permute" name="permute">
<label class="form-check-label" for="permute">Enable Username Permutations</label>
</div>
<div class="mb-3 form-check">
<input type="checkbox" class="form-check-input" id="disable_recursive_search"
name="disable_recursive_search">
<label class="form-check-label" for="disable_recursive_search">Disable Recursive Search</label>
</div>
<div class="mb-3 form-check">
<input type="checkbox" class="form-check-input" id="disable_extracting" name="disable_extracting">
<label class="form-check-label" for="disable_extracting">Disable Information Extraction</label>
</div>
<div class="mb-3 form-check">
<input type="checkbox" class="form-check-input" id="with_domains" name="with_domains">
<label class="form-check-label" for="with_domains">Check Domains</label>
</div>
<div class="mb-3">
<label for="proxy" class="form-label">Proxy URL</label>
<input type="text" class="form-control" id="proxy" name="proxy"
placeholder="e.g., 127.0.0.1:1080">
</div>
<div class="mb-3">
<label for="tor_proxy" class="form-label">TOR Proxy URL</label>
<input type="text" class="form-control" id="tor_proxy" name="tor_proxy"
placeholder="Default: 127.0.0.1:9050">
</div>
<div class="mb-3">
<label for="i2p_proxy" class="form-label">I2P Proxy URL</label>
<input type="text" class="form-control" id="i2p_proxy" name="i2p_proxy"
placeholder="Default: 127.0.0.1:4444">
</div>
</div>
</div>
<button type="submit" class="btn search-button" style="background-color: rgb(249, 207, 0); color: black;">
Start Search
</button>
</form>
</div>
<script>
function toggleSection(sectionId) {
const content = document.getElementById(sectionId);
const header = content.previousElementSibling;
content.classList.toggle('show');
header.querySelector('.chevron').classList.toggle('collapsed');
}
document.addEventListener('DOMContentLoaded', function () {
// Tag cloud functionality
const tagCloud = document.getElementById('tagCloud');
const hiddenSelect = document.getElementById('tags');
const allTags = Array.from(hiddenSelect.options).map(opt => ({
value: opt.value,
label: opt.text
}));
allTags.forEach(tag => {
const tagElement = document.createElement('span');
tagElement.className = 'tag';
tagElement.textContent = tag.label;
tagElement.dataset.value = tag.value;
tagElement.addEventListener('click', function () {
const isSelected = this.classList.toggle('selected');
const option = Array.from(hiddenSelect.options).find(opt => opt.value === tag.value);
if (option) {
option.selected = isSelected;
}
});
tagCloud.appendChild(tagElement);
});
// Site selection functionality
const siteInput = document.getElementById('siteInput');
const hiddenInput = document.getElementById('site');
const selectedSitesContainer = document.getElementById('selectedSites');
let selectedSites = new Set();
function updateHiddenInput() {
hiddenInput.value = Array.from(selectedSites).join(',');
}
function addSite(site) {
if (site && !selectedSites.has(site)) {
selectedSites.add(site);
updateHiddenInput();
const siteElement = document.createElement('span');
siteElement.className = 'selected-site';
siteElement.innerHTML = `${site}<span class="remove-site" data-site="${site}">&times;</span>`;
selectedSitesContainer.appendChild(siteElement);
}
}
function removeSite(site) {
selectedSites.delete(site);
updateHiddenInput();
const siteElements = selectedSitesContainer.querySelectorAll('.selected-site');
siteElements.forEach(el => {
if (el.querySelector('.remove-site').dataset.site === site) {
el.remove();
}
});
}
siteInput.addEventListener('change', function (e) {
const value = this.value.trim();
if (value) {
addSite(value);
this.value = '';
}
});
selectedSitesContainer.addEventListener('click', function (e) {
if (e.target.classList.contains('remove-site')) {
removeSite(e.target.dataset.site);
}
});
siteInput.addEventListener('paste', function (e) {
e.preventDefault();
const paste = (e.clipboardData || window.clipboardData).getData('text');
const sites = paste.split(',').map(site => site.trim()).filter(site => site);
sites.forEach(addSite);
});
const form = document.querySelector('form');
form.addEventListener('submit', function (e) {
const selectedTags = Array.from(tagCloud.querySelectorAll('.tag.selected'));
Array.from(hiddenSelect.options).forEach(opt => {
opt.selected = selectedTags.some(tag => tag.dataset.value === opt.value);
});
updateHiddenInput();
});
});
</script>
{% endblock %}
-156
View File
@@ -1,156 +0,0 @@
{% extends "base.html" %}
{% block content %}
<style>
.tag-badge {
background-color: #214e7b;
padding: 2px 8px;
border-radius: 12px;
font-size: 14px;
display: inline-flex;
align-items: center;
gap: 5px;
margin: 2px;
color: white;
}
.profile-list {
list-style: none;
padding: 0;
}
.profile-item {
margin-bottom: 10px;
padding: 10px;
display: flex;
justify-content: space-between;
align-items: center;
border-bottom: 1px solid rgba(255, 255, 255, 0.1);
}
.profile-link {
display: flex;
align-items: center;
gap: 8px;
}
.favicon {
width: 16px;
height: 16px;
}
.tag-container {
display: flex;
flex-wrap: wrap;
gap: 5px;
justify-content: flex-end;
}
.report-container {
margin-bottom: 1rem;
}
.report-header {
cursor: pointer;
padding: 1rem;
background: rgba(255, 255, 255, 0.05);
border-radius: 4px;
margin-bottom: 0.5rem;
}
.report-content {
display: none;
}
.report-content.show {
display: block;
}
.chevron::after {
content: '▼';
margin-left: 8px;
transition: transform 0.2s;
}
.chevron.collapsed::after {
transform: rotate(-90deg);
}
</style>
<div class="form-container">
<h1 class="mb-4">Search Results</h1>
<!-- Flash messages -->
{% with messages = get_flashed_messages() %}
{% if messages %}
{% for message in messages %}
<div class="alert alert-info">{{ message }}</div>
{% endfor %}
{% endif %}
{% endwith %}
<p>The search has completed. <a href="{{ url_for('index')}}">Back to start.</a></p>
{% if graph_file %}
<h3>Combined Graph</h3>
<iframe src="{{ url_for('download_report', filename=graph_file) }}" style="width:100%; height:600px; border:none;"></iframe>
{% endif %}
<hr>
{% if individual_reports %}
<h3>Individual Reports</h3>
<div class="reports-list">
{% for report in individual_reports %}
<div class="report-container">
<div class="report-header" onclick="toggleReport(this)" data-target="report-{{ loop.index }}">
<h5 class="mb-0 d-flex align-items-center">
<span>{{ report.username }}</span>
<span class="chevron"></span>
</h5>
</div>
<div id="report-{{ loop.index }}" class="report-content">
<p>
<a href="{{ url_for('download_report', filename=report.csv_file) }}">CSV Report</a> |
<a href="{{ url_for('download_report', filename=report.json_file) }}">JSON Report</a> |
<a href="{{ url_for('download_report', filename=report.pdf_file) }}">PDF Report</a> |
<a href="{{ url_for('download_report', filename=report.html_file) }}">HTML Report</a>
</p>
{% if report.claimed_profiles %}
<strong>Claimed Profiles:</strong>
<ul class="profile-list">
{% for profile in report.claimed_profiles %}
<li class="profile-item">
<div class="profile-link">
<img class="favicon" src="https://www.google.com/s2/favicons?domain={{ profile.url }}" onerror="this.style.display='none'" alt="">
<a href="{{ profile.url }}" target="_blank">{{ profile.site_name }}</a>
</div>
{% if profile.tags %}
<div class="tag-container">
{% for tag in profile.tags %}
<span class="tag-badge">{{ tag }}</span>
{% endfor %}
</div>
{% endif %}
</li>
{% endfor %}
</ul>
{% else %}
<p>No claimed profiles found.</p>
{% endif %}
</div>
</div>
{% endfor %}
</div>
{% else %}
<p>No individual reports available.</p>
{% endif %}
</div>
<script>
function toggleReport(header) {
const reportId = header.getAttribute('data-target');
const content = document.getElementById(reportId);
content.classList.toggle('show');
header.querySelector('.chevron').classList.toggle('collapsed');
}
</script>
{% endblock %}
-16
View File
@@ -1,16 +0,0 @@
{% extends "base.html" %}
{% block content %}
<div class="container mt-4 text-center">
<h2>Search in progress...</h2>
<p>Your request is being processed in the background. This page will automatically redirect once the results are ready.</p>
<div class="spinner-border text-primary" role="status">
<span class="visually-hidden">Loading...</span>
</div>
<script>
// Auto-refresh the page every 5 seconds to check completion
setTimeout(function() {
window.location.reload();
}, 5000);
</script>
</div>
{% endblock %}
Generated
+836 -1168
View File
File diff suppressed because it is too large Load Diff
+1 -1
View File
@@ -1,5 +1,5 @@
maigret @ https://github.com/soxoj/maigret/archive/refs/heads/main.zip maigret @ https://github.com/soxoj/maigret/archive/refs/heads/main.zip
pefile==2023.2.7 # do not bump while pyinstaller is 6.11.1, there is a conflict pefile==2023.2.7 # do not bump while pyinstaller is 6.11.1, there is a conflict
psutil==6.1.1 psutil==6.1.0
pyinstaller==6.11.1 pyinstaller==6.11.1
pywin32-ctypes==0.2.3 pywin32-ctypes==0.2.3
+18 -24
View File
@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
[tool.poetry] [tool.poetry]
name = "maigret" name = "maigret"
version = "0.5.0" version = "0.4.4"
description = "🕵️‍♂️ Collect a dossier on a person by username from thousands of sites." description = "🕵️‍♂️ Collect a dossier on a person by username from thousands of sites."
authors = ["Soxoj <soxoj@protonmail.com>"] authors = ["Soxoj <soxoj@protonmail.com>"]
readme = "README.md" readme = "README.md"
@@ -32,47 +32,44 @@ classifiers = [
# poetry install --with dev # poetry install --with dev
python = "^3.10" python = "^3.10"
aiodns = "^3.0.0" aiodns = "^3.0.0"
aiohttp = "^3.12.14" aiohttp = "^3.11.9"
aiohttp-socks = "^0.10.1" aiohttp-socks = "^0.9.1"
arabic-reshaper = "^3.0.0" arabic-reshaper = "^3.0.0"
async-timeout = "^5.0.1" async-timeout = "^5.0.1"
attrs = "^25.3.0" attrs = "^24.2.0"
certifi = "^2025.6.15" certifi = "^2024.8.30"
chardet = "^5.0.0" chardet = "^5.0.0"
colorama = "^0.4.6" colorama = "^0.4.6"
future = "^1.0.0" future = "^1.0.0"
future-annotations= "^1.0.0" future-annotations= "^1.0.0"
html5lib = "^1.1" html5lib = "^1.1"
idna = "^3.4" idna = "^3.4"
Jinja2 = "^3.1.6" Jinja2 = "^3.1.3"
lxml = ">=5.3,<7.0" lxml = "^5.3.0"
MarkupSafe = "^3.0.2" MarkupSafe = "^3.0.2"
mock = "^5.1.0" mock = "^5.1.0"
multidict = "^6.6.3" multidict = "^6.0.4"
pycountry = "^24.6.1" pycountry = "^24.6.1"
PyPDF2 = "^3.0.1" PyPDF2 = "^3.0.1"
PySocks = "^1.7.1" PySocks = "^1.7.1"
python-bidi = "^0.6.3" python-bidi = "^0.6.3"
requests = "^2.32.4" requests = "^2.31.0"
requests-futures = "^1.0.2" requests-futures = "^1.0.2"
six = "^1.17.0" six = "^1.16.0"
socid-extractor = "^0.0.27" socid-extractor = "^0.0.26"
soupsieve = "^2.6" soupsieve = "^2.6"
stem = "^1.8.1" stem = "^1.8.1"
torrequest = "^0.1.0" torrequest = "^0.1.0"
alive_progress = "^3.2.0" alive_progress = "^3.2.0"
typing-extensions = "^4.14.1" typing-extensions = "^4.8.0"
webencodings = "^0.5.1" webencodings = "^0.5.1"
xhtml2pdf = "^0.2.11" xhtml2pdf = "^0.2.11"
XMind = "^1.2.0" XMind = "^1.2.0"
yarl = "^1.20.1" yarl = "^1.18.3"
networkx = "^2.6.3" networkx = "^2.6.3"
pyvis = "^0.3.2" pyvis = "^0.3.2"
reportlab = "^4.4.3" reportlab = "^4.2.0"
cloudscraper = "^1.2.71" cloudscraper = "^1.2.71"
flask = {extras = ["async"], version = "^3.1.1"}
asgiref = "^3.9.1"
platformdirs = "^4.3.8"
[tool.poetry.group.dev.dependencies] [tool.poetry.group.dev.dependencies]
@@ -80,17 +77,14 @@ platformdirs = "^4.3.8"
# Install dev dependencies with: poetry install --with dev # Install dev dependencies with: poetry install --with dev
flake8 = "^7.1.1" flake8 = "^7.1.1"
pytest = "^8.3.4" pytest = "^8.3.4"
pytest-asyncio = "^1.0.0" pytest-asyncio = "^0.24.0"
pytest-cov = "^6.0.0" pytest-cov = "^6.0.0"
pytest-httpserver = "^1.0.0" pytest-httpserver = "^1.0.0"
pytest-rerunfailures = "^15.1" pytest-rerunfailures = "^15.0"
reportlab = "^4.4.3" reportlab = "^4.2.0"
mypy = "^1.14.1" mypy = "^1.13.0"
tuna = "^0.5.11" tuna = "^0.5.11"
coverage = "^7.9.2"
black = "^25.1.0"
[tool.poetry.scripts] [tool.poetry.scripts]
# Run with: poetry run maigret <username> # Run with: poetry run maigret <username>
maigret = "maigret.maigret:run" maigret = "maigret.maigret:run"
update_sitesmd = "utils.update_site_data:main"
+60 -80
View File
@@ -1,5 +1,5 @@
## List of supported sites (search methods): total 3143 ## List of supported sites (search methods): total 3126
Rank data fetched from Alexa by domains. Rank data fetched from Alexa by domains.
@@ -19,16 +19,16 @@ Rank data fetched from Alexa by domains.
1. ![](https://www.google.com/s2/favicons?domain=https://gist.github.com) [GitHubGist (https://gist.github.com)](https://gist.github.com)*: top 50, coding, sharing* 1. ![](https://www.google.com/s2/favicons?domain=https://gist.github.com) [GitHubGist (https://gist.github.com)](https://gist.github.com)*: top 50, coding, sharing*
1. ![](https://www.google.com/s2/favicons?domain=https://vk.com/) [VK (https://vk.com/)](https://vk.com/)*: top 50, ru* 1. ![](https://www.google.com/s2/favicons?domain=https://vk.com/) [VK (https://vk.com/)](https://vk.com/)*: top 50, ru*
1. ![](https://www.google.com/s2/favicons?domain=https://vk.com/) [VK (by id) (https://vk.com/)](https://vk.com/)*: top 50, ru* 1. ![](https://www.google.com/s2/favicons?domain=https://vk.com/) [VK (by id) (https://vk.com/)](https://vk.com/)*: top 50, ru*
1. ![](https://www.google.com/s2/favicons?domain=https://sbongacams.com) [BongaCams (https://sbongacams.com)](https://sbongacams.com)*: top 50, cz, webcam* 1. ![](https://www.google.com/s2/favicons?domain=https://pt.bongacams.com) [BongaCams (https://pt.bongacams.com)](https://pt.bongacams.com)*: top 50, cz, webcam*
1. ![](https://www.google.com/s2/favicons?domain=https://www.instagram.com/) [Instagram (https://www.instagram.com/)](https://www.instagram.com/)*: top 50, photo*, search is disabled 1. ![](https://www.google.com/s2/favicons?domain=https://www.instagram.com/) [Instagram (https://www.instagram.com/)](https://www.instagram.com/)*: top 50, photo*, search is disabled
1. ![](https://www.google.com/s2/favicons?domain=https://www.twitch.tv/) [Twitch (https://www.twitch.tv/)](https://www.twitch.tv/)*: top 50, streaming, us* 1. ![](https://www.google.com/s2/favicons?domain=https://www.twitch.tv/) [Twitch (https://www.twitch.tv/)](https://www.twitch.tv/)*: top 50, streaming, us*
1. ![](https://www.google.com/s2/favicons?domain=https://yandex.ru/collections/) [YandexCollections API (https://yandex.ru/collections/)](https://yandex.ru/collections/)*: top 50, ru, sharing*, search is disabled 1. ![](https://www.google.com/s2/favicons?domain=https://yandex.ru/collections/) [YandexCollections API (https://yandex.ru/collections/)](https://yandex.ru/collections/)*: top 50, ru, sharing*
1. ![](https://www.google.com/s2/favicons?domain=https://stackoverflow.com) [StackOverflow (https://stackoverflow.com)](https://stackoverflow.com)*: top 50, coding* 1. ![](https://www.google.com/s2/favicons?domain=https://stackoverflow.com) [StackOverflow (https://stackoverflow.com)](https://stackoverflow.com)*: top 50, coding*
1. ![](https://www.google.com/s2/favicons?domain=https://www.ebay.com/) [Ebay (https://www.ebay.com/)](https://www.ebay.com/)*: top 50, shopping, us* 1. ![](https://www.google.com/s2/favicons?domain=https://www.ebay.com/) [Ebay (https://www.ebay.com/)](https://www.ebay.com/)*: top 50, shopping, us*
1. ![](https://www.google.com/s2/favicons?domain=https://naver.com) [Naver (https://naver.com)](https://naver.com)*: top 50, kr* 1. ![](https://www.google.com/s2/favicons?domain=https://naver.com) [Naver (https://naver.com)](https://naver.com)*: top 50, kr*
1. ![](https://www.google.com/s2/favicons?domain=https://developer.apple.com/forums) [AppleDeveloper (https://developer.apple.com/forums)](https://developer.apple.com/forums)*: top 50, forum, us* 1. ![](https://www.google.com/s2/favicons?domain=https://developer.apple.com/forums) [AppleDeveloper (https://developer.apple.com/forums)](https://developer.apple.com/forums)*: top 50, forum, us*
1. ![](https://www.google.com/s2/favicons?domain=https://discussions.apple.com/) [AppleDiscussions (https://discussions.apple.com/)](https://discussions.apple.com/)*: top 50, us* 1. ![](https://www.google.com/s2/favicons?domain=https://discussions.apple.com/) [AppleDiscussions (https://discussions.apple.com/)](https://discussions.apple.com/)*: top 50, us*
1. ![](https://www.google.com/s2/favicons?domain=https://nitter.net/) [Nitter (https://nitter.net/)](https://nitter.net/)*: top 50, messaging*, search is disabled 1. ![](https://www.google.com/s2/favicons?domain=https://nitter.net/) [Nitter (https://nitter.net/)](https://nitter.net/)*: top 50, messaging*
1. ![](https://www.google.com/s2/favicons?domain=https://www.twitter.com/) [Twitter (https://www.twitter.com/)](https://www.twitter.com/)*: top 50, messaging* 1. ![](https://www.google.com/s2/favicons?domain=https://www.twitter.com/) [Twitter (https://www.twitter.com/)](https://www.twitter.com/)*: top 50, messaging*
1. ![](https://www.google.com/s2/favicons?domain=https://allods.mail.ru) [Allods (https://allods.mail.ru)](https://allods.mail.ru)*: top 50, forum, gaming, ru*, search is disabled 1. ![](https://www.google.com/s2/favicons?domain=https://allods.mail.ru) [Allods (https://allods.mail.ru)](https://allods.mail.ru)*: top 50, forum, gaming, ru*, search is disabled
1. ![](https://www.google.com/s2/favicons?domain=https://aa.mail.ru) [ArcheAge (https://aa.mail.ru)](https://aa.mail.ru)*: top 50, forum, gaming, ru*, search is disabled 1. ![](https://www.google.com/s2/favicons?domain=https://aa.mail.ru) [ArcheAge (https://aa.mail.ru)](https://aa.mail.ru)*: top 50, forum, gaming, ru*, search is disabled
@@ -63,7 +63,7 @@ Rank data fetched from Alexa by domains.
1. ![](https://www.google.com/s2/favicons?domain=https://www.tradingview.com/) [TradingView (https://www.tradingview.com/)](https://www.tradingview.com/)*: top 100, trading, us* 1. ![](https://www.google.com/s2/favicons?domain=https://www.tradingview.com/) [TradingView (https://www.tradingview.com/)](https://www.tradingview.com/)*: top 100, trading, us*
1. ![](https://www.google.com/s2/favicons?domain=https://www.aparat.com) [Aparat (https://www.aparat.com)](https://www.aparat.com)*: top 100, ir, video* 1. ![](https://www.google.com/s2/favicons?domain=https://www.aparat.com) [Aparat (https://www.aparat.com)](https://www.aparat.com)*: top 100, ir, video*
1. ![](https://www.google.com/s2/favicons?domain=https://chaturbate.com) [ChaturBate (https://chaturbate.com)](https://chaturbate.com)*: top 100, us* 1. ![](https://www.google.com/s2/favicons?domain=https://chaturbate.com) [ChaturBate (https://chaturbate.com)](https://chaturbate.com)*: top 100, us*
1. ![](https://www.google.com/s2/favicons?domain=https://medium.com/) [Medium (https://medium.com/)](https://medium.com/)*: top 100, blog, us*, search is disabled 1. ![](https://www.google.com/s2/favicons?domain=https://medium.com/) [Medium (https://medium.com/)](https://medium.com/)*: top 100, blog, us*
1. ![](https://www.google.com/s2/favicons?domain=https://www.livejasmin.com/) [Livejasmin (https://www.livejasmin.com/)](https://www.livejasmin.com/)*: top 100, us, webcam* 1. ![](https://www.google.com/s2/favicons?domain=https://www.livejasmin.com/) [Livejasmin (https://www.livejasmin.com/)](https://www.livejasmin.com/)*: top 100, us, webcam*
1. ![](https://www.google.com/s2/favicons?domain=https://pornhub.com/) [Pornhub (https://pornhub.com/)](https://pornhub.com/)*: top 100, porn* 1. ![](https://www.google.com/s2/favicons?domain=https://pornhub.com/) [Pornhub (https://pornhub.com/)](https://pornhub.com/)*: top 100, porn*
1. ![](https://www.google.com/s2/favicons?domain=https://imgur.com) [Imgur (https://imgur.com)](https://imgur.com)*: top 100, photo* 1. ![](https://www.google.com/s2/favicons?domain=https://imgur.com) [Imgur (https://imgur.com)](https://imgur.com)*: top 100, photo*
@@ -77,7 +77,7 @@ Rank data fetched from Alexa by domains.
1. ![](https://www.google.com/s2/favicons?domain=https://open.spotify.com/) [Spotify (https://open.spotify.com/)](https://open.spotify.com/)*: top 100, music, us*, search is disabled 1. ![](https://www.google.com/s2/favicons?domain=https://open.spotify.com/) [Spotify (https://open.spotify.com/)](https://open.spotify.com/)*: top 100, music, us*, search is disabled
1. ![](https://www.google.com/s2/favicons?domain=https://www.tiktok.com/) [TikTok (https://www.tiktok.com/)](https://www.tiktok.com/)*: top 100, video* 1. ![](https://www.google.com/s2/favicons?domain=https://www.tiktok.com/) [TikTok (https://www.tiktok.com/)](https://www.tiktok.com/)*: top 100, video*
1. ![](https://www.google.com/s2/favicons?domain=https://xvideos.com/) [Xvideos (https://xvideos.com/)](https://xvideos.com/)*: top 500, porn, us* 1. ![](https://www.google.com/s2/favicons?domain=https://xvideos.com/) [Xvideos (https://xvideos.com/)](https://xvideos.com/)*: top 500, porn, us*
1. ![](https://www.google.com/s2/favicons?domain=https://www.tumblr.com) [Tumblr (https://www.tumblr.com)](https://www.tumblr.com)*: top 500, blog* 1. ![](https://www.google.com/s2/favicons?domain=https://tumblr.com/) [Tumblr (https://tumblr.com/)](https://tumblr.com/)*: top 500, blog*
1. ![](https://www.google.com/s2/favicons?domain=https://www.roblox.com/) [Roblox (https://www.roblox.com/)](https://www.roblox.com/)*: top 500, gaming, us* 1. ![](https://www.google.com/s2/favicons?domain=https://www.roblox.com/) [Roblox (https://www.roblox.com/)](https://www.roblox.com/)*: top 500, gaming, us*
1. ![](https://www.google.com/s2/favicons?domain=https://soundcloud.com/) [SoundCloud (https://soundcloud.com/)](https://soundcloud.com/)*: top 500, music* 1. ![](https://www.google.com/s2/favicons?domain=https://soundcloud.com/) [SoundCloud (https://soundcloud.com/)](https://soundcloud.com/)*: top 500, music*
1. ![](https://www.google.com/s2/favicons?domain=https://www.udemy.com) [Udemy (https://www.udemy.com)](https://www.udemy.com)*: top 500, in* 1. ![](https://www.google.com/s2/favicons?domain=https://www.udemy.com) [Udemy (https://www.udemy.com)](https://www.udemy.com)*: top 500, in*
@@ -88,49 +88,41 @@ Rank data fetched from Alexa by domains.
1. ![](https://www.google.com/s2/favicons?domain=https://www.blogger.com) [Blogger (by GAIA id) (https://www.blogger.com)](https://www.blogger.com)*: top 500, blog* 1. ![](https://www.google.com/s2/favicons?domain=https://www.blogger.com) [Blogger (by GAIA id) (https://www.blogger.com)](https://www.blogger.com)*: top 500, blog*
1. ![](https://www.google.com/s2/favicons?domain=https://www.researchgate.net/) [ResearchGate (https://www.researchgate.net/)](https://www.researchgate.net/)*: top 500, in, us* 1. ![](https://www.google.com/s2/favicons?domain=https://www.researchgate.net/) [ResearchGate (https://www.researchgate.net/)](https://www.researchgate.net/)*: top 500, in, us*
1. ![](https://www.google.com/s2/favicons?domain=https://www.freepik.com) [Freepik (https://www.freepik.com)](https://www.freepik.com)*: top 500, art, photo, stock* 1. ![](https://www.google.com/s2/favicons?domain=https://www.freepik.com) [Freepik (https://www.freepik.com)](https://www.freepik.com)*: top 500, art, photo, stock*
1. ![](https://www.google.com/s2/favicons?domain=https://vimeo.com) [Vimeo (https://vimeo.com)](https://vimeo.com)*: top 500, video* 1. ![](https://www.google.com/s2/favicons?domain=https://vimeo.com/) [Vimeo (https://vimeo.com/)](https://vimeo.com/)*: top 500, us, video*
1. ![](https://www.google.com/s2/favicons?domain=https://www.pinterest.com/) [Pinterest (https://www.pinterest.com/)](https://www.pinterest.com/)*: top 500, art, photo, sharing* 1. ![](https://www.google.com/s2/favicons?domain=https://www.pinterest.com/) [Pinterest (https://www.pinterest.com/)](https://www.pinterest.com/)*: top 500, art, photo, sharing*
1. ![](https://www.google.com/s2/favicons?domain=https://www.fiverr.com/) [Fiverr (https://www.fiverr.com/)](https://www.fiverr.com/)*: top 500, shopping, us* 1. ![](https://www.google.com/s2/favicons?domain=https://www.fiverr.com/) [Fiverr (https://www.fiverr.com/)](https://www.fiverr.com/)*: top 500, shopping, us*
1. ![](https://www.google.com/s2/favicons?domain=https://t.me/) [Telegram (https://t.me/)](https://t.me/)*: top 500, messaging* 1. ![](https://www.google.com/s2/favicons?domain=https://t.me/) [Telegram (https://t.me/)](https://t.me/)*: top 500, messaging*
1. ![](https://www.google.com/s2/favicons?domain=https://www.slideshare.net) [SlideShare (https://www.slideshare.net)](https://www.slideshare.net)*: top 500* 1. ![](https://www.google.com/s2/favicons?domain=https://slideshare.net/) [SlideShare (https://slideshare.net/)](https://slideshare.net/)*: top 500, documents, sharing*
1. ![](https://www.google.com/s2/favicons?domain=https://theguardian.com) [TheGuardian (https://theguardian.com)](https://theguardian.com)*: top 500, news, us*, search is disabled 1. ![](https://www.google.com/s2/favicons?domain=https://theguardian.com) [TheGuardian (https://theguardian.com)](https://theguardian.com)*: top 500, news, us*, search is disabled
1. ![](https://www.google.com/s2/favicons?domain=https://trello.com/) [Trello (https://trello.com/)](https://trello.com/)*: top 500, tasks* 1. ![](https://www.google.com/s2/favicons?domain=https://trello.com/) [Trello (https://trello.com/)](https://trello.com/)*: top 500, tasks*
1. ![](https://www.google.com/s2/favicons?domain=https://support.mozilla.org) [Mozilla Support (https://support.mozilla.org)](https://support.mozilla.org)*: top 500, us* 1. ![](https://www.google.com/s2/favicons?domain=https://support.mozilla.org) [Mozilla Support (https://support.mozilla.org)](https://support.mozilla.org)*: top 500, us*
1. ![](https://www.google.com/s2/favicons?domain=https://www.cnet.com) [CNET (https://www.cnet.com)](https://www.cnet.com)*: top 500, news, tech, us* 1. ![](https://www.google.com/s2/favicons?domain=https://www.cnet.com/) [CNET (https://www.cnet.com/)](https://www.cnet.com/)*: top 500, news, tech, us*
1. ![](https://www.google.com/s2/favicons?domain=https://www.shutterstock.com) [Shutterstock (https://www.shutterstock.com)](https://www.shutterstock.com)*: top 500, music, photo, stock, us* 1. ![](https://www.google.com/s2/favicons?domain=https://www.shutterstock.com) [Shutterstock (https://www.shutterstock.com)](https://www.shutterstock.com)*: top 500, music, photo, stock, us*
1. ![](https://www.google.com/s2/favicons?domain=https://wix.com/) [Wix (https://wix.com/)](https://wix.com/)*: top 500, us* 1. ![](https://www.google.com/s2/favicons?domain=https://wix.com/) [Wix (https://wix.com/)](https://wix.com/)*: top 500, us*
1. ![](https://www.google.com/s2/favicons?domain=https://slack.com) [Slack (https://slack.com)](https://slack.com)*: top 500, messaging* 1. ![](https://www.google.com/s2/favicons?domain=https://slack.com) [Slack (https://slack.com)](https://slack.com)*: top 500, messaging*
1. ![](https://www.google.com/s2/favicons?domain=https://www.chess.com) [Chess (https://www.chess.com)](https://www.chess.com)*: top 500, gaming, hobby* 1. ![](https://www.google.com/s2/favicons?domain=https://www.chess.com/) [Chess (https://www.chess.com/)](https://www.chess.com/)*: top 500, gaming, hobby*
1. ![](https://www.google.com/s2/favicons?domain=https://upwork.com) [upwork.com (https://upwork.com)](https://upwork.com)*: top 500, us* 1. ![](https://www.google.com/s2/favicons?domain=https://upwork.com) [upwork.com (https://upwork.com)](https://upwork.com)*: top 500, us*
1. ![](https://www.google.com/s2/favicons?domain=https://archive.org) [Archive.org (https://archive.org)](https://archive.org)*: top 500*, search is disabled 1. ![](https://www.google.com/s2/favicons?domain=https://archive.org) [Archive.org (https://archive.org)](https://archive.org)*: top 500*
1. ![](https://www.google.com/s2/favicons?domain=https://www.figma.com/) [Figma (https://www.figma.com/)](https://www.figma.com/)*: top 500, design* 1. ![](https://www.google.com/s2/favicons?domain=https://www.figma.com/) [Figma (https://www.figma.com/)](https://www.figma.com/)*: top 500, design*
1. ![](https://www.google.com/s2/favicons?domain=https://www.istockphoto.com) [iStock (https://www.istockphoto.com)](https://www.istockphoto.com)*: top 500, photo, stock* 1. ![](https://www.google.com/s2/favicons?domain=https://www.istockphoto.com) [iStock (https://www.istockphoto.com)](https://www.istockphoto.com)*: top 500, photo, stock*
1. ![](https://www.google.com/s2/favicons?domain=https://www.scribd.com/) [Scribd (https://www.scribd.com/)](https://www.scribd.com/)*: top 500, reading* 1. ![](https://www.google.com/s2/favicons?domain=https://www.scribd.com/) [Scribd (https://www.scribd.com/)](https://www.scribd.com/)*: top 500, reading*
1. ![](https://www.google.com/s2/favicons?domain=https://opensea.io) [opensea.io (https://opensea.io)](https://opensea.io)*: top 500, us* 1. ![](https://www.google.com/s2/favicons?domain=https://opensea.io) [opensea.io (https://opensea.io)](https://opensea.io)*: top 500, us*
1. ![](https://www.google.com/s2/favicons?domain=https://www.dailymotion.com) [DailyMotion (https://www.dailymotion.com)](https://www.dailymotion.com)*: top 500, video* 1. ![](https://www.google.com/s2/favicons?domain=https://www.dailymotion.com/) [DailyMotion (https://www.dailymotion.com/)](https://www.dailymotion.com/)*: top 500, us, video*
1. ![](https://www.google.com/s2/favicons?domain=https://www.behance.net/) [Behance (https://www.behance.net/)](https://www.behance.net/)*: top 500, business* 1. ![](https://www.google.com/s2/favicons?domain=https://www.behance.net/) [Behance (https://www.behance.net/)](https://www.behance.net/)*: top 500, business*
1. ![](https://www.google.com/s2/favicons?domain=http://www.yelp.com) [Yelp (http://www.yelp.com)](http://www.yelp.com)*: top 500, review*, search is disabled 1. ![](https://www.google.com/s2/favicons?domain=http://www.yelp.com) [Yelp (http://www.yelp.com)](http://www.yelp.com)*: top 500, review*, search is disabled
1. ![](https://www.google.com/s2/favicons?domain=https://www.yelp.com) [Yelp (by id) (https://www.yelp.com)](https://www.yelp.com)*: top 500, review* 1. ![](https://www.google.com/s2/favicons?domain=https://www.yelp.com) [Yelp (by id) (https://www.yelp.com)](https://www.yelp.com)*: top 500, review*
1. ![](https://www.google.com/s2/favicons?domain=https://www.blogger.com/) [Blogger (https://www.blogger.com/)](https://www.blogger.com/)*: top 500, blog* 1. ![](https://www.google.com/s2/favicons?domain=https://www.blogger.com/) [Blogger (https://www.blogger.com/)](https://www.blogger.com/)*: top 500, blog*
1. ![](https://www.google.com/s2/favicons?domain=https://www.patreon.com/) [Patreon (https://www.patreon.com/)](https://www.patreon.com/)*: top 500, finance* 1. ![](https://www.google.com/s2/favicons?domain=https://www.patreon.com/) [Patreon (https://www.patreon.com/)](https://www.patreon.com/)*: top 500, finance*
1. ![](https://www.google.com/s2/favicons?domain=https://www.goodreads.com/) [GoodReads (https://www.goodreads.com/)](https://www.goodreads.com/)*: top 500, books, us* 1. ![](https://www.google.com/s2/favicons?domain=https://www.goodreads.com/) [GoodReads (https://www.goodreads.com/)](https://www.goodreads.com/)*: top 500, books, us*
1. ![](https://www.google.com/s2/favicons?domain=https://www.op.gg/) [OP.GG [LeagueOfLegends] Brazil (https://www.op.gg/)](https://www.op.gg/)*: top 500, br, gaming* 1. ![](https://www.google.com/s2/favicons?domain=https://br.op.gg/) [br.op.gg (https://br.op.gg/)](https://br.op.gg/)*: top 500, br, us*
1. ![](https://www.google.com/s2/favicons?domain=https://www.op.gg/) [OP.GG [LeagueOfLegends] North America (https://www.op.gg/)](https://www.op.gg/)*: top 500, gaming* 1. ![](https://www.google.com/s2/favicons?domain=https://eune.op.gg/) [eune.op.gg (https://eune.op.gg/)](https://eune.op.gg/)*: top 500, eu, gaming, us*
1. ![](https://www.google.com/s2/favicons?domain=https://www.op.gg/) [OP.GG [LeagueOfLegends] Middle East (https://www.op.gg/)](https://www.op.gg/)*: top 500, gaming* 1. ![](https://www.google.com/s2/favicons?domain=https://euw.op.gg/) [euw.op.gg (https://euw.op.gg/)](https://euw.op.gg/)*: top 500, gaming, us*
1. ![](https://www.google.com/s2/favicons?domain=https://www.op.gg/) [OP.GG [LeagueOfLegends] Europe Nordic & East (https://www.op.gg/)](https://www.op.gg/)*: top 500, gaming* 1. ![](https://www.google.com/s2/favicons?domain=https://lan.op.gg/) [lan.op.gg (https://lan.op.gg/)](https://lan.op.gg/)*: top 500, us*
1. ![](https://www.google.com/s2/favicons?domain=https://www.op.gg/) [OP.GG [LeagueOfLegends] Europe West (https://www.op.gg/)](https://www.op.gg/)*: top 500, gaming* 1. ![](https://www.google.com/s2/favicons?domain=https://las.op.gg/) [las.op.gg (https://las.op.gg/)](https://las.op.gg/)*: top 500, gaming, us*
1. ![](https://www.google.com/s2/favicons?domain=https://www.op.gg/) [OP.GG [LeagueOfLegends] Oceania (https://www.op.gg/)](https://www.op.gg/)*: top 500, gaming* 1. ![](https://www.google.com/s2/favicons?domain=https://na.op.gg/) [na.op.gg (https://na.op.gg/)](https://na.op.gg/)*: top 500, gaming*
1. ![](https://www.google.com/s2/favicons?domain=https://www.op.gg/) [OP.GG [LeagueOfLegends] Korea (https://www.op.gg/)](https://www.op.gg/)*: top 500, gaming, kr* 1. ![](https://www.google.com/s2/favicons?domain=https://oce.op.gg/) [oce.op.gg (https://oce.op.gg/)](https://oce.op.gg/)*: top 500, au, gaming, us*
1. ![](https://www.google.com/s2/favicons?domain=https://www.op.gg/) [OP.GG [LeagueOfLegends] Japan (https://www.op.gg/)](https://www.op.gg/)*: top 500, gaming, jp* 1. ![](https://www.google.com/s2/favicons?domain=https://ru.op.gg/) [ru.op.gg (https://ru.op.gg/)](https://ru.op.gg/)*: top 500, gaming, ru, us*
1. ![](https://www.google.com/s2/favicons?domain=https://www.op.gg/) [OP.GG [LeagueOfLegends] LAS (https://www.op.gg/)](https://www.op.gg/)*: top 500, gaming* 1. ![](https://www.google.com/s2/favicons?domain=https://tr.op.gg/) [tr.op.gg (https://tr.op.gg/)](https://tr.op.gg/)*: top 500, gaming, tr, us*
1. ![](https://www.google.com/s2/favicons?domain=https://www.op.gg/) [OP.GG [LeagueOfLegends] LAN (https://www.op.gg/)](https://www.op.gg/)*: top 500, gaming*
1. ![](https://www.google.com/s2/favicons?domain=https://www.op.gg/) [OP.GG [LeagueOfLegends] Russia (https://www.op.gg/)](https://www.op.gg/)*: top 500, gaming, ru*
1. ![](https://www.google.com/s2/favicons?domain=https://www.op.gg/) [OP.GG [LeagueOfLegends] Turkey (https://www.op.gg/)](https://www.op.gg/)*: top 500, gaming, tr*
1. ![](https://www.google.com/s2/favicons?domain=https://www.op.gg/) [OP.GG [LeagueOfLegends] Singapore (https://www.op.gg/)](https://www.op.gg/)*: top 500, gaming, sg*
1. ![](https://www.google.com/s2/favicons?domain=https://www.op.gg/) [OP.GG [LeagueOfLegends] Phillippines (https://www.op.gg/)](https://www.op.gg/)*: top 500, gaming, ph*
1. ![](https://www.google.com/s2/favicons?domain=https://www.op.gg/) [OP.GG [LeagueOfLegends] Taiwan (https://www.op.gg/)](https://www.op.gg/)*: top 500, gaming, tw*
1. ![](https://www.google.com/s2/favicons?domain=https://www.op.gg/) [OP.GG [LeagueOfLegends] Vietnam (https://www.op.gg/)](https://www.op.gg/)*: top 500, gaming, vn*
1. ![](https://www.google.com/s2/favicons?domain=https://www.op.gg/) [OP.GG [LeagueOfLegends] Thailand (https://www.op.gg/)](https://www.op.gg/)*: top 500, gaming, th*
1. ![](https://www.google.com/s2/favicons?domain=https://www.quora.com/) [Quora (https://www.quora.com/)](https://www.quora.com/)*: top 500, education* 1. ![](https://www.google.com/s2/favicons?domain=https://www.quora.com/) [Quora (https://www.quora.com/)](https://www.quora.com/)*: top 500, education*
1. ![](https://www.google.com/s2/favicons?domain=https://tripadvisor.com/) [TripAdvisor (https://tripadvisor.com/)](https://tripadvisor.com/)*: top 500, travel* 1. ![](https://www.google.com/s2/favicons?domain=https://tripadvisor.com/) [TripAdvisor (https://tripadvisor.com/)](https://tripadvisor.com/)*: top 500, travel*
1. ![](https://www.google.com/s2/favicons?domain=https://www.academia.edu/) [Academia.edu (https://www.academia.edu/)](https://www.academia.edu/)*: top 500, id* 1. ![](https://www.google.com/s2/favicons?domain=https://www.academia.edu/) [Academia.edu (https://www.academia.edu/)](https://www.academia.edu/)*: top 500, id*
@@ -187,7 +179,7 @@ Rank data fetched from Alexa by domains.
1. ![](https://www.google.com/s2/favicons?domain=https://community.brave.com) [community.brave.com (https://community.brave.com)](https://community.brave.com)*: top 1K, forum, us* 1. ![](https://www.google.com/s2/favicons?domain=https://community.brave.com) [community.brave.com (https://community.brave.com)](https://community.brave.com)*: top 1K, forum, us*
1. ![](https://www.google.com/s2/favicons?domain=https://tinder.com/) [Tinder (https://tinder.com/)](https://tinder.com/)*: top 1K, dating, us* 1. ![](https://www.google.com/s2/favicons?domain=https://tinder.com/) [Tinder (https://tinder.com/)](https://tinder.com/)*: top 1K, dating, us*
1. ![](https://www.google.com/s2/favicons?domain=https://community.cloudflare.com/) [CloudflareCommunity (https://community.cloudflare.com/)](https://community.cloudflare.com/)*: top 1K, forum, tech* 1. ![](https://www.google.com/s2/favicons?domain=https://community.cloudflare.com/) [CloudflareCommunity (https://community.cloudflare.com/)](https://community.cloudflare.com/)*: top 1K, forum, tech*
1. ![](https://www.google.com/s2/favicons?domain=https://eksisozluk.com) [Eksisozluk (https://eksisozluk.com)](https://eksisozluk.com)*: top 1K, tr* 1. ![](https://www.google.com/s2/favicons?domain=https://eksisozluk.com/biri/) [Eksisozluk (https://eksisozluk.com/biri/)](https://eksisozluk.com/biri/)*: top 1K, tr*
1. ![](https://www.google.com/s2/favicons?domain=https://www.allrecipes.com/) [AllRecipes (https://www.allrecipes.com/)](https://www.allrecipes.com/)*: top 1K, us* 1. ![](https://www.google.com/s2/favicons?domain=https://www.allrecipes.com/) [AllRecipes (https://www.allrecipes.com/)](https://www.allrecipes.com/)*: top 1K, us*
1. ![](https://www.google.com/s2/favicons?domain=https://support.t-mobile.com) [T-MobileSupport (https://support.t-mobile.com)](https://support.t-mobile.com)*: top 1K, us*, search is disabled 1. ![](https://www.google.com/s2/favicons?domain=https://support.t-mobile.com) [T-MobileSupport (https://support.t-mobile.com)](https://support.t-mobile.com)*: top 1K, us*, search is disabled
1. ![](https://www.google.com/s2/favicons?domain=https://www.tinkoff.ru/invest/) [Tinkoff Invest (https://www.tinkoff.ru/invest/)](https://www.tinkoff.ru/invest/)*: top 5K, ru* 1. ![](https://www.google.com/s2/favicons?domain=https://www.tinkoff.ru/invest/) [Tinkoff Invest (https://www.tinkoff.ru/invest/)](https://www.tinkoff.ru/invest/)*: top 5K, ru*
@@ -195,7 +187,7 @@ Rank data fetched from Alexa by domains.
1. ![](https://www.google.com/s2/favicons?domain=https://discuss.python.org/) [DiscussPython (https://discuss.python.org/)](https://discuss.python.org/)*: top 5K, coding, forum, us* 1. ![](https://www.google.com/s2/favicons?domain=https://discuss.python.org/) [DiscussPython (https://discuss.python.org/)](https://discuss.python.org/)*: top 5K, coding, forum, us*
1. ![](https://www.google.com/s2/favicons?domain=https://www.nairaland.com/) [Nairaland Forum (https://www.nairaland.com/)](https://www.nairaland.com/)*: top 5K, ng* 1. ![](https://www.google.com/s2/favicons?domain=https://www.nairaland.com/) [Nairaland Forum (https://www.nairaland.com/)](https://www.nairaland.com/)*: top 5K, ng*
1. ![](https://www.google.com/s2/favicons?domain=https://ru.redtube.com/) [Redtube (https://ru.redtube.com/)](https://ru.redtube.com/)*: top 5K, porn, us* 1. ![](https://www.google.com/s2/favicons?domain=https://ru.redtube.com/) [Redtube (https://ru.redtube.com/)](https://ru.redtube.com/)*: top 5K, porn, us*
1. ![](https://www.google.com/s2/favicons?domain=https://www.strava.com/) [Strava (https://www.strava.com/)](https://www.strava.com/)*: top 5K, us*, search is disabled 1. ![](https://www.google.com/s2/favicons?domain=https://www.strava.com/) [Strava (https://www.strava.com/)](https://www.strava.com/)*: top 5K, us*
1. ![](https://www.google.com/s2/favicons?domain=https://profile.ameba.jp) [Ameba (https://profile.ameba.jp)](https://profile.ameba.jp)*: top 5K, jp* 1. ![](https://www.google.com/s2/favicons?domain=https://profile.ameba.jp) [Ameba (https://profile.ameba.jp)](https://profile.ameba.jp)*: top 5K, jp*
1. ![](https://www.google.com/s2/favicons?domain=https://adblockplus.org) [adblockplus.org (https://adblockplus.org)](https://adblockplus.org)*: top 5K, us* 1. ![](https://www.google.com/s2/favicons?domain=https://adblockplus.org) [adblockplus.org (https://adblockplus.org)](https://adblockplus.org)*: top 5K, us*
1. ![](https://www.google.com/s2/favicons?domain=https://houzz.com/) [Houzz (https://houzz.com/)](https://houzz.com/)*: top 5K, us*, search is disabled 1. ![](https://www.google.com/s2/favicons?domain=https://houzz.com/) [Houzz (https://houzz.com/)](https://houzz.com/)*: top 5K, us*, search is disabled
@@ -265,7 +257,7 @@ Rank data fetched from Alexa by domains.
1. ![](https://www.google.com/s2/favicons?domain=https://lichess.org) [Lichess (https://lichess.org)](https://lichess.org)*: top 5K, gaming, hobby* 1. ![](https://www.google.com/s2/favicons?domain=https://lichess.org) [Lichess (https://lichess.org)](https://lichess.org)*: top 5K, gaming, hobby*
1. ![](https://www.google.com/s2/favicons?domain=https://jsfiddle.net) [jsfiddle.net (https://jsfiddle.net)](https://jsfiddle.net)*: top 5K, coding, sharing* 1. ![](https://www.google.com/s2/favicons?domain=https://jsfiddle.net) [jsfiddle.net (https://jsfiddle.net)](https://jsfiddle.net)*: top 5K, coding, sharing*
1. ![](https://www.google.com/s2/favicons?domain=https://ru.pathofexile.com) [Pathofexile (https://ru.pathofexile.com)](https://ru.pathofexile.com)*: top 5K, ru, us* 1. ![](https://www.google.com/s2/favicons?domain=https://ru.pathofexile.com) [Pathofexile (https://ru.pathofexile.com)](https://ru.pathofexile.com)*: top 5K, ru, us*
1. ![](https://www.google.com/s2/favicons?domain=https://vc.ru) [VC.ru (https://vc.ru)](https://vc.ru)*: top 5K, ru*, search is disabled 1. ![](https://www.google.com/s2/favicons?domain=https://vc.ru) [VC.ru (https://vc.ru)](https://vc.ru)*: top 5K, ru*
1. ![](https://www.google.com/s2/favicons?domain=https://www.metacritic.com/) [metacritic (https://www.metacritic.com/)](https://www.metacritic.com/)*: top 5K, us*, search is disabled 1. ![](https://www.google.com/s2/favicons?domain=https://www.metacritic.com/) [metacritic (https://www.metacritic.com/)](https://www.metacritic.com/)*: top 5K, us*, search is disabled
1. ![](https://www.google.com/s2/favicons?domain=https://www.digitalocean.com/) [DigitalOcean (https://www.digitalocean.com/)](https://www.digitalocean.com/)*: top 5K, forum, in, tech* 1. ![](https://www.google.com/s2/favicons?domain=https://www.digitalocean.com/) [DigitalOcean (https://www.digitalocean.com/)](https://www.digitalocean.com/)*: top 5K, forum, in, tech*
1. ![](https://www.google.com/s2/favicons?domain=http://www.jeuxvideo.com) [jeuxvideo (http://www.jeuxvideo.com)](http://www.jeuxvideo.com)*: top 5K, fr, gaming* 1. ![](https://www.google.com/s2/favicons?domain=http://www.jeuxvideo.com) [jeuxvideo (http://www.jeuxvideo.com)](http://www.jeuxvideo.com)*: top 5K, fr, gaming*
@@ -281,7 +273,7 @@ Rank data fetched from Alexa by domains.
1. ![](https://www.google.com/s2/favicons?domain=https://archiveofourown.org) [ArchiveOfOurOwn (https://archiveofourown.org)](https://archiveofourown.org)*: top 5K, us* 1. ![](https://www.google.com/s2/favicons?domain=https://archiveofourown.org) [ArchiveOfOurOwn (https://archiveofourown.org)](https://archiveofourown.org)*: top 5K, us*
1. ![](https://www.google.com/s2/favicons?domain=https://bit.ly) [Bit.ly (https://bit.ly)](https://bit.ly)*: top 5K, links* 1. ![](https://www.google.com/s2/favicons?domain=https://bit.ly) [Bit.ly (https://bit.ly)](https://bit.ly)*: top 5K, links*
1. ![](https://www.google.com/s2/favicons?domain=https://infourok.ru) [Infourok (https://infourok.ru)](https://infourok.ru)*: top 5K, ru* 1. ![](https://www.google.com/s2/favicons?domain=https://infourok.ru) [Infourok (https://infourok.ru)](https://infourok.ru)*: top 5K, ru*
1. ![](https://www.google.com/s2/favicons?domain=https://community.cbr.com) [Cbr (https://community.cbr.com)](https://community.cbr.com)*: top 5K, forum, us*, search is disabled 1. ![](https://www.google.com/s2/favicons?domain=https://community.cbr.com) [Cbr (https://community.cbr.com)](https://community.cbr.com)*: top 5K, forum, us*
1. ![](https://www.google.com/s2/favicons?domain=https://segmentfault.com/) [segmentfault (https://segmentfault.com/)](https://segmentfault.com/)*: top 5K, cn*, search is disabled 1. ![](https://www.google.com/s2/favicons?domain=https://segmentfault.com/) [segmentfault (https://segmentfault.com/)](https://segmentfault.com/)*: top 5K, cn*, search is disabled
1. ![](https://www.google.com/s2/favicons?domain=https://www.warriorforum.com/) [Warrior Forum (https://www.warriorforum.com/)](https://www.warriorforum.com/)*: top 5K, forum, us* 1. ![](https://www.google.com/s2/favicons?domain=https://www.warriorforum.com/) [Warrior Forum (https://www.warriorforum.com/)](https://www.warriorforum.com/)*: top 5K, forum, us*
1. ![](https://www.google.com/s2/favicons?domain=https://hub.docker.com/) [Docker Hub (https://hub.docker.com/)](https://hub.docker.com/)*: top 5K, coding* 1. ![](https://www.google.com/s2/favicons?domain=https://hub.docker.com/) [Docker Hub (https://hub.docker.com/)](https://hub.docker.com/)*: top 5K, coding*
@@ -295,7 +287,7 @@ Rank data fetched from Alexa by domains.
1. ![](https://www.google.com/s2/favicons?domain=https://creativemarket.com/) [CreativeMarket (https://creativemarket.com/)](https://creativemarket.com/)*: top 5K, art, stock* 1. ![](https://www.google.com/s2/favicons?domain=https://creativemarket.com/) [CreativeMarket (https://creativemarket.com/)](https://creativemarket.com/)*: top 5K, art, stock*
1. ![](https://www.google.com/s2/favicons?domain=https://bitbucket.org/) [BitBucket (https://bitbucket.org/)](https://bitbucket.org/)*: top 5K, coding* 1. ![](https://www.google.com/s2/favicons?domain=https://bitbucket.org/) [BitBucket (https://bitbucket.org/)](https://bitbucket.org/)*: top 5K, coding*
1. ![](https://www.google.com/s2/favicons?domain=https://www.techrepublic.com) [Techrepublic (https://www.techrepublic.com)](https://www.techrepublic.com)*: top 5K, us* 1. ![](https://www.google.com/s2/favicons?domain=https://www.techrepublic.com) [Techrepublic (https://www.techrepublic.com)](https://www.techrepublic.com)*: top 5K, us*
1. ![](https://www.google.com/s2/favicons?domain=https://aminoapps.com/) [aminoapp (https://aminoapps.com/)](https://aminoapps.com/)*: top 5K, br, us*, search is disabled 1. ![](https://www.google.com/s2/favicons?domain=https://aminoapps.com/) [aminoapp (https://aminoapps.com/)](https://aminoapps.com/)*: top 5K, br, us*
1. ![](https://www.google.com/s2/favicons?domain=https://www.mixcloud.com/) [MixCloud (https://www.mixcloud.com/)](https://www.mixcloud.com/)*: top 5K, music* 1. ![](https://www.google.com/s2/favicons?domain=https://www.mixcloud.com/) [MixCloud (https://www.mixcloud.com/)](https://www.mixcloud.com/)*: top 5K, music*
1. ![](https://www.google.com/s2/favicons?domain=https://forum.xda-developers.com) [XDA (https://forum.xda-developers.com)](https://forum.xda-developers.com)*: top 5K, apps, forum*, search is disabled 1. ![](https://www.google.com/s2/favicons?domain=https://forum.xda-developers.com) [XDA (https://forum.xda-developers.com)](https://forum.xda-developers.com)*: top 5K, apps, forum*, search is disabled
1. ![](https://www.google.com/s2/favicons?domain=https://thechive.com/) [Thechive (https://thechive.com/)](https://thechive.com/)*: top 5K, us* 1. ![](https://www.google.com/s2/favicons?domain=https://thechive.com/) [Thechive (https://thechive.com/)](https://thechive.com/)*: top 5K, us*
@@ -321,7 +313,7 @@ Rank data fetched from Alexa by domains.
1. ![](https://www.google.com/s2/favicons?domain=http://forums.bulbagarden.net) [forums.bulbagarden.net (http://forums.bulbagarden.net)](http://forums.bulbagarden.net)*: top 5K, forum, us* 1. ![](https://www.google.com/s2/favicons?domain=http://forums.bulbagarden.net) [forums.bulbagarden.net (http://forums.bulbagarden.net)](http://forums.bulbagarden.net)*: top 5K, forum, us*
1. ![](https://www.google.com/s2/favicons?domain=https://videohive.net) [videohive.net (https://videohive.net)](https://videohive.net)*: top 5K, video* 1. ![](https://www.google.com/s2/favicons?domain=https://videohive.net) [videohive.net (https://videohive.net)](https://videohive.net)*: top 5K, video*
1. ![](https://www.google.com/s2/favicons?domain=https://imginn.com) [ImgInn (https://imginn.com)](https://imginn.com)*: top 5K, photo* 1. ![](https://www.google.com/s2/favicons?domain=https://imginn.com) [ImgInn (https://imginn.com)](https://imginn.com)*: top 5K, photo*
1. ![](https://www.google.com/s2/favicons?domain=https://boardgamegeek.com) [BoardGameGeek (https://boardgamegeek.com)](https://boardgamegeek.com)*: top 5K, gaming, us* 1. ![](https://www.google.com/s2/favicons?domain=https://www.boardgamegeek.com) [BoardGameGeek (https://www.boardgamegeek.com)](https://www.boardgamegeek.com)*: top 5K, gaming, us*
1. ![](https://www.google.com/s2/favicons?domain=https://osu.ppy.sh/) [osu! (https://osu.ppy.sh/)](https://osu.ppy.sh/)*: top 5K, us* 1. ![](https://www.google.com/s2/favicons?domain=https://osu.ppy.sh/) [osu! (https://osu.ppy.sh/)](https://osu.ppy.sh/)*: top 5K, us*
1. ![](https://www.google.com/s2/favicons?domain=https://app.pluralsight.com) [Pluralsight (https://app.pluralsight.com)](https://app.pluralsight.com)*: top 5K, in, us* 1. ![](https://www.google.com/s2/favicons?domain=https://app.pluralsight.com) [Pluralsight (https://app.pluralsight.com)](https://app.pluralsight.com)*: top 5K, in, us*
1. ![](https://www.google.com/s2/favicons?domain=https://www.techpowerup.com) [TechPowerUp (https://www.techpowerup.com)](https://www.techpowerup.com)*: top 5K, us* 1. ![](https://www.google.com/s2/favicons?domain=https://www.techpowerup.com) [TechPowerUp (https://www.techpowerup.com)](https://www.techpowerup.com)*: top 5K, us*
@@ -336,11 +328,11 @@ Rank data fetched from Alexa by domains.
1. ![](https://www.google.com/s2/favicons?domain=https://www.jigsawplanet.com) [Jigsawplanet (https://www.jigsawplanet.com)](https://www.jigsawplanet.com)*: top 5K, fr, us* 1. ![](https://www.google.com/s2/favicons?domain=https://www.jigsawplanet.com) [Jigsawplanet (https://www.jigsawplanet.com)](https://www.jigsawplanet.com)*: top 5K, fr, us*
1. ![](https://www.google.com/s2/favicons?domain=https://hackernoon.com) [hackernoon.com (https://hackernoon.com)](https://hackernoon.com)*: top 5K, news, us* 1. ![](https://www.google.com/s2/favicons?domain=https://hackernoon.com) [hackernoon.com (https://hackernoon.com)](https://hackernoon.com)*: top 5K, news, us*
1. ![](https://www.google.com/s2/favicons?domain=https://pcpartpicker.com) [PCPartPicker (https://pcpartpicker.com)](https://pcpartpicker.com)*: top 5K, us*, search is disabled 1. ![](https://www.google.com/s2/favicons?domain=https://pcpartpicker.com) [PCPartPicker (https://pcpartpicker.com)](https://pcpartpicker.com)*: top 5K, us*, search is disabled
1. ![](https://www.google.com/s2/favicons?domain=https://ask.fm/) [AskFM (https://ask.fm/)](https://ask.fm/)*: top 5K, eg, in, ru*, search is disabled 1. ![](https://www.google.com/s2/favicons?domain=https://ask.fm/) [AskFM (https://ask.fm/)](https://ask.fm/)*: top 5K, eg, in, ru*
1. ![](https://www.google.com/s2/favicons?domain=https://gitlab.com/) [GitLab (https://gitlab.com/)](https://gitlab.com/)*: top 5K, coding* 1. ![](https://www.google.com/s2/favicons?domain=https://gitlab.com/) [GitLab (https://gitlab.com/)](https://gitlab.com/)*: top 5K, coding*
1. ![](https://www.google.com/s2/favicons?domain=https://dev.to/) [DEV Community (https://dev.to/)](https://dev.to/)*: top 5K, coding* 1. ![](https://www.google.com/s2/favicons?domain=https://dev.to/) [DEV Community (https://dev.to/)](https://dev.to/)*: top 5K, coding*
1. ![](https://www.google.com/s2/favicons?domain=https://www.gumroad.com/) [Gumroad (https://www.gumroad.com/)](https://www.gumroad.com/)*: top 5K, us* 1. ![](https://www.google.com/s2/favicons?domain=https://www.gumroad.com/) [Gumroad (https://www.gumroad.com/)](https://www.gumroad.com/)*: top 5K, us*
1. ![](https://www.google.com/s2/favicons?domain=https://gramho.com/) [Gramho (https://gramho.com/)](https://gramho.com/)*: top 5K, photo*, search is disabled 1. ![](https://www.google.com/s2/favicons?domain=https://gramho.com/) [Gramho (https://gramho.com/)](https://gramho.com/)*: top 5K, photo*
1. ![](https://www.google.com/s2/favicons?domain=https://taplink.cc/) [Taplink (https://taplink.cc/)](https://taplink.cc/)*: top 5K, links, ru*, search is disabled 1. ![](https://www.google.com/s2/favicons?domain=https://taplink.cc/) [Taplink (https://taplink.cc/)](https://taplink.cc/)*: top 5K, links, ru*, search is disabled
1. ![](https://www.google.com/s2/favicons?domain=https://www.buymeacoffee.com/) [BuyMeACoffee (https://www.buymeacoffee.com/)](https://www.buymeacoffee.com/)*: top 5K, in* 1. ![](https://www.google.com/s2/favicons?domain=https://www.buymeacoffee.com/) [BuyMeACoffee (https://www.buymeacoffee.com/)](https://www.buymeacoffee.com/)*: top 5K, in*
1. ![](https://www.google.com/s2/favicons?domain=https://muckrack.com) [Muckrack (https://muckrack.com)](https://muckrack.com)*: top 5K, us* 1. ![](https://www.google.com/s2/favicons?domain=https://muckrack.com) [Muckrack (https://muckrack.com)](https://muckrack.com)*: top 5K, us*
@@ -406,7 +398,7 @@ Rank data fetched from Alexa by domains.
1. ![](https://www.google.com/s2/favicons?domain=https://www.reverbnation.com/) [ReverbNation (https://www.reverbnation.com/)](https://www.reverbnation.com/)*: top 10K, us* 1. ![](https://www.google.com/s2/favicons?domain=https://www.reverbnation.com/) [ReverbNation (https://www.reverbnation.com/)](https://www.reverbnation.com/)*: top 10K, us*
1. ![](https://www.google.com/s2/favicons?domain=https://www.glavbukh.ru) [Scorcher (https://www.glavbukh.ru)](https://www.glavbukh.ru)*: top 10K, ru*, search is disabled 1. ![](https://www.google.com/s2/favicons?domain=https://www.glavbukh.ru) [Scorcher (https://www.glavbukh.ru)](https://www.glavbukh.ru)*: top 10K, ru*, search is disabled
1. ![](https://www.google.com/s2/favicons?domain=https://www.trakt.tv/) [Trakt (https://www.trakt.tv/)](https://www.trakt.tv/)*: top 10K, de, fr* 1. ![](https://www.google.com/s2/favicons?domain=https://www.trakt.tv/) [Trakt (https://www.trakt.tv/)](https://www.trakt.tv/)*: top 10K, de, fr*
1. ![](https://www.google.com/s2/favicons?domain=https://hotcopper.com.au) [Hotcopper (https://hotcopper.com.au)](https://hotcopper.com.au)*: top 10K, finance* 1. ![](https://www.google.com/s2/favicons?domain=https://hotcopper.com.au) [Hotcopper (https://hotcopper.com.au)](https://hotcopper.com.au)*: top 10K, au*
1. ![](https://www.google.com/s2/favicons?domain=https://pandia.ru) [Pandia (https://pandia.ru)](https://pandia.ru)*: top 10K, news, ru* 1. ![](https://www.google.com/s2/favicons?domain=https://pandia.ru) [Pandia (https://pandia.ru)](https://pandia.ru)*: top 10K, news, ru*
1. ![](https://www.google.com/s2/favicons?domain=https://forums.majorgeeks.com) [forums.majorgeeks.com (https://forums.majorgeeks.com)](https://forums.majorgeeks.com)*: top 10K, forum, us* 1. ![](https://www.google.com/s2/favicons?domain=https://forums.majorgeeks.com) [forums.majorgeeks.com (https://forums.majorgeeks.com)](https://forums.majorgeeks.com)*: top 10K, forum, us*
1. ![](https://www.google.com/s2/favicons?domain=https://www.hackerearth.com) [Hackerearth (https://www.hackerearth.com)](https://www.hackerearth.com)*: top 10K, freelance* 1. ![](https://www.google.com/s2/favicons?domain=https://www.hackerearth.com) [Hackerearth (https://www.hackerearth.com)](https://www.hackerearth.com)*: top 10K, freelance*
@@ -472,7 +464,7 @@ Rank data fetched from Alexa by domains.
1. ![](https://www.google.com/s2/favicons?domain=https://3ddd.ru) [3ddd (https://3ddd.ru)](https://3ddd.ru)*: top 100K, ru* 1. ![](https://www.google.com/s2/favicons?domain=https://3ddd.ru) [3ddd (https://3ddd.ru)](https://3ddd.ru)*: top 100K, ru*
1. ![](https://www.google.com/s2/favicons?domain=https://namemc.com/) [NameMC (https://namemc.com/)](https://namemc.com/)*: top 100K, us* 1. ![](https://www.google.com/s2/favicons?domain=https://namemc.com/) [NameMC (https://namemc.com/)](https://namemc.com/)*: top 100K, us*
1. ![](https://www.google.com/s2/favicons?domain=https://www.b17.ru/) [B17 (https://www.b17.ru/)](https://www.b17.ru/)*: top 100K, ru* 1. ![](https://www.google.com/s2/favicons?domain=https://www.b17.ru/) [B17 (https://www.b17.ru/)](https://www.b17.ru/)*: top 100K, ru*
1. ![](https://www.google.com/s2/favicons?domain=https://www.beermoneyforum.com) [BeerMoneyForum (https://www.beermoneyforum.com)](https://www.beermoneyforum.com)*: top 100K, finance, forum, gambling*, search is disabled 1. ![](https://www.google.com/s2/favicons?domain=https://www.beermoneyforum.com) [BeerMoneyForum (https://www.beermoneyforum.com)](https://www.beermoneyforum.com)*: top 100K, finance, forum, gambling*
1. ![](https://www.google.com/s2/favicons?domain=https://diary.ru) [Diary.ru (https://diary.ru)](https://diary.ru)*: top 100K, blog, nl, ru* 1. ![](https://www.google.com/s2/favicons?domain=https://diary.ru) [Diary.ru (https://diary.ru)](https://diary.ru)*: top 100K, blog, nl, ru*
1. ![](https://www.google.com/s2/favicons?domain=https://www.americanthinker.com/) [Americanthinker (https://www.americanthinker.com/)](https://www.americanthinker.com/)*: top 100K* 1. ![](https://www.google.com/s2/favicons?domain=https://www.americanthinker.com/) [Americanthinker (https://www.americanthinker.com/)](https://www.americanthinker.com/)*: top 100K*
1. ![](https://www.google.com/s2/favicons?domain=https://contently.com/) [Contently (https://contently.com/)](https://contently.com/)*: top 100K, freelance, in* 1. ![](https://www.google.com/s2/favicons?domain=https://contently.com/) [Contently (https://contently.com/)](https://contently.com/)*: top 100K, freelance, in*
@@ -497,7 +489,7 @@ Rank data fetched from Alexa by domains.
1. ![](https://www.google.com/s2/favicons?domain=https://pbase.com/) [Pbase (https://pbase.com/)](https://pbase.com/)*: top 100K, in* 1. ![](https://www.google.com/s2/favicons?domain=https://pbase.com/) [Pbase (https://pbase.com/)](https://pbase.com/)*: top 100K, in*
1. ![](https://www.google.com/s2/favicons?domain=https://www.native-instruments.com/forum/) [NICommunityForum (https://www.native-instruments.com/forum/)](https://www.native-instruments.com/forum/)*: top 100K, forum* 1. ![](https://www.google.com/s2/favicons?domain=https://www.native-instruments.com/forum/) [NICommunityForum (https://www.native-instruments.com/forum/)](https://www.native-instruments.com/forum/)*: top 100K, forum*
1. ![](https://www.google.com/s2/favicons?domain=https://spletnik.ru/) [spletnik (https://spletnik.ru/)](https://spletnik.ru/)*: top 100K, ru* 1. ![](https://www.google.com/s2/favicons?domain=https://spletnik.ru/) [spletnik (https://spletnik.ru/)](https://spletnik.ru/)*: top 100K, ru*
1. ![](https://www.google.com/s2/favicons?domain=http://www.folkd.com/profile/) [Folkd (http://www.folkd.com/profile/)](http://www.folkd.com/profile/)*: top 100K, eu, in*, search is disabled 1. ![](https://www.google.com/s2/favicons?domain=http://www.folkd.com/profile/) [Folkd (http://www.folkd.com/profile/)](http://www.folkd.com/profile/)*: top 100K, eu, in*
1. ![](https://www.google.com/s2/favicons?domain=https://www.iphones.ru) [Iphones.ru (https://www.iphones.ru)](https://www.iphones.ru)*: top 100K, ru* 1. ![](https://www.google.com/s2/favicons?domain=https://www.iphones.ru) [Iphones.ru (https://www.iphones.ru)](https://www.iphones.ru)*: top 100K, ru*
1. ![](https://www.google.com/s2/favicons?domain=https://www.oper.ru/) [Oper (https://www.oper.ru/)](https://www.oper.ru/)*: top 100K, ru* 1. ![](https://www.google.com/s2/favicons?domain=https://www.oper.ru/) [Oper (https://www.oper.ru/)](https://www.oper.ru/)*: top 100K, ru*
1. ![](https://www.google.com/s2/favicons?domain=https://www.interpals.net/) [interpals (https://www.interpals.net/)](https://www.interpals.net/)*: top 100K, dating* 1. ![](https://www.google.com/s2/favicons?domain=https://www.interpals.net/) [interpals (https://www.interpals.net/)](https://www.interpals.net/)*: top 100K, dating*
@@ -750,7 +742,7 @@ Rank data fetched from Alexa by domains.
1. ![](https://www.google.com/s2/favicons?domain=http://forum.eksmo.ru) [forum.eksmo.ru (http://forum.eksmo.ru)](http://forum.eksmo.ru)*: top 100K, forum, ru* 1. ![](https://www.google.com/s2/favicons?domain=http://forum.eksmo.ru) [forum.eksmo.ru (http://forum.eksmo.ru)](http://forum.eksmo.ru)*: top 100K, forum, ru*
1. ![](https://www.google.com/s2/favicons?domain=https://davesgarden.com) [Davesgarden (https://davesgarden.com)](https://davesgarden.com)*: top 100K, us* 1. ![](https://www.google.com/s2/favicons?domain=https://davesgarden.com) [Davesgarden (https://davesgarden.com)](https://davesgarden.com)*: top 100K, us*
1. ![](https://www.google.com/s2/favicons?domain=https://forum.cxem.net/) [forum.cxem.net (https://forum.cxem.net/)](https://forum.cxem.net/)*: top 100K, forum, ru*, search is disabled 1. ![](https://www.google.com/s2/favicons?domain=https://forum.cxem.net/) [forum.cxem.net (https://forum.cxem.net/)](https://forum.cxem.net/)*: top 100K, forum, ru*, search is disabled
1. ![](https://www.google.com/s2/favicons?domain=https://icq.com) [ICQ (https://icq.com)](https://icq.com)*: top 100K, ch, ru, tr*, search is disabled 1. ![](https://www.google.com/s2/favicons?domain=https://icq.com) [ICQ (https://icq.com)](https://icq.com)*: top 100K, ch, ru, tr*
1. ![](https://www.google.com/s2/favicons?domain=https://d3.ru/) [d3 (https://d3.ru/)](https://d3.ru/)*: top 100K, ru* 1. ![](https://www.google.com/s2/favicons?domain=https://d3.ru/) [d3 (https://d3.ru/)](https://d3.ru/)*: top 100K, ru*
1. ![](https://www.google.com/s2/favicons?domain=https://forum.dwg.ru/) [dwg (https://forum.dwg.ru/)](https://forum.dwg.ru/)*: top 100K, forum, ru* 1. ![](https://www.google.com/s2/favicons?domain=https://forum.dwg.ru/) [dwg (https://forum.dwg.ru/)](https://forum.dwg.ru/)*: top 100K, forum, ru*
1. ![](https://www.google.com/s2/favicons?domain=https://fotki.com) [Fotki (https://fotki.com)](https://fotki.com)*: top 100K, photo* 1. ![](https://www.google.com/s2/favicons?domain=https://fotki.com) [Fotki (https://fotki.com)](https://fotki.com)*: top 100K, photo*
@@ -770,7 +762,7 @@ Rank data fetched from Alexa by domains.
1. ![](https://www.google.com/s2/favicons?domain=https://tellonym.me/) [Tellonym.me (https://tellonym.me/)](https://tellonym.me/)*: top 100K, de, fr, sa, us* 1. ![](https://www.google.com/s2/favicons?domain=https://tellonym.me/) [Tellonym.me (https://tellonym.me/)](https://tellonym.me/)*: top 100K, de, fr, sa, us*
1. ![](https://www.google.com/s2/favicons?domain=https://spaces.im) [Spaces (https://spaces.im)](https://spaces.im)*: top 100K, blog, ru* 1. ![](https://www.google.com/s2/favicons?domain=https://spaces.im) [Spaces (https://spaces.im)](https://spaces.im)*: top 100K, blog, ru*
1. ![](https://www.google.com/s2/favicons?domain=https://www.ethicalhacker.net) [EthicalHacker (https://www.ethicalhacker.net)](https://www.ethicalhacker.net)*: top 100K, in, us* 1. ![](https://www.google.com/s2/favicons?domain=https://www.ethicalhacker.net) [EthicalHacker (https://www.ethicalhacker.net)](https://www.ethicalhacker.net)*: top 100K, in, us*
1. ![](https://www.google.com/s2/favicons?domain=https://www.playstationtrophies.org) [PlaystationTrophies (https://www.playstationtrophies.org)](https://www.playstationtrophies.org)*: top 100K, forum, gaming*, search is disabled 1. ![](https://www.google.com/s2/favicons?domain=https://www.playstationtrophies.org) [PlaystationTrophies (https://www.playstationtrophies.org)](https://www.playstationtrophies.org)*: top 100K, forum, gaming*
1. ![](https://www.google.com/s2/favicons?domain=https://appleinsider.ru) [appleinsider.ru (https://appleinsider.ru)](https://appleinsider.ru)*: top 100K, news, ru, tech* 1. ![](https://www.google.com/s2/favicons?domain=https://appleinsider.ru) [appleinsider.ru (https://appleinsider.ru)](https://appleinsider.ru)*: top 100K, news, ru, tech*
1. ![](https://www.google.com/s2/favicons?domain=https://www.hr.com) [Hr (https://www.hr.com)](https://www.hr.com)*: top 100K, in, us* 1. ![](https://www.google.com/s2/favicons?domain=https://www.hr.com) [Hr (https://www.hr.com)](https://www.hr.com)*: top 100K, in, us*
1. ![](https://www.google.com/s2/favicons?domain=https://www.funnyordie.com) [Funnyordie (https://www.funnyordie.com)](https://www.funnyordie.com)*: top 100K, in, us*, search is disabled 1. ![](https://www.google.com/s2/favicons?domain=https://www.funnyordie.com) [Funnyordie (https://www.funnyordie.com)](https://www.funnyordie.com)*: top 100K, in, us*, search is disabled
@@ -812,7 +804,7 @@ Rank data fetched from Alexa by domains.
1. ![](https://www.google.com/s2/favicons?domain=https://forums.gentoo.org) [gentoo (https://forums.gentoo.org)](https://forums.gentoo.org)*: top 100K, fi, forum, in* 1. ![](https://www.google.com/s2/favicons?domain=https://forums.gentoo.org) [gentoo (https://forums.gentoo.org)](https://forums.gentoo.org)*: top 100K, fi, forum, in*
1. ![](https://www.google.com/s2/favicons?domain=https://community.asterisk.org) [community.asterisk.org (https://community.asterisk.org)](https://community.asterisk.org)*: top 100K, forum, in, ir, jp, us* 1. ![](https://www.google.com/s2/favicons?domain=https://community.asterisk.org) [community.asterisk.org (https://community.asterisk.org)](https://community.asterisk.org)*: top 100K, forum, in, ir, jp, us*
1. ![](https://www.google.com/s2/favicons?domain=https://www.gapyear.com) [Gapyear (https://www.gapyear.com)](https://www.gapyear.com)*: top 100K, gb, in* 1. ![](https://www.google.com/s2/favicons?domain=https://www.gapyear.com) [Gapyear (https://www.gapyear.com)](https://www.gapyear.com)*: top 100K, gb, in*
1. ![](https://www.google.com/s2/favicons?domain=https://shadowban.eu) [Twitter Shadowban (https://shadowban.eu)](https://shadowban.eu)*: top 100K, jp, sa*, search is disabled 1. ![](https://www.google.com/s2/favicons?domain=https://shadowban.eu) [Twitter Shadowban (https://shadowban.eu)](https://shadowban.eu)*: top 100K, jp, sa*
1. ![](https://www.google.com/s2/favicons?domain=https://psyera.ru) [Psyera (https://psyera.ru)](https://psyera.ru)*: top 100K, ru* 1. ![](https://www.google.com/s2/favicons?domain=https://psyera.ru) [Psyera (https://psyera.ru)](https://psyera.ru)*: top 100K, ru*
1. ![](https://www.google.com/s2/favicons?domain=http://forum.mfd.ru) [mfd (http://forum.mfd.ru)](http://forum.mfd.ru)*: top 100K, forum, ru* 1. ![](https://www.google.com/s2/favicons?domain=http://forum.mfd.ru) [mfd (http://forum.mfd.ru)](http://forum.mfd.ru)*: top 100K, forum, ru*
1. ![](https://www.google.com/s2/favicons?domain=https://forum.mirf.ru/) [mirf (https://forum.mirf.ru/)](https://forum.mirf.ru/)*: top 100K, forum, ru* 1. ![](https://www.google.com/s2/favicons?domain=https://forum.mirf.ru/) [mirf (https://forum.mirf.ru/)](https://forum.mirf.ru/)*: top 100K, forum, ru*
@@ -1256,7 +1248,7 @@ Rank data fetched from Alexa by domains.
1. ![](https://www.google.com/s2/favicons?domain=https://www.mobrep.ru) [Mobrep (https://www.mobrep.ru)](https://www.mobrep.ru)*: top 10M, ru* 1. ![](https://www.google.com/s2/favicons?domain=https://www.mobrep.ru) [Mobrep (https://www.mobrep.ru)](https://www.mobrep.ru)*: top 10M, ru*
1. ![](https://www.google.com/s2/favicons?domain=https://www.hipforums.com/) [Hipforums (https://www.hipforums.com/)](https://www.hipforums.com/)*: top 10M, forum, in, ru, us*, search is disabled 1. ![](https://www.google.com/s2/favicons?domain=https://www.hipforums.com/) [Hipforums (https://www.hipforums.com/)](https://www.hipforums.com/)*: top 10M, forum, in, ru, us*, search is disabled
1. ![](https://www.google.com/s2/favicons?domain=https://induste.com/) [induste.com (https://induste.com/)](https://induste.com/)*: top 10M, forum, ma, re* 1. ![](https://www.google.com/s2/favicons?domain=https://induste.com/) [induste.com (https://induste.com/)](https://induste.com/)*: top 10M, forum, ma, re*
1. ![](https://www.google.com/s2/favicons?domain=https://minecraftonly.ru) [MinecraftOnly (https://minecraftonly.ru)](https://minecraftonly.ru)*: top 10M, forum, gaming, ru*, search is disabled 1. ![](https://www.google.com/s2/favicons?domain=https://minecraftonly.ru) [MinecraftOnly (https://minecraftonly.ru)](https://minecraftonly.ru)*: top 10M, forum, gaming, ru*
1. ![](https://www.google.com/s2/favicons?domain=http://www.vauxhallownersnetwork.co.uk) [vauxhallownersnetwork.co.uk (http://www.vauxhallownersnetwork.co.uk)](http://www.vauxhallownersnetwork.co.uk)*: top 10M, forum, tr* 1. ![](https://www.google.com/s2/favicons?domain=http://www.vauxhallownersnetwork.co.uk) [vauxhallownersnetwork.co.uk (http://www.vauxhallownersnetwork.co.uk)](http://www.vauxhallownersnetwork.co.uk)*: top 10M, forum, tr*
1. ![](https://www.google.com/s2/favicons?domain=https://forum.astralinux.ru) [Astralinux (https://forum.astralinux.ru)](https://forum.astralinux.ru)*: top 10M, forum, ru* 1. ![](https://www.google.com/s2/favicons?domain=https://forum.astralinux.ru) [Astralinux (https://forum.astralinux.ru)](https://forum.astralinux.ru)*: top 10M, forum, ru*
1. ![](https://www.google.com/s2/favicons?domain=https://forum.podolsk.ru) [podolsk (https://forum.podolsk.ru)](https://forum.podolsk.ru)*: top 10M, forum, ru* 1. ![](https://www.google.com/s2/favicons?domain=https://forum.podolsk.ru) [podolsk (https://forum.podolsk.ru)](https://forum.podolsk.ru)*: top 10M, forum, ru*
@@ -2161,7 +2153,6 @@ Rank data fetched from Alexa by domains.
1. ![](https://www.google.com/s2/favicons?domain=) [Finanzfrage ()]()*: top 100M* 1. ![](https://www.google.com/s2/favicons?domain=) [Finanzfrage ()]()*: top 100M*
1. ![](https://www.google.com/s2/favicons?domain=http://forum.quake2.com.ru/) [Forum.quake2.com.ru (http://forum.quake2.com.ru/)](http://forum.quake2.com.ru/)*: top 100M, forum, ru* 1. ![](https://www.google.com/s2/favicons?domain=http://forum.quake2.com.ru/) [Forum.quake2.com.ru (http://forum.quake2.com.ru/)](http://forum.quake2.com.ru/)*: top 100M, forum, ru*
1. ![](https://www.google.com/s2/favicons?domain=https://forums.tauck.com) [ForumTauck (https://forums.tauck.com)](https://forums.tauck.com)*: top 100M, forum, us* 1. ![](https://www.google.com/s2/favicons?domain=https://forums.tauck.com) [ForumTauck (https://forums.tauck.com)](https://forums.tauck.com)*: top 100M, forum, us*
1. ![](https://www.google.com/s2/favicons?domain=https://framapiaf.org) [Framapiaf (https://framapiaf.org)](https://framapiaf.org)*: top 100M, mastodon*
1. ![](https://www.google.com/s2/favicons?domain=) [G2g.com ()]()*: top 100M* 1. ![](https://www.google.com/s2/favicons?domain=) [G2g.com ()]()*: top 100M*
1. ![](https://www.google.com/s2/favicons?domain=https://gam1ng.com.br) [Gam1ng (https://gam1ng.com.br)](https://gam1ng.com.br)*: top 100M, br, webcam*, search is disabled 1. ![](https://www.google.com/s2/favicons?domain=https://gam1ng.com.br) [Gam1ng (https://gam1ng.com.br)](https://gam1ng.com.br)*: top 100M, br, webcam*, search is disabled
1. ![](https://www.google.com/s2/favicons?domain=) [GeniusArtists ()]()*: top 100M* 1. ![](https://www.google.com/s2/favicons?domain=) [GeniusArtists ()]()*: top 100M*
@@ -2203,7 +2194,6 @@ Rank data fetched from Alexa by domains.
1. ![](https://www.google.com/s2/favicons?domain=https://macqa.ru) [Macqa (https://macqa.ru)](https://macqa.ru)*: top 100M, ru*, search is disabled 1. ![](https://www.google.com/s2/favicons?domain=https://macqa.ru) [Macqa (https://macqa.ru)](https://macqa.ru)*: top 100M, ru*, search is disabled
1. ![](https://www.google.com/s2/favicons?domain=) [Maga-Chat ()]()*: top 100M* 1. ![](https://www.google.com/s2/favicons?domain=) [Maga-Chat ()]()*: top 100M*
1. ![](https://www.google.com/s2/favicons?domain=) [Magabook ()]()*: top 100M* 1. ![](https://www.google.com/s2/favicons?domain=) [Magabook ()]()*: top 100M*
1. ![](https://www.google.com/s2/favicons?domain=https://mamot.fr) [Mamot (https://mamot.fr)](https://mamot.fr)*: top 100M, mastodon*
1. ![](https://www.google.com/s2/favicons?domain=) [Mapify.travel ()]()*: top 100M* 1. ![](https://www.google.com/s2/favicons?domain=) [Mapify.travel ()]()*: top 100M*
1. ![](https://www.google.com/s2/favicons?domain=) [MapMyTracks ()]()*: top 100M* 1. ![](https://www.google.com/s2/favicons?domain=) [MapMyTracks ()]()*: top 100M*
1. ![](https://www.google.com/s2/favicons?domain=) [Marshmallow ()]()*: top 100M* 1. ![](https://www.google.com/s2/favicons?domain=) [Marshmallow ()]()*: top 100M*
@@ -2226,12 +2216,10 @@ Rank data fetched from Alexa by domains.
1. ![](https://www.google.com/s2/favicons?domain=) [Oglaszamy24h ()]()*: top 100M* 1. ![](https://www.google.com/s2/favicons?domain=) [Oglaszamy24h ()]()*: top 100M*
1. ![](https://www.google.com/s2/favicons?domain=) [Olx.pl ()]()*: top 100M* 1. ![](https://www.google.com/s2/favicons?domain=) [Olx.pl ()]()*: top 100M*
1. ![](https://www.google.com/s2/favicons?domain=) [Ourfreedombook ()]()*: top 100M* 1. ![](https://www.google.com/s2/favicons?domain=) [Ourfreedombook ()]()*: top 100M*
1. ![](https://www.google.com/s2/favicons?domain=https://outgress.com/) [Outgress (https://outgress.com/)](https://outgress.com/)*: top 100M*
1. ![](https://www.google.com/s2/favicons?domain=) [Ow.ly ()]()*: top 100M* 1. ![](https://www.google.com/s2/favicons?domain=) [Ow.ly ()]()*: top 100M*
1. ![](https://www.google.com/s2/favicons?domain=) [Patronite ()]()*: top 100M* 1. ![](https://www.google.com/s2/favicons?domain=) [Patronite ()]()*: top 100M*
1. ![](https://www.google.com/s2/favicons?domain=) [Pewex.pl ()]()*: top 100M* 1. ![](https://www.google.com/s2/favicons?domain=) [Pewex.pl ()]()*: top 100M*
1. ![](https://www.google.com/s2/favicons?domain=) [Piekielni ()]()*: top 100M* 1. ![](https://www.google.com/s2/favicons?domain=) [Piekielni ()]()*: top 100M*
1. ![](https://www.google.com/s2/favicons?domain=https://pixelfed.social/) [pixelfed.social (https://pixelfed.social/)](https://pixelfed.social/)*: top 100M, art, pixelfed*
1. ![](https://www.google.com/s2/favicons?domain=) [Pol.social ()]()*: top 100M* 1. ![](https://www.google.com/s2/favicons?domain=) [Pol.social ()]()*: top 100M*
1. ![](https://www.google.com/s2/favicons?domain=) [Polczat.pl ()]()*: top 100M* 1. ![](https://www.google.com/s2/favicons?domain=) [Polczat.pl ()]()*: top 100M*
1. ![](https://www.google.com/s2/favicons?domain=) [Policja2009 ()]()*: top 100M* 1. ![](https://www.google.com/s2/favicons?domain=) [Policja2009 ()]()*: top 100M*
@@ -2242,7 +2230,6 @@ Rank data fetched from Alexa by domains.
1. ![](https://www.google.com/s2/favicons?domain=https://palexaRankru.net/) [PalexaRankru (https://palexaRankru.net/)](https://palexaRankru.net/)*: top 100M, forum, ru*, search is disabled 1. ![](https://www.google.com/s2/favicons?domain=https://palexaRankru.net/) [PalexaRankru (https://palexaRankru.net/)](https://palexaRankru.net/)*: top 100M, forum, ru*, search is disabled
1. ![](https://www.google.com/s2/favicons?domain=https://www.programmersforum) [ProgrammersForum (https://www.programmersforum)](https://www.programmersforum)*: top 100M, forum, ru*, search is disabled 1. ![](https://www.google.com/s2/favicons?domain=https://www.programmersforum) [ProgrammersForum (https://www.programmersforum)](https://www.programmersforum)*: top 100M, forum, ru*, search is disabled
1. ![](https://www.google.com/s2/favicons?domain=) [Prv.pl ()]()*: top 100M* 1. ![](https://www.google.com/s2/favicons?domain=) [Prv.pl ()]()*: top 100M*
1. ![](https://www.google.com/s2/favicons?domain=https://programming.dev) [programming.dev (https://programming.dev)](https://programming.dev)*: top 100M, lemmy*
1. ![](https://www.google.com/s2/favicons?domain=) [Quitter.pl ()]()*: top 100M* 1. ![](https://www.google.com/s2/favicons?domain=) [Quitter.pl ()]()*: top 100M*
1. ![](https://www.google.com/s2/favicons?domain=) [Quizlet ()]()*: top 100M* 1. ![](https://www.google.com/s2/favicons?domain=) [Quizlet ()]()*: top 100M*
1. ![](https://www.google.com/s2/favicons?domain=http://www.rammclan.ru) [Rammclan (http://www.rammclan.ru)](http://www.rammclan.ru)*: top 100M, ru* 1. ![](https://www.google.com/s2/favicons?domain=http://www.rammclan.ru) [Rammclan (http://www.rammclan.ru)](http://www.rammclan.ru)*: top 100M, ru*
@@ -3101,13 +3088,13 @@ Rank data fetched from Alexa by domains.
1. ![](https://www.google.com/s2/favicons?domain=) [universocraft ()]()*: top 100M, gaming* 1. ![](https://www.google.com/s2/favicons?domain=) [universocraft ()]()*: top 100M, gaming*
1. ![](https://www.google.com/s2/favicons?domain=https://fragment.com) [fragment.com (https://fragment.com)](https://fragment.com)*: top 100M, crypto* 1. ![](https://www.google.com/s2/favicons?domain=https://fragment.com) [fragment.com (https://fragment.com)](https://fragment.com)*: top 100M, crypto*
1. ![](https://www.google.com/s2/favicons?domain=https://ud.me) [UnstoppableDomains (https://ud.me)](https://ud.me)*: top 100M, crypto* 1. ![](https://www.google.com/s2/favicons?domain=https://ud.me) [UnstoppableDomains (https://ud.me)](https://ud.me)*: top 100M, crypto*
1. ![](https://www.google.com/s2/favicons?domain=https://api.edns.domains) [edns.domains/meta (https://api.edns.domains)](https://api.edns.domains)*: top 100M, crypto*, search is disabled 1. ![](https://www.google.com/s2/favicons?domain=https://api.edns.domains) [edns.domains/meta (https://api.edns.domains)](https://api.edns.domains)*: top 100M, crypto*
1. ![](https://www.google.com/s2/favicons?domain=https://api.edns.domains) [edns.domains/music (https://api.edns.domains)](https://api.edns.domains)*: top 100M, crypto*, search is disabled 1. ![](https://www.google.com/s2/favicons?domain=https://api.edns.domains) [edns.domains/music (https://api.edns.domains)](https://api.edns.domains)*: top 100M, crypto*
1. ![](https://www.google.com/s2/favicons?domain=https://api.edns.domains) [edns.domains/ass (https://api.edns.domains)](https://api.edns.domains)*: top 100M, crypto*, search is disabled 1. ![](https://www.google.com/s2/favicons?domain=https://api.edns.domains) [edns.domains/ass (https://api.edns.domains)](https://api.edns.domains)*: top 100M, crypto*
1. ![](https://www.google.com/s2/favicons?domain=https://api.edns.domains) [edns.domains/404 (https://api.edns.domains)](https://api.edns.domains)*: top 100M, crypto*, search is disabled 1. ![](https://www.google.com/s2/favicons?domain=https://api.edns.domains) [edns.domains/404 (https://api.edns.domains)](https://api.edns.domains)*: top 100M, crypto*
1. ![](https://www.google.com/s2/favicons?domain=https://api.edns.domains) [edns.domains/sandbox (https://api.edns.domains)](https://api.edns.domains)*: top 100M, crypto*, search is disabled 1. ![](https://www.google.com/s2/favicons?domain=https://api.edns.domains) [edns.domains/sandbox (https://api.edns.domains)](https://api.edns.domains)*: top 100M, crypto*
1. ![](https://www.google.com/s2/favicons?domain=https://api.edns.domains) [edns.domains/web3 (https://api.edns.domains)](https://api.edns.domains)*: top 100M, crypto*, search is disabled 1. ![](https://www.google.com/s2/favicons?domain=https://api.edns.domains) [edns.domains/web3 (https://api.edns.domains)](https://api.edns.domains)*: top 100M, crypto*
1. ![](https://www.google.com/s2/favicons?domain=https://api.edns.domains) [edns.domains/gamefi (https://api.edns.domains)](https://api.edns.domains)*: top 100M, crypto*, search is disabled 1. ![](https://www.google.com/s2/favicons?domain=https://api.edns.domains) [edns.domains/gamefi (https://api.edns.domains)](https://api.edns.domains)*: top 100M, crypto*
1. ![](https://www.google.com/s2/favicons?domain=https://api.edns.domains) [edns.domains/iotex (https://api.edns.domains)](https://api.edns.domains)*: top 100M, crypto* 1. ![](https://www.google.com/s2/favicons?domain=https://api.edns.domains) [edns.domains/iotex (https://api.edns.domains)](https://api.edns.domains)*: top 100M, crypto*
1. ![](https://www.google.com/s2/favicons?domain=https://peername.com/) [peername.com/bit (https://peername.com/)](https://peername.com/)*: top 100M, crypto* 1. ![](https://www.google.com/s2/favicons?domain=https://peername.com/) [peername.com/bit (https://peername.com/)](https://peername.com/)*: top 100M, crypto*
1. ![](https://www.google.com/s2/favicons?domain=https://peername.com/) [peername.com/coin (https://peername.com/)](https://peername.com/)*: top 100M, crypto* 1. ![](https://www.google.com/s2/favicons?domain=https://peername.com/) [peername.com/coin (https://peername.com/)](https://peername.com/)*: top 100M, crypto*
@@ -3142,41 +3129,34 @@ Rank data fetched from Alexa by domains.
1. ![](https://www.google.com/s2/favicons?domain=https://www.tnaflix.com) [www.tnaflix.com (https://www.tnaflix.com)](https://www.tnaflix.com)*: top 100M* 1. ![](https://www.google.com/s2/favicons?domain=https://www.tnaflix.com) [www.tnaflix.com (https://www.tnaflix.com)](https://www.tnaflix.com)*: top 100M*
1. ![](https://www.google.com/s2/favicons?domain=https://massagerepublic.com) [massagerepublic.com (https://massagerepublic.com)](https://massagerepublic.com)*: top 100M* 1. ![](https://www.google.com/s2/favicons?domain=https://massagerepublic.com) [massagerepublic.com (https://massagerepublic.com)](https://massagerepublic.com)*: top 100M*
1. ![](https://www.google.com/s2/favicons?domain=https://mynickname.com) [mynickname.com (https://mynickname.com)](https://mynickname.com)*: top 100M* 1. ![](https://www.google.com/s2/favicons?domain=https://mynickname.com) [mynickname.com (https://mynickname.com)](https://mynickname.com)*: top 100M*
1. ![](https://www.google.com/s2/favicons?domain=https://substack.com) [Substack (https://substack.com)](https://substack.com)*: top 100M, blog*
1. ![](https://www.google.com/s2/favicons?domain=https://pubg.op.gg) [OP.GG [PUBG] (https://pubg.op.gg)](https://pubg.op.gg)*: top 100M, gaming*
1. ![](https://www.google.com/s2/favicons?domain=https://valorant.op.gg) [OP.GG [Valorant] (https://valorant.op.gg)](https://valorant.op.gg)*: top 100M, gaming*
1. ![](https://www.google.com/s2/favicons?domain=https://write.as) [write.as (https://write.as)](https://write.as)*: top 100M, writefreely*
The list was updated at (2025-08-10) The list was updated at (2024-11-30)
## Statistics ## Statistics
Enabled/total sites: 2687/3143 = 85.49% Enabled/total sites: 2693/3126 = 86.15%
Incomplete message checks: 394/2687 = 14.66% (false positive risks) Incomplete message checks: 404/2693 = 15.0% (false positive risks)
Status code checks: 618/2687 = 23.0% (false positive risks) Status code checks: 618/2694 = 22.94% (false positive risks)
False positive risk (total): 37.66% False positive risk (total): 37.97%
Sites with probing: 500px, Aparat, BinarySearch (disabled), BongaCams, BuyMeACoffee, Cent, Disqus, Docker Hub, Duolingo, Gab, GitHub, GitLab, Google Plus (archived), Gravatar, Imgur, Issuu, Keybase, Livejasmin, LocalCryptos (disabled), MixCloud, Niftygateway, Reddit Search (Pushshift) (disabled), SportsTracker, Spotify (disabled), TAP'D, Trello, Twitch, Twitter, Twitter Shadowban (disabled), UnstoppableDomains, Vimeo, Weibo, Yapisal (disabled), YouNow, nightbot, notabug.org, polarsteps, qiwi.me (disabled)
Sites with activation: Spotify (disabled), Twitter, Vimeo, Weibo
Top 20 profile URLs: Top 20 profile URLs:
- (796) `{urlMain}/index/8-0-{username} (uCoz)` - (796) `{urlMain}/index/8-0-{username} (uCoz)`
- (303) `/{username}` - (302) `/{username}`
- (221) `{urlMain}{urlSubpath}/members/?username={username} (XenForo)` - (221) `{urlMain}{urlSubpath}/members/?username={username} (XenForo)`
- (161) `/user/{username}` - (160) `/user/{username}`
- (133) `{urlMain}{urlSubpath}/member.php?username={username} (vBulletin)` - (133) `{urlMain}{urlSubpath}/member.php?username={username} (vBulletin)`
- (127) `{urlMain}{urlSubpath}/search.php?author={username} (phpBB/Search)` - (127) `{urlMain}{urlSubpath}/search.php?author={username} (phpBB/Search)`
- (118) `/profile/{username}` - (118) `/profile/{username}`
- (112) `/u/{username}` - (111) `/u/{username}`
- (88) `/users/{username}` - (88) `/users/{username}`
- (87) `{urlMain}/u/{username}/summary (Discourse)` - (87) `{urlMain}/u/{username}/summary (Discourse)`
- (54) `/@{username}`
- (54) `/wiki/User:{username}` - (54) `/wiki/User:{username}`
- (49) `/@{username}`
- (42) `SUBDOMAIN`
- (41) `/members/?username={username}` - (41) `/members/?username={username}`
- (41) `SUBDOMAIN`
- (32) `/members/{username}` - (32) `/members/{username}`
- (29) `/author/{username}` - (29) `/author/{username}`
- (27) `{urlMain}{urlSubpath}/memberlist.php?username={username} (phpBB)` - (27) `{urlMain}{urlSubpath}/memberlist.php?username={username} (phpBB)`
@@ -3186,20 +3166,20 @@ Top 20 profile URLs:
Top 20 tags: Top 20 tags:
- (1106) `NO_TAGS` (non-standard) - (1104) `NO_TAGS` (non-standard)
- (735) `forum` - (735) `forum`
- (92) `gaming` - (80) `gaming`
- (48) `photo` - (48) `photo`
- (41) `coding` - (41) `coding`
- (30) `tech` - (30) `tech`
- (29) `news` - (29) `news`
- (28) `blog` - (27) `blog`
- (23) `music` - (23) `music`
- (19) `finance` - (18) `finance`
- (18) `crypto` - (18) `crypto`
- (16) `sharing` - (17) `sharing`
- (16) `art`
- (16) `freelance` - (16) `freelance`
- (15) `art`
- (15) `shopping` - (15) `shopping`
- (13) `sport` - (13) `sport`
- (13) `business` - (13) `business`
+1 -1
View File
@@ -7,7 +7,7 @@ description: |
Currently supported more than 3000 sites, search is launched against 500 popular sites in descending order of popularity by default. Also supported checking of Tor sites, I2P sites, and domains (via DNS resolving). Currently supported more than 3000 sites, search is launched against 500 popular sites in descending order of popularity by default. Also supported checking of Tor sites, I2P sites, and domains (via DNS resolving).
version: 0.5.0 version: 0.4.4
license: MIT license: MIT
base: core22 base: core22
confinement: strict confinement: strict
File diff suppressed because one or more lines are too long

Before

Width:  |  Height:  |  Size: 1.6 MiB

After

Width:  |  Height:  |  Size: 1.6 MiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 501 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 312 KiB

-47
View File
@@ -8,11 +8,8 @@ from _pytest.mark import Mark
from maigret.sites import MaigretDatabase from maigret.sites import MaigretDatabase
from maigret.maigret import setup_arguments_parser from maigret.maigret import setup_arguments_parser
from maigret.settings import Settings from maigret.settings import Settings
from aiohttp import web
LOCAL_SERVER_PORT = 8080
CUR_PATH = os.path.dirname(os.path.realpath(__file__)) CUR_PATH = os.path.dirname(os.path.realpath(__file__))
JSON_FILE = os.path.join(CUR_PATH, '../maigret/resources/data.json') JSON_FILE = os.path.join(CUR_PATH, '../maigret/resources/data.json')
SETTINGS_FILE = os.path.join(CUR_PATH, '../maigret/resources/settings.json') SETTINGS_FILE = os.path.join(CUR_PATH, '../maigret/resources/settings.json')
@@ -21,26 +18,6 @@ LOCAL_TEST_JSON_FILE = os.path.join(CUR_PATH, 'local.json')
empty_mark = Mark('', (), {}) empty_mark = Mark('', (), {})
RESULTS_EXAMPLE = {
'Reddit': {
'cookies': None,
'parsing_enabled': False,
'url_main': 'https://www.reddit.com/',
'username': 'Skyeng',
},
'GooglePlayStore': {
'cookies': None,
'http_status': 200,
'is_similar': False,
'parsing_enabled': False,
'rank': 1,
'url_main': 'https://play.google.com/store',
'url_user': 'https://play.google.com/store/apps/developer?id=Skyeng',
'username': 'Skyeng',
},
}
def by_slow_marker(item): def by_slow_marker(item):
return item.get_closest_marker('slow', default=empty_mark).name return item.get_closest_marker('slow', default=empty_mark).name
@@ -82,13 +59,6 @@ def reports_autoclean():
remove_test_reports() remove_test_reports()
@pytest.fixture(scope='session')
def settings():
settings = Settings()
settings.load([SETTINGS_FILE])
return settings
@pytest.fixture(scope='session') @pytest.fixture(scope='session')
def argparser(): def argparser():
settings = Settings() settings = Settings()
@@ -99,20 +69,3 @@ def argparser():
@pytest.fixture(scope="session") @pytest.fixture(scope="session")
def httpserver_listen_address(): def httpserver_listen_address():
return ("localhost", 8989) return ("localhost", 8989)
@pytest.fixture
async def cookie_test_server():
async def handle_cookies(request):
print(f"Received cookies: {request.cookies}")
cookies_dict = {k: v for k, v in request.cookies.items()}
return web.json_response({'cookies': cookies_dict})
app = web.Application()
app.router.add_get('/cookies', handle_cookies)
runner = web.AppRunner(app)
await runner.setup()
server = web.TCPSite(runner, port=LOCAL_SERVER_PORT)
await server.start()
yield server
await runner.cleanup()
+5 -23
View File
@@ -1,23 +1,5 @@
{ {
"engines": { "engines": {},
"Discourse": {
"name": "Discourse",
"site": {
"presenseStrs": [
"<meta name=\"generator\" content=\"Discourse"
],
"absenceStrs": [
"Oops! That page doesn\u2019t exist or is private.",
"wrap not-found-container"
],
"checkType": "message",
"url": "{urlMain}/u/{username}/summary"
},
"presenseStrs": [
"<meta name=\"generator\" content=\"Discourse"
]
}
},
"sites": { "sites": {
"ValidActive": { "ValidActive": {
"tags": ["global", "us"], "tags": ["global", "us"],
@@ -26,7 +8,7 @@
"alexaRank": 1, "alexaRank": 1,
"url": "https://play.google.com/store/apps/developer?id={username}", "url": "https://play.google.com/store/apps/developer?id={username}",
"urlMain": "https://play.google.com/store", "urlMain": "https://play.google.com/store",
"usernameClaimed": "KONAMI", "usernameClaimed": "OpenAI",
"usernameUnclaimed": "noonewouldeverusethis7" "usernameUnclaimed": "noonewouldeverusethis7"
}, },
"InvalidActive": { "InvalidActive": {
@@ -36,7 +18,7 @@
"alexaRank": 1, "alexaRank": 1,
"url": "https://play.google.com/store/apps/dev?id={username}", "url": "https://play.google.com/store/apps/dev?id={username}",
"urlMain": "https://play.google.com/store", "urlMain": "https://play.google.com/store",
"usernameClaimed": "KONAMI", "usernameClaimed": "OpenAI",
"usernameUnclaimed": "noonewouldeverusethis7" "usernameUnclaimed": "noonewouldeverusethis7"
}, },
"ValidInactive": { "ValidInactive": {
@@ -46,7 +28,7 @@
"alexaRank": 1, "alexaRank": 1,
"url": "https://play.google.com/store/apps/developer?id={username}", "url": "https://play.google.com/store/apps/developer?id={username}",
"urlMain": "https://play.google.com/store", "urlMain": "https://play.google.com/store",
"usernameClaimed": "KONAMI", "usernameClaimed": "OpenAI",
"usernameUnclaimed": "noonewouldeverusethis7" "usernameUnclaimed": "noonewouldeverusethis7"
}, },
"InvalidInactive": { "InvalidInactive": {
@@ -56,7 +38,7 @@
"alexaRank": 1, "alexaRank": 1,
"url": "https://play.google.com/store/apps/dev?id={username}", "url": "https://play.google.com/store/apps/dev?id={username}",
"urlMain": "https://play.google.com/store", "urlMain": "https://play.google.com/store",
"usernameClaimed": "KONAMI", "usernameClaimed": "OpenAI",
"usernameUnclaimed": "noonewouldeverusethis7" "usernameUnclaimed": "noonewouldeverusethis7"
} }
} }
+18 -19
View File
@@ -1,13 +1,10 @@
"""Maigret activation test functions""" """Maigret activation test functions"""
import json import json
import yarl
import aiohttp import aiohttp
import pytest import pytest
from mock import Mock from mock import Mock
from tests.conftest import LOCAL_SERVER_PORT
from maigret.activation import ParsingActivator, import_aiohttp_cookies from maigret.activation import ParsingActivator, import_aiohttp_cookies
COOKIES_TXT = """# HTTP Cookie File downloaded with cookies.txt by Genuinous @genuinous COOKIES_TXT = """# HTTP Cookie File downloaded with cookies.txt by Genuinous @genuinous
@@ -21,38 +18,40 @@ xss.is FALSE / TRUE 0 xf_csrf test
xss.is FALSE / TRUE 1642709308 xf_user tset xss.is FALSE / TRUE 1642709308 xf_user tset
.xss.is TRUE / FALSE 0 muchacho_cache test .xss.is TRUE / FALSE 0 muchacho_cache test
.xss.is TRUE / FALSE 1924905600 132_evc test .xss.is TRUE / FALSE 1924905600 132_evc test
localhost FALSE / FALSE 0 a b httpbin.org FALSE / FALSE 0 a b
""" """
@pytest.mark.skip("captcha") @pytest.mark.skip(reason="periodically fails")
@pytest.mark.slow @pytest.mark.slow
def test_vimeo_activation(default_db): def test_twitter_activation(default_db):
vimeo_site = default_db.sites_dict['Vimeo'] twitter_site = default_db.sites_dict['Twitter']
token1 = vimeo_site.headers['Authorization'] token1 = twitter_site.headers['x-guest-token']
ParsingActivator.vimeo(vimeo_site, Mock()) ParsingActivator.twitter(twitter_site, Mock())
token2 = vimeo_site.headers['Authorization'] token2 = twitter_site.headers['x-guest-token']
assert token1 != token2 assert token1 != token2
@pytest.mark.slow
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_import_aiohttp_cookies(cookie_test_server): async def test_import_aiohttp_cookies():
cookies_filename = 'cookies_test.txt' cookies_filename = 'cookies_test.txt'
with open(cookies_filename, 'w') as f: with open(cookies_filename, 'w') as f:
f.write(COOKIES_TXT) f.write(COOKIES_TXT)
cookie_jar = import_aiohttp_cookies(cookies_filename) cookie_jar = import_aiohttp_cookies(cookies_filename)
url = f'http://localhost:{LOCAL_SERVER_PORT}/cookies' # new aiohttp support
assert list(cookie_jar._cookies.keys()) in (['xss.is', 'httpbin.org'], [('xss.is', '/'), ('httpbin.org', '/')], [('xss.is', ''), ('httpbin.org', '')])
cookies = cookie_jar.filter_cookies(yarl.URL(url)) url = 'https://httpbin.org/cookies'
assert cookies['a'].value == 'b' connector = aiohttp.TCPConnector(ssl=False)
session = aiohttp.ClientSession(
connector=connector, trust_env=True, cookie_jar=cookie_jar
)
async with aiohttp.ClientSession(cookie_jar=cookie_jar) as session: response = await session.get(url=url)
async with session.get(url=url) as response: result = json.loads(await response.content.read())
result = await response.json() await session.close()
print(f"Server response: {result}")
assert result == {'cookies': {'a': 'b'}} assert result == {'cookies': {'a': 'b'}}
+4 -10
View File
@@ -1,5 +1,4 @@
"""Maigret command-line arguments parsing tests""" """Maigret command-line arguments parsing tests"""
from argparse import Namespace from argparse import Namespace
from typing import Dict, Any from typing import Dict, Any
@@ -42,7 +41,6 @@ DEFAULT_ARGS: Dict[str, Any] = {
'use_disabled_sites': False, 'use_disabled_sites': False,
'username': [], 'username': [],
'verbose': False, 'verbose': False,
'web': None,
'with_domains': False, 'with_domains': False,
'xmind': False, 'xmind': False,
} }
@@ -56,8 +54,7 @@ def test_args_search_mode(argparser):
want_args = dict(DEFAULT_ARGS) want_args = dict(DEFAULT_ARGS)
want_args.update({'username': ['username']}) want_args.update({'username': ['username']})
for arg in vars(args): assert args == Namespace(**want_args)
assert getattr(args, arg) == want_args[arg]
def test_args_search_mode_several_usernames(argparser): def test_args_search_mode_several_usernames(argparser):
@@ -68,8 +65,7 @@ def test_args_search_mode_several_usernames(argparser):
want_args = dict(DEFAULT_ARGS) want_args = dict(DEFAULT_ARGS)
want_args.update({'username': ['username1', 'username2']}) want_args.update({'username': ['username1', 'username2']})
for arg in vars(args): assert args == Namespace(**want_args)
assert getattr(args, arg) == want_args[arg]
def test_args_self_check_mode(argparser): def test_args_self_check_mode(argparser):
@@ -84,8 +80,7 @@ def test_args_self_check_mode(argparser):
} }
) )
for arg in vars(args): assert args == Namespace(**want_args)
assert getattr(args, arg) == want_args[arg]
def test_args_multiple_sites(argparser): def test_args_multiple_sites(argparser):
@@ -101,5 +96,4 @@ def test_args_multiple_sites(argparser):
} }
) )
for arg in vars(args): assert args == Namespace(**want_args)
assert getattr(args, arg) == want_args[arg]
-84
View File
@@ -1,84 +0,0 @@
"""Tests for the close_invalid_telegram_prs utility."""
import unittest
import sys
import os
# Add the utils directory to the path
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', 'utils'))
from close_invalid_telegram_prs import is_invalid_telegram_pr
class TestCloseInvalidTelegramPRs(unittest.TestCase):
"""Test cases for the invalid Telegram PR detection."""
def test_valid_invalid_telegram_pr_titles(self):
"""Test that valid invalid Telegram PR titles are correctly identified."""
valid_titles = [
"Invalid result https://t.me/someuser",
"invalid result https://t.me/channel123",
"Invalid Result https://t.me/bot_name",
"INVALID RESULT https://t.me/test",
"Invalid result https://t.me/user/123",
"Invalid result https://t.me/s/channel_name",
]
for title in valid_titles:
with self.subTest(title=title):
self.assertTrue(is_invalid_telegram_pr(title),
f"Title should be identified as invalid: {title}")
def test_invalid_telegram_pr_titles_not_matching(self):
"""Test that non-matching titles are correctly rejected."""
invalid_titles = [
"Valid result https://t.me/someuser", # "Valid" instead of "Invalid"
"Invalid results https://t.me/someuser", # "results" instead of "result"
"Invalid result http://t.me/someuser", # "http" instead of "https"
"Invalid result https://telegram.me/someuser", # Wrong domain
"Fix invalid result https://t.me/someuser", # Extra words before
"Invalid result for https://t.me/someuser", # Extra words in between
"Added telegram site", # Completely different
"Fix false positives", # Unrelated
"", # Empty title
"Invalid result", # Missing URL
"https://t.me/someuser", # Missing "Invalid result"
]
for title in invalid_titles:
with self.subTest(title=title):
self.assertFalse(is_invalid_telegram_pr(title),
f"Title should NOT be identified as invalid: {title}")
def test_whitespace_handling(self):
"""Test that whitespace is handled correctly."""
titles_with_whitespace = [
" Invalid result https://t.me/someuser ", # Leading/trailing spaces
"\tInvalid result https://t.me/someuser\t", # Tabs
"Invalid\tresult\thttps://t.me/someuser", # Tabs between words
"Invalid result https://t.me/someuser", # Multiple spaces
]
for title in titles_with_whitespace:
with self.subTest(title=title):
self.assertTrue(is_invalid_telegram_pr(title),
f"Title with whitespace should be identified: {title}")
def test_case_insensitive(self):
"""Test that the pattern matching is case insensitive."""
case_variations = [
"invalid result https://t.me/someuser",
"Invalid Result https://t.me/someuser",
"INVALID RESULT https://t.me/someuser",
"Invalid result https://T.ME/someuser",
"iNvAlId ReSuLt https://t.me/someuser",
]
for title in case_variations:
with self.subTest(title=title):
self.assertTrue(is_invalid_telegram_pr(title),
f"Case variation should be identified: {title}")
if __name__ == '__main__':
unittest.main()
-2
View File
@@ -1,10 +1,8 @@
"""Maigret data test functions""" """Maigret data test functions"""
import pytest
from maigret.utils import is_country_tag from maigret.utils import is_country_tag
@pytest.mark.slow
def test_tags_validity(default_db): def test_tags_validity(default_db):
unknown_tags = set() unknown_tags = set()
-58
View File
@@ -1,58 +0,0 @@
import pytest
from maigret.errors import notify_about_errors, CheckError
from maigret.types import QueryResultWrapper
from maigret.result import MaigretCheckResult, MaigretCheckStatus
def test_notify_about_errors():
results = {
'site1': {
'status': MaigretCheckResult(
'', '', '', MaigretCheckStatus.UNKNOWN, error=CheckError('Captcha')
)
},
'site2': {
'status': MaigretCheckResult(
'',
'',
'',
MaigretCheckStatus.UNKNOWN,
error=CheckError('Bot protection'),
)
},
'site3': {
'status': MaigretCheckResult(
'',
'',
'',
MaigretCheckStatus.UNKNOWN,
error=CheckError('Access denied'),
)
},
'site4': {
'status': MaigretCheckResult(
'', '', '', MaigretCheckStatus.CLAIMED, error=None
)
},
}
results = notify_about_errors(results, query_notify=None, show_statistics=True)
# Check the output
expected_output = [
(
'Too many errors of type "Captcha" (25.0%). Try to switch to another ip address or to use service cookies',
'!',
),
(
'Too many errors of type "Bot protection" (25.0%). Try to switch to another ip address',
'!',
),
('Too many errors of type "Access denied" (25.0%)', '!'),
('Verbose error statistics:', '-'),
('Captcha: 25.0%', '!'),
('Bot protection: 25.0%', '!'),
('Access denied: 25.0%', '!'),
('You can see detailed site check errors with a flag `--print-errors`', '-'),
]
assert results == expected_output
-35
View File
@@ -1,5 +1,4 @@
"""Maigret checking logic test functions""" """Maigret checking logic test functions"""
import pytest import pytest
import asyncio import asyncio
import logging import logging
@@ -8,7 +7,6 @@ from maigret.executors import (
AsyncioProgressbarExecutor, AsyncioProgressbarExecutor,
AsyncioProgressbarSemaphoreExecutor, AsyncioProgressbarSemaphoreExecutor,
AsyncioProgressbarQueueExecutor, AsyncioProgressbarQueueExecutor,
AsyncioQueueGeneratorExecutor,
) )
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@@ -50,7 +48,6 @@ async def test_asyncio_progressbar_semaphore_executor():
assert executor.execution_time < 0.4 assert executor.execution_time < 0.4
@pytest.mark.slow
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_asyncio_progressbar_queue_executor(): async def test_asyncio_progressbar_queue_executor():
tasks = [(func, [n], {}) for n in range(10)] tasks = [(func, [n], {}) for n in range(10)]
@@ -77,35 +74,3 @@ async def test_asyncio_progressbar_queue_executor():
assert await executor.run(tasks) == [0, 3, 6, 9, 1, 4, 7, 2, 5, 8] assert await executor.run(tasks) == [0, 3, 6, 9, 1, 4, 7, 2, 5, 8]
assert executor.execution_time > 0.2 assert executor.execution_time > 0.2
assert executor.execution_time < 0.4 assert executor.execution_time < 0.4
@pytest.mark.asyncio
async def test_asyncio_queue_generator_executor():
tasks = [(func, [n], {}) for n in range(10)]
executor = AsyncioQueueGeneratorExecutor(logger=logger, in_parallel=2)
results = [result async for result in executor.run(tasks)]
assert results == [0, 1, 3, 2, 4, 6, 7, 5, 9, 8]
assert executor.execution_time > 0.5
assert executor.execution_time < 0.6
executor = AsyncioQueueGeneratorExecutor(logger=logger, in_parallel=3)
results = [result async for result in executor.run(tasks)]
assert results == [0, 3, 1, 4, 6, 2, 7, 9, 5, 8]
assert executor.execution_time > 0.4
assert executor.execution_time < 0.5
executor = AsyncioQueueGeneratorExecutor(logger=logger, in_parallel=5)
results = [result async for result in executor.run(tasks)]
assert results in (
[0, 3, 6, 1, 4, 7, 9, 2, 5, 8],
[0, 3, 6, 1, 4, 9, 7, 2, 5, 8],
)
assert executor.execution_time > 0.3
assert executor.execution_time < 0.4
executor = AsyncioQueueGeneratorExecutor(logger=logger, in_parallel=10)
results = [result async for result in executor.run(tasks)]
assert results == [0, 3, 6, 9, 1, 4, 7, 2, 5, 8]
assert executor.execution_time > 0.2
assert executor.execution_time < 0.3
+25 -8
View File
@@ -1,5 +1,4 @@
"""Maigret main module test functions""" """Maigret main module test functions"""
import asyncio import asyncio
import copy import copy
@@ -12,8 +11,27 @@ from maigret.maigret import (
extract_ids_from_results, extract_ids_from_results,
) )
from maigret.sites import MaigretSite from maigret.sites import MaigretSite
from maigret.result import MaigretCheckResult, MaigretCheckStatus from maigret.result import QueryResult, QueryStatus
from tests.conftest import RESULTS_EXAMPLE
RESULTS_EXAMPLE = {
'Reddit': {
'cookies': None,
'parsing_enabled': False,
'url_main': 'https://www.reddit.com/',
'username': 'Skyeng',
},
'GooglePlayStore': {
'cookies': None,
'http_status': 200,
'is_similar': False,
'parsing_enabled': False,
'rank': 1,
'url_main': 'https://play.google.com/store',
'url_user': 'https://play.google.com/store/apps/developer?id=Skyeng',
'username': 'Skyeng',
},
}
@pytest.mark.slow @pytest.mark.slow
@@ -67,12 +85,12 @@ def test_maigret_results(test_db):
del results['GooglePlayStore']['site'] del results['GooglePlayStore']['site']
reddit_status = results['Reddit']['status'] reddit_status = results['Reddit']['status']
assert isinstance(reddit_status, MaigretCheckResult) assert isinstance(reddit_status, QueryResult)
assert reddit_status.status == MaigretCheckStatus.ILLEGAL assert reddit_status.status == QueryStatus.ILLEGAL
playstore_status = results['GooglePlayStore']['status'] playstore_status = results['GooglePlayStore']['status']
assert isinstance(playstore_status, MaigretCheckResult) assert isinstance(playstore_status, QueryResult)
assert playstore_status.status == MaigretCheckStatus.CLAIMED assert playstore_status.status == QueryStatus.CLAIMED
del results['Reddit']['status'] del results['Reddit']['status']
del results['GooglePlayStore']['status'] del results['GooglePlayStore']['status']
@@ -84,7 +102,6 @@ def test_maigret_results(test_db):
assert results == RESULTS_EXAMPLE assert results == RESULTS_EXAMPLE
@pytest.mark.slow
def test_extract_ids_from_url(default_db): def test_extract_ids_from_url(default_db):
assert default_db.extract_ids_from_url('https://www.reddit.com/user/test') == { assert default_db.extract_ids_from_url('https://www.reddit.com/user/test') == {
'test': 'username' 'test': 'username'
+9 -9
View File
@@ -1,6 +1,6 @@
from maigret.errors import CheckError from maigret.errors import CheckError
from maigret.notify import QueryNotifyPrint from maigret.notify import QueryNotifyPrint
from maigret.result import MaigretCheckStatus, MaigretCheckResult from maigret.result import QueryStatus, QueryResult
def test_notify_illegal(): def test_notify_illegal():
@@ -8,9 +8,9 @@ def test_notify_illegal():
assert ( assert (
n.update( n.update(
MaigretCheckResult( QueryResult(
username="test", username="test",
status=MaigretCheckStatus.ILLEGAL, status=QueryStatus.ILLEGAL,
site_name="TEST_SITE", site_name="TEST_SITE",
site_url_user="http://example.com/test", site_url_user="http://example.com/test",
) )
@@ -24,9 +24,9 @@ def test_notify_claimed():
assert ( assert (
n.update( n.update(
MaigretCheckResult( QueryResult(
username="test", username="test",
status=MaigretCheckStatus.CLAIMED, status=QueryStatus.CLAIMED,
site_name="TEST_SITE", site_name="TEST_SITE",
site_url_user="http://example.com/test", site_url_user="http://example.com/test",
) )
@@ -40,9 +40,9 @@ def test_notify_available():
assert ( assert (
n.update( n.update(
MaigretCheckResult( QueryResult(
username="test", username="test",
status=MaigretCheckStatus.AVAILABLE, status=QueryStatus.AVAILABLE,
site_name="TEST_SITE", site_name="TEST_SITE",
site_url_user="http://example.com/test", site_url_user="http://example.com/test",
) )
@@ -53,9 +53,9 @@ def test_notify_available():
def test_notify_unknown(): def test_notify_unknown():
n = QueryNotifyPrint(color=False) n = QueryNotifyPrint(color=False)
result = MaigretCheckResult( result = QueryResult(
username="test", username="test",
status=MaigretCheckStatus.UNKNOWN, status=QueryStatus.UNKNOWN,
site_name="TEST_SITE", site_name="TEST_SITE",
site_url_user="http://example.com/test", site_url_user="http://example.com/test",
) )
-50
View File
@@ -1,50 +0,0 @@
import pytest
from maigret.permutator import Permute
def test_gather_strict():
elements = {'a': 1, 'b': 2}
permute = Permute(elements)
result = permute.gather(method="strict")
expected = {
'a_b': 1,
'b_a': 2,
'a-b': 1,
'b-a': 2,
'a.b': 1,
'b.a': 2,
'ab': 1,
'ba': 2,
'_ab': 1,
'ab_': 1,
'_ba': 2,
'ba_': 2,
}
assert result == expected
def test_gather_all():
elements = {'a': 1, 'b': 2}
permute = Permute(elements)
result = permute.gather(method="all")
expected = {
'a': 1,
'_a': 1,
'a_': 1,
'b': 2,
'_b': 2,
'b_': 2,
'a_b': 1,
'b_a': 2,
'a-b': 1,
'b-a': 2,
'a.b': 1,
'b.a': 2,
'ab': 1,
'ba': 2,
'_ab': 1,
'ab_': 1,
'_ba': 2,
'ba_': 2,
}
assert result == expected
+5 -6
View File
@@ -1,5 +1,4 @@
"""Maigret reports test functions""" """Maigret reports test functions"""
import copy import copy
import json import json
import os import os
@@ -20,12 +19,12 @@ from maigret.report import (
generate_json_report, generate_json_report,
get_plaintext_report, get_plaintext_report,
) )
from maigret.result import MaigretCheckResult, MaigretCheckStatus from maigret.result import QueryResult, QueryStatus
from maigret.sites import MaigretSite from maigret.sites import MaigretSite
GOOD_RESULT = MaigretCheckResult('', '', '', MaigretCheckStatus.CLAIMED) GOOD_RESULT = QueryResult('', '', '', QueryStatus.CLAIMED)
BAD_RESULT = MaigretCheckResult('', '', '', MaigretCheckStatus.AVAILABLE) BAD_RESULT = QueryResult('', '', '', QueryStatus.AVAILABLE)
EXAMPLE_RESULTS = { EXAMPLE_RESULTS = {
'GitHub': { 'GitHub': {
@@ -33,11 +32,11 @@ EXAMPLE_RESULTS = {
'parsing_enabled': True, 'parsing_enabled': True,
'url_main': 'https://www.github.com/', 'url_main': 'https://www.github.com/',
'url_user': 'https://www.github.com/test', 'url_user': 'https://www.github.com/test',
'status': MaigretCheckResult( 'status': QueryResult(
'test', 'test',
'GitHub', 'GitHub',
'https://www.github.com/test', 'https://www.github.com/test',
MaigretCheckStatus.CLAIMED, QueryStatus.CLAIMED,
tags=['test_tag'], tags=['test_tag'],
), ),
'http_status': 200, 'http_status': 200,
-1
View File
@@ -1,5 +1,4 @@
"""Maigret Database test functions""" """Maigret Database test functions"""
from maigret.sites import MaigretDatabase, MaigretSite from maigret.sites import MaigretDatabase, MaigretSite
EXAMPLE_DB = { EXAMPLE_DB = {
-277
View File
@@ -1,277 +0,0 @@
import pytest
from unittest.mock import MagicMock, patch
from maigret.submit import Submitter
from aiohttp import ClientSession
from maigret.sites import MaigretDatabase
import logging
@pytest.mark.slow
@pytest.mark.asyncio
async def test_detect_known_engine(test_db, local_test_db):
# Use the database fixture instead of mocking
mock_db = test_db
mock_settings = MagicMock()
mock_logger = MagicMock()
mock_args = MagicMock()
mock_args.cookie_file = ""
mock_args.proxy = ""
# Mock the supposed usernames
mock_settings.supposed_usernames = ["adam"]
# Create the Submitter instance
submitter = Submitter(test_db, mock_settings, mock_logger, mock_args)
# Call the method with test URLs
url_exists = "https://devforum.zoom.us/u/adam"
url_mainpage = "https://devforum.zoom.us/"
# Mock extract_username_dialog to return "adam"
submitter.extract_username_dialog = MagicMock(return_value="adam")
sites, resp_text = await submitter.detect_known_engine(
url_exists, url_mainpage, session=None, follow_redirects=False, headers=None
)
# Assertions
assert len(sites) == 2
assert sites[0].name == "devforum.zoom.us"
assert sites[0].url_main == "https://devforum.zoom.us/"
assert sites[0].engine == "Discourse"
assert sites[0].username_claimed == "adam"
assert sites[0].username_unclaimed == "noonewouldeverusethis7"
assert resp_text != ""
await submitter.close()
# Create the Submitter instance without engines
submitter = Submitter(local_test_db, mock_settings, mock_logger, mock_args)
sites, resp_text = await submitter.detect_known_engine(
url_exists, url_mainpage, session=None, follow_redirects=False, headers=None
)
assert len(sites) == 0
await submitter.close()
@pytest.mark.slow
@pytest.mark.asyncio
async def test_check_features_manually_success(settings):
# Setup
db = MaigretDatabase()
logger = logging.getLogger("test_logger")
args = type(
'Args', (object,), {'proxy': None, 'cookie_file': None, 'verbose': False}
)()
submitter = Submitter(db, settings, logger, args)
username = "KONAMI"
url_exists = "https://play.google.com/store/apps/developer?id=KONAMI"
# Execute
presence_list, absence_list, status, random_username = (
await submitter.check_features_manually(
username=username,
url_exists=url_exists,
session=ClientSession(),
follow_redirects=False,
headers=None,
)
)
await submitter.close()
# Assert
assert status == "Found", "Expected status to be 'Found'"
assert isinstance(presence_list, list), "Presence list should be a list"
assert isinstance(absence_list, list), "Absence list should be a list"
assert isinstance(random_username, str), "Random username should be a string"
assert (
random_username != username
), "Random username should not be the same as the input username"
assert sorted(presence_list) == sorted(
[
' title=',
'og:title',
'display: none;',
'4;0',
'main-title',
]
)
assert sorted(absence_list) == sorted(
[
' body {',
' </style>',
'><title>Not Found</title>',
' <style nonce=',
' .rounded {',
]
)
@pytest.mark.slow
@pytest.mark.asyncio
async def test_check_features_manually_success(settings):
# Setup
db = MaigretDatabase()
logger = logging.getLogger("test_logger")
args = type(
'Args', (object,), {'proxy': None, 'cookie_file': None, 'verbose': False}
)()
submitter = Submitter(db, settings, logger, args)
username = "abel"
url_exists = "https://community.cloudflare.com/badges/1/basic?username=abel"
# Execute
presence_list, absence_list, status, random_username = (
await submitter.check_features_manually(
username=username,
url_exists=url_exists,
session=ClientSession(),
follow_redirects=False,
headers=None,
)
)
await submitter.close()
# Assert
assert status == "Cloudflare detected, skipping"
assert presence_list is None
assert absence_list is None
assert random_username != username
@pytest.mark.slow
@pytest.mark.asyncio
async def test_dialog_adds_site_positive(settings):
# Initialize necessary objects
db = MaigretDatabase()
logger = logging.getLogger("test_logger")
logger.setLevel(logging.INFO)
args = type(
'Args',
(object,),
{
'proxy': None,
'cookie_file': None,
'verbose': False,
'db_file': 'test_db.json',
'db': 'test_db.json',
},
)()
submitter = Submitter(db, settings, logger, args)
# Mock user inputs
user_inputs = [
'KONAMI', # Manually input username
'y', # Save the site in the Maigret DB
'GooglePlayStore', # Custom site name
'', # no custom tags
]
with patch('builtins.input', side_effect=user_inputs):
result = await submitter.dialog(
"https://play.google.com/store/apps/developer?id=KONAMI", None
)
await submitter.close()
assert result is True
assert len(db.sites) == 1
site = db.sites[0]
assert site.url_main == "https://play.google.com"
assert site.name == "GooglePlayStore"
assert site.tags == []
assert site.presense_strs != []
assert site.absence_strs != []
assert site.username_claimed == "KONAMI"
assert site.check_type == "message"
@pytest.mark.slow
@pytest.mark.asyncio
async def test_dialog_replace_site(settings, test_db):
# Initialize necessary objects
db = test_db
logger = logging.getLogger("test_logger")
logger.setLevel(logging.DEBUG)
args = type(
'Args',
(object,),
{
'proxy': None,
'cookie_file': None,
'verbose': False,
'db_file': 'test_db.json',
'db': 'test_db.json',
},
)()
assert len(db.sites) == 4
submitter = Submitter(db, settings, logger, args)
# Mock user inputs
user_inputs = [
'y', # Similar sites found, continue
'InvalidActive', # Choose site to replace
'', # Custom headers
'y', # Should we do redirects automatically?
'KONAMI', # Manually input username
'y', # Save the site in the Maigret DB
'', # Custom site name
'', # no custom tags
]
with patch('builtins.input', side_effect=user_inputs):
result = await submitter.dialog(
"https://play.google.com/store/apps/developer?id=KONAMI", None
)
await submitter.close()
assert result is True
assert len(db.sites) == 4
site = db.sites_dict["InvalidActive"]
assert site.name == "InvalidActive"
assert site.url_main == "https://play.google.com"
assert site.tags == ['global', 'us']
assert site.presense_strs != []
assert site.absence_strs != []
assert site.username_claimed == "KONAMI"
assert site.check_type == "message"
@pytest.mark.slow
@pytest.mark.asyncio
async def test_dialog_adds_site_negative(settings):
# Initialize necessary objects
db = MaigretDatabase()
logger = logging.getLogger("test_logger")
logger.setLevel(logging.INFO)
args = type(
'Args',
(object,),
{
'proxy': None,
'cookie_file': None,
'verbose': False,
'db_file': 'test_db.json',
'db': 'test_db.json',
},
)()
submitter = Submitter(db, settings, logger, args)
# Mock user inputs
user_inputs = [
'sokrat', # Manually input username
'y', # Save the site in the Maigret DB
]
with patch('builtins.input', side_effect=user_inputs):
result = await submitter.dialog("https://icq.com/sokrat", None)
await submitter.close()
assert result is False
-1
View File
@@ -1,5 +1,4 @@
"""Maigret utils test functions""" """Maigret utils test functions"""
import itertools import itertools
import re import re
-205
View File
@@ -1,205 +0,0 @@
#!/usr/bin/env python3
"""
Utility script to close pull requests with titles matching "Invalid result https://t.me/..."
This script identifies and closes PRs that follow the pattern of invalid telegram results,
which are typically auto-generated or spam PRs that should not be processed.
"""
import argparse
import os
import re
import sys
from typing import List, Optional
try:
import requests
except ImportError:
print("Error: requests library is required. Install with: pip install requests")
sys.exit(1)
class GitHubAPI:
"""Simple GitHub API wrapper for managing pull requests."""
def __init__(self, token: str, owner: str, repo: str):
self.token = token
self.owner = owner
self.repo = repo
self.base_url = "https://api.github.com"
self.headers = {
"Authorization": f"token {token}",
"Accept": "application/vnd.github.v3+json"
}
def get_open_prs(self) -> List[dict]:
"""Get all open pull requests."""
url = f"{self.base_url}/repos/{self.owner}/{self.repo}/pulls"
params = {"state": "open", "per_page": 100}
all_prs = []
page = 1
while True:
params["page"] = page
response = requests.get(url, headers=self.headers, params=params)
response.raise_for_status()
prs = response.json()
if not prs:
break
all_prs.extend(prs)
page += 1
return all_prs
def close_pr(self, pr_number: int, comment: Optional[str] = None) -> bool:
"""Close a pull request with an optional comment."""
try:
# Add comment if provided
if comment:
comment_url = f"{self.base_url}/repos/{self.owner}/{self.repo}/issues/{pr_number}/comments"
comment_data = {"body": comment}
response = requests.post(comment_url, headers=self.headers, json=comment_data)
response.raise_for_status()
# Close the PR
close_url = f"{self.base_url}/repos/{self.owner}/{self.repo}/pulls/{pr_number}"
close_data = {"state": "closed"}
response = requests.patch(close_url, headers=self.headers, json=close_data)
response.raise_for_status()
return True
except requests.RequestException as e:
print(f"Error closing PR #{pr_number}: {e}")
return False
def is_invalid_telegram_pr(title: str) -> bool:
"""
Check if a PR title matches the pattern "Invalid result https://t.me/..."
Args:
title: The PR title to check
Returns:
True if the title matches the pattern, False otherwise
"""
# Pattern: "Invalid result https://t.me/..." (case insensitive)
pattern = r"^invalid\s+result\s+https://t\.me/.*"
return bool(re.match(pattern, title.strip(), re.IGNORECASE))
def find_invalid_telegram_prs(github_api: GitHubAPI) -> List[dict]:
"""
Find all open PRs that match the invalid telegram pattern.
Args:
github_api: GitHub API wrapper instance
Returns:
List of PR dictionaries that match the pattern
"""
all_prs = github_api.get_open_prs()
matching_prs = []
for pr in all_prs:
if is_invalid_telegram_pr(pr["title"]):
matching_prs.append(pr)
return matching_prs
def main():
"""Main function to find and close invalid telegram PRs."""
parser = argparse.ArgumentParser(
description="Close pull requests with titles matching 'Invalid result https://t.me/...'"
)
parser.add_argument(
"--token",
required=False,
help="GitHub personal access token (or set GITHUB_TOKEN env var)"
)
parser.add_argument(
"--owner",
default="soxoj",
help="Repository owner (default: soxoj)"
)
parser.add_argument(
"--repo",
default="maigret",
help="Repository name (default: maigret)"
)
parser.add_argument(
"--dry-run",
action="store_true",
help="Show what would be closed without actually closing PRs"
)
parser.add_argument(
"--comment",
default="Automatically closing this PR as it appears to be an invalid result for a Telegram URL. "
"If this is a legitimate PR, please reopen it with a more descriptive title.",
help="Comment to add when closing PRs"
)
args = parser.parse_args()
# Get GitHub token
token = args.token or os.getenv("GITHUB_TOKEN")
if not token:
print("Error: GitHub token is required. Provide via --token or GITHUB_TOKEN env var")
sys.exit(1)
# Initialize GitHub API
try:
github_api = GitHubAPI(token, args.owner, args.repo)
except Exception as e:
print(f"Error initializing GitHub API: {e}")
sys.exit(1)
# Find matching PRs
print(f"Searching for PRs matching pattern in {args.owner}/{args.repo}...")
try:
matching_prs = find_invalid_telegram_prs(github_api)
except Exception as e:
print(f"Error fetching PRs: {e}")
sys.exit(1)
if not matching_prs:
print("No PRs found matching the pattern 'Invalid result https://t.me/...'")
return
print(f"Found {len(matching_prs)} PR(s) matching the pattern:")
for pr in matching_prs:
print(f" - PR #{pr['number']}: {pr['title']}")
print(f" Created by: {pr['user']['login']}")
print(f" URL: {pr['html_url']}")
print()
if args.dry_run:
print("Dry run mode: No PRs were actually closed.")
return
# Confirm before closing
response = input(f"Close {len(matching_prs)} PR(s)? [y/N]: ")
if response.lower() != 'y':
print("Cancelled.")
return
# Close PRs
closed_count = 0
for pr in matching_prs:
print(f"Closing PR #{pr['number']}: {pr['title']}")
if github_api.close_pr(pr['number'], args.comment):
closed_count += 1
print(f" ✓ Closed successfully")
else:
print(f" ✗ Failed to close")
print(f"\nClosed {closed_count} out of {len(matching_prs)} PRs.")
if __name__ == "__main__":
main()
+5 -5
View File
@@ -8,7 +8,7 @@ from mock import Mock
import requests import requests
from maigret.maigret import * from maigret.maigret import *
from maigret.result import MaigretCheckStatus from maigret.result import QueryStatus
from maigret.sites import MaigretSite from maigret.sites import MaigretSite
URL_RE = re.compile(r"https?://(www\.)?") URL_RE = re.compile(r"https?://(www\.)?")
@@ -31,7 +31,7 @@ async def maigret_check(site, site_data, username, status, logger):
) )
if results[site]['status'].status != status: if results[site]['status'].status != status:
if results[site]['status'].status == MaigretCheckStatus.UNKNOWN: if results[site]['status'].status == QueryStatus.UNKNOWN:
msg = site_data.absence_strs msg = site_data.absence_strs
etype = site_data.check_type etype = site_data.check_type
context = results[site]['status'].context context = results[site]['status'].context
@@ -41,7 +41,7 @@ async def maigret_check(site, site_data, username, status, logger):
# continue # continue
return False return False
if status == MaigretCheckStatus.CLAIMED: if status == QueryStatus.CLAIMED:
logger.debug(f'Not found {username} in {site}, must be claimed') logger.debug(f'Not found {username} in {site}, must be claimed')
logger.debug(results[site]) logger.debug(results[site])
pass pass
@@ -62,7 +62,7 @@ async def check_and_add_maigret_site(site_data, semaphore, logger, ok_usernames,
for ok_username in ok_usernames: for ok_username in ok_usernames:
site_data.username_claimed = ok_username site_data.username_claimed = ok_username
status = MaigretCheckStatus.CLAIMED status = QueryStatus.CLAIMED
if await maigret_check(sitename, site_data, ok_username, status, logger): if await maigret_check(sitename, site_data, ok_username, status, logger):
# print(f'{sitename} positive case is okay') # print(f'{sitename} positive case is okay')
positive = True positive = True
@@ -70,7 +70,7 @@ async def check_and_add_maigret_site(site_data, semaphore, logger, ok_usernames,
for bad_username in bad_usernames: for bad_username in bad_usernames:
site_data.username_unclaimed = bad_username site_data.username_unclaimed = bad_username
status = MaigretCheckStatus.AVAILABLE status = QueryStatus.AVAILABLE
if await maigret_check(sitename, site_data, bad_username, status, logger): if await maigret_check(sitename, site_data, bad_username, status, logger):
# print(f'{sitename} negative case is okay') # print(f'{sitename} negative case is okay')
negative = True negative = True
+2 -8
View File
@@ -67,7 +67,7 @@ def get_step_rank(rank):
return get_readable_rank(list(filter(lambda x: x >= rank, valid_step_ranks))[0]) return get_readable_rank(list(filter(lambda x: x >= rank, valid_step_ranks))[0])
def main(): if __name__ == '__main__':
parser = ArgumentParser(formatter_class=RawDescriptionHelpFormatter parser = ArgumentParser(formatter_class=RawDescriptionHelpFormatter
) )
parser.add_argument("--base","-b", metavar="BASE_FILE", parser.add_argument("--base","-b", metavar="BASE_FILE",
@@ -86,8 +86,6 @@ def main():
db = MaigretDatabase() db = MaigretDatabase()
sites_subset = db.load_from_file(args.base_file).sites sites_subset = db.load_from_file(args.base_file).sites
print(f"\nUpdating supported sites list (don't worry, it's needed)...")
with open("sites.md", "w") as site_file: with open("sites.md", "w") as site_file:
site_file.write(f""" site_file.write(f"""
## List of supported sites (search methods): total {len(sites_subset)}\n ## List of supported sites (search methods): total {len(sites_subset)}\n
@@ -146,8 +144,4 @@ Rank data fetched from Alexa by domains.
site_file.write('## Statistics\n\n') site_file.write('## Statistics\n\n')
site_file.write(statistics_text) site_file.write(statistics_text)
print("Finished updating supported site listing!") print("\nFinished updating supported site listing!")
if __name__ == '__main__':
main()
+9 -20
View File
@@ -16,29 +16,18 @@ def main():
db = maigret.MaigretDatabase().load_from_file('./maigret/resources/data.json') db = maigret.MaigretDatabase().load_from_file('./maigret/resources/data.json')
username = input('Enter username to search: ') username = input('Enter username to search: ')
sites_count = ( sites_count = int(input(
int( f'Select the number of sites to search ({TOP_SITES_COUNT} for default, {len(db.sites_dict)} max): '
input( )) or TOP_SITES_COUNT
f'Select the number of sites to search ({TOP_SITES_COUNT} for default, {len(db.sites_dict)} max): '
)
)
or TOP_SITES_COUNT
)
sites = db.ranked_sites_dict(top=sites_count) sites = db.ranked_sites_dict(top=sites_count)
show_progressbar = input('Do you want to show a progressbar? [Yn] ').lower() != 'n' show_progressbar = input('Do you want to show a progressbar? [Yn] ').lower() != 'n'
extract_info = ( extract_info = input(
input( 'Do you want to extract additional info from accounts\' pages? [Yn] '
'Do you want to extract additional info from accounts\' pages? [Yn] ' ).lower() != 'n'
).lower() use_notifier = input(
!= 'n' 'Do you want to use notifier for displaying results while searching? [Yn] '
) ).lower() != 'n'
use_notifier = (
input(
'Do you want to use notifier for displaying results while searching? [Yn] '
).lower()
!= 'n'
)
notifier = None notifier = None
if use_notifier: if use_notifier: