Compare commits

..

11 Commits

Author SHA1 Message Date
Soxoj 86ea0b9212 CLI test fixes 2024-12-15 12:57:01 +01:00
overcuriousity f8f7c996ca fix poetry 2024-12-14 12:20:51 +01:00
overcuriousity c7639b9eec fix to make pull request 2024-12-14 01:19:20 +01:00
overcuriousity 5b7d8de9d1 Merge branch 'soxoj-main' 2024-12-14 01:16:15 +01:00
overcuriousity 1e74b09f78 Merge branch 'main' of https://github.com/soxoj/maigret into soxoj-main 2024-12-14 01:14:43 +01:00
overcuriousity dac9abeb79 webinterface: minor changes 2024-12-14 01:01:40 +01:00
overcuriousity a03b36fb5a updates to webinterface 2024-12-14 00:58:51 +01:00
overcuriousity a862309682 update 2024-12-13 14:51:05 +01:00
overcuriousity f43ebbb6fa update webinterface 2024-12-13 10:59:01 +01:00
overcuriousity fb70bc6ffb Merge pull request #1 from soxoj/main
merge upstream
2024-12-13 09:44:05 +01:00
overcuriousity c0cefac546 create flask frontend 2024-12-12 23:27:31 +01:00
37 changed files with 1154 additions and 2897 deletions
@@ -1,61 +0,0 @@
name: Close Invalid Telegram PRs
on:
schedule:
# Run daily at 2 AM UTC
- cron: '0 2 * * *'
workflow_dispatch:
# Allow manual triggering
inputs:
dry_run:
description: 'Run in dry-run mode (show what would be closed without closing)'
required: false
default: 'false'
type: boolean
jobs:
close-invalid-prs:
runs-on: ubuntu-latest
permissions:
# Need write permissions for pull requests and issues
pull-requests: write
issues: write
contents: read
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: '3.10'
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install requests
- name: Make script executable
run: chmod +x utils/close_invalid_telegram_prs.py
- name: Run PR closer script (dry-run for manual trigger)
if: github.event_name == 'workflow_dispatch' && github.event.inputs.dry_run == 'true'
run: |
python utils/close_invalid_telegram_prs.py --dry-run
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: Run PR closer script (live for manual trigger)
if: github.event_name == 'workflow_dispatch' && github.event.inputs.dry_run == 'false'
run: |
python utils/close_invalid_telegram_prs.py
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: Run PR closer script (automated daily)
if: github.event_name == 'schedule'
run: |
python utils/close_invalid_telegram_prs.py --dry-run
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+3 -3
View File
@@ -13,7 +13,7 @@ jobs:
runs-on: ubuntu-latest runs-on: ubuntu-latest
strategy: strategy:
matrix: matrix:
python-version: ["3.10", "3.11", "3.12", "3.13"] python-version: ["3.10", "3.11", "3.12"]
steps: steps:
- name: Checkout - name: Checkout
@@ -33,7 +33,7 @@ jobs:
poetry run coverage report --fail-under=60 poetry run coverage report --fail-under=60
poetry run coverage html poetry run coverage html
- name: Upload coverage report - name: Upload coverage report
uses: actions/upload-artifact@v4 uses: actions/upload-artifact@v3
with: with:
name: htmlcov-${{ strategy.job-index }} name: htmlcov
path: htmlcov path: htmlcov
+21 -12
View File
@@ -1,21 +1,30 @@
name: Upload Python Package to PyPI when a Release is Created name: Upload Python Package to PyPI when a Release is Created
on: on:
release: release:
types: [created] types: [created]
push:
tags:
- "v*"
permissions:
id-token: write
contents: read
jobs: jobs:
build-and-publish: pypi-publish:
name: Publish release to PyPI
runs-on: ubuntu-latest runs-on: ubuntu-latest
environment:
name: pypi
url: https://pypi.org/p/maigret
permissions:
id-token: write
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
- uses: astral-sh/setup-uv@v3 - name: Set up Python
- run: uv build uses: actions/setup-python@v4
- name: Publish to PyPI (Trusted Publishing)
uses: pypa/gh-action-pypi-publish@release/v1
with: with:
packages-dir: dist python-version: "3.x"
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install setuptools wheel
- name: Build package
run: |
python setup.py sdist bdist_wheel # Could also be python -m build
- name: Publish package distributions to PyPI
uses: pypa/gh-action-pypi-publish@release/v1
+1 -249
View File
@@ -1,254 +1,6 @@
# Changelog # Changelog
## [0.5.0] - 2025-08-10 ## [Unreleased]
* Site Supression by @C3n7ral051nt4g3ncy in https://github.com/soxoj/maigret/pull/627
* Bump yarl from 1.7.2 to 1.8.1 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/626
* Streaming sites by @soxoj in https://github.com/soxoj/maigret/pull/628
* Mirrors by @fen0s in https://github.com/soxoj/maigret/pull/630
* Added Instagram scrapers by @soxoj in https://github.com/soxoj/maigret/pull/633
* Bump psutil from 5.9.1 to 5.9.2 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/624
* Bump pypdf2 from 2.10.4 to 2.10.5 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/625
* Invalid results fixes by @soxoj in https://github.com/soxoj/maigret/pull/634
* Bump pytest-httpserver from 1.0.5 to 1.0.6 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/638
* Bump pypdf2 from 2.10.5 to 2.10.8 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/641
* Bump certifi from 2022.6.15 to 2022.9.14 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/644
* Bump idna from 3.3 to 3.4 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/640
* fix false positives from bot by @fen0s in https://github.com/soxoj/maigret/pull/663
* Add pre commit hook by @fen0s in https://github.com/soxoj/maigret/pull/664
* site deletion by @C3n7ral051nt4g3ncy in https://github.com/soxoj/maigret/pull/648
* Changed docker run to interactive and remove on exit by @dr-BEat in https://github.com/soxoj/maigret/pull/675
* Corrected grammar in README.md by @Trkzi-Omar in https://github.com/soxoj/maigret/pull/674
* fix sites from issues by @fen0s in https://github.com/soxoj/maigret/pull/680
* correct username in usage examples by @LeonGr in https://github.com/soxoj/maigret/pull/673
* Update README.md by @johanburati in https://github.com/soxoj/maigret/pull/669
* Fix typos by @LorenzoSapora in https://github.com/soxoj/maigret/pull/681
* Build docker images for arm64 and amd64 by @krydos in https://github.com/soxoj/maigret/pull/687
* Bump certifi from 2022.9.14 to 2022.9.24 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/652
* Bump aiohttp from 3.8.1 to 3.8.3 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/651
* Bump arabic-reshaper from 2.1.3 to 2.1.4 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/650
* Update README.md, Repl.it -> Replit with new badge by @PeterDaveHello in https://github.com/soxoj/maigret/pull/692
* Refactor Dockerfile with best practices by @PeterDaveHello in https://github.com/soxoj/maigret/pull/691
* Improve README.md Installation section by @PeterDaveHello in https://github.com/soxoj/maigret/pull/690
* Bump pytest-cov from 3.0.0 to 4.0.0 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/688
* Bump stem from 1.8.0 to 1.8.1 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/689
* Bump typing-extensions from 4.3.0 to 4.4.0 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/698
* Typo fixes in error.py by @Ben-Chapman in https://github.com/soxoj/maigret/pull/711
* Fixed docs about tags by @soxoj in https://github.com/soxoj/maigret/pull/715
* Fixed lightstalking.com by @soxoj in https://github.com/soxoj/maigret/pull/716
* Fixed YouTube by @soxoj in https://github.com/soxoj/maigret/pull/717
* Bump pytest-asyncio from 0.19.0 to 0.20.1 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/732
* Updated snapcraft yaml by @kz6fittycent in https://github.com/soxoj/maigret/pull/720
* Bump colorama from 0.4.5 to 0.4.6 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/733
* Bump pytest from 7.1.3 to 7.2.0 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/734
* disable not working sites by @fen0s in https://github.com/soxoj/maigret/pull/739
* disable broken sites by @fen0s in https://github.com/soxoj/maigret/pull/756
* Bump cloudscraper from 1.2.64 to 1.2.66 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/769
* fix opensea and shutterstock, disable a few dead sites by @fen0s in https://github.com/soxoj/maigret/pull/798
* Fixed documentation URL by @soxoj in https://github.com/soxoj/maigret/pull/799
* Small readme fix by @soxoj in https://github.com/soxoj/maigret/pull/857
* docs spelling error by @Nadeem-05 in https://github.com/soxoj/maigret/pull/866
* Fix Pinterest false positive by @therealchiendat in https://github.com/soxoj/maigret/pull/862
* Added new Websites by @codyMar30 in https://github.com/soxoj/maigret/pull/838
* Update "future" package to v0.18.3 by @PeterDaveHello in https://github.com/soxoj/maigret/pull/834
* Bump certifi from 2022.9.24 to 2022.12.7 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/793
* Update dependency - networkx from v2.5.1 to v2.6 by @PeterDaveHello in https://github.com/soxoj/maigret/pull/738
* Bump reportlab from 3.6.11 to 3.6.12 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/735
* Bump typing-extensions from 4.4.0 to 4.5.0 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/888
* Bump psutil from 5.9.2 to 5.9.4 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/741
* Bump attrs from 22.1.0 to 22.2.0 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/892
* Bump multidict from 6.0.2 to 6.0.4 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/891
* Fixed false positives, updated networkx dep, some lint fixes by @soxoj in https://github.com/soxoj/maigret/pull/894
* Bump lxml from 4.9.1 to 4.9.2 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/900
* Bump yarl from 1.8.1 to 1.8.2 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/899
* Fixed false positives on Mastodon sites by @soxoj in https://github.com/soxoj/maigret/pull/901
* Added valid regex for Mastodon instances (#848) by @soxoj in https://github.com/soxoj/maigret/pull/906
* Fix missing Mastodon Regex on #906 by @therealchiendat in https://github.com/soxoj/maigret/pull/908
* Bump tqdm from 4.64.1 to 4.65.0 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/905
* Bump requests from 2.28.1 to 2.28.2 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/904
* Bump psutil from 5.9.4 to 5.9.5 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/910
* fix deployment of tests by @noraj in https://github.com/soxoj/maigret/pull/933
* Added 26 ENS and similar domains with tag `crypto` by @soxoj in https://github.com/soxoj/maigret/pull/942
* Bump requests from 2.28.2 to 2.31.0 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/957
* Update wizard.py by @engNoori in https://github.com/soxoj/maigret/pull/1016
* Improved search through UnstoppableDomains by @soxoj in https://github.com/soxoj/maigret/pull/1040
* Added memory.lol (Twitter usernames archive) by @soxoj in https://github.com/soxoj/maigret/pull/1067
* Disabled and fixed several sites by @soxoj in https://github.com/soxoj/maigret/pull/1132
* Fixed some sites (again) by @soxoj in https://github.com/soxoj/maigret/pull/1133
* fix(sec): upgrade reportlab to 3.6.13 by @realize096 in https://github.com/soxoj/maigret/pull/1051
* Add compatibility with pytest >= 7.3.0 by @tjni in https://github.com/soxoj/maigret/pull/1117
* Additionally fixed sites, win32 build fix by @soxoj in https://github.com/soxoj/maigret/pull/1148
* Sites fixes 250823 by @soxoj in https://github.com/soxoj/maigret/pull/1149
* Bump reportlab from 3.6.12 to 4.0.4 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1160
* Bump certifi from 2022.12.7 to 2023.7.22 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1070
* fix(sec): upgrade certifi to 2022.12.07 by @realize096 in https://github.com/soxoj/maigret/pull/1173
* Bump cloudscraper from 1.2.66 to 1.2.71 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/914
* Some sites fixed & cloudflare detection by @soxoj in https://github.com/soxoj/maigret/pull/1178
* EasyInstaller because everyone likes saving time :) by @CatchySmile in https://github.com/soxoj/maigret/pull/1212
* Tests fixes + last updates by @soxoj in https://github.com/soxoj/maigret/pull/1228
* Bump pypdf2 from 2.10.8 to 3.0.1 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/815
* Bump pyvis from 0.2.1 to 0.3.2 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/861
* Bump xhtml2pdf from 0.2.8 to 0.2.11 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/935
* Bump flake8 from 5.0.4 to 6.1.0 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1091
* Bump aiohttp from 3.8.3 to 3.8.6 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1222
* Specified pyinstaller version by @soxoj in https://github.com/soxoj/maigret/pull/1230
* Pyinstaller fix by @soxoj in https://github.com/soxoj/maigret/pull/1231
* Test pyinstaller on dev branch by @soxoj in https://github.com/soxoj/maigret/pull/1233
* Update main from dev again by @soxoj in https://github.com/soxoj/maigret/pull/1234
* Bump typing-extensions from 4.5.0 to 4.8.0 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1239
* Bump pytest-rerunfailures from 10.2 to 12.0 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1237
* Bump async-timeout from 4.0.2 to 4.0.3 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1238
* Changed pyinstaller dir by @soxoj in https://github.com/soxoj/maigret/pull/1245
* Bump tqdm from 4.65.0 to 4.66.1 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1235
* Updating site checkers, disabling suspended sites by @MeowyPouncer in https://github.com/soxoj/maigret/pull/1266
* Updated site statistics by @soxoj in https://github.com/soxoj/maigret/pull/1273
* Compat RegataOS (Opensuse) by @Jeiel0rbit in https://github.com/soxoj/maigret/pull/1308
* fix reddit by @hhhtylerw in https://github.com/soxoj/maigret/pull/1296
* Added Telegram bot link by @soxoj in https://github.com/soxoj/maigret/pull/1321
* Added SOWEL classification by @soxoj in https://github.com/soxoj/maigret/pull/1453
* Bump jinja2 from 3.1.2 to 3.1.3 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1358
* Fixed/Disabled sites. Update requirements.txt by @rly0nheart in https://github.com/soxoj/maigret/pull/1517
* Fixed 4 sites, added 6 sites, disabled 27 sites by @rly0nheart in https://github.com/soxoj/maigret/pull/1536
* Fixed 3 sites, disabed 3, added by @rly0nheart in https://github.com/soxoj/maigret/pull/1539
* Bump socid-extractor from 0.0.24 to 0.0.26 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1546
* Added code conventions to CONTRIBUTING.md by @Lord-Topa in https://github.com/soxoj/maigret/pull/1589
* Readme by @Lord-Topa in https://github.com/soxoj/maigret/pull/1588
* Update data.json by @ranlo in https://github.com/soxoj/maigret/pull/1559
* Adding permutator feature for usernames by @balestek in https://github.com/soxoj/maigret/pull/1575
* Alik.cz indirectly requests removal by @ppfeister in https://github.com/soxoj/maigret/pull/1671
* Fixed 1 site, PyInstaller workflow, Google Colab example by @Ixve in https://github.com/soxoj/maigret/pull/1558
* Bump soupsieve from 2.5 to 2.6 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1708
* Added dev documentation, fixed some sites, removed GitHub issue links… by @soxoj in https://github.com/soxoj/maigret/pull/1869
* Bump cryptography from 42.0.7 to 43.0.1 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1870
* Bump requests-futures from 1.0.1 to 1.0.2 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1868
* Bump werkzeug from 3.0.3 to 3.0.6 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1846
* Added .readthedocs.yaml, fixed Pyinstaller and Docker workflows by @soxoj in https://github.com/soxoj/maigret/pull/1874
* Added GitHub and BuyMeACoffee sponsorships by @soxoj in https://github.com/soxoj/maigret/pull/1875
* Bump psutil from 5.9.5 to 6.1.0 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1839
* Bump flake8 from 6.1.0 to 7.1.1 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1692
* Bump future from 0.18.3 to 1.0.0 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1545
* Bump urllib3 from 2.2.1 to 2.2.2 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1600
* Bump certifi from 2023.11.17 to 2024.8.30 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1840
* Fixed test for aiohttp 3.10 by @soxoj in https://github.com/soxoj/maigret/pull/1876
* Bump aiohttp from 3.9.5 to 3.10.5 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1721
* Added new badges to README by @soxoj in https://github.com/soxoj/maigret/pull/1877
* Show detailed error statistics for `-v` by @soxoj in https://github.com/soxoj/maigret/pull/1879
* Disabled unavailable sites by @soxoj in https://github.com/soxoj/maigret/pull/1880
* Added 7 sites, implemented integration with Marple, docs update by @soxoj in https://github.com/soxoj/maigret/pull/1881
* Bump pefile from 2022.5.30 to 2024.8.26 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1883
* Bump lxml from 4.9.4 to 5.3.0 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1884
* New sites added by @soxoj in https://github.com/soxoj/maigret/pull/1888
* Improved self-check mode, added 15 sites by @soxoj in https://github.com/soxoj/maigret/pull/1887
* Bump pyinstaller from 6.1 to 6.11.1 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1882
* Bump pytest-asyncio from 0.23.7 to 0.23.8 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1885
* Pyinstaller bump & pefile fix by @soxoj in https://github.com/soxoj/maigret/pull/1890
* Bump python-bidi from 0.4.2 to 0.6.3 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1886
* Sites checks fixes by @soxoj in https://github.com/soxoj/maigret/pull/1896
* Parallel execution optimization by @soxoj in https://github.com/soxoj/maigret/pull/1897
* Maigret bot support (custom progress function fixed) by @soxoj in https://github.com/soxoj/maigret/pull/1898
* Bump markupsafe from 2.1.5 to 3.0.2 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1895
* Retries set to 0 by default, refactored code of executor with progress by @soxoj in https://github.com/soxoj/maigret/pull/1899
* Bump aiohttp-socks from 0.7.1 to 0.9.1 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1900
* Bump pycountry from 23.12.11 to 24.6.1 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1903
* Bump pytest-cov from 4.1.0 to 6.0.0 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1902
* Bump pyvis from 0.2.1 to 0.3.2 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1893
* Close http connections (#1595) by @soxoj in https://github.com/soxoj/maigret/pull/1905
* New logo by @soxoj in https://github.com/soxoj/maigret/pull/1906
* Fixed dateutil parsing error for CDT timezone by @soxoj in https://github.com/soxoj/maigret/pull/1907
* Bump alive-progress from 2.4.1 to 3.2.0 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1910
* Permutator output and documentation updates by @soxoj in https://github.com/soxoj/maigret/pull/1914
* Bump aiohttp from 3.11.7 to 3.11.8 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1912
* Bump async-timeout from 4.0.3 to 5.0.1 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1909
* An recursive search animation in README has been updated by @soxoj in https://github.com/soxoj/maigret/pull/1915
* Bump pytest-rerunfailures from 12.0 to 15.0 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1911
* Bump attrs from 22.2.0 to 24.2.0 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1913
* Sites fixes by @soxoj in https://github.com/soxoj/maigret/pull/1917
* Update README.md by @soxoj in https://github.com/soxoj/maigret/pull/1919
* Refactored sites module, updated documentation by @soxoj in https://github.com/soxoj/maigret/pull/1918
* Bump aiohttp from 3.11.8 to 3.11.9 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1920
* Bump pytest from 7.4.4 to 8.3.4 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1923
* Bump yarl from 1.18.0 to 1.18.3 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1922
* Bump pytest-asyncio from 0.23.8 to 0.24.0 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1925
* Documentation update by @soxoj in https://github.com/soxoj/maigret/pull/1926
* Bump mock from 4.0.3 to 5.1.0 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1921
* Bump pywin32-ctypes from 0.2.1 to 0.2.3 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1924
* Installation docs update by @soxoj in https://github.com/soxoj/maigret/pull/1927
* Disabled Figma check by @soxoj in https://github.com/soxoj/maigret/pull/1928
* Put Windows executable in Releases for each dev and main commit by @soxoj in https://github.com/soxoj/maigret/pull/1929
* Updated PyInstaller workflow by @soxoj in https://github.com/soxoj/maigret/pull/1930
* Documentation update by @soxoj in https://github.com/soxoj/maigret/pull/1931
* Fixed Figma check and some bugs by @soxoj in https://github.com/soxoj/maigret/pull/1932
* Bump six from 1.16.0 to 1.17.0 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1933
* Activation mechanism documentation added by @soxoj in https://github.com/soxoj/maigret/pull/1935
* Readme/docs update based on GH discussions by @soxoj in https://github.com/soxoj/maigret/pull/1936
* Bump aiohttp from 3.11.9 to 3.11.10 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1937
* Weibo site check fix, activation mechanism added by @soxoj in https://github.com/soxoj/maigret/pull/1938
* Fixed Ebay and BongaCams checks by @soxoj in https://github.com/soxoj/maigret/pull/1939
* Sites fixes by @soxoj in https://github.com/soxoj/maigret/pull/1940
* Fixed Linktr and discourse.mozilla.org by @soxoj in https://github.com/soxoj/maigret/pull/1941
* Refactored self-check method, code formatting, small lint fixes by @soxoj in https://github.com/soxoj/maigret/pull/1942
* Refactoring, test coverage increased to 60% by @soxoj in https://github.com/soxoj/maigret/pull/1943
* Added a test for submitter by @soxoj in https://github.com/soxoj/maigret/pull/1944
* Update README.md by @soxoj in https://github.com/soxoj/maigret/pull/1949
* Updated OP.GG checks by @soxoj in https://github.com/soxoj/maigret/pull/1950
* Fixed ProductHunt check by @soxoj in https://github.com/soxoj/maigret/pull/1951
* Improved check feature extraction function, added tests by @soxoj in https://github.com/soxoj/maigret/pull/1952
* Submit improvements and site check fixes by @soxoj in https://github.com/soxoj/maigret/pull/1956
* chore: update submit.py by @eltociear in https://github.com/soxoj/maigret/pull/1957
* Fixed Gravatar parsing (socid_extractor) by @soxoj in https://github.com/soxoj/maigret/pull/1958
* Site check fixes by @soxoj in https://github.com/soxoj/maigret/pull/1962
* fix bad linux filename generation by @overcuriousity in https://github.com/soxoj/maigret/pull/1961
* Bump pytest-asyncio from 0.24.0 to 0.25.0 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1963
* Fixed flaky tests to check cookies by @soxoj in https://github.com/soxoj/maigret/pull/1965
* Preparation of 0.5.0 alpha version by @soxoj in https://github.com/soxoj/maigret/pull/1966
* Created web frontend launched via --web flag by @overcuriousity in https://github.com/soxoj/maigret/pull/1967
* Bump certifi from 2024.8.30 to 2024.12.14 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1969
* Bump attrs from 24.2.0 to 24.3.0 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1970
* Added web interface docs by @soxoj in https://github.com/soxoj/maigret/pull/1972
* Small docs and parameters fixes for web interface mode by @soxoj in https://github.com/soxoj/maigret/pull/1973
* [ImgBot] Optimize images by @imgbot[bot] in https://github.com/soxoj/maigret/pull/1974
* Improving the web interface by @overcuriousity in https://github.com/soxoj/maigret/pull/1975
* make graph more meaningful by @overcuriousity in https://github.com/soxoj/maigret/pull/1977
* Async generator-executor for site checks by @soxoj in https://github.com/soxoj/maigret/pull/1978
* Bump aiohttp from 3.11.10 to 3.11.11 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1979
* Bump psutil from 6.1.0 to 6.1.1 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1980
* Bump aiohttp-socks from 0.9.1 to 0.10.0 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1985
* Bump mypy from 1.13.0 to 1.14.0 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1983
* Bump aiohttp-socks from 0.10.0 to 0.10.1 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1987
* Bump jinja2 from 3.1.4 to 3.1.5 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1982
* Bump coverage from 7.6.9 to 7.6.10 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1986
* Bump pytest-asyncio from 0.25.0 to 0.25.1 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1989
* Bump mypy from 1.14.0 to 1.14.1 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1988
* Bump pytest-asyncio from 0.25.1 to 0.25.2 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/1990
* docs: update usage-examples.rst by @eltociear in https://github.com/soxoj/maigret/pull/1996
* upload-artifact action in python test workflow updated to v4 by @soxoj in https://github.com/soxoj/maigret/pull/2024
* Pass db_file configuration to web interface by @pykereaper in https://github.com/soxoj/maigret/pull/2019
* Fix usage of data.json files from web by @pykereaper in https://github.com/soxoj/maigret/pull/2020
* Bump black from 24.10.0 to 25.1.0 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2001
* Important Update Installer.bat by @CatchySmile in https://github.com/soxoj/maigret/pull/1994
* Bump cryptography from 44.0.0 to 44.0.1 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2005
* Bump jinja2 from 3.1.5 to 3.1.6 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2011
* [#2010] Add 6 more websites to manage by @pylapp in https://github.com/soxoj/maigret/pull/2009
* Bump flask from 3.1.0 to 3.1.1 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2028
* Bump requests from 2.32.3 to 2.32.4 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2026
* Bump pycares from 4.5.0 to 4.9.0 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2025
* Bump pytest-asyncio from 0.25.2 to 0.26.0 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2016
* Bump urllib3 from 2.2.3 to 2.5.0 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2027
* Disable ICQ site by @Echo-Darlyson in https://github.com/soxoj/maigret/pull/1993
* Bump attrs from 24.3.0 to 25.3.0 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2014
* Bump certifi from 2024.12.14 to 2025.1.31 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2004
* Bump typing-extensions from 4.12.2 to 4.14.1 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2038
* Disable AskFM by @MR-VL in https://github.com/soxoj/maigret/pull/2037
* Bump platformdirs from 4.3.6 to 4.3.8 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2033
* Bump coverage from 7.6.10 to 7.9.2 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2039
* Bump aiohttp from 3.11.11 to 3.12.14 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2041
* Bump yarl from 1.18.3 to 1.20.1 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2032
* Fixed test dialog_adds_site_negative by @soxoj in https://github.com/soxoj/maigret/pull/2107
* Bump reportlab from 4.2.5 to 4.4.3 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2063
* Bump asgiref from 3.8.1 to 3.9.1 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2040
* Bump multidict from 6.1.0 to 6.6.3 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2034
* Bump pytest-rerunfailures from 15.0 to 15.1 by @dependabot[bot] in https://github.com/soxoj/maigret/pull/2030
**Full Changelog**: https://github.com/soxoj/maigret/compare/v0.4.4...v0.5.0
## [0.4.4] - 2022-09-03 ## [0.4.4] - 2022-09-03
* Fixed some false positives by @soxoj in https://github.com/soxoj/maigret/pull/433 * Fixed some false positives by @soxoj in https://github.com/soxoj/maigret/pull/433
+78 -68
View File
@@ -1,61 +1,85 @@
@echo off @echo off
REM check if running as admin
goto check_Permissions goto check_Permissions
:check_Permissions :check_Permissions
echo Administrative permissions required. Detecting permissions...
net session >nul 2>&1 net session >nul 2>&1
if %errorLevel% == 0 ( if %errorLevel% == 0 (
echo Success: Elevated permissions granted. goto 1
) else ( ) else (
echo Failure: Requires elevated permissions. cls
pause >nul echo Failure: You MUST run this as administator, otherwise commands will fail.
) )
cls pause >nul
echo --------------------------------------------------------
echo Python 3.8 or higher and pip3 required.
echo --------------------------------------------------------
echo Press [I] to begin installation.
echo Press [R] If already installed.
echo --------------------------------------------------------
choice /c IR
if %errorlevel%==1 goto check_python
if %errorlevel%==2 goto after
:check_python
cls
for /f "tokens=2 delims= " %%i in ('python --version 2^>nul') do (
for /f "tokens=1,2 delims=." %%j in ("%%i") do (
if %%j GEQ 3 (
if %%k GEQ 8 (
goto check_pip
)
)
)
)
echo Python 3.8 or higher is required. Please install it first.
pause
exit /b
:check_pip
pip --version 2>nul | findstr /r /c:"pip" >nul REM Step 2: Check if Python and pip3 are installed
python --version >nul 2>&1
if %errorlevel% neq 0 ( if %errorlevel% neq 0 (
echo pip is required. Please install it first. echo Python is not installed. Please install Python 3.8 or higher.
pause pause
exit /b exit /b
) )
goto install1
pip3 --version >nul 2>&1
if %errorlevel% neq 0 (
echo pip3 is not installed. Please install pip3.
pause
exit /b
)
REM Step 3: Check Python version
python -c "import sys; exit(0) if sys.version_info >= (3,8) else exit(1)"
if %errorlevel% neq 0 (
echo Python version 3.8 or higher is required.
pause
exit /b
)
:1
cls
:::===============================================================
::: ______ __ __ _ _
::: | ____| | \/ | (_) | |
::: | |__ __ _ ___ _ _ | \ / | __ _ _ __ _ _ __ ___| |_
::: | __| / _` / __| | | | | |\/| |/ _` | |/ _` | '__/ _ \ __|
::: | |___| (_| \__ \ |_| | | | | | (_| | | (_| | | | __/ |_
::: |______\__,_|___/\__, | |_| |_|\__,_|_|\__, |_| \___|\__|
::: __/ | __/ |
::: |___/ |___/
:::
:::===============================================================
echo.
for /f "delims=: tokens=*" %%A in ('findstr /b ::: "%~f0"') do @echo(%%A
echo.
echo ----------------------------------------------------------------
echo Python 3.8 or higher and pip3 required.
echo ----------------------------------------------------------------
echo Press [I] to begin installation.
echo Press [R] If already installed.
echo ----------------------------------------------------------------
choice /c IR
if %errorlevel%==1 goto install1
if %errorlevel%==2 goto after
:install1 :install1
cls cls
echo ======================================================== echo ========================================================
echo Maigret Installation echo Maigret Installation Script
echo ======================================================== echo ========================================================
echo. echo.
echo -------------------------------------------------------- echo --------------------------------------------------------
echo If your pip installation is outdated, it could cause echo If your pip installation is outdated, it could cause
echo cryptography to fail on installation. echo cryptography to fail on installation.
echo -------------------------------------------------------- echo --------------------------------------------------------
echo Check for and install pip 23.3.2 now? echo check for and install pip updates now?
echo -------------------------------------------------------- echo --------------------------------------------------------
choice /c YN choice /c YN
if %errorlevel%==1 goto install2 if %errorlevel%==1 goto install2
@@ -63,56 +87,42 @@ if %errorlevel%==2 goto install3
:install2 :install2
cls cls
python -m pip install --upgrade pip==23.3.2 python -m pip install --upgrade pip
if %errorlevel% neq 0 ( goto:install3
echo Failed to update pip to version 23.3.2. Please check your installation.
pause
exit /b
)
goto install3
:install3 :install3
cls cls
echo ======================================================== echo ========================================================
echo Maigret Installation echo Maigret Installation Script
echo ======================================================== echo ========================================================
echo. echo.
echo -------------------------------------------------------- echo --------------------------------------------------------
echo Installing Maigret... echo Install requirements and maigret?
python -m pip install maigret echo --------------------------------------------------------
if %errorlevel% neq 0 ( choice /c YN
echo Failed to install Maigret. Please check your installation. if %errorlevel%==1 goto install4
pause if %errorlevel%==2 goto 1
exit /b
) :install4
echo. cls
echo +------------------------------------------------------+ pip install .
echo Maigret installed successfully. pip install maigret
echo +------------------------------------------------------+ goto:after
pause
goto after
:after :after
cls cls
echo ======================================================== echo ========================================================
echo Maigret Usage echo Maigret Background Search
echo ======================================================== echo ========================================================
echo. echo.
echo +--------------------------------------------------------+ echo --------------------------------------------------------
echo To use Maigret, you can run the following command: echo Please Enter Username / Email
echo --------------------------------------------------------
set /p input=
maigret %input%
echo. echo.
echo maigret [options] [username]
echo. echo.
echo For example, to search for a username:
echo. echo.
echo maigret example_username
echo. echo.
echo For more options and usage details, refer to the Maigret documentation.
echo.
echo https://github.com/soxoj/maigret/blob/5b3b81b4822f6deb2e9c31eb95039907f25beb5e/README.md
echo +--------------------------------------------------------+
echo.
cmd
pause pause
exit /b goto:after
exit /b
-25
View File
@@ -75,7 +75,6 @@ You can launch Maigret using cloud shells and Jupyter notebooks. Press one of th
Maigret can be installed using pip, Docker, or simply can be launched from the cloned repo. Maigret can be installed using pip, Docker, or simply can be launched from the cloned repo.
**NOTE**: Python 3.10 or higher and pip is required, **Python 3.11 is recommended.** **NOTE**: Python 3.10 or higher and pip is required, **Python 3.11 is recommended.**
```bash ```bash
@@ -132,30 +131,6 @@ maigret user1 user2 user3 -a
Use `maigret --help` to get full options description. Also options [are documented](https://maigret.readthedocs.io/en/latest/command-line-options.html). Use `maigret --help` to get full options description. Also options [are documented](https://maigret.readthedocs.io/en/latest/command-line-options.html).
### Web interface
You can run Maigret with a web interface, where you can view the graph with results and download reports of all formats on a single page.
<details>
<summary>Web Interface Screenshots</summary>
![Web interface: how to start](https://raw.githubusercontent.com/soxoj/maigret/main/static/web_interface_screenshot_start.png)
![Web interface: results](https://raw.githubusercontent.com/soxoj/maigret/main/static/web_interface_screenshot.png)
</details>
Instructions:
1. Run Maigret with the ``--web`` flag and specify the port number.
```console
maigret --web 5000
```
2. Open http://127.0.0.1:5000 in your browser and enter one or more usernames to make a search.
3. Wait a bit for the search to complete and view the graph with results, the table with all accounts found, and download reports of all formats.
## Contributing ## Contributing
Maigret has open-source code, so you may contribute your own sites by adding them to `data.json` file, or bring changes to it's code! Maigret has open-source code, so you may contribute your own sites by adding them to `data.json` file, or bring changes to it's code!
-121
View File
@@ -1,121 +0,0 @@
# Invalid Telegram PR Auto-Closer
This repository includes an automated solution to identify and close pull requests with titles matching the pattern "Invalid result https://t.me/...". These PRs are typically auto-generated or spam submissions that should not be processed.
## Components
### 1. Python Script (`utils/close_invalid_telegram_prs.py`)
A utility script that:
- Searches for open PRs matching the pattern "Invalid result https://t.me/..."
- Optionally closes them with a descriptive comment
- Supports dry-run mode for testing
- Uses the GitHub API to interact with the repository
#### Usage
```bash
# Dry run (show what would be closed without closing)
python utils/close_invalid_telegram_prs.py --dry-run
# Close matching PRs interactively
python utils/close_invalid_telegram_prs.py
# Close PRs with custom comment
python utils/close_invalid_telegram_prs.py --comment "Custom closure message"
# Use with different repository
python utils/close_invalid_telegram_prs.py --owner username --repo repository
```
#### Requirements
- Python 3.6+
- `requests` library: `pip install requests`
- GitHub personal access token with repository access
#### Authentication
Set your GitHub token via:
- Command line: `--token YOUR_TOKEN`
- Environment variable: `export GITHUB_TOKEN=YOUR_TOKEN`
### 2. GitHub Actions Workflow (`.github/workflows/close-invalid-telegram-prs.yml`)
An automated workflow that:
- Runs daily at 2 AM UTC (in dry-run mode by default)
- Can be manually triggered with option to actually close PRs
- Uses the repository's `GITHUB_TOKEN` for authentication
#### Manual Trigger
1. Go to the Actions tab in your GitHub repository
2. Select "Close Invalid Telegram PRs" workflow
3. Click "Run workflow"
4. Choose whether to run in dry-run mode or actually close PRs
### 3. Tests (`tests/test_close_invalid_telegram_prs.py`)
Unit tests that verify:
- Correct identification of matching PR titles
- Proper rejection of non-matching titles
- Case-insensitive pattern matching
- Whitespace handling
Run tests with:
```bash
python tests/test_close_invalid_telegram_prs.py
```
## Pattern Detection
The script identifies PRs with titles matching:
- `Invalid result https://t.me/...` (case insensitive)
- Various whitespace and formatting variations
- Any Telegram URL after the pattern
### Examples of Matching Titles
- "Invalid result https://t.me/someuser"
- "INVALID RESULT https://t.me/channel123"
- "Invalid Result https://t.me/bot_name"
- " Invalid result https://t.me/user/123 " (with whitespace)
### Examples of Non-Matching Titles
- "Valid result https://t.me/someuser" (not "Invalid")
- "Invalid results https://t.me/someuser" (plural "results")
- "Fix invalid result https://t.me/someuser" (extra words)
- "Invalid result http://t.me/someuser" (http instead of https)
## Security
- The GitHub Actions workflow only has the minimum required permissions
- The script requires explicit confirmation before closing PRs (except in automated mode)
- All actions are logged and can be audited
- Dry-run mode is available for testing
## Customization
You can customize the behavior by:
- Modifying the regex pattern in `is_invalid_telegram_pr()` function
- Changing the default comment message
- Adjusting the GitHub Actions schedule
- Adding additional validation logic
## Troubleshooting
### Common Issues
1. **Permission Denied**: Ensure your GitHub token has the required permissions
2. **No PRs Found**: This is normal if there are no matching PRs
3. **Rate Limiting**: The script handles GitHub API rate limits automatically
### Debug Mode
Run with verbose output:
```bash
python utils/close_invalid_telegram_prs.py --dry-run
```
This will show exactly which PRs match the pattern without closing them.
+2 -2
View File
@@ -3,10 +3,10 @@
# -- Project information # -- Project information
project = 'Maigret' project = 'Maigret'
copyright = '2025, soxoj' copyright = '2024, soxoj'
author = 'soxoj' author = 'soxoj'
release = '0.5.0' release = '0.5.0a1'
version = '0.5' version = '0.5'
# -- General configuration # -- General configuration
+2 -3
View File
@@ -194,10 +194,9 @@ PyPi package.
2. Update Maigret version in three files manually: 2. Update Maigret version in three files manually:
- pyproject.toml - setup.py
- maigret/__version__.py - maigret/__version__.py
- docs/source/conf.py - docs/source/conf.py
- snapcraft.yaml
3. Create a new empty text section in the beginning of the file `CHANGELOG.md` with a current date: 3. Create a new empty text section in the beginning of the file `CHANGELOG.md` with a current date:
-28
View File
@@ -5,34 +5,6 @@ Features
This is the list of Maigret features. This is the list of Maigret features.
.. _web-interface:
Web Interface
-------------
You can run Maigret with a web interface, where you can view the graph with results and download reports of all formats on a single page.
.. image:: https://raw.githubusercontent.com/soxoj/maigret/main/static/web_interface_screenshot_start.png
:alt: Web interface: how to start
.. image:: https://raw.githubusercontent.com/soxoj/maigret/main/static/web_interface_screenshot.png
:alt: Web interface: results
Instructions:
1. Run Maigret with the ``--web`` flag and specify the port number.
.. code-block:: console
maigret --web 5000
2. Open http://127.0.0.1:5000 in your browser and enter one or more usernames to make a search.
3. Wait a bit for the search to complete and view the graph with results, the table with all accounts found, and download reports of all formats.
Personal info gathering Personal info gathering
----------------------- -----------------------
Binary file not shown.

Before

Width:  |  Height:  |  Size: 234 KiB

After

Width:  |  Height:  |  Size: 375 KiB

-10
View File
@@ -3,16 +3,6 @@
Usage examples Usage examples
============== ==============
You can use Maigret as:
- a command line tool: initial and a default mode
- a `web interface <#web-interface>`_: view the graph with results and download all report formats on a single page
- a library: integrate Maigret into your own project
Use Cases
---------
1. Search for accounts with username ``machine42`` on top 500 sites (by default, according to Alexa rank) from the Maigret DB. 1. Search for accounts with username ``machine42`` on top 500 sites (by default, according to Alexa rank) from the Maigret DB.
.. code-block:: console .. code-block:: console
+1 -1
View File
@@ -1,3 +1,3 @@
"""Maigret version file""" """Maigret version file"""
__version__ = '0.5.0' __version__ = '0.5.0a1'
+21 -16
View File
@@ -26,7 +26,11 @@ except ImportError:
from . import errors from . import errors
from .activation import ParsingActivator, import_aiohttp_cookies from .activation import ParsingActivator, import_aiohttp_cookies
from .errors import CheckError from .errors import CheckError
from .executors import AsyncioQueueGeneratorExecutor from .executors import (
AsyncExecutor,
AsyncioSimpleExecutor,
AsyncioProgressbarQueueExecutor,
)
from .result import MaigretCheckResult, MaigretCheckStatus from .result import MaigretCheckResult, MaigretCheckStatus
from .sites import MaigretDatabase, MaigretSite from .sites import MaigretDatabase, MaigretSite
from .types import QueryOptions, QueryResultWrapper from .types import QueryOptions, QueryResultWrapper
@@ -666,13 +670,18 @@ async def maigret(
await debug_ip_request(clearweb_checker, logger) await debug_ip_request(clearweb_checker, logger)
# setup parallel executor # setup parallel executor
executor = AsyncioQueueGeneratorExecutor( executor: Optional[AsyncExecutor] = None
logger=logger, if no_progressbar:
in_parallel=max_connections, # TODO: switch to AsyncioProgressbarQueueExecutor with progress object mock
timeout=timeout + 0.5, executor = AsyncioSimpleExecutor(logger=logger)
*args, else:
**kwargs, executor = AsyncioProgressbarQueueExecutor(
) logger=logger,
in_parallel=max_connections,
timeout=timeout + 0.5,
*args,
**kwargs,
)
# make options objects for all the requests # make options objects for all the requests
options: QueryOptions = {} options: QueryOptions = {}
@@ -719,17 +728,13 @@ async def maigret(
}, },
) )
cur_results = [] cur_results = await executor.run(tasks_dict.values())
with alive_bar(
len(tasks_dict), title="Searching", force_tty=True, disable=no_progressbar # wait for executor timeout errors
) as progress: await asyncio.sleep(1)
async for result in executor.run(tasks_dict.values()):
cur_results.append(result)
progress()
all_results.update(cur_results) all_results.update(cur_results)
# rerun for failed sites
sites = get_failed_sites(dict(cur_results)) sites = get_failed_sites(dict(cur_results))
attempts -= 1 attempts -= 1
+1 -69
View File
@@ -1,7 +1,7 @@
import asyncio import asyncio
import sys import sys
import time import time
from typing import Any, Iterable, List, Callable from typing import Any, Iterable, List
import alive_progress import alive_progress
from alive_progress import alive_bar from alive_progress import alive_bar
@@ -19,7 +19,6 @@ def create_task_func():
class AsyncExecutor: class AsyncExecutor:
# Deprecated: will be removed soon, don't use it
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
self.logger = kwargs['logger'] self.logger = kwargs['logger']
@@ -35,7 +34,6 @@ class AsyncExecutor:
class AsyncioSimpleExecutor(AsyncExecutor): class AsyncioSimpleExecutor(AsyncExecutor):
# Deprecated: will be removed soon, don't use it
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs) super().__init__(*args, **kwargs)
self.semaphore = asyncio.Semaphore(kwargs.get('in_parallel', 100)) self.semaphore = asyncio.Semaphore(kwargs.get('in_parallel', 100))
@@ -50,7 +48,6 @@ class AsyncioSimpleExecutor(AsyncExecutor):
class AsyncioProgressbarExecutor(AsyncExecutor): class AsyncioProgressbarExecutor(AsyncExecutor):
# Deprecated: will be removed soon, don't use it
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs) super().__init__(*args, **kwargs)
@@ -74,7 +71,6 @@ class AsyncioProgressbarExecutor(AsyncExecutor):
class AsyncioProgressbarSemaphoreExecutor(AsyncExecutor): class AsyncioProgressbarSemaphoreExecutor(AsyncExecutor):
# Deprecated: will be removed soon, don't use it
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs) super().__init__(*args, **kwargs)
self.semaphore = asyncio.Semaphore(kwargs.get('in_parallel', 1)) self.semaphore = asyncio.Semaphore(kwargs.get('in_parallel', 1))
@@ -178,67 +174,3 @@ class AsyncioProgressbarQueueExecutor(AsyncExecutor):
w.cancel() w.cancel()
return self.results return self.results
class AsyncioQueueGeneratorExecutor:
# Deprecated: will be removed soon, don't use it
def __init__(self, *args, **kwargs):
self.workers_count = kwargs.get('in_parallel', 10)
self.queue = asyncio.Queue()
self.timeout = kwargs.get('timeout')
self.logger = kwargs['logger']
self._results = asyncio.Queue()
self._stop_signal = object()
async def worker(self):
"""Process tasks from the queue and put results into the results queue."""
while True:
task = await self.queue.get()
if task is self._stop_signal:
self.queue.task_done()
break
try:
f, args, kwargs = task
query_future = f(*args, **kwargs)
query_task = create_task_func()(query_future)
try:
result = await asyncio.wait_for(query_task, timeout=self.timeout)
except asyncio.TimeoutError:
result = kwargs.get('default')
await self._results.put(result)
except Exception as e:
self.logger.error(f"Error in worker: {e}")
finally:
self.queue.task_done()
async def run(self, queries: Iterable[Callable[..., Any]]):
"""Run workers to process queries in parallel."""
start_time = time.time()
# Add tasks to the queue
for t in queries:
await self.queue.put(t)
# Create workers
workers = [
asyncio.create_task(self.worker()) for _ in range(self.workers_count)
]
# Add stop signals
for _ in range(self.workers_count):
await self.queue.put(self._stop_signal)
try:
while any(w.done() is False for w in workers) or not self._results.empty():
try:
result = await asyncio.wait_for(self._results.get(), timeout=1)
yield result
except asyncio.TimeoutError:
pass
finally:
# Ensure all workers are awaited
await asyncio.gather(*workers)
self.execution_time = time.time() - start_time
self.logger.debug(f"Spent time: {self.execution_time}")
+13 -19
View File
@@ -328,10 +328,10 @@ def setup_arguments_parser(settings: Settings):
"--web", "--web",
metavar='PORT', metavar='PORT',
type=int, type=int,
nargs='?', # Optional PORT value nargs='?',
const=5000, # Default PORT if `--web` is provided without a value const=5000, # default if --web is provided without a port
default=None, # Explicitly set default to None default=settings.web_interface_port,
help="Launch the web interface on the specified port (default: 5000 if no PORT is provided).", help="Launches the web interface on the specified port (default: 5000 if no PORT is provided).",
) )
output_group = parser.add_argument_group( output_group = parser.add_argument_group(
'Output options', 'Options to change verbosity and view of the console output' 'Output options', 'Options to change verbosity and view of the console output'
@@ -493,6 +493,14 @@ async def main():
log_level = logging.WARNING log_level = logging.WARNING
logger.setLevel(log_level) logger.setLevel(log_level)
if args.web is not None:
from maigret.web.app import app
port = (
args.web if args.web else 5000
) # args.web is either the specified port or 5000 by const
app.run(port=port)
# Usernames initial list # Usernames initial list
usernames = { usernames = {
u: args.id_type u: args.id_type
@@ -520,9 +528,7 @@ async def main():
if args.tags: if args.tags:
args.tags = list(set(str(args.tags).split(','))) args.tags = list(set(str(args.tags).split(',')))
db_file = args.db_file \ db_file = path.join(path.dirname(path.realpath(__file__)), args.db_file)
if (args.db_file.startswith("http://") or args.db_file.startswith("https://")) \
else path.join(path.dirname(path.realpath(__file__)), args.db_file)
if args.top_sites == 0 or args.all_sites: if args.top_sites == 0 or args.all_sites:
args.top_sites = sys.maxsize args.top_sites = sys.maxsize
@@ -602,18 +608,6 @@ async def main():
# Define one report filename template # Define one report filename template
report_filepath_tpl = path.join(report_dir, 'report_{username}{postfix}') report_filepath_tpl = path.join(report_dir, 'report_{username}{postfix}')
# Web interface
if args.web is not None:
from maigret.web.app import app
app.config["MAIGRET_DB_FILE"] = db_file
port = (
args.web if args.web else 5000
) # args.web is either the specified port or 5000 by default
app.run(port=port)
return
if usernames == {}: if usernames == {}:
# magic params to exit after init # magic params to exit after init
query_notify.warning('No usernames to check, exiting.') query_notify.warning('No usernames to check, exiting.')
+68 -99
View File
@@ -98,20 +98,21 @@ class MaigretGraph:
def __init__(self, graph): def __init__(self, graph):
self.G = graph self.G = graph
def add_node(self, key, value, color=None): def add_node(self, key, value):
node_name = f'{key}: {value}' node_name = f'{key}: {value}'
params = dict(self.other_params) params = self.other_params
if key in SUPPORTED_IDS: if key in SUPPORTED_IDS:
params = dict(self.username_params) params = self.username_params
elif value.startswith('http'): elif value.startswith('http'):
params = dict(self.site_params) params = self.site_params
params['title'] = node_name self.G.add_node(node_name, title=node_name, **params)
if color:
params['color'] = color if value != value.lower():
normalized_node_name = self.add_node(key, value.lower())
self.link(node_name, normalized_node_name)
self.G.add_node(node_name, **params)
return node_name return node_name
def link(self, node1_name, node2_name): def link(self, node1_name, node2_name):
@@ -119,126 +120,94 @@ class MaigretGraph:
def save_graph_report(filename: str, username_results: list, db: MaigretDatabase): def save_graph_report(filename: str, username_results: list, db: MaigretDatabase):
# moved here to speed up the launch of Maigret
import networkx as nx import networkx as nx
G = nx.Graph() G = nx.Graph()
graph = MaigretGraph(G) graph = MaigretGraph(G)
base_site_nodes = {}
site_account_nodes = {}
processed_values = {} # Track processed values to avoid duplicates
for username, id_type, results in username_results: for username, id_type, results in username_results:
# Add username node, using normalized version directly if different username_node_name = graph.add_node(id_type, username)
norm_username = username.lower()
username_node_name = graph.add_node(id_type, norm_username)
for website_name, dictionary in results.items(): for website_name in results:
if not dictionary or dictionary.get("is_similar"): dictionary = results[website_name]
# TODO: fix no site data issue
if not dictionary:
continue
if dictionary.get("is_similar"):
continue continue
status = dictionary.get("status") status = dictionary.get("status")
if not status or status.status != MaigretCheckStatus.CLAIMED: if not status: # FIXME: currently in case of timeout
continue continue
# base site node if dictionary["status"].status != MaigretCheckStatus.CLAIMED:
site_base_url = website_name continue
if site_base_url not in base_site_nodes:
base_site_nodes[site_base_url] = graph.add_node(
'site', site_base_url, color='#28a745'
) # Green color
site_base_node_name = base_site_nodes[site_base_url] site_fallback_name = dictionary.get(
'url_user', f'{website_name}: {username.lower()}'
# account node )
account_url = dictionary.get('url_user', f'{site_base_url}/{norm_username}') # site_node_name = dictionary.get('url_user', f'{website_name}: {username.lower()}')
account_node_id = f"{site_base_url}: {account_url}" site_node_name = graph.add_node('site', site_fallback_name)
if account_node_id not in site_account_nodes: graph.link(username_node_name, site_node_name)
site_account_nodes[account_node_id] = graph.add_node(
'account', account_url
)
account_node_name = site_account_nodes[account_node_id]
# link username → account → site
graph.link(username_node_name, account_node_name)
graph.link(account_node_name, site_base_node_name)
def process_ids(parent_node, ids): def process_ids(parent_node, ids):
for k, v in ids.items(): for k, v in ids.items():
if ( if k.endswith('_count') or k.startswith('is_') or k.endswith('_at'):
k.endswith('_count') continue
or k.startswith('is_') if k in 'image':
or k.endswith('_at')
or k in 'image'
):
continue continue
# Normalize value if string v_data = v
norm_v = v.lower() if isinstance(v, str) else v if v.startswith('['):
value_key = f"{k}:{norm_v}" try:
v_data = ast.literal_eval(v)
except Exception as e:
logging.error(e)
if value_key in processed_values: # value is a list
ids_data_name = processed_values[value_key] if isinstance(v_data, list):
else: list_node_name = graph.add_node(k, site_fallback_name)
v_data = v for vv in v_data:
if isinstance(v, str) and v.startswith('['): data_node_name = graph.add_node(vv, site_fallback_name)
try: graph.link(list_node_name, data_node_name)
v_data = ast.literal_eval(v)
except Exception as e:
logging.error(e)
continue
if isinstance(v_data, list):
list_node_name = graph.add_node(k, site_base_url)
processed_values[value_key] = list_node_name
for vv in v_data:
data_node_name = graph.add_node(vv, site_base_url)
graph.link(list_node_name, data_node_name)
add_ids = {
a: b for b, a in db.extract_ids_from_url(vv).items()
}
if add_ids:
process_ids(data_node_name, add_ids)
ids_data_name = list_node_name
else:
ids_data_name = graph.add_node(k, norm_v)
processed_values[value_key] = ids_data_name
if 'username' in k or k in SUPPORTED_IDS:
new_username_key = f"username:{norm_v}"
if new_username_key not in processed_values:
new_username_node_name = graph.add_node(
'username', norm_v
)
processed_values[new_username_key] = (
new_username_node_name
)
graph.link(ids_data_name, new_username_node_name)
add_ids = { add_ids = {
k: v for v, k in db.extract_ids_from_url(v).items() a: b for b, a in db.extract_ids_from_url(vv).items()
} }
if add_ids: if add_ids:
process_ids(ids_data_name, add_ids) process_ids(data_node_name, add_ids)
else:
# value is just a string
# ids_data_name = f'{k}: {v}'
# if ids_data_name == parent_node:
# continue
graph.link(parent_node, ids_data_name) ids_data_name = graph.add_node(k, v)
# G.add_node(ids_data_name, size=10, title=ids_data_name, group=3)
graph.link(parent_node, ids_data_name)
# check for username
if 'username' in k or k in SUPPORTED_IDS:
new_username_node_name = graph.add_node('username', v)
graph.link(ids_data_name, new_username_node_name)
add_ids = {k: v for v, k in db.extract_ids_from_url(v).items()}
if add_ids:
process_ids(ids_data_name, add_ids)
if status.ids_data: if status.ids_data:
process_ids(account_node_name, status.ids_data) process_ids(site_node_name, status.ids_data)
# Remove overly long nodes nodes_to_remove = []
nodes_to_remove = [node for node in G.nodes if len(str(node)) > 100] for node in G.nodes:
G.remove_nodes_from(nodes_to_remove) if len(str(node)) > 100:
nodes_to_remove.append(node)
# Remove site nodes with only one connection [G.remove_node(node) for node in nodes_to_remove]
single_degree_sites = [
n for n, deg in G.degree() if n.startswith("site:") and deg <= 1
]
G.remove_nodes_from(single_degree_sites)
# Generate interactive visualization # moved here to speed up the launch of Maigret
from pyvis.network import Network from pyvis.network import Network
nt = Network(notebook=True, height="750px", width="100%") nt = Network(notebook=True, height="750px", width="100%")
+3 -74
View File
@@ -1451,7 +1451,6 @@
"usernameUnclaimed": "noonewouldeverusethis7" "usernameUnclaimed": "noonewouldeverusethis7"
}, },
"AskFM": { "AskFM": {
"disabled": true,
"tags": [ "tags": [
"eg", "eg",
"in", "in",
@@ -6164,16 +6163,6 @@
"usernameClaimed": "adam", "usernameClaimed": "adam",
"usernameUnclaimed": "noonewouldeverusethis7" "usernameUnclaimed": "noonewouldeverusethis7"
}, },
"Framapiaf": {
"tags": [
"mastodon"
],
"checkType": "status_code",
"urlMain": "https://framapiaf.org",
"url": "https://framapiaf.org/@{username}",
"usernameClaimed": "pylapp",
"usernameUnclaimed": "noonewouldeverusethis42"
},
"Free-lance.ua": { "Free-lance.ua": {
"tags": [ "tags": [
"freelance", "freelance",
@@ -7229,8 +7218,7 @@
"url": "https://gramho.com/explore-hashtag/{username}", "url": "https://gramho.com/explore-hashtag/{username}",
"source": "Instagram", "source": "Instagram",
"usernameClaimed": "adam", "usernameClaimed": "adam",
"usernameUnclaimed": "noonewouldeverusethis7", "usernameUnclaimed": "noonewouldeverusethis7"
"disabled": true
}, },
"Gravatar": { "Gravatar": {
"tags": [ "tags": [
@@ -7896,7 +7884,6 @@
"usernameUnclaimed": "noonewouldeverusethis7" "usernameUnclaimed": "noonewouldeverusethis7"
}, },
"ICQ": { "ICQ": {
"disabled": true,
"tags": [ "tags": [
"ch", "ch",
"ru", "ru",
@@ -9792,16 +9779,6 @@
"usernameUnclaimed": "noonewouldeverusethis7", "usernameUnclaimed": "noonewouldeverusethis7",
"alexaRank": 4938389 "alexaRank": 4938389
}, },
"Mamot": {
"tags": [
"mastodon"
],
"checkType": "status_code",
"urlMain": "https://mamot.fr",
"url": "https://mamot.fr/@{username}",
"usernameClaimed": "pylapp",
"usernameUnclaimed": "noonewouldeverusethis42"
},
"Mamuli": { "Mamuli": {
"tags": [ "tags": [
"ru", "ru",
@@ -11807,16 +11784,6 @@
"usernameClaimed": "uehkon89", "usernameClaimed": "uehkon89",
"usernameUnclaimed": "noonewouldeverusethis7" "usernameUnclaimed": "noonewouldeverusethis7"
}, },
"Outgress": {
"checkType": "message",
"absenceStrs": [
"Outgress - Error"
],
"url": "https://outgress.com/agents/{username}",
"urlMain": "https://outgress.com/",
"usernameClaimed": "pylapp",
"usernameUnclaimed": "noonewouldeverusethis42"
},
"Overclockers": { "Overclockers": {
"tags": [ "tags": [
"ru" "ru"
@@ -12436,17 +12403,6 @@
"usernameClaimed": "adam", "usernameClaimed": "adam",
"usernameUnclaimed": "noonewouldeverusethis7" "usernameUnclaimed": "noonewouldeverusethis7"
}, },
"pixelfed.social": {
"tags": [
"art",
"pixelfed"
],
"checkType": "status_code",
"usernameClaimed": "pylapp",
"usernameUnclaimed": "noonewouldeverusethis42",
"urlMain": "https://pixelfed.social/",
"url": "https://pixelfed.social/{username}/"
},
"PlanetMinecraft": { "PlanetMinecraft": {
"tags": [ "tags": [
"us" "us"
@@ -13119,19 +13075,6 @@
"usernameClaimed": "Sinkler", "usernameClaimed": "Sinkler",
"usernameUnclaimed": "noonewouldeverusethis7" "usernameUnclaimed": "noonewouldeverusethis7"
}, },
"programming.dev": {
"tags": [
"lemmy"
],
"checkType": "message",
"absenceStrs": [
"Error!"
],
"url": "https://programming.dev/u/{username}",
"urlMain": "https://programming.dev",
"usernameClaimed": "pylapp",
"usernameUnclaimed": "noonewouldeverusethis42"
},
"Qbn": { "Qbn": {
"tags": [ "tags": [
"in", "in",
@@ -17533,7 +17476,7 @@
"method": "vimeo" "method": "vimeo"
}, },
"headers": { "headers": {
"Authorization": "jwt eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJleHAiOjE3MzQxMTc1NDAsInVzZXJfaWQiOm51bGwsImFwcF9pZCI6NTg0NzksInNjb3BlcyI6InB1YmxpYyIsInRlYW1fdXNlcl9pZCI6bnVsbCwianRpIjoiNDc4Y2ZhZGUtZjI0Yy00MDVkLTliYWItN2RlNGEzNGM4MzI5In0.guN7Fg8dqq7EYdckrJ-6Rdkj_5MOl6FaC4YUSOceDpU" "Authorization": "jwt eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJleHAiOjE3MzM5Njc3MjAsInVzZXJfaWQiOm51bGwsImFwcF9pZCI6NTg0NzksInNjb3BlcyI6InB1YmxpYyIsInRlYW1fdXNlcl9pZCI6bnVsbCwianRpIjoiNGJkNDE4NzktM2VhOS00ZWRiLWIzZDUtNjAyNjQ3YjMyNTVhIn0.kPbKREujSfYsisyF0pS_HskTapRlHBfVLRw4cis1ezk"
}, },
"urlProbe": "https://api.vimeo.com/users/{username}?fields=name%2Cgender%2Cbio%2Curi%2Clink%2Cbackground_video%2Clocation_details%2Cpictures%2Cverified%2Cmetadata.public_videos.total%2Cavailable_for_hire%2Ccan_work_remotely%2Cmetadata.connections.videos.total%2Cmetadata.connections.albums.total%2Cmetadata.connections.followers.total%2Cmetadata.connections.following.total%2Cmetadata.public_videos.total%2Cmetadata.connections.vimeo_experts.is_enrolled%2Ctotal_collection_count%2Ccreated_time%2Cprofile_preferences%2Cmembership%2Cclients%2Cskills%2Cproject_types%2Crates%2Ccategories%2Cis_expert%2Cprofile_discovery%2Cwebsites%2Ccontact_emails&fetch_user_profile=1", "urlProbe": "https://api.vimeo.com/users/{username}?fields=name%2Cgender%2Cbio%2Curi%2Clink%2Cbackground_video%2Clocation_details%2Cpictures%2Cverified%2Cmetadata.public_videos.total%2Cavailable_for_hire%2Ccan_work_remotely%2Cmetadata.connections.videos.total%2Cmetadata.connections.albums.total%2Cmetadata.connections.followers.total%2Cmetadata.connections.following.total%2Cmetadata.public_videos.total%2Cmetadata.connections.vimeo_experts.is_enrolled%2Ctotal_collection_count%2Ccreated_time%2Cprofile_preferences%2Cmembership%2Cclients%2Cskills%2Cproject_types%2Crates%2Ccategories%2Cis_expert%2Cprofile_discovery%2Cwebsites%2Ccontact_emails&fetch_user_profile=1",
"checkType": "status_code", "checkType": "status_code",
@@ -35600,16 +35543,6 @@
"tags": [ "tags": [
"tr" "tr"
] ]
},
"write.as": {
"tags": [
"writefreely"
],
"checkType": "status_code",
"url": "https://write.as/{username}",
"urlMain": "https://write.as",
"usernameClaimed": "pylapp",
"usernameUnclaimed": "noonewouldeverusethis42"
} }
}, },
"engines": { "engines": {
@@ -35891,10 +35824,6 @@
"i2p", "i2p",
"q&a", "q&a",
"crypto", "crypto",
"ai", "ai"
"mastodon",
"writefreely",
"lemmy",
"pixelfed"
] ]
} }
-1
View File
@@ -188,7 +188,6 @@ class Submitter:
) )
return entered_username if entered_username else supposed_username return entered_username if entered_username else supposed_username
# TODO: replace with checking.py/SimpleAiohttpChecker call
@staticmethod @staticmethod
async def get_html_response_to_compare( async def get_html_response_to_compare(
url: str, session: ClientSession = None, redirects=False, headers: Dict = None url: str, session: ClientSession = None, redirects=False, headers: Dict = None
+41 -102
View File
@@ -1,3 +1,4 @@
# app.py
from flask import ( from flask import (
Flask, Flask,
render_template, render_template,
@@ -21,15 +22,18 @@ from maigret.report import generate_report_context
app = Flask(__name__) app = Flask(__name__)
app.secret_key = 'your-secret-key-here' app.secret_key = 'your-secret-key-here'
# add background job tracking # Add background job tracking
background_jobs = {} background_jobs = {}
job_results = {} job_results = {}
# Configuration # Configuration
app.config["MAIGRET_DB_FILE"] = os.path.join('maigret', 'resources', 'data.json') MAIGRET_DB_FILE = os.path.join('maigret', 'resources', 'data.json')
app.config["COOKIES_FILE"] = "cookies.txt" COOKIES_FILE = "cookies.txt"
app.config["UPLOAD_FOLDER"] = 'uploads' UPLOAD_FOLDER = 'uploads'
app.config["REPORTS_FOLDER"] = os.path.abspath('/tmp/maigret_reports') REPORTS_FOLDER = os.path.abspath('/tmp/maigret_reports')
os.makedirs(UPLOAD_FOLDER, exist_ok=True)
os.makedirs(REPORTS_FOLDER, exist_ok=True)
def setup_logger(log_level, name): def setup_logger(log_level, name):
@@ -41,41 +45,16 @@ def setup_logger(log_level, name):
async def maigret_search(username, options): async def maigret_search(username, options):
logger = setup_logger(logging.WARNING, 'maigret') logger = setup_logger(logging.WARNING, 'maigret')
try: try:
db = MaigretDatabase().load_from_path(app.config["MAIGRET_DB_FILE"]) db = MaigretDatabase().load_from_path(MAIGRET_DB_FILE)
sites = db.ranked_sites_dict(top=int(options.get('top_sites', 500)))
top_sites = int(options.get('top_sites') or 500)
if options.get('all_sites'):
top_sites = 999999999 # effectively all
tags = options.get('tags', [])
site_list = options.get('site_list', [])
logger.info(f"Filtering sites by tags: {tags}")
sites = db.ranked_sites_dict(
top=top_sites,
tags=tags,
names=site_list,
disabled=False,
id_type='username',
)
logger.info(f"Found {len(sites)} sites matching the tag criteria")
results = await maigret.search( results = await maigret.search(
username=username, username=username,
site_dict=sites, site_dict=sites,
timeout=int(options.get('timeout', 30)), timeout=int(options.get('timeout', 30)),
logger=logger, logger=logger,
id_type='username', id_type=options.get('id_type', 'username'),
cookies=app.config["COOKIES_FILE"] if options.get('use_cookies') else None, cookies=COOKIES_FILE if options.get('use_cookies') else None,
is_parsing_enabled=(not options.get('disable_extracting', False)),
recursive_search_enabled=(
not options.get('disable_recursive_search', False)
),
check_domains=options.get('with_domains', False),
proxy=options.get('proxy', None),
tor_proxy=options.get('tor_proxy', None),
i2p_proxy=options.get('i2p_proxy', None),
) )
return results return results
except Exception as e: except Exception as e:
@@ -88,7 +67,7 @@ async def search_multiple_usernames(usernames, options):
for username in usernames: for username in usernames:
try: try:
search_results = await maigret_search(username.strip(), options) search_results = await maigret_search(username.strip(), options)
results.append((username.strip(), 'username', search_results)) results.append((username.strip(), options['id_type'], search_results))
except Exception as e: except Exception as e:
logging.error(f"Error searching username {username}: {str(e)}") logging.error(f"Error searching username {username}: {str(e)}")
return results return results
@@ -96,26 +75,28 @@ async def search_multiple_usernames(usernames, options):
def process_search_task(usernames, options, timestamp): def process_search_task(usernames, options, timestamp):
try: try:
# Setup event loop for async operations
loop = asyncio.new_event_loop() loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop) asyncio.set_event_loop(loop)
# Run the search
general_results = loop.run_until_complete( general_results = loop.run_until_complete(
search_multiple_usernames(usernames, options) search_multiple_usernames(usernames, options)
) )
os.makedirs(app.config["REPORTS_FOLDER"], exist_ok=True) # Create session folder
session_folder = os.path.join( session_folder = os.path.join(REPORTS_FOLDER, f"search_{timestamp}")
app.config["REPORTS_FOLDER"], f"search_{timestamp}"
)
os.makedirs(session_folder, exist_ok=True) os.makedirs(session_folder, exist_ok=True)
# Save the combined graph
graph_path = os.path.join(session_folder, "combined_graph.html") graph_path = os.path.join(session_folder, "combined_graph.html")
maigret.report.save_graph_report( maigret.report.save_graph_report(
graph_path, graph_path,
general_results, general_results,
MaigretDatabase().load_from_path(app.config["MAIGRET_DB_FILE"]), MaigretDatabase().load_from_path(MAIGRET_DB_FILE),
) )
# Save individual reports
individual_reports = [] individual_reports = []
for username, id_type, results in general_results: for username, id_type, results in general_results:
report_base = os.path.join(session_folder, f"report_{username}") report_base = os.path.join(session_folder, f"report_{username}")
@@ -172,7 +153,7 @@ def process_search_task(usernames, options, timestamp):
} }
) )
# save results and mark job as complete using timestamp as key # Save results and mark job as complete
job_results[timestamp] = { job_results[timestamp] = {
'status': 'completed', 'status': 'completed',
'session_folder': f"search_{timestamp}", 'session_folder': f"search_{timestamp}",
@@ -180,9 +161,7 @@ def process_search_task(usernames, options, timestamp):
'usernames': usernames, 'usernames': usernames,
'individual_reports': individual_reports, 'individual_reports': individual_reports,
} }
except Exception as e: except Exception as e:
logging.error(f"Error in search task for timestamp {timestamp}: {str(e)}")
job_results[timestamp] = {'status': 'failed', 'error': str(e)} job_results[timestamp] = {'status': 'failed', 'error': str(e)}
finally: finally:
background_jobs[timestamp]['completed'] = True background_jobs[timestamp]['completed'] = True
@@ -190,24 +169,9 @@ def process_search_task(usernames, options, timestamp):
@app.route('/') @app.route('/')
def index(): def index():
# load site data for autocomplete return render_template('index.html')
db = MaigretDatabase().load_from_path(app.config["MAIGRET_DB_FILE"])
site_options = []
for site in db.sites:
# add main site name
site_options.append(site.name)
# add URL if different from name
if site.url_main and site.url_main not in site_options:
site_options.append(site.url_main)
# sort and deduplicate
site_options = sorted(set(site_options))
return render_template('index.html', site_options=site_options)
# Modified search route
@app.route('/search', methods=['POST']) @app.route('/search', methods=['POST'])
def search(): def search():
usernames_input = request.form.get('usernames', '').strip() usernames_input = request.form.get('usernames', '').strip()
@@ -222,32 +186,15 @@ def search():
# Create timestamp for this search session # Create timestamp for this search session
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
# Get selected tags - ensure it's a list logging.info(f"Starting search for usernames: {usernames}")
selected_tags = request.form.getlist('tags')
logging.info(f"Selected tags: {selected_tags}")
options = { options = {
'top_sites': request.form.get('top_sites') or '500', 'top_sites': request.form.get('top_sites', '500'),
'timeout': request.form.get('timeout') or '30', 'timeout': request.form.get('timeout', '30'),
'id_type': 'username', # fixed as username
'use_cookies': 'use_cookies' in request.form, 'use_cookies': 'use_cookies' in request.form,
'all_sites': 'all_sites' in request.form,
'disable_recursive_search': 'disable_recursive_search' in request.form,
'disable_extracting': 'disable_extracting' in request.form,
'with_domains': 'with_domains' in request.form,
'proxy': request.form.get('proxy', None) or None,
'tor_proxy': request.form.get('tor_proxy', None) or None,
'i2p_proxy': request.form.get('i2p_proxy', None) or None,
'permute': 'permute' in request.form,
'tags': selected_tags, # Pass selected tags as a list
'site_list': [
s.strip() for s in request.form.get('site', '').split(',') if s.strip()
],
} }
logging.info(
f"Starting search for usernames: {usernames} with tags: {selected_tags}"
)
# Start background job # Start background job
background_jobs[timestamp] = { background_jobs[timestamp] = {
'completed': False, 'completed': False,
@@ -257,6 +204,9 @@ def search():
} }
background_jobs[timestamp]['thread'].start() background_jobs[timestamp]['thread'].start()
logging.info(f"Search job started with timestamp: {timestamp}")
# Redirect to status page
return redirect(url_for('status', timestamp=timestamp)) return redirect(url_for('status', timestamp=timestamp))
@@ -266,34 +216,34 @@ def status(timestamp):
# Validate timestamp # Validate timestamp
if timestamp not in background_jobs: if timestamp not in background_jobs:
flash('Invalid search session.', 'danger') flash('Invalid search session', 'danger')
logging.error(f"Invalid search session: {timestamp}")
return redirect(url_for('index')) return redirect(url_for('index'))
# Check if job is completed # Check if job is completed
if background_jobs[timestamp]['completed']: if background_jobs[timestamp]['completed']:
result = job_results.get(timestamp) result = job_results.get(timestamp)
if not result: if not result:
flash('No results found for this search session.', 'warning') flash('No results found for this search session', 'warning')
logging.error(f"No results found for completed session: {timestamp}")
return redirect(url_for('index')) return redirect(url_for('index'))
if result['status'] == 'completed': if result['status'] == 'completed':
# Note: use the session_folder from the results to redirect # Redirect to results page once done
return redirect(url_for('results', session_id=result['session_folder'])) return redirect(url_for('results', session_id=result['session_folder']))
else: else:
error_msg = result.get('error', 'Unknown error occurred.') error_msg = result.get('error', 'Unknown error occurred')
flash(f'Search failed: {error_msg}', 'danger') flash(f'Search failed: {error_msg}', 'danger')
logging.error(f"Search failed for session {timestamp}: {error_msg}")
return redirect(url_for('index')) return redirect(url_for('index'))
# If job is still running, show a status page # If job is still running, show status page with a simple spinner
return render_template('status.html', timestamp=timestamp) return render_template('status.html', timestamp=timestamp)
@app.route('/results/<session_id>') @app.route('/results/<session_id>')
def results(session_id): def results(session_id):
# Find completed results that match this session_folder if not session_id.startswith('search_'):
flash('Invalid results session format', 'danger')
return redirect(url_for('index'))
result_data = next( result_data = next(
( (
r r
@@ -303,11 +253,6 @@ def results(session_id):
None, None,
) )
if not result_data:
flash('No results found for this session ID.', 'danger')
logging.error(f"Results for session {session_id} not found in job_results.")
return redirect(url_for('index'))
return render_template( return render_template(
'results.html', 'results.html',
usernames=result_data['usernames'], usernames=result_data['usernames'],
@@ -320,12 +265,7 @@ def results(session_id):
@app.route('/reports/<path:filename>') @app.route('/reports/<path:filename>')
def download_report(filename): def download_report(filename):
try: try:
os.makedirs(app.config["REPORTS_FOLDER"], exist_ok=True) file_path = os.path.join(REPORTS_FOLDER, filename)
file_path = os.path.normpath(
os.path.join(app.config["REPORTS_FOLDER"], filename)
)
if not file_path.startswith(app.config["REPORTS_FOLDER"]):
raise Exception("Invalid file path")
return send_file(file_path) return send_file(file_path)
except Exception as e: except Exception as e:
logging.error(f"Error serving file {filename}: {str(e)}") logging.error(f"Error serving file {filename}: {str(e)}")
@@ -337,5 +277,4 @@ if __name__ == '__main__':
level=logging.INFO, level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
) )
debug_mode = os.getenv('FLASK_DEBUG', 'False').lower() in ['true', '1', 't'] app.run(debug=True)
app.run(debug=debug_mode)
Binary file not shown.

Before

Width:  |  Height:  |  Size: 45 KiB

+37 -111
View File
@@ -1,118 +1,44 @@
<!-- templates/base.html -->
<!DOCTYPE html> <!DOCTYPE html>
<html lang="en" data-bs-theme="dark"> <html lang="en" data-bs-theme="dark">
<head> <head>
<meta charset="UTF-8"> <meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0"> <meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Maigret Web Interface</title> <title>Maigret Web Interface</title>
<link href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0/dist/css/bootstrap.min.css" rel="stylesheet"> <link href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0/dist/css/bootstrap.min.css" rel="stylesheet">
<style> <style>
body { body {
min-height: 100vh; padding-top: 2rem;
display: flex; }
flex-direction: column; .form-container {
} max-width: auto;
margin: auto;
.main-container { }
flex: 1; [data-bs-theme="dark"] {
padding-top: 2rem; --bs-body-bg: #212529;
} --bs-body-color: #dee2e6;
}
.form-container { </style>
max-width: auto;
margin: auto;
padding-bottom: 2rem;
}
[data-bs-theme="dark"] {
--bs-body-bg: #212529;
--bs-body-color: #dee2e6;
}
.header {
padding: 1rem 0;
margin-bottom: 2rem;
border-bottom: 1px solid var(--bs-border-color);
}
.header-content {
display: flex;
align-items: center;
justify-content: space-between;
}
.logo-container {
display: flex;
align-items: center;
gap: 1rem;
}
.logo {
height: 40px;
width: auto;
}
.footer {
margin-top: auto;
padding: 1rem 0;
text-align: center;
border-top: 1px solid var(--bs-border-color);
font-size: 0.9rem;
}
.footer a {
color: inherit;
text-decoration: none;
}
.footer a:hover {
text-decoration: underline;
}
</style>
</head> </head>
<body> <body>
<div class="header"> <div class="container">
<div class="container"> <div class="mb-3">
<div class="header-content"> <button class="btn btn-outline-secondary" id="theme-toggle">
<div class="logo-container"> Toggle Dark/Light Mode
<img src="{{ url_for('static', filename='maigret.png') }}" alt="Maigret Logo" class="logo"> </button>
<h1 class="h4 mb-0">Maigret Web Interface</h1>
</div>
<button class="btn btn-outline-secondary" id="theme-toggle">
Toggle Dark/Light Mode
</button>
</div>
</div>
</div> </div>
{% block content %}{% endblock %}
<div class="main-container"> </div>
<div class="container"> <script src="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0/dist/js/bootstrap.bundle.min.js"></script>
{% block content %}{% endblock %} <script>
</div> document.getElementById('theme-toggle').addEventListener('click', function() {
</div> const html = document.documentElement;
if (html.getAttribute('data-bs-theme') === 'dark') {
<footer class="footer"> html.setAttribute('data-bs-theme', 'light');
<div class="container"> } else {
<p class="mb-0"> html.setAttribute('data-bs-theme', 'dark');
Powered by <a href="https://github.com/soxoj/maigret" target="_blank">Maigret</a> | }
Licensed under <a href="https://github.com/soxoj/maigret/blob/main/LICENSE" target="_blank">MIT });
License</a> </script>
</p>
</div>
</footer>
<script src="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0/dist/js/bootstrap.bundle.min.js"></script>
<script>
document.getElementById('theme-toggle').addEventListener('click', function () {
const html = document.documentElement;
if (html.getAttribute('data-bs-theme') === 'dark') {
html.setAttribute('data-bs-theme', 'light');
} else {
html.setAttribute('data-bs-theme', 'dark');
}
});
</script>
</body> </body>
</html>
</html>
+22 -370
View File
@@ -1,383 +1,35 @@
{% extends "base.html" %} {% extends "base.html" %}
{% block content %} {% block content %}
<style>
.tag-cloud {
display: flex;
flex-wrap: wrap;
gap: 8px;
padding: 15px;
border-radius: 8px;
background: rgba(0, 0, 0, 0.05);
margin-bottom: 20px;
}
.tag {
display: inline-block;
padding: 5px 10px;
border-radius: 15px;
background-color: #dc3545;
color: white;
cursor: pointer;
font-size: 14px;
transition: all 0.3s ease;
user-select: none;
}
.tag.selected {
background-color: #28a745;
}
.tag:hover {
transform: translateY(-2px);
box-shadow: 0 2px 5px rgba(0, 0, 0, 0.2);
}
.hidden-select {
display: none !important;
}
.site-input-container {
position: relative;
}
.site-input {
width: 100%;
}
.selected-sites {
display: flex;
flex-wrap: wrap;
gap: 8px;
padding: 10px 0;
}
.selected-site {
background-color: #214e7b;
padding: 2px 8px;
border-radius: 12px;
font-size: 14px;
display: inline-flex;
align-items: center;
gap: 5px;
}
.remove-site {
cursor: pointer;
color: #dc3545;
font-weight: bold;
}
.section-header {
cursor: pointer;
padding: 1rem;
background: rgba(255, 255, 255, 0.05);
border-radius: 4px;
margin-bottom: 0.5rem;
display: flex;
justify-content: space-between;
align-items: center;
}
.section-content {
padding: 1rem;
display: none;
}
.section-content.show {
display: block;
}
.chevron::after {
content: '▼';
transition: transform 0.2s;
}
.chevron.collapsed::after {
transform: rotate(-90deg);
}
.main-search-section {
background: rgba(255, 255, 255, 0.03);
padding: 2rem;
border-radius: 8px;
margin-bottom: 2rem;
}
.search-button {
width: 100%;
padding: 1rem;
font-size: 1.2rem;
margin-top: 2rem;
}
</style>
<div class="form-container"> <div class="form-container">
<h1 class="mb-4">Maigret Web Interface</h1>
{% if error %} {% if error %}
<div class="alert alert-danger">{{ error }}</div> <div class="alert alert-danger">{{ error }}</div>
{% endif %} {% endif %}
<form method="POST" action="{{ url_for('search') }}" class="mb-4"> <form method="POST" action="{{ url_for('search') }}" class="mb-4">
<!-- Main Search Section --> <div class="mb-3">
<div class="main-search-section"> <label for="usernames" class="form-label">Usernames to Search</label>
<div class="mb-4"> <textarea class="form-control" id="usernames" name="usernames" rows="3" required
<label for="usernames" class="form-label h5">Usernames to Search</label> placeholder="Enter one or more usernames (separated by spaces or commas)"></textarea>
<textarea class="form-control" id="usernames" name="usernames" rows="3" required
placeholder="Enter one or more usernames (separated by spaces or commas)..."></textarea>
</div>
<div class="row align-items-center">
<div class="col-md-6">
<label for="top_sites" class="form-label">Number of Sites</label>
<input type="number" class="form-control" id="top_sites" name="top_sites" min="1" max="10000"
placeholder="Default: 500">
</div>
<div class="col-md-6">
<label for="timeout" class="form-label">Timeout (seconds)</label>
<input type="number" class="form-control" id="timeout" name="timeout" min="1"
placeholder="Default: 30">
</div>
<div class="col-12 mt-3">
<div class="form-check">
<input type="checkbox" class="form-check-input" id="all_sites" name="all_sites"
onchange="document.getElementById('top_sites').disabled = this.checked;">
<label class="form-check-label" for="all_sites">Search All Sites</label>
</div>
</div>
</div>
</div> </div>
<!-- Filters Section --> <div class="mb-3">
<div class="mb-4"> <label for="top_sites" class="form-label">Number of Top Sites to Check</label>
<div class="section-header" onclick="toggleSection('filters')"> <input type="number" class="form-control" id="top_sites" name="top_sites" value="500" min="1" max="10000">
<h5 class="mb-0">Filters</h5>
<span class="chevron"></span>
</div>
<div id="filters" class="section-content">
<div class="mb-3 site-input-container">
<label for="site" class="form-label">Specify Sites (Optional)</label>
<input type="text" class="form-control site-input" id="siteInput"
placeholder="Type to search for sites..." list="siteOptions">
<input type="hidden" id="site" name="site">
<datalist id="siteOptions">
{% for site in site_options %}
<option value="{{ site }}">
{% endfor %}
</datalist>
<div class="selected-sites" id="selectedSites"></div>
</div>
<div class="mb-3">
<label class="form-label">Tags (click to select)</label>
<div class="tag-cloud" id="tagCloud"></div>
<select multiple class="hidden-select" id="tags" name="tags">
<option value="gaming">Gaming</option>
<option value="coding">Coding</option>
<option value="photo">Photo</option>
<option value="music">Music</option>
<option value="blog">Blog</option>
<option value="finance">Finance</option>
<option value="freelance">Freelance</option>
<option value="dating">Dating</option>
<option value="tech">Tech</option>
<option value="forum">Forum</option>
<option value="porn">Porn</option>
<option value="erotic">Erotic</option>
<option value="webcam">Webcam</option>
<option value="video">Video</option>
<option value="movies">Movies</option>
<option value="hacking">Hacking</option>
<option value="art">Art</option>
<option value="discussion">Discussion</option>
<option value="sharing">Sharing</option>
<option value="writing">Writing</option>
<option value="wiki">Wiki</option>
<option value="business">Business</option>
<option value="shopping">Shopping</option>
<option value="sport">Sport</option>
<option value="books">Books</option>
<option value="news">News</option>
<option value="documents">Documents</option>
<option value="travel">Travel</option>
<option value="maps">Maps</option>
<option value="hobby">Hobby</option>
<option value="apps">Apps</option>
<option value="classified">Classified</option>
<option value="career">Career</option>
<option value="geosocial">Geosocial</option>
<option value="streaming">Streaming</option>
<option value="education">Education</option>
<option value="networking">Networking</option>
<option value="torrent">Torrent</option>
<option value="science">Science</option>
<option value="medicine">Medicine</option>
<option value="reading">Reading</option>
<option value="stock">Stock</option>
<option value="messaging">Messaging</option>
<option value="trading">Trading</option>
<option value="links">Links</option>
<option value="fashion">Fashion</option>
<option value="tasks">Tasks</option>
<option value="military">Military</option>
<option value="auto">Auto</option>
<option value="gambling">Gambling</option>
<option value="cybercriminal">Cybercriminal</option>
<option value="review">Review</option>
<option value="bookmarks">Bookmarks</option>
<option value="design">Design</option>
<option value="tor">Tor</option>
<option value="i2p">I2P</option>
<option value="q&a">Q&A</option>
<option value="crypto">Crypto</option>
<option value="ai">AI</option>
</select>
</div>
</div>
</div> </div>
<!-- Advanced Options Section --> <div class="mb-3">
<div class="mb-4"> <label for="timeout" class="form-label">Timeout (seconds)</label>
<div class="section-header" onclick="toggleSection('advanced')"> <input type="number" class="form-control" id="timeout" name="timeout" value="30" min="1" max="120">
<h5 class="mb-0">Advanced Options</h5>
<span class="chevron"></span>
</div>
<div id="advanced" class="section-content">
<div class="mb-3 form-check">
<input type="checkbox" class="form-check-input" id="permute" name="permute">
<label class="form-check-label" for="permute">Enable Username Permutations</label>
</div>
<div class="mb-3 form-check">
<input type="checkbox" class="form-check-input" id="disable_recursive_search"
name="disable_recursive_search">
<label class="form-check-label" for="disable_recursive_search">Disable Recursive Search</label>
</div>
<div class="mb-3 form-check">
<input type="checkbox" class="form-check-input" id="disable_extracting" name="disable_extracting">
<label class="form-check-label" for="disable_extracting">Disable Information Extraction</label>
</div>
<div class="mb-3 form-check">
<input type="checkbox" class="form-check-input" id="with_domains" name="with_domains">
<label class="form-check-label" for="with_domains">Check Domains</label>
</div>
<div class="mb-3">
<label for="proxy" class="form-label">Proxy URL</label>
<input type="text" class="form-control" id="proxy" name="proxy"
placeholder="e.g., 127.0.0.1:1080">
</div>
<div class="mb-3">
<label for="tor_proxy" class="form-label">TOR Proxy URL</label>
<input type="text" class="form-control" id="tor_proxy" name="tor_proxy"
placeholder="Default: 127.0.0.1:9050">
</div>
<div class="mb-3">
<label for="i2p_proxy" class="form-label">I2P Proxy URL</label>
<input type="text" class="form-control" id="i2p_proxy" name="i2p_proxy"
placeholder="Default: 127.0.0.1:4444">
</div>
</div>
</div> </div>
<button type="submit" class="btn search-button" style="background-color: rgb(249, 207, 0); color: black;"> <div class="mb-3 form-check">
Start Search <input type="checkbox" class="form-check-input" id="use_cookies" name="use_cookies">
</button> <label class="form-check-label" for="use_cookies">Use Cookies File</label>
</div>
<button type="submit" class="btn btn-primary">Search</button>
</form> </form>
</div> </div>
<script>
function toggleSection(sectionId) {
const content = document.getElementById(sectionId);
const header = content.previousElementSibling;
content.classList.toggle('show');
header.querySelector('.chevron').classList.toggle('collapsed');
}
document.addEventListener('DOMContentLoaded', function () {
// Tag cloud functionality
const tagCloud = document.getElementById('tagCloud');
const hiddenSelect = document.getElementById('tags');
const allTags = Array.from(hiddenSelect.options).map(opt => ({
value: opt.value,
label: opt.text
}));
allTags.forEach(tag => {
const tagElement = document.createElement('span');
tagElement.className = 'tag';
tagElement.textContent = tag.label;
tagElement.dataset.value = tag.value;
tagElement.addEventListener('click', function () {
const isSelected = this.classList.toggle('selected');
const option = Array.from(hiddenSelect.options).find(opt => opt.value === tag.value);
if (option) {
option.selected = isSelected;
}
});
tagCloud.appendChild(tagElement);
});
// Site selection functionality
const siteInput = document.getElementById('siteInput');
const hiddenInput = document.getElementById('site');
const selectedSitesContainer = document.getElementById('selectedSites');
let selectedSites = new Set();
function updateHiddenInput() {
hiddenInput.value = Array.from(selectedSites).join(',');
}
function addSite(site) {
if (site && !selectedSites.has(site)) {
selectedSites.add(site);
updateHiddenInput();
const siteElement = document.createElement('span');
siteElement.className = 'selected-site';
siteElement.innerHTML = `${site}<span class="remove-site" data-site="${site}">&times;</span>`;
selectedSitesContainer.appendChild(siteElement);
}
}
function removeSite(site) {
selectedSites.delete(site);
updateHiddenInput();
const siteElements = selectedSitesContainer.querySelectorAll('.selected-site');
siteElements.forEach(el => {
if (el.querySelector('.remove-site').dataset.site === site) {
el.remove();
}
});
}
siteInput.addEventListener('change', function (e) {
const value = this.value.trim();
if (value) {
addSite(value);
this.value = '';
}
});
selectedSitesContainer.addEventListener('click', function (e) {
if (e.target.classList.contains('remove-site')) {
removeSite(e.target.dataset.site);
}
});
siteInput.addEventListener('paste', function (e) {
e.preventDefault();
const paste = (e.clipboardData || window.clipboardData).getData('text');
const sites = paste.split(',').map(site => site.trim()).filter(site => site);
sites.forEach(addSite);
});
const form = document.querySelector('form');
form.addEventListener('submit', function (e) {
const selectedTags = Array.from(tagCloud.querySelectorAll('.tag.selected'));
Array.from(hiddenSelect.options).forEach(opt => {
opt.selected = selectedTags.some(tag => tag.dataset.value === opt.value);
});
updateHiddenInput();
});
});
</script>
{% endblock %} {% endblock %}
+50 -150
View File
@@ -1,156 +1,56 @@
{% extends "base.html" %} {% extends "base.html" %}
{% block content %} {% block content %}
<style> <div class="form-container">
.tag-badge { <h1 class="mb-4">Search Results</h1>
background-color: #214e7b;
padding: 2px 8px;
border-radius: 12px;
font-size: 14px;
display: inline-flex;
align-items: center;
gap: 5px;
margin: 2px;
color: white;
}
.profile-list { {% with messages = get_flashed_messages() %}
list-style: none; {% if messages %}
padding: 0; {% for message in messages %}
}
.profile-item {
margin-bottom: 10px;
padding: 10px;
display: flex;
justify-content: space-between;
align-items: center;
border-bottom: 1px solid rgba(255, 255, 255, 0.1);
}
.profile-link {
display: flex;
align-items: center;
gap: 8px;
}
.favicon {
width: 16px;
height: 16px;
}
.tag-container {
display: flex;
flex-wrap: wrap;
gap: 5px;
justify-content: flex-end;
}
.report-container {
margin-bottom: 1rem;
}
.report-header {
cursor: pointer;
padding: 1rem;
background: rgba(255, 255, 255, 0.05);
border-radius: 4px;
margin-bottom: 0.5rem;
}
.report-content {
display: none;
}
.report-content.show {
display: block;
}
.chevron::after {
content: '▼';
margin-left: 8px;
transition: transform 0.2s;
}
.chevron.collapsed::after {
transform: rotate(-90deg);
}
</style>
<div class="form-container">
<h1 class="mb-4">Search Results</h1>
<!-- Flash messages -->
{% with messages = get_flashed_messages() %}
{% if messages %}
{% for message in messages %}
<div class="alert alert-info">{{ message }}</div> <div class="alert alert-info">{{ message }}</div>
{% endfor %}
{% endif %}
{% endwith %}
<p>The search has completed. <a href="{{ url_for('index')}}">Back to start.</a></p>
{% if graph_file %}
<h3>Combined Graph</h3>
<iframe src="{{ url_for('download_report', filename=graph_file) }}" style="width:100%; height:600px; border:none;"></iframe>
{% endif %}
<hr>
{% if individual_reports %}
<h3>Individual Reports</h3>
<div class="reports-list">
{% for report in individual_reports %}
<div class="report-container">
<div class="report-header" onclick="toggleReport(this)" data-target="report-{{ loop.index }}">
<h5 class="mb-0 d-flex align-items-center">
<span>{{ report.username }}</span>
<span class="chevron"></span>
</h5>
</div>
<div id="report-{{ loop.index }}" class="report-content">
<p>
<a href="{{ url_for('download_report', filename=report.csv_file) }}">CSV Report</a> |
<a href="{{ url_for('download_report', filename=report.json_file) }}">JSON Report</a> |
<a href="{{ url_for('download_report', filename=report.pdf_file) }}">PDF Report</a> |
<a href="{{ url_for('download_report', filename=report.html_file) }}">HTML Report</a>
</p>
{% if report.claimed_profiles %}
<strong>Claimed Profiles:</strong>
<ul class="profile-list">
{% for profile in report.claimed_profiles %}
<li class="profile-item">
<div class="profile-link">
<img class="favicon" src="https://www.google.com/s2/favicons?domain={{ profile.url }}" onerror="this.style.display='none'" alt="">
<a href="{{ profile.url }}" target="_blank">{{ profile.site_name }}</a>
</div>
{% if profile.tags %}
<div class="tag-container">
{% for tag in profile.tags %}
<span class="tag-badge">{{ tag }}</span>
{% endfor %}
</div>
{% endif %}
</li>
{% endfor %}
</ul>
{% else %}
<p>No claimed profiles found.</p>
{% endif %}
</div>
</div>
{% endfor %} {% endfor %}
</div>
{% else %}
<p>No individual reports available.</p>
{% endif %} {% endif %}
</div> {% endwith %}
<script> <p>The search has completed. Below are the results:</p>
function toggleReport(header) {
const reportId = header.getAttribute('data-target'); <!-- Display the combined graph if available -->
const content = document.getElementById(reportId); {% if graph_file %}
content.classList.toggle('show'); <h3>Combined Graph</h3>
header.querySelector('.chevron').classList.toggle('collapsed'); <iframe src="{{ url_for('download_report', filename=graph_file) }}" style="width:100%; height:600px; border:none;"></iframe>
} {% endif %}
</script>
{% endblock %} <hr>
<!-- Display individual reports -->
{% if individual_reports %}
<h3>Individual Reports</h3>
<ul class="list-group">
{% for report in individual_reports %}
<li class="list-group-item">
<h5>{{ report.username }}</h5>
<p>
<a href="{{ url_for('download_report', filename=report.csv_file) }}">CSV Report</a> |
<a href="{{ url_for('download_report', filename=report.json_file) }}">JSON Report</a> |
<a href="{{ url_for('download_report', filename=report.pdf_file) }}">PDF Report</a> |
<a href="{{ url_for('download_report', filename=report.html_file) }}">HTML Report</a>
</p>
{% if report.claimed_profiles %}
<strong>Claimed Profiles:</strong>
<ul>
{% for profile in report.claimed_profiles %}
<li>
<a href="{{ profile.url }}" target="_blank">{{ profile.site_name }}</a> (Tags: {{ profile.tags|join(', ') }})
</li>
{% endfor %}
</ul>
{% else %}
<p>No claimed profiles found.</p>
{% endif %}
</li>
{% endfor %}
</ul>
{% else %}
<p>No individual reports available.</p>
{% endif %}
</div>
{% endblock %}
Generated
+747 -933
View File
File diff suppressed because it is too large Load Diff
+1 -1
View File
@@ -1,5 +1,5 @@
maigret @ https://github.com/soxoj/maigret/archive/refs/heads/main.zip maigret @ https://github.com/soxoj/maigret/archive/refs/heads/main.zip
pefile==2023.2.7 # do not bump while pyinstaller is 6.11.1, there is a conflict pefile==2023.2.7 # do not bump while pyinstaller is 6.11.1, there is a conflict
psutil==6.1.1 psutil==6.1.0
pyinstaller==6.11.1 pyinstaller==6.11.1
pywin32-ctypes==0.2.3 pywin32-ctypes==0.2.3
+21 -21
View File
@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
[tool.poetry] [tool.poetry]
name = "maigret" name = "maigret"
version = "0.5.0" version = "0.5.0a1"
description = "🕵️‍♂️ Collect a dossier on a person by username from thousands of sites." description = "🕵️‍♂️ Collect a dossier on a person by username from thousands of sites."
authors = ["Soxoj <soxoj@protonmail.com>"] authors = ["Soxoj <soxoj@protonmail.com>"]
readme = "README.md" readme = "README.md"
@@ -32,28 +32,28 @@ classifiers = [
# poetry install --with dev # poetry install --with dev
python = "^3.10" python = "^3.10"
aiodns = "^3.0.0" aiodns = "^3.0.0"
aiohttp = "^3.12.14" aiohttp = "^3.11.10"
aiohttp-socks = "^0.10.1" aiohttp-socks = "^0.9.1"
arabic-reshaper = "^3.0.0" arabic-reshaper = "^3.0.0"
async-timeout = "^5.0.1" async-timeout = "^5.0.1"
attrs = "^25.3.0" attrs = "^24.2.0"
certifi = "^2025.6.15" certifi = "^2024.8.30"
chardet = "^5.0.0" chardet = "^5.0.0"
colorama = "^0.4.6" colorama = "^0.4.6"
future = "^1.0.0" future = "^1.0.0"
future-annotations= "^1.0.0" future-annotations= "^1.0.0"
html5lib = "^1.1" html5lib = "^1.1"
idna = "^3.4" idna = "^3.4"
Jinja2 = "^3.1.6" Jinja2 = "^3.1.3"
lxml = ">=5.3,<7.0" lxml = "^5.3.0"
MarkupSafe = "^3.0.2" MarkupSafe = "^3.0.2"
mock = "^5.1.0" mock = "^5.1.0"
multidict = "^6.6.3" multidict = "^6.0.4"
pycountry = "^24.6.1" pycountry = "^24.6.1"
PyPDF2 = "^3.0.1" PyPDF2 = "^3.0.1"
PySocks = "^1.7.1" PySocks = "^1.7.1"
python-bidi = "^0.6.3" python-bidi = "^0.6.3"
requests = "^2.32.4" requests = "^2.31.0"
requests-futures = "^1.0.2" requests-futures = "^1.0.2"
six = "^1.17.0" six = "^1.17.0"
socid-extractor = "^0.0.27" socid-extractor = "^0.0.27"
@@ -61,18 +61,18 @@ soupsieve = "^2.6"
stem = "^1.8.1" stem = "^1.8.1"
torrequest = "^0.1.0" torrequest = "^0.1.0"
alive_progress = "^3.2.0" alive_progress = "^3.2.0"
typing-extensions = "^4.14.1" typing-extensions = "^4.8.0"
webencodings = "^0.5.1" webencodings = "^0.5.1"
xhtml2pdf = "^0.2.11" xhtml2pdf = "^0.2.11"
XMind = "^1.2.0" XMind = "^1.2.0"
yarl = "^1.20.1" yarl = "^1.18.3"
networkx = "^2.6.3" networkx = "^2.6.3"
pyvis = "^0.3.2" pyvis = "^0.3.2"
reportlab = "^4.4.3" reportlab = "^4.2.0"
cloudscraper = "^1.2.71" cloudscraper = "^1.2.71"
flask = {extras = ["async"], version = "^3.1.1"} flask = {extras = ["async"], version = "^3.1.0"}
asgiref = "^3.9.1" asgiref = "^3.8.1"
platformdirs = "^4.3.8" platformdirs = "^4.3.6"
[tool.poetry.group.dev.dependencies] [tool.poetry.group.dev.dependencies]
@@ -80,15 +80,15 @@ platformdirs = "^4.3.8"
# Install dev dependencies with: poetry install --with dev # Install dev dependencies with: poetry install --with dev
flake8 = "^7.1.1" flake8 = "^7.1.1"
pytest = "^8.3.4" pytest = "^8.3.4"
pytest-asyncio = "^1.0.0" pytest-asyncio = "^0.25.0"
pytest-cov = "^6.0.0" pytest-cov = "^6.0.0"
pytest-httpserver = "^1.0.0" pytest-httpserver = "^1.0.0"
pytest-rerunfailures = "^15.1" pytest-rerunfailures = "^15.0"
reportlab = "^4.4.3" reportlab = "^4.2.0"
mypy = "^1.14.1" mypy = "^1.13.0"
tuna = "^0.5.11" tuna = "^0.5.11"
coverage = "^7.9.2" coverage = "^7.6.9"
black = "^25.1.0" black = "^24.10.0"
[tool.poetry.scripts] [tool.poetry.scripts]
# Run with: poetry run maigret <username> # Run with: poetry run maigret <username>
+14 -20
View File
@@ -1,5 +1,5 @@
## List of supported sites (search methods): total 3143 ## List of supported sites (search methods): total 3137
Rank data fetched from Alexa by domains. Rank data fetched from Alexa by domains.
@@ -336,11 +336,11 @@ Rank data fetched from Alexa by domains.
1. ![](https://www.google.com/s2/favicons?domain=https://www.jigsawplanet.com) [Jigsawplanet (https://www.jigsawplanet.com)](https://www.jigsawplanet.com)*: top 5K, fr, us* 1. ![](https://www.google.com/s2/favicons?domain=https://www.jigsawplanet.com) [Jigsawplanet (https://www.jigsawplanet.com)](https://www.jigsawplanet.com)*: top 5K, fr, us*
1. ![](https://www.google.com/s2/favicons?domain=https://hackernoon.com) [hackernoon.com (https://hackernoon.com)](https://hackernoon.com)*: top 5K, news, us* 1. ![](https://www.google.com/s2/favicons?domain=https://hackernoon.com) [hackernoon.com (https://hackernoon.com)](https://hackernoon.com)*: top 5K, news, us*
1. ![](https://www.google.com/s2/favicons?domain=https://pcpartpicker.com) [PCPartPicker (https://pcpartpicker.com)](https://pcpartpicker.com)*: top 5K, us*, search is disabled 1. ![](https://www.google.com/s2/favicons?domain=https://pcpartpicker.com) [PCPartPicker (https://pcpartpicker.com)](https://pcpartpicker.com)*: top 5K, us*, search is disabled
1. ![](https://www.google.com/s2/favicons?domain=https://ask.fm/) [AskFM (https://ask.fm/)](https://ask.fm/)*: top 5K, eg, in, ru*, search is disabled 1. ![](https://www.google.com/s2/favicons?domain=https://ask.fm/) [AskFM (https://ask.fm/)](https://ask.fm/)*: top 5K, eg, in, ru*
1. ![](https://www.google.com/s2/favicons?domain=https://gitlab.com/) [GitLab (https://gitlab.com/)](https://gitlab.com/)*: top 5K, coding* 1. ![](https://www.google.com/s2/favicons?domain=https://gitlab.com/) [GitLab (https://gitlab.com/)](https://gitlab.com/)*: top 5K, coding*
1. ![](https://www.google.com/s2/favicons?domain=https://dev.to/) [DEV Community (https://dev.to/)](https://dev.to/)*: top 5K, coding* 1. ![](https://www.google.com/s2/favicons?domain=https://dev.to/) [DEV Community (https://dev.to/)](https://dev.to/)*: top 5K, coding*
1. ![](https://www.google.com/s2/favicons?domain=https://www.gumroad.com/) [Gumroad (https://www.gumroad.com/)](https://www.gumroad.com/)*: top 5K, us* 1. ![](https://www.google.com/s2/favicons?domain=https://www.gumroad.com/) [Gumroad (https://www.gumroad.com/)](https://www.gumroad.com/)*: top 5K, us*
1. ![](https://www.google.com/s2/favicons?domain=https://gramho.com/) [Gramho (https://gramho.com/)](https://gramho.com/)*: top 5K, photo*, search is disabled 1. ![](https://www.google.com/s2/favicons?domain=https://gramho.com/) [Gramho (https://gramho.com/)](https://gramho.com/)*: top 5K, photo*
1. ![](https://www.google.com/s2/favicons?domain=https://taplink.cc/) [Taplink (https://taplink.cc/)](https://taplink.cc/)*: top 5K, links, ru*, search is disabled 1. ![](https://www.google.com/s2/favicons?domain=https://taplink.cc/) [Taplink (https://taplink.cc/)](https://taplink.cc/)*: top 5K, links, ru*, search is disabled
1. ![](https://www.google.com/s2/favicons?domain=https://www.buymeacoffee.com/) [BuyMeACoffee (https://www.buymeacoffee.com/)](https://www.buymeacoffee.com/)*: top 5K, in* 1. ![](https://www.google.com/s2/favicons?domain=https://www.buymeacoffee.com/) [BuyMeACoffee (https://www.buymeacoffee.com/)](https://www.buymeacoffee.com/)*: top 5K, in*
1. ![](https://www.google.com/s2/favicons?domain=https://muckrack.com) [Muckrack (https://muckrack.com)](https://muckrack.com)*: top 5K, us* 1. ![](https://www.google.com/s2/favicons?domain=https://muckrack.com) [Muckrack (https://muckrack.com)](https://muckrack.com)*: top 5K, us*
@@ -750,7 +750,7 @@ Rank data fetched from Alexa by domains.
1. ![](https://www.google.com/s2/favicons?domain=http://forum.eksmo.ru) [forum.eksmo.ru (http://forum.eksmo.ru)](http://forum.eksmo.ru)*: top 100K, forum, ru* 1. ![](https://www.google.com/s2/favicons?domain=http://forum.eksmo.ru) [forum.eksmo.ru (http://forum.eksmo.ru)](http://forum.eksmo.ru)*: top 100K, forum, ru*
1. ![](https://www.google.com/s2/favicons?domain=https://davesgarden.com) [Davesgarden (https://davesgarden.com)](https://davesgarden.com)*: top 100K, us* 1. ![](https://www.google.com/s2/favicons?domain=https://davesgarden.com) [Davesgarden (https://davesgarden.com)](https://davesgarden.com)*: top 100K, us*
1. ![](https://www.google.com/s2/favicons?domain=https://forum.cxem.net/) [forum.cxem.net (https://forum.cxem.net/)](https://forum.cxem.net/)*: top 100K, forum, ru*, search is disabled 1. ![](https://www.google.com/s2/favicons?domain=https://forum.cxem.net/) [forum.cxem.net (https://forum.cxem.net/)](https://forum.cxem.net/)*: top 100K, forum, ru*, search is disabled
1. ![](https://www.google.com/s2/favicons?domain=https://icq.com) [ICQ (https://icq.com)](https://icq.com)*: top 100K, ch, ru, tr*, search is disabled 1. ![](https://www.google.com/s2/favicons?domain=https://icq.com) [ICQ (https://icq.com)](https://icq.com)*: top 100K, ch, ru, tr*
1. ![](https://www.google.com/s2/favicons?domain=https://d3.ru/) [d3 (https://d3.ru/)](https://d3.ru/)*: top 100K, ru* 1. ![](https://www.google.com/s2/favicons?domain=https://d3.ru/) [d3 (https://d3.ru/)](https://d3.ru/)*: top 100K, ru*
1. ![](https://www.google.com/s2/favicons?domain=https://forum.dwg.ru/) [dwg (https://forum.dwg.ru/)](https://forum.dwg.ru/)*: top 100K, forum, ru* 1. ![](https://www.google.com/s2/favicons?domain=https://forum.dwg.ru/) [dwg (https://forum.dwg.ru/)](https://forum.dwg.ru/)*: top 100K, forum, ru*
1. ![](https://www.google.com/s2/favicons?domain=https://fotki.com) [Fotki (https://fotki.com)](https://fotki.com)*: top 100K, photo* 1. ![](https://www.google.com/s2/favicons?domain=https://fotki.com) [Fotki (https://fotki.com)](https://fotki.com)*: top 100K, photo*
@@ -2161,7 +2161,6 @@ Rank data fetched from Alexa by domains.
1. ![](https://www.google.com/s2/favicons?domain=) [Finanzfrage ()]()*: top 100M* 1. ![](https://www.google.com/s2/favicons?domain=) [Finanzfrage ()]()*: top 100M*
1. ![](https://www.google.com/s2/favicons?domain=http://forum.quake2.com.ru/) [Forum.quake2.com.ru (http://forum.quake2.com.ru/)](http://forum.quake2.com.ru/)*: top 100M, forum, ru* 1. ![](https://www.google.com/s2/favicons?domain=http://forum.quake2.com.ru/) [Forum.quake2.com.ru (http://forum.quake2.com.ru/)](http://forum.quake2.com.ru/)*: top 100M, forum, ru*
1. ![](https://www.google.com/s2/favicons?domain=https://forums.tauck.com) [ForumTauck (https://forums.tauck.com)](https://forums.tauck.com)*: top 100M, forum, us* 1. ![](https://www.google.com/s2/favicons?domain=https://forums.tauck.com) [ForumTauck (https://forums.tauck.com)](https://forums.tauck.com)*: top 100M, forum, us*
1. ![](https://www.google.com/s2/favicons?domain=https://framapiaf.org) [Framapiaf (https://framapiaf.org)](https://framapiaf.org)*: top 100M, mastodon*
1. ![](https://www.google.com/s2/favicons?domain=) [G2g.com ()]()*: top 100M* 1. ![](https://www.google.com/s2/favicons?domain=) [G2g.com ()]()*: top 100M*
1. ![](https://www.google.com/s2/favicons?domain=https://gam1ng.com.br) [Gam1ng (https://gam1ng.com.br)](https://gam1ng.com.br)*: top 100M, br, webcam*, search is disabled 1. ![](https://www.google.com/s2/favicons?domain=https://gam1ng.com.br) [Gam1ng (https://gam1ng.com.br)](https://gam1ng.com.br)*: top 100M, br, webcam*, search is disabled
1. ![](https://www.google.com/s2/favicons?domain=) [GeniusArtists ()]()*: top 100M* 1. ![](https://www.google.com/s2/favicons?domain=) [GeniusArtists ()]()*: top 100M*
@@ -2203,7 +2202,6 @@ Rank data fetched from Alexa by domains.
1. ![](https://www.google.com/s2/favicons?domain=https://macqa.ru) [Macqa (https://macqa.ru)](https://macqa.ru)*: top 100M, ru*, search is disabled 1. ![](https://www.google.com/s2/favicons?domain=https://macqa.ru) [Macqa (https://macqa.ru)](https://macqa.ru)*: top 100M, ru*, search is disabled
1. ![](https://www.google.com/s2/favicons?domain=) [Maga-Chat ()]()*: top 100M* 1. ![](https://www.google.com/s2/favicons?domain=) [Maga-Chat ()]()*: top 100M*
1. ![](https://www.google.com/s2/favicons?domain=) [Magabook ()]()*: top 100M* 1. ![](https://www.google.com/s2/favicons?domain=) [Magabook ()]()*: top 100M*
1. ![](https://www.google.com/s2/favicons?domain=https://mamot.fr) [Mamot (https://mamot.fr)](https://mamot.fr)*: top 100M, mastodon*
1. ![](https://www.google.com/s2/favicons?domain=) [Mapify.travel ()]()*: top 100M* 1. ![](https://www.google.com/s2/favicons?domain=) [Mapify.travel ()]()*: top 100M*
1. ![](https://www.google.com/s2/favicons?domain=) [MapMyTracks ()]()*: top 100M* 1. ![](https://www.google.com/s2/favicons?domain=) [MapMyTracks ()]()*: top 100M*
1. ![](https://www.google.com/s2/favicons?domain=) [Marshmallow ()]()*: top 100M* 1. ![](https://www.google.com/s2/favicons?domain=) [Marshmallow ()]()*: top 100M*
@@ -2226,12 +2224,10 @@ Rank data fetched from Alexa by domains.
1. ![](https://www.google.com/s2/favicons?domain=) [Oglaszamy24h ()]()*: top 100M* 1. ![](https://www.google.com/s2/favicons?domain=) [Oglaszamy24h ()]()*: top 100M*
1. ![](https://www.google.com/s2/favicons?domain=) [Olx.pl ()]()*: top 100M* 1. ![](https://www.google.com/s2/favicons?domain=) [Olx.pl ()]()*: top 100M*
1. ![](https://www.google.com/s2/favicons?domain=) [Ourfreedombook ()]()*: top 100M* 1. ![](https://www.google.com/s2/favicons?domain=) [Ourfreedombook ()]()*: top 100M*
1. ![](https://www.google.com/s2/favicons?domain=https://outgress.com/) [Outgress (https://outgress.com/)](https://outgress.com/)*: top 100M*
1. ![](https://www.google.com/s2/favicons?domain=) [Ow.ly ()]()*: top 100M* 1. ![](https://www.google.com/s2/favicons?domain=) [Ow.ly ()]()*: top 100M*
1. ![](https://www.google.com/s2/favicons?domain=) [Patronite ()]()*: top 100M* 1. ![](https://www.google.com/s2/favicons?domain=) [Patronite ()]()*: top 100M*
1. ![](https://www.google.com/s2/favicons?domain=) [Pewex.pl ()]()*: top 100M* 1. ![](https://www.google.com/s2/favicons?domain=) [Pewex.pl ()]()*: top 100M*
1. ![](https://www.google.com/s2/favicons?domain=) [Piekielni ()]()*: top 100M* 1. ![](https://www.google.com/s2/favicons?domain=) [Piekielni ()]()*: top 100M*
1. ![](https://www.google.com/s2/favicons?domain=https://pixelfed.social/) [pixelfed.social (https://pixelfed.social/)](https://pixelfed.social/)*: top 100M, art, pixelfed*
1. ![](https://www.google.com/s2/favicons?domain=) [Pol.social ()]()*: top 100M* 1. ![](https://www.google.com/s2/favicons?domain=) [Pol.social ()]()*: top 100M*
1. ![](https://www.google.com/s2/favicons?domain=) [Polczat.pl ()]()*: top 100M* 1. ![](https://www.google.com/s2/favicons?domain=) [Polczat.pl ()]()*: top 100M*
1. ![](https://www.google.com/s2/favicons?domain=) [Policja2009 ()]()*: top 100M* 1. ![](https://www.google.com/s2/favicons?domain=) [Policja2009 ()]()*: top 100M*
@@ -2242,7 +2238,6 @@ Rank data fetched from Alexa by domains.
1. ![](https://www.google.com/s2/favicons?domain=https://palexaRankru.net/) [PalexaRankru (https://palexaRankru.net/)](https://palexaRankru.net/)*: top 100M, forum, ru*, search is disabled 1. ![](https://www.google.com/s2/favicons?domain=https://palexaRankru.net/) [PalexaRankru (https://palexaRankru.net/)](https://palexaRankru.net/)*: top 100M, forum, ru*, search is disabled
1. ![](https://www.google.com/s2/favicons?domain=https://www.programmersforum) [ProgrammersForum (https://www.programmersforum)](https://www.programmersforum)*: top 100M, forum, ru*, search is disabled 1. ![](https://www.google.com/s2/favicons?domain=https://www.programmersforum) [ProgrammersForum (https://www.programmersforum)](https://www.programmersforum)*: top 100M, forum, ru*, search is disabled
1. ![](https://www.google.com/s2/favicons?domain=) [Prv.pl ()]()*: top 100M* 1. ![](https://www.google.com/s2/favicons?domain=) [Prv.pl ()]()*: top 100M*
1. ![](https://www.google.com/s2/favicons?domain=https://programming.dev) [programming.dev (https://programming.dev)](https://programming.dev)*: top 100M, lemmy*
1. ![](https://www.google.com/s2/favicons?domain=) [Quitter.pl ()]()*: top 100M* 1. ![](https://www.google.com/s2/favicons?domain=) [Quitter.pl ()]()*: top 100M*
1. ![](https://www.google.com/s2/favicons?domain=) [Quizlet ()]()*: top 100M* 1. ![](https://www.google.com/s2/favicons?domain=) [Quizlet ()]()*: top 100M*
1. ![](https://www.google.com/s2/favicons?domain=http://www.rammclan.ru) [Rammclan (http://www.rammclan.ru)](http://www.rammclan.ru)*: top 100M, ru* 1. ![](https://www.google.com/s2/favicons?domain=http://www.rammclan.ru) [Rammclan (http://www.rammclan.ru)](http://www.rammclan.ru)*: top 100M, ru*
@@ -3145,18 +3140,17 @@ Rank data fetched from Alexa by domains.
1. ![](https://www.google.com/s2/favicons?domain=https://substack.com) [Substack (https://substack.com)](https://substack.com)*: top 100M, blog* 1. ![](https://www.google.com/s2/favicons?domain=https://substack.com) [Substack (https://substack.com)](https://substack.com)*: top 100M, blog*
1. ![](https://www.google.com/s2/favicons?domain=https://pubg.op.gg) [OP.GG [PUBG] (https://pubg.op.gg)](https://pubg.op.gg)*: top 100M, gaming* 1. ![](https://www.google.com/s2/favicons?domain=https://pubg.op.gg) [OP.GG [PUBG] (https://pubg.op.gg)](https://pubg.op.gg)*: top 100M, gaming*
1. ![](https://www.google.com/s2/favicons?domain=https://valorant.op.gg) [OP.GG [Valorant] (https://valorant.op.gg)](https://valorant.op.gg)*: top 100M, gaming* 1. ![](https://www.google.com/s2/favicons?domain=https://valorant.op.gg) [OP.GG [Valorant] (https://valorant.op.gg)](https://valorant.op.gg)*: top 100M, gaming*
1. ![](https://www.google.com/s2/favicons?domain=https://write.as) [write.as (https://write.as)](https://write.as)*: top 100M, writefreely*
The list was updated at (2025-08-10) The list was updated at (2024-12-13)
## Statistics ## Statistics
Enabled/total sites: 2687/3143 = 85.49% Enabled/total sites: 2684/3137 = 85.56%
Incomplete message checks: 394/2687 = 14.66% (false positive risks) Incomplete message checks: 394/2684 = 14.68% (false positive risks)
Status code checks: 618/2687 = 23.0% (false positive risks) Status code checks: 615/2684 = 22.91% (false positive risks)
False positive risk (total): 37.66% False positive risk (total): 37.59%
Sites with probing: 500px, Aparat, BinarySearch (disabled), BongaCams, BuyMeACoffee, Cent, Disqus, Docker Hub, Duolingo, Gab, GitHub, GitLab, Google Plus (archived), Gravatar, Imgur, Issuu, Keybase, Livejasmin, LocalCryptos (disabled), MixCloud, Niftygateway, Reddit Search (Pushshift) (disabled), SportsTracker, Spotify (disabled), TAP'D, Trello, Twitch, Twitter, Twitter Shadowban (disabled), UnstoppableDomains, Vimeo, Weibo, Yapisal (disabled), YouNow, nightbot, notabug.org, polarsteps, qiwi.me (disabled) Sites with probing: 500px, Aparat, BinarySearch (disabled), BongaCams, BuyMeACoffee, Cent, Disqus, Docker Hub, Duolingo, Gab, GitHub, GitLab, Google Plus (archived), Gravatar, Imgur, Issuu, Keybase, Livejasmin, LocalCryptos (disabled), MixCloud, Niftygateway, Reddit Search (Pushshift) (disabled), SportsTracker, Spotify (disabled), TAP'D, Trello, Twitch, Twitter, Twitter Shadowban (disabled), UnstoppableDomains, Vimeo, Weibo, Yapisal (disabled), YouNow, nightbot, notabug.org, polarsteps, qiwi.me (disabled)
@@ -3164,17 +3158,17 @@ Sites with activation: Spotify (disabled), Twitter, Vimeo, Weibo
Top 20 profile URLs: Top 20 profile URLs:
- (796) `{urlMain}/index/8-0-{username} (uCoz)` - (796) `{urlMain}/index/8-0-{username} (uCoz)`
- (303) `/{username}` - (301) `/{username}`
- (221) `{urlMain}{urlSubpath}/members/?username={username} (XenForo)` - (221) `{urlMain}{urlSubpath}/members/?username={username} (XenForo)`
- (161) `/user/{username}` - (161) `/user/{username}`
- (133) `{urlMain}{urlSubpath}/member.php?username={username} (vBulletin)` - (133) `{urlMain}{urlSubpath}/member.php?username={username} (vBulletin)`
- (127) `{urlMain}{urlSubpath}/search.php?author={username} (phpBB/Search)` - (127) `{urlMain}{urlSubpath}/search.php?author={username} (phpBB/Search)`
- (118) `/profile/{username}` - (118) `/profile/{username}`
- (112) `/u/{username}` - (111) `/u/{username}`
- (88) `/users/{username}` - (88) `/users/{username}`
- (87) `{urlMain}/u/{username}/summary (Discourse)` - (87) `{urlMain}/u/{username}/summary (Discourse)`
- (54) `/@{username}`
- (54) `/wiki/User:{username}` - (54) `/wiki/User:{username}`
- (52) `/@{username}`
- (41) `/members/?username={username}` - (41) `/members/?username={username}`
- (41) `SUBDOMAIN` - (41) `SUBDOMAIN`
- (32) `/members/{username}` - (32) `/members/{username}`
@@ -3186,7 +3180,7 @@ Top 20 profile URLs:
Top 20 tags: Top 20 tags:
- (1106) `NO_TAGS` (non-standard) - (1105) `NO_TAGS` (non-standard)
- (735) `forum` - (735) `forum`
- (92) `gaming` - (92) `gaming`
- (48) `photo` - (48) `photo`
@@ -3198,8 +3192,8 @@ Top 20 tags:
- (19) `finance` - (19) `finance`
- (18) `crypto` - (18) `crypto`
- (16) `sharing` - (16) `sharing`
- (16) `art`
- (16) `freelance` - (16) `freelance`
- (15) `art`
- (15) `shopping` - (15) `shopping`
- (13) `sport` - (13) `sport`
- (13) `business` - (13) `business`
+1 -1
View File
@@ -7,7 +7,7 @@ description: |
Currently supported more than 3000 sites, search is launched against 500 popular sites in descending order of popularity by default. Also supported checking of Tor sites, I2P sites, and domains (via DNS resolving). Currently supported more than 3000 sites, search is launched against 500 popular sites in descending order of popularity by default. Also supported checking of Tor sites, I2P sites, and domains (via DNS resolving).
version: 0.5.0 version: 0.5.0a1
license: MIT license: MIT
base: core22 base: core22
confinement: strict confinement: strict
File diff suppressed because one or more lines are too long

Before

Width:  |  Height:  |  Size: 1.6 MiB

After

Width:  |  Height:  |  Size: 1.6 MiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 501 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 312 KiB

+1 -1
View File
@@ -42,7 +42,7 @@ DEFAULT_ARGS: Dict[str, Any] = {
'use_disabled_sites': False, 'use_disabled_sites': False,
'username': [], 'username': [],
'verbose': False, 'verbose': False,
'web': None, 'web': 5000,
'with_domains': False, 'with_domains': False,
'xmind': False, 'xmind': False,
} }
-84
View File
@@ -1,84 +0,0 @@
"""Tests for the close_invalid_telegram_prs utility."""
import unittest
import sys
import os
# Add the utils directory to the path
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', 'utils'))
from close_invalid_telegram_prs import is_invalid_telegram_pr
class TestCloseInvalidTelegramPRs(unittest.TestCase):
"""Test cases for the invalid Telegram PR detection."""
def test_valid_invalid_telegram_pr_titles(self):
"""Test that valid invalid Telegram PR titles are correctly identified."""
valid_titles = [
"Invalid result https://t.me/someuser",
"invalid result https://t.me/channel123",
"Invalid Result https://t.me/bot_name",
"INVALID RESULT https://t.me/test",
"Invalid result https://t.me/user/123",
"Invalid result https://t.me/s/channel_name",
]
for title in valid_titles:
with self.subTest(title=title):
self.assertTrue(is_invalid_telegram_pr(title),
f"Title should be identified as invalid: {title}")
def test_invalid_telegram_pr_titles_not_matching(self):
"""Test that non-matching titles are correctly rejected."""
invalid_titles = [
"Valid result https://t.me/someuser", # "Valid" instead of "Invalid"
"Invalid results https://t.me/someuser", # "results" instead of "result"
"Invalid result http://t.me/someuser", # "http" instead of "https"
"Invalid result https://telegram.me/someuser", # Wrong domain
"Fix invalid result https://t.me/someuser", # Extra words before
"Invalid result for https://t.me/someuser", # Extra words in between
"Added telegram site", # Completely different
"Fix false positives", # Unrelated
"", # Empty title
"Invalid result", # Missing URL
"https://t.me/someuser", # Missing "Invalid result"
]
for title in invalid_titles:
with self.subTest(title=title):
self.assertFalse(is_invalid_telegram_pr(title),
f"Title should NOT be identified as invalid: {title}")
def test_whitespace_handling(self):
"""Test that whitespace is handled correctly."""
titles_with_whitespace = [
" Invalid result https://t.me/someuser ", # Leading/trailing spaces
"\tInvalid result https://t.me/someuser\t", # Tabs
"Invalid\tresult\thttps://t.me/someuser", # Tabs between words
"Invalid result https://t.me/someuser", # Multiple spaces
]
for title in titles_with_whitespace:
with self.subTest(title=title):
self.assertTrue(is_invalid_telegram_pr(title),
f"Title with whitespace should be identified: {title}")
def test_case_insensitive(self):
"""Test that the pattern matching is case insensitive."""
case_variations = [
"invalid result https://t.me/someuser",
"Invalid Result https://t.me/someuser",
"INVALID RESULT https://t.me/someuser",
"Invalid result https://T.ME/someuser",
"iNvAlId ReSuLt https://t.me/someuser",
]
for title in case_variations:
with self.subTest(title=title):
self.assertTrue(is_invalid_telegram_pr(title),
f"Case variation should be identified: {title}")
if __name__ == '__main__':
unittest.main()
-33
View File
@@ -8,7 +8,6 @@ from maigret.executors import (
AsyncioProgressbarExecutor, AsyncioProgressbarExecutor,
AsyncioProgressbarSemaphoreExecutor, AsyncioProgressbarSemaphoreExecutor,
AsyncioProgressbarQueueExecutor, AsyncioProgressbarQueueExecutor,
AsyncioQueueGeneratorExecutor,
) )
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@@ -77,35 +76,3 @@ async def test_asyncio_progressbar_queue_executor():
assert await executor.run(tasks) == [0, 3, 6, 9, 1, 4, 7, 2, 5, 8] assert await executor.run(tasks) == [0, 3, 6, 9, 1, 4, 7, 2, 5, 8]
assert executor.execution_time > 0.2 assert executor.execution_time > 0.2
assert executor.execution_time < 0.4 assert executor.execution_time < 0.4
@pytest.mark.asyncio
async def test_asyncio_queue_generator_executor():
tasks = [(func, [n], {}) for n in range(10)]
executor = AsyncioQueueGeneratorExecutor(logger=logger, in_parallel=2)
results = [result async for result in executor.run(tasks)]
assert results == [0, 1, 3, 2, 4, 6, 7, 5, 9, 8]
assert executor.execution_time > 0.5
assert executor.execution_time < 0.6
executor = AsyncioQueueGeneratorExecutor(logger=logger, in_parallel=3)
results = [result async for result in executor.run(tasks)]
assert results == [0, 3, 1, 4, 6, 2, 7, 9, 5, 8]
assert executor.execution_time > 0.4
assert executor.execution_time < 0.5
executor = AsyncioQueueGeneratorExecutor(logger=logger, in_parallel=5)
results = [result async for result in executor.run(tasks)]
assert results in (
[0, 3, 6, 1, 4, 7, 9, 2, 5, 8],
[0, 3, 6, 1, 4, 9, 7, 2, 5, 8],
)
assert executor.execution_time > 0.3
assert executor.execution_time < 0.4
executor = AsyncioQueueGeneratorExecutor(logger=logger, in_parallel=10)
results = [result async for result in executor.run(tasks)]
assert results == [0, 3, 6, 9, 1, 4, 7, 2, 5, 8]
assert executor.execution_time > 0.2
assert executor.execution_time < 0.3
+4 -3
View File
@@ -1,8 +1,9 @@
import pytest import pytest
from unittest.mock import MagicMock, patch from unittest.mock import AsyncMock, MagicMock, patch
from maigret.submit import Submitter from maigret.submit import Submitter, MaigretSite, MaigretEngine
from aiohttp import ClientSession from aiohttp import ClientSession
from maigret.sites import MaigretDatabase from maigret.sites import MaigretDatabase
from maigret.settings import Settings
import logging import logging
@@ -271,7 +272,7 @@ async def test_dialog_adds_site_negative(settings):
] ]
with patch('builtins.input', side_effect=user_inputs): with patch('builtins.input', side_effect=user_inputs):
result = await submitter.dialog("https://icq.com/sokrat", None) result = await submitter.dialog("https://icq.im/sokrat", None)
await submitter.close() await submitter.close()
assert result is False assert result is False
-205
View File
@@ -1,205 +0,0 @@
#!/usr/bin/env python3
"""
Utility script to close pull requests with titles matching "Invalid result https://t.me/..."
This script identifies and closes PRs that follow the pattern of invalid telegram results,
which are typically auto-generated or spam PRs that should not be processed.
"""
import argparse
import os
import re
import sys
from typing import List, Optional
try:
import requests
except ImportError:
print("Error: requests library is required. Install with: pip install requests")
sys.exit(1)
class GitHubAPI:
"""Simple GitHub API wrapper for managing pull requests."""
def __init__(self, token: str, owner: str, repo: str):
self.token = token
self.owner = owner
self.repo = repo
self.base_url = "https://api.github.com"
self.headers = {
"Authorization": f"token {token}",
"Accept": "application/vnd.github.v3+json"
}
def get_open_prs(self) -> List[dict]:
"""Get all open pull requests."""
url = f"{self.base_url}/repos/{self.owner}/{self.repo}/pulls"
params = {"state": "open", "per_page": 100}
all_prs = []
page = 1
while True:
params["page"] = page
response = requests.get(url, headers=self.headers, params=params)
response.raise_for_status()
prs = response.json()
if not prs:
break
all_prs.extend(prs)
page += 1
return all_prs
def close_pr(self, pr_number: int, comment: Optional[str] = None) -> bool:
"""Close a pull request with an optional comment."""
try:
# Add comment if provided
if comment:
comment_url = f"{self.base_url}/repos/{self.owner}/{self.repo}/issues/{pr_number}/comments"
comment_data = {"body": comment}
response = requests.post(comment_url, headers=self.headers, json=comment_data)
response.raise_for_status()
# Close the PR
close_url = f"{self.base_url}/repos/{self.owner}/{self.repo}/pulls/{pr_number}"
close_data = {"state": "closed"}
response = requests.patch(close_url, headers=self.headers, json=close_data)
response.raise_for_status()
return True
except requests.RequestException as e:
print(f"Error closing PR #{pr_number}: {e}")
return False
def is_invalid_telegram_pr(title: str) -> bool:
"""
Check if a PR title matches the pattern "Invalid result https://t.me/..."
Args:
title: The PR title to check
Returns:
True if the title matches the pattern, False otherwise
"""
# Pattern: "Invalid result https://t.me/..." (case insensitive)
pattern = r"^invalid\s+result\s+https://t\.me/.*"
return bool(re.match(pattern, title.strip(), re.IGNORECASE))
def find_invalid_telegram_prs(github_api: GitHubAPI) -> List[dict]:
"""
Find all open PRs that match the invalid telegram pattern.
Args:
github_api: GitHub API wrapper instance
Returns:
List of PR dictionaries that match the pattern
"""
all_prs = github_api.get_open_prs()
matching_prs = []
for pr in all_prs:
if is_invalid_telegram_pr(pr["title"]):
matching_prs.append(pr)
return matching_prs
def main():
"""Main function to find and close invalid telegram PRs."""
parser = argparse.ArgumentParser(
description="Close pull requests with titles matching 'Invalid result https://t.me/...'"
)
parser.add_argument(
"--token",
required=False,
help="GitHub personal access token (or set GITHUB_TOKEN env var)"
)
parser.add_argument(
"--owner",
default="soxoj",
help="Repository owner (default: soxoj)"
)
parser.add_argument(
"--repo",
default="maigret",
help="Repository name (default: maigret)"
)
parser.add_argument(
"--dry-run",
action="store_true",
help="Show what would be closed without actually closing PRs"
)
parser.add_argument(
"--comment",
default="Automatically closing this PR as it appears to be an invalid result for a Telegram URL. "
"If this is a legitimate PR, please reopen it with a more descriptive title.",
help="Comment to add when closing PRs"
)
args = parser.parse_args()
# Get GitHub token
token = args.token or os.getenv("GITHUB_TOKEN")
if not token:
print("Error: GitHub token is required. Provide via --token or GITHUB_TOKEN env var")
sys.exit(1)
# Initialize GitHub API
try:
github_api = GitHubAPI(token, args.owner, args.repo)
except Exception as e:
print(f"Error initializing GitHub API: {e}")
sys.exit(1)
# Find matching PRs
print(f"Searching for PRs matching pattern in {args.owner}/{args.repo}...")
try:
matching_prs = find_invalid_telegram_prs(github_api)
except Exception as e:
print(f"Error fetching PRs: {e}")
sys.exit(1)
if not matching_prs:
print("No PRs found matching the pattern 'Invalid result https://t.me/...'")
return
print(f"Found {len(matching_prs)} PR(s) matching the pattern:")
for pr in matching_prs:
print(f" - PR #{pr['number']}: {pr['title']}")
print(f" Created by: {pr['user']['login']}")
print(f" URL: {pr['html_url']}")
print()
if args.dry_run:
print("Dry run mode: No PRs were actually closed.")
return
# Confirm before closing
response = input(f"Close {len(matching_prs)} PR(s)? [y/N]: ")
if response.lower() != 'y':
print("Cancelled.")
return
# Close PRs
closed_count = 0
for pr in matching_prs:
print(f"Closing PR #{pr['number']}: {pr['title']}")
if github_api.close_pr(pr['number'], args.comment):
closed_count += 1
print(f" ✓ Closed successfully")
else:
print(f" ✗ Failed to close")
print(f"\nClosed {closed_count} out of {len(matching_prs)} PRs.")
if __name__ == "__main__":
main()