disable-sites

disable-donationalerts
Fixed documentation URL (#799 )
2026-05-07 14:34:33 +00:00 · 2023-01-31 13:18:56 -05:00 · 2023-01-27 14:30:40 -05:00 · 2022-12-18 12:26:19 +03:00 · 2022-12-18 12:22:24 +03:00 · 2022-12-04 12:48:41 +03:00
84 changed files with 10662 additions and 2004 deletions
@@ -0,0 +1,2 @@
 #!/bin/sh
 python3 ./utils/update_site_data.py
@@ -0,0 +1,3 @@
 # These are supported funding model platforms
 patreon: soxoj
@@ -0,0 +1,13 @@
 ---
 name: Add a site
 about: I want to add a new site for Maigret checks
 title: New site
 labels: new-site
 assignees: soxoj
 ---
 Link to the site main page: https://example.com
 Link to an existing account: https://example.com/users/john
 Link to a nonexistent account: https://example.com/users/noonewouldeverusethis7
 Tags: photo, us, ...
@@ -0,0 +1,28 @@
 ---
 name: Maigret bug report
 about: I want to report a bug in Maigret functionality
 title: ''
 labels: bug
 assignees: soxoj
 ---
 ## Checklist
 - [ ] I'm reporting a bug in Maigret functionality
 - [ ] I've checked for similar bug reports including closed ones
 - [ ] I've checked for pull requests that attempt to fix this bug
 ## Description
 Info about Maigret version you are running and environment (`--version`, operation system, ISP provider):
 <INSERT VERSION INFO HERE>
 How to reproduce this bug (commandline options / conditions):
 <INSERT EXAMPLE OF CLI COMMAND HERE>
 <DESCRIPTION>
 <PASTE SCREENSHOT>
 <ATTACH LOG FILE>
@@ -0,0 +1,20 @@
 ---
 name: Report invalid result
 about: I want to report invalid result of Maigret search
 title: Invalid result
 labels: false-result
 assignees: soxoj
 ---
 Invalid link: <INSERT LINK HERE>
 <!--
 Put x into the box
 [ ] ==> [x]
 -->
 - [ ] I'm sure that the link leads to "not found" page
@@ -0,0 +1,6 @@
 version: 2
 updates:
  - package-ecosystem: "pip"
    directory: "/"
    schedule:
      interval: "daily"
@@ -27,6 +27,7 @@ jobs:
        with:
          push: true
          tags: ${{ secrets.DOCKER_HUB_USERNAME }}/maigret:latest
          platforms: linux/amd64,linux/arm64
      -
        name: Image digest
        run: echo ${{ steps.docker_build.outputs.digest }}
@@ -0,0 +1,67 @@
 # For most projects, this workflow file will not need changing; you simply need
 # to commit it to your repository.
 #
 # You may wish to alter this file to override the set of languages analyzed,
 # or to provide custom queries or build logic.
 #
 # ******** NOTE ********
 # We have attempted to detect the languages in your repository. Please check
 # the `language` matrix defined below to confirm you have the correct set of
 # supported CodeQL languages.
 #
 name: "CodeQL"
 on:
  push:
    branches: [ main ]
  schedule:
    - cron: '23 6 * * 6'
 jobs:
  analyze:
    name: Analyze
    runs-on: ubuntu-latest
    permissions:
      actions: read
      contents: read
      security-events: write
    strategy:
      fail-fast: false
      matrix:
        language: [ 'python' ]
        # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python', 'ruby' ]
        # Learn more about CodeQL language support at https://git.io/codeql-language-support
    steps:
    - name: Checkout repository
      uses: actions/checkout@v2
    # Initializes the CodeQL tools for scanning.
    - name: Initialize CodeQL
      uses: github/codeql-action/init@v1
      with:
        languages: ${{ matrix.language }}
        # If you wish to specify custom queries, you can do so here or in a config file.
        # By default, queries listed here will override any specified in a config file.
        # Prefix the list here with "+" to use these queries and those in the config file.
        # queries: ./path/to/local/query, your-org/your-repo/queries@main
    # Autobuild attempts to build any compiled languages  (C/C++, C#, or Java).
    # If this step fails, then you should remove it and run the build manually (see below)
    - name: Autobuild
      uses: github/codeql-action/autobuild@v1
    # ℹ️ Command-line programs to run using the OS shell.
    # 📚 https://git.io/JvXDl
    # ✏️ If the Autobuild fails above, remove it and uncomment the following three lines
    #    and modify them (or add more) to build your code if your project
    #    uses a compiled language
    #- run: |
    #   make bootstrap
    #   make release
    - name: Perform CodeQL Analysis
      uses: github/codeql-action/analyze@v1
@@ -0,0 +1,22 @@
 name: Package exe with PyInstaller - Windows
 on:
  push:
    branches: [ main ]
 jobs:
  build:
    runs-on: ubuntu-latest
    steps:
    - uses: actions/checkout@v2
    - name: PyInstaller Windows
      uses: JackMcKew/pyinstaller-action-windows@main
      with:
        path: pyinstaller
    - uses: actions/upload-artifact@v2
      with:
        name: maigret_standalone_win32
        path: pyinstaller/dist/windows # or path/to/artifact
@@ -1,13 +1,11 @@
-# This workflow will install Python dependencies, run tests and lint with a variety of Python versions
+name: Linting and testing
 # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
 name: Python package
 on:
  push:
    branches: [ main ]
  pull_request:
    branches: [ main ]
    types: [opened, synchronize, reopened]
 jobs:
  build:
@@ -15,7 +13,7 @@ jobs:
    runs-on: ubuntu-latest
    strategy:
      matrix:
-        python-version: [3.6.9, 3.7, 3.8, 3.9]
+        python-version: [3.7, 3.8, 3.9]
    steps:
    - uses: actions/checkout@v2
@@ -1,6 +1,3 @@
 # This workflow will upload a Python Package using Twine when a release is created
 # For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries
 name: Upload Python Package
 on:
@@ -0,0 +1,34 @@
 name: Update sites rating and statistics
 on:
  pull_request:
    branches: [ dev ]
    types: [opened, synchronize]
 jobs:
  build:
    runs-on: ubuntu-latest
    steps:
    - name: Checkout repository
      uses: actions/checkout@v2.3.2
      with:
        ref: ${{ github.event.pull_request.head.sha }}
        fetch-depth: 0 # otherwise, there would be errors pushing refs to the destination repository.
    - name: build application
      run: |
        pip3 install .
        python3 ./utils/update_site_data.py --empty-only
    - name: Commit and push changes
      run: |
        git config --global user.name "Maigret autoupdate"
        git config --global user.email "soxoj@protonmail.com"
        echo `git name-rev ${{ github.event.pull_request.head.sha }} --name-only`
        export BRANCH=`git name-rev ${{ github.event.pull_request.head.sha }} --name-only | sed 's/remotes\/origin\///'`
        echo $BRANCH
        git remote -v
        git checkout $BRANCH
        git add sites.md
        git commit -m "Updated site list and statistics"
        git push origin $BRANCH
@@ -15,6 +15,10 @@ src/
 .ipynb_checkpoints
 *.ipynb
 # Logs and backups
 *.log
 *.bak
 # Output files, except requirements.txt
 *.txt
 !requirements.txt
@@ -30,4 +34,7 @@ src/
 .coverage
 dist/
 htmlcov/
-/test_*
+/test_*
 # Maigret files
 settings.json
@@ -2,6 +2,227 @@
 ## [Unreleased]
 ## [0.4.4] - 2022-09-03
 * Fixed some false positives by @soxoj in https://github.com/soxoj/maigret/pull/433
 * Drop Python 3.6 support by @soxoj in https://github.com/soxoj/maigret/pull/434
 * Bump xhtml2pdf from 0.2.5 to 0.2.7 by @dependabot in https://github.com/soxoj/maigret/pull/409
 * Bump reportlab from 3.6.6 to 3.6.9 by @dependabot in https://github.com/soxoj/maigret/pull/403
 * Bump markupsafe from 2.0.1 to 2.1.1 by @dependabot in https://github.com/soxoj/maigret/pull/389
 * Bump pycountry from 22.1.10 to 22.3.5 by @dependabot in https://github.com/soxoj/maigret/pull/384
 * Bump pypdf2 from 1.26.0 to 1.27.4 by @dependabot in https://github.com/soxoj/maigret/pull/438
 * Update GH actions by @soxoj in https://github.com/soxoj/maigret/pull/439
 * Bump tqdm from 4.63.0 to 4.64.0 by @dependabot in https://github.com/soxoj/maigret/pull/440
 * Bump jinja2 from 3.0.3 to 3.1.1 by @dependabot in https://github.com/soxoj/maigret/pull/441
 * Bump soupsieve from 2.3.1 to 2.3.2 by @dependabot in https://github.com/soxoj/maigret/pull/436
 * Bump pypdf2 from 1.26.0 to 1.27.4 by @dependabot in https://github.com/soxoj/maigret/pull/442
 * Bump pyvis from 0.1.9 to 0.2.0 by @dependabot in https://github.com/soxoj/maigret/pull/443
 * Bump pypdf2 from 1.27.4 to 1.27.6 by @dependabot in https://github.com/soxoj/maigret/pull/448
 * Bump typing-extensions from 4.1.1 to 4.2.0 by @dependabot in https://github.com/soxoj/maigret/pull/447
 * Bump soupsieve from 2.3.2 to 2.3.2.post1 by @dependabot in https://github.com/soxoj/maigret/pull/444
 * Bump pypdf2 from 1.27.6 to 1.27.7 by @dependabot in https://github.com/soxoj/maigret/pull/449
 * Bump pypdf2 from 1.27.7 to 1.27.8 by @dependabot in https://github.com/soxoj/maigret/pull/450
 * XMind 8 report warning and some docs update by @soxoj in https://github.com/soxoj/maigret/pull/452
 * False positive fixes 24.04.22 by @soxoj in https://github.com/soxoj/maigret/pull/455
 * Bump pypdf2 from 1.27.8 to 1.27.9 by @dependabot in https://github.com/soxoj/maigret/pull/456
 * Bump pytest from 7.0.1 to 7.1.2 by @dependabot in https://github.com/soxoj/maigret/pull/457
 * Bump jinja2 from 3.1.1 to 3.1.2 by @dependabot in https://github.com/soxoj/maigret/pull/460
 * Ubisoft forums addition by @fen0s in https://github.com/soxoj/maigret/pull/461
 * Add BYOND, Figma, BeatStars by @fen0s in https://github.com/soxoj/maigret/pull/462
 * fix Figma username definition, add a bunch of sites by @fen0s in https://github.com/soxoj/maigret/pull/464
 * Bump pypdf2 from 1.27.9 to 1.27.10 by @dependabot in https://github.com/soxoj/maigret/pull/465
 * Bump pypdf2 from 1.27.10 to 1.27.12 by @dependabot in https://github.com/soxoj/maigret/pull/466
 * Sites fixes 05 05 22 by @soxoj in https://github.com/soxoj/maigret/pull/469
 * Bump pyvis from 0.2.0 to 0.2.1 by @dependabot in https://github.com/soxoj/maigret/pull/472
 * Social analyzer websites, also fixing presense strs by @fen0s in https://github.com/soxoj/maigret/pull/471
 * Updated logic of false positive risk estimating by @soxoj in https://github.com/soxoj/maigret/pull/475
 * Improved usability of external progressbar func by @soxoj in https://github.com/soxoj/maigret/pull/476
 * New sites added, some tags/rank update by @soxoj in https://github.com/soxoj/maigret/pull/477
 * Added new sites by @soxoj in https://github.com/soxoj/maigret/pull/480
 * Added new forums, updated ranks, some utils improvements by @soxoj in https://github.com/soxoj/maigret/pull/481
 * Disabled sites with false positives results by @soxoj in https://github.com/soxoj/maigret/pull/482
 * Bump certifi from 2021.10.8 to 2022.5.18.1 by @dependabot in https://github.com/soxoj/maigret/pull/488
 * Bump psutil from 5.9.0 to 5.9.1 by @dependabot in https://github.com/soxoj/maigret/pull/490
 * Bump pypdf2 from 1.27.12 to 1.28.1 by @dependabot in https://github.com/soxoj/maigret/pull/491
 * Bump pypdf2 from 1.28.1 to 1.28.2 by @dependabot in https://github.com/soxoj/maigret/pull/493
 * added and fixed some websites in data.json by @kustermariocoding in https://github.com/soxoj/maigret/pull/494
 * Bump pypdf2 from 1.28.2 to 2.0.0 by @dependabot in https://github.com/soxoj/maigret/pull/504
 * Bump pefile from 2021.9.3 to 2022.5.30 by @dependabot in https://github.com/soxoj/maigret/pull/499
 * Updated sites list, added disabled Anilist by @soxoj in https://github.com/soxoj/maigret/pull/502
 * Bump lxml from 4.8.0 to 4.9.0 by @dependabot in https://github.com/soxoj/maigret/pull/503
 * Compatibility with Python 10 by @soxoj in https://github.com/soxoj/maigret/pull/509
 * feat: add .log & .bak files to gitignore in https://github.com/soxoj/maigret/pull/511
 * fix some sites and delete abandoned by @fen0s in https://github.com/soxoj/maigret/pull/526
 * Fixesjulyfirst by @fen0s in https://github.com/soxoj/maigret/pull/533
 * yazbel, aboutcar, zhihu by @fen0s in https://github.com/soxoj/maigret/pull/531
 * Fixes july third by @fen0s in https://github.com/soxoj/maigret/pull/535
 * Update data.json by @fen0s in https://github.com/soxoj/maigret/pull/539
 * Update data.json by @fen0s in https://github.com/soxoj/maigret/pull/540
 * Bump reportlab from 3.6.9 to 3.6.11 by @dependabot in https://github.com/soxoj/maigret/pull/543
 * Bump requests from 2.27.1 to 2.28.1 by @dependabot in https://github.com/soxoj/maigret/pull/530
 * Bump pypdf2 from 2.0.0 to 2.5.0 by @dependabot in https://github.com/soxoj/maigret/pull/542
 * Bump xhtml2pdf from 0.2.7 to 0.2.8 by @dependabot in https://github.com/soxoj/maigret/pull/522
 * Bump lxml from 4.9.0 to 4.9.1 by @dependabot in https://github.com/soxoj/maigret/pull/538
 * disable yandex music + set utf8 encoding by @fen0s in https://github.com/soxoj/maigret/pull/562
 * fix false positives by @fen0s in https://github.com/soxoj/maigret/pull/577
 * disable Instagram, fix two false positives by @fen0s in https://github.com/soxoj/maigret/pull/578
 * Bump certifi from 2022.5.18.1 to 2022.6.15 by @dependabot in https://github.com/soxoj/maigret/pull/551
 * August15 by @fen0s in https://github.com/soxoj/maigret/pull/591
 * Bump pytest-httpserver from 1.0.4 to 1.0.5 by @dependabot in https://github.com/soxoj/maigret/pull/583
 * Bump typing-extensions from 4.2.0 to 4.3.0 by @dependabot in https://github.com/soxoj/maigret/pull/549
 * Bump colorama from 0.4.4 to 0.4.5 by @dependabot in https://github.com/soxoj/maigret/pull/548
 * Bump chardet from 4.0.0 to 5.0.0 by @dependabot in https://github.com/soxoj/maigret/pull/550
 * Bump cloudscraper from 1.2.60 to 1.2.63 by @dependabot in https://github.com/soxoj/maigret/pull/600
 * Bump flake8 from 4.0.1 to 5.0.4 by @dependabot in https://github.com/soxoj/maigret/pull/598
 * Bump attrs from 21.4.0 to 22.1.0 by @dependabot in https://github.com/soxoj/maigret/pull/597
 * Bump pytest-asyncio from 0.18.2 to 0.19.0 by @dependabot in https://github.com/soxoj/maigret/pull/601
 * Bump pypdf2 from 2.5.0 to 2.10.4 by @dependabot in https://github.com/soxoj/maigret/pull/606
 * Bump pytest from 7.1.2 to 7.1.3 by @dependabot in https://github.com/soxoj/maigret/pull/613
 * Update sites.md -Gitmemory.com suppression by @C3n7ral051nt4g3ncy in https://github.com/soxoj/maigret/pull/610
 * Bump cloudscraper from 1.2.63 to 1.2.64 by @dependabot in https://github.com/soxoj/maigret/pull/614
 * Bump pycountry from 22.1.10 to 22.3.5 by @dependabot in https://github.com/soxoj/maigret/pull/607
 * add ProtonMail, disable 3 broken sites by @fen0s in https://github.com/soxoj/maigret/pull/619
 * Bump tqdm from 4.64.0 to 4.64.1 by @dependabot in https://github.com/soxoj/maigret/pull/618
 **Full Changelog**: https://github.com/soxoj/maigret/compare/v0.4.3...v0.4.4
 ## [0.4.3] - 2022-04-13
 * Added Sites to data.json by @kustermariocoding in https://github.com/soxoj/maigret/pull/386
 * added new Websites to data.json by @kustermariocoding in https://github.com/soxoj/maigret/pull/390
 * Skipped broken tests by @soxoj in https://github.com/soxoj/maigret/pull/397
 * Added new Websites to data.json by @kustermariocoding in https://github.com/soxoj/maigret/pull/401
 * Added new Websites to data.json by @kustermariocoding in https://github.com/soxoj/maigret/pull/404
 * Updated statistics by @soxoj in https://github.com/soxoj/maigret/pull/406
 * Added new Websites to data.json by @kustermariocoding in https://github.com/soxoj/maigret/pull/413
 * Disabled houzz.com, updated sites statistics by @soxoj in https://github.com/soxoj/maigret/pull/422
 * Fixed last false positives by @soxoj in https://github.com/soxoj/maigret/pull/424
 * Fixed actual false positives by @soxoj in https://github.com/soxoj/maigret/pull/431
 **Full Changelog**: https://github.com/soxoj/maigret/compare/v0.4.2...v0.4.3
 ## [0.4.2] - 2022-03-07
 * [ImgBot] Optimize images by @imgbot in https://github.com/soxoj/maigret/pull/319
 * Bump pytest-asyncio from 0.17.0 to 0.17.1 by @dependabot in https://github.com/soxoj/maigret/pull/321
 * Bump pytest-asyncio from 0.17.1 to 0.17.2 by @dependabot in https://github.com/soxoj/maigret/pull/323
 * Disabled Ruboard by @soxoj in https://github.com/soxoj/maigret/pull/327
 * Disable kinooh, sites list update workflow added by @soxoj in https://github.com/soxoj/maigret/pull/329
 * Bump multidict from 5.2.0 to 6.0.1 by @dependabot in https://github.com/soxoj/maigret/pull/332
 * Bump multidict from 6.0.1 to 6.0.2 by @dependabot in https://github.com/soxoj/maigret/pull/333
 * Bump pytest-httpserver from 1.0.3 to 1.0.4 by @dependabot in https://github.com/soxoj/maigret/pull/334
 * Bump pytest from 6.2.5 to 7.0.0 by @dependabot in https://github.com/soxoj/maigret/pull/339
 * Bump pytest-asyncio from 0.17.2 to 0.18.0 by @dependabot in https://github.com/soxoj/maigret/pull/340
 * Bump pytest-asyncio from 0.18.0 to 0.18.1 by @dependabot in https://github.com/soxoj/maigret/pull/343
 * Bump pytest from 7.0.0 to 7.0.1 by @dependabot in https://github.com/soxoj/maigret/pull/345
 * Bump typing-extensions from 4.0.1 to 4.1.1 by @dependabot in https://github.com/soxoj/maigret/pull/346
 * Bump lxml from 4.7.1 to 4.8.0 by @dependabot in https://github.com/soxoj/maigret/pull/350
 * Pin reportlab version by @cyb3rk0tik in https://github.com/soxoj/maigret/pull/351
 * Fix reportlab not only for testing by @cyb3rk0tik in https://github.com/soxoj/maigret/pull/352
 * Added some scripts by @soxoj in https://github.com/soxoj/maigret/pull/355
 * Added package publishing instruction by @soxoj in https://github.com/soxoj/maigret/pull/356
 * Added DB statistics autoupdate and write to sites.md by @soxoj in https://github.com/soxoj/maigret/pull/357
 * CI autoupdate by @soxoj in https://github.com/soxoj/maigret/pull/359
 * Op.gg fixes by @soxoj in https://github.com/soxoj/maigret/pull/363
 * Wikipedia fix by @soxoj in https://github.com/soxoj/maigret/pull/365
 * Disabled Netvibes and LeetCode by @soxoj in https://github.com/soxoj/maigret/pull/366
 * Fixed several false positives, improved statistics info by @soxoj in https://github.com/soxoj/maigret/pull/368
 * Fix false positives  by @soxoj in https://github.com/soxoj/maigret/pull/370
 * Fixed the rest of false positives for now by @soxoj in https://github.com/soxoj/maigret/pull/371
 * Fix false positive and CI by @soxoj in https://github.com/soxoj/maigret/pull/372
 * Added new sites to data.json by @kustermariocoding in https://github.com/soxoj/maigret/pull/375
 * Fixed issue with str alexaRank by @soxoj in https://github.com/soxoj/maigret/pull/382
 * Bump tqdm from 4.62.3 to 4.63.0 by @dependabot in https://github.com/soxoj/maigret/pull/374
 * Bump pytest-asyncio from 0.18.1 to 0.18.2 by @dependabot in https://github.com/soxoj/maigret/pull/380
 * @imgbot made their first contribution in https://github.com/soxoj/maigret/pull/319
 * @kustermariocoding made their first contribution in https://github.com/soxoj/maigret/pull/375
 **Full Changelog**: https://github.com/soxoj/maigret/compare/v0.4.1...v0.4.2
 ## [0.4.1] - 2022-01-15
 * Added dozen of sites, improved submit mode by @soxoj in https://github.com/soxoj/maigret/pull/288
 * Bump requests from 2.26.0 to 2.27.0 by @dependabot in https://github.com/soxoj/maigret/pull/292
 * changed Bayoushooter to use XenForo and foursquare to use correct checkType by @antomarsi in https://github.com/soxoj/maigret/pull/289
 * Bump requests from 2.27.0 to 2.27.1 by @dependabot in https://github.com/soxoj/maigret/pull/293
 * Added aparat.com by @soxoj in https://github.com/soxoj/maigret/pull/294
 * Fixed BongaCams, links parsing improved by @soxoj in https://github.com/soxoj/maigret/pull/297
 * Temporary fix for Twitter (#299) by @soxoj in https://github.com/soxoj/maigret/pull/300
 * Fixed TikTok checks (#303) by @soxoj in https://github.com/soxoj/maigret/pull/306
 * Bump pycountry from 20.7.3 to 22.1.10 by @dependabot in https://github.com/soxoj/maigret/pull/313
 * Pornhub search improved by @soxoj in https://github.com/soxoj/maigret/pull/315
 * Codacademy fixed by @soxoj in https://github.com/soxoj/maigret/pull/316
 * Bump pytest-asyncio from 0.16.0 to 0.17.0 by @dependabot in https://github.com/soxoj/maigret/pull/314
 **Full Changelog**: https://github.com/soxoj/maigret/compare/v0.4.0...v0.4.1
 ## [0.4.0] - 2022-01-03
 * Delayed import of requests module, speed check command, reqs updated by @soxoj in https://github.com/soxoj/maigret/pull/189
 * Snapcraft yaml added by @soxoj in https://github.com/soxoj/maigret/pull/190
 * Create codeql-analysis.yml by @soxoj in https://github.com/soxoj/maigret/pull/191
 * Move wiki pages to ReadTheDocs by @egornagornov in https://github.com/soxoj/maigret/pull/194
 * Created ReadTheDocs requirements file by @soxoj in https://github.com/soxoj/maigret/pull/195
 * Fix incompatible version requirements by @JasperJuergensen in https://github.com/soxoj/maigret/pull/196
 * Added link to documentation by @soxoj in https://github.com/soxoj/maigret/pull/198
 * Upgraded base docker image by @soxoj in https://github.com/soxoj/maigret/pull/199
 * Run CodeQL only aflter merge and each Saturday by @soxoj in https://github.com/soxoj/maigret/pull/201
 * Added cascade settings loading from /.maigret/settings.json and ./settings.json by @soxoj in https://github.com/soxoj/maigret/pull/200
 * Documentation and settings improved by @soxoj in https://github.com/soxoj/maigret/pull/203
 * New config options added by @soxoj in https://github.com/soxoj/maigret/pull/204
 * Added export of cli entrypoint by @soxoj in https://github.com/soxoj/maigret/pull/207
 * Removed redundant logging by @soxoj in https://github.com/soxoj/maigret/pull/210
 * PyInstaller workflow by @soxoj in https://github.com/soxoj/maigret/pull/206
 * Create bug.md by @soxoj in https://github.com/soxoj/maigret/pull/213
 * Fixed path and names of report files by @soxoj in https://github.com/soxoj/maigret/pull/216
 * Box drawing logic improved, added new settings by @soxoj in https://github.com/soxoj/maigret/pull/217
 * Fixes for win32 release by @soxoj in https://github.com/soxoj/maigret/pull/218
 * Bump six from 1.15.0 to 1.16.0 by @dependabot in https://github.com/soxoj/maigret/pull/221
 * Bump flake8 from 3.8.4 to 4.0.1 by @dependabot in https://github.com/soxoj/maigret/pull/219
 * Bump aiohttp from 3.7.4 to 3.8.0 by @dependabot in https://github.com/soxoj/maigret/pull/220
 * Bump aiohttp-socks from 0.5.5 to 0.6.0 by @dependabot in https://github.com/soxoj/maigret/pull/222
 * Bump typing-extensions from 3.7.4.3 to 3.10.0.2 by @dependabot in https://github.com/soxoj/maigret/pull/224
 * Bump multidict from 5.1.0 to 5.2.0 by @dependabot in https://github.com/soxoj/maigret/pull/225
 * Bump idna from 2.10 to 3.3 by @dependabot in https://github.com/soxoj/maigret/pull/228
 * Bump pytest-cov from 2.10.1 to 3.0.0 by @dependabot in https://github.com/soxoj/maigret/pull/227
 * Bump mock from 4.0.2 to 4.0.3 by @dependabot in https://github.com/soxoj/maigret/pull/226
 * Bump certifi from 2020.12.5 to 2021.10.8 by @dependabot in https://github.com/soxoj/maigret/pull/233
 * Bump pytest-httpserver from 1.0.0 to 1.0.2 by @dependabot in https://github.com/soxoj/maigret/pull/232
 * Bump lxml from 4.6.3 to 4.6.4 by @dependabot in https://github.com/soxoj/maigret/pull/231
 * Bump pefile from 2019.4.18 to 2021.9.3 by @dependabot in https://github.com/soxoj/maigret/pull/229
 * Bump pytest-rerunfailures from 9.1.1 to 10.2 by @dependabot in https://github.com/soxoj/maigret/pull/230
 * Bump yarl from 1.6.3 to 1.7.2 by @dependabot in https://github.com/soxoj/maigret/pull/237
 * Bump async-timeout from 4.0.0 to 4.0.1 by @dependabot in https://github.com/soxoj/maigret/pull/236
 * Bump psutil from 5.7.0 to 5.8.0 by @dependabot in https://github.com/soxoj/maigret/pull/234
 * Bump jinja2 from 3.0.2 to 3.0.3 by @dependabot in https://github.com/soxoj/maigret/pull/235
 * Bump pytest from 6.2.4 to 6.2.5 by @dependabot in https://github.com/soxoj/maigret/pull/238
 * Bump tqdm from 4.55.0 to 4.62.3 by @dependabot in https://github.com/soxoj/maigret/pull/242
 * Bump arabic-reshaper from 2.1.1 to 2.1.3 by @dependabot in https://github.com/soxoj/maigret/pull/243
 * Bump pytest-asyncio from 0.14.0 to 0.16.0 by @dependabot in https://github.com/soxoj/maigret/pull/240
 * Bump chardet from 3.0.4 to 4.0.0 by @dependabot in https://github.com/soxoj/maigret/pull/241
 * Bump soupsieve from 2.1 to 2.3.1 by @dependabot in https://github.com/soxoj/maigret/pull/239
 * Bump aiohttp from 3.8.0 to 3.8.1 by @dependabot in https://github.com/soxoj/maigret/pull/246
 * Bump typing-extensions from 3.10.0.2 to 4.0.0 by @dependabot in https://github.com/soxoj/maigret/pull/245
 * Bump aiohttp-socks from 0.6.0 to 0.6.1 by @dependabot in https://github.com/soxoj/maigret/pull/249
 * Bump aiohttp-socks from 0.6.1 to 0.7.1 by @dependabot in https://github.com/soxoj/maigret/pull/250
 * Bump typing-extensions from 4.0.0 to 4.0.1 by @dependabot in https://github.com/soxoj/maigret/pull/253
 * Fixed some false positives by @soxoj in https://github.com/soxoj/maigret/pull/254
 * Disabled non-working sites by @soxoj in https://github.com/soxoj/maigret/pull/255
 * Added false results buttons to reports, fixed some falses by @soxoj in https://github.com/soxoj/maigret/pull/256
 * Fixed xHamster, added support of proxies to self-check mode by @soxoj in https://github.com/soxoj/maigret/pull/259
 * Disabled non-working sites, updated public sites list by @soxoj in https://github.com/soxoj/maigret/pull/263
 * Bump lxml from 4.6.4 to 4.6.5 by @dependabot in https://github.com/soxoj/maigret/pull/266
 * Bump lxml from 4.6.5 to 4.7.1 by @dependabot in https://github.com/soxoj/maigret/pull/269
 * Bump pytest-httpserver from 1.0.2 to 1.0.3 by @dependabot in https://github.com/soxoj/maigret/pull/270
 * Fixed failed tests (thx to Meta aka Facebook) by @soxoj in https://github.com/soxoj/maigret/pull/273
 * Fixed votetags, updated issue template by @soxoj in https://github.com/soxoj/maigret/pull/278
 * Bump async-timeout from 4.0.1 to 4.0.2 by @dependabot in https://github.com/soxoj/maigret/pull/275
 * Fixed some false positives by @soxoj in https://github.com/soxoj/maigret/pull/280
 * Bump attrs from 21.2.0 to 21.3.0 by @dependabot in https://github.com/soxoj/maigret/pull/281
 * Bump psutil from 5.8.0 to 5.9.0 by @dependabot in https://github.com/soxoj/maigret/pull/282
 * Bump attrs from 21.3.0 to 21.4.0 by @dependabot in https://github.com/soxoj/maigret/pull/283
 **Full Changelog**: https://github.com/soxoj/maigret/compare/v0.3.1...v0.4.0
 ## [0.3.1] - 2021-10-31
 * fixed false positives
 * accelerated maigret start time by 3 times
 ## [0.3.0] - 2021-06-02
 * added support of Tor and I2P sites
 * added experimental DNS checking feature
@@ -0,0 +1,128 @@
 # Contributor Covenant Code of Conduct
 ## Our Pledge
 We as members, contributors, and leaders pledge to make participation in our
 community a harassment-free experience for everyone, regardless of age, body
 size, visible or invisible disability, ethnicity, sex characteristics, gender
 identity and expression, level of experience, education, socio-economic status,
 nationality, personal appearance, race, religion, or sexual identity
 and orientation.
 We pledge to act and interact in ways that contribute to an open, welcoming,
 diverse, inclusive, and healthy community.
 ## Our Standards
 Examples of behavior that contributes to a positive environment for our
 community include:
 * Demonstrating empathy and kindness toward other people
 * Being respectful of differing opinions, viewpoints, and experiences
 * Giving and gracefully accepting constructive feedback
 * Accepting responsibility and apologizing to those affected by our mistakes,
  and learning from the experience
 * Focusing on what is best not just for us as individuals, but for the
  overall community
 Examples of unacceptable behavior include:
 * The use of sexualized language or imagery, and sexual attention or
  advances of any kind
 * Trolling, insulting or derogatory comments, and personal or political attacks
 * Public or private harassment
 * Publishing others' private information, such as a physical or email
  address, without their explicit permission
 * Other conduct which could reasonably be considered inappropriate in a
  professional setting
 ## Enforcement Responsibilities
 Community leaders are responsible for clarifying and enforcing our standards of
 acceptable behavior and will take appropriate and fair corrective action in
 response to any behavior that they deem inappropriate, threatening, offensive,
 or harmful.
 Community leaders have the right and responsibility to remove, edit, or reject
 comments, commits, code, wiki edits, issues, and other contributions that are
 not aligned to this Code of Conduct, and will communicate reasons for moderation
 decisions when appropriate.
 ## Scope
 This Code of Conduct applies within all community spaces, and also applies when
 an individual is officially representing the community in public spaces.
 Examples of representing our community include using an official e-mail address,
 posting via an official social media account, or acting as an appointed
 representative at an online or offline event.
 ## Enforcement
 Instances of abusive, harassing, or otherwise unacceptable behavior may be
 reported to the community leaders responsible for enforcement at
 https://t.me/soxoj.
 All complaints will be reviewed and investigated promptly and fairly.
 All community leaders are obligated to respect the privacy and security of the
 reporter of any incident.
 ## Enforcement Guidelines
 Community leaders will follow these Community Impact Guidelines in determining
 the consequences for any action they deem in violation of this Code of Conduct:
 ### 1. Correction
 **Community Impact**: Use of inappropriate language or other behavior deemed
 unprofessional or unwelcome in the community.
 **Consequence**: A private, written warning from community leaders, providing
 clarity around the nature of the violation and an explanation of why the
 behavior was inappropriate. A public apology may be requested.
 ### 2. Warning
 **Community Impact**: A violation through a single incident or series
 of actions.
 **Consequence**: A warning with consequences for continued behavior. No
 interaction with the people involved, including unsolicited interaction with
 those enforcing the Code of Conduct, for a specified period of time. This
 includes avoiding interactions in community spaces as well as external channels
 like social media. Violating these terms may lead to a temporary or
 permanent ban.
 ### 3. Temporary Ban
 **Community Impact**: A serious violation of community standards, including
 sustained inappropriate behavior.
 **Consequence**: A temporary ban from any sort of interaction or public
 communication with the community for a specified period of time. No public or
 private interaction with the people involved, including unsolicited interaction
 with those enforcing the Code of Conduct, is allowed during this period.
 Violating these terms may lead to a permanent ban.
 ### 4. Permanent Ban
 **Community Impact**: Demonstrating a pattern of violation of community
 standards, including sustained inappropriate behavior,  harassment of an
 individual, or aggression toward or disparagement of classes of individuals.
 **Consequence**: A permanent ban from any sort of public interaction within
 the community.
 ## Attribution
 This Code of Conduct is adapted from the [Contributor Covenant][homepage],
 version 2.0, available at
 https://www.contributor-covenant.org/version/2/0/code_of_conduct.html.
 Community Impact Guidelines were inspired by [Mozilla's code of conduct
 enforcement ladder](https://github.com/mozilla/diversity).
 [homepage]: https://www.contributor-covenant.org
 For answers to common questions about this code of conduct, see the FAQ at
 https://www.contributor-covenant.org/faq. Translations are available at
 https://www.contributor-covenant.org/translations.
@@ -0,0 +1,30 @@
 # How to contribute
 Hey! I'm really glad you're reading this. Maigret contains a lot of sites, and it is very hard to keep all the sites operational. That's why any fix is important. 
 ## How to add a new site
 #### Beginner level
 You can use Maigret **submit mode** (`maigret --submit URL`) to add a new site or update an existing site. In this mode Maigret do an automatic analysis of the given account URL or site main page URL to determine the site engine and methods to check account presence. After checking Maigret asks if you want to add the site, answering y/Y will rewrite the local database.
 #### Advanced level
 You can edit [the database JSON file](https://github.com/soxoj/maigret/blob/main/maigret/resources/data.json) (`./maigret/resources/data.json`) manually.
 ## Testing
 There are CI checks for every PR to the Maigret repository. But it will be better to run `make format`, `make link` and `make test` to ensure you've made a corrent changes. 
 ## Submitting changes
 To submit you changes you must [send a GitHub PR](https://github.com/soxoj/maigret/pulls) to the Maigret project.
 Always write a clear log message for your commits. One-line messages are fine for small changes, but bigger changes should look like this:
    $ git commit -m "A brief summary of the commit
    > 
    > A paragraph describing what changed and its impact."
 ## Coding conventions
 Start reading the code and you'll get the hang of it. ;)
@@ -1,25 +1,16 @@
-FROM python:3.7
+FROM python:3.9-slim
 LABEL maintainer="Soxoj <soxoj@protonmail.com>"
 WORKDIR /app
-
+RUN pip install --no-cache-dir --upgrade pip
-ADD requirements.txt .
+RUN apt-get update && \
-
+    apt-get install --no-install-recommends -y \
 RUN pip install --upgrade pip
 RUN apt update -y
 RUN apt install -y\
      gcc \
      musl-dev \
      libxml2 \
      libxml2-dev \
      libxslt-dev \
-&&  YARL_NO_EXTENSIONS=1 python3 -m pip install maigret \
+    && \
-&&  rm -rf /var/cache/apk/* \
+    rm -rf /var/lib/apt/lists/* /tmp/*
-           /tmp/* \
+COPY . .
-           /var/tmp/*
+RUN YARL_NO_EXTENSIONS=1 python3 -m pip install --no-cache-dir .
 ADD . .
 ENTRYPOINT ["maigret"]
@@ -0,0 +1,41 @@
 LINT_FILES=maigret wizard.py tests
 test:
 	coverage run --source=./maigret -m pytest tests
 	coverage report -m
 	coverage html
 rerun-tests:
 	pytest --lf -vv
 lint:
 	@echo 'syntax errors or undefined names'
 	flake8 --count --select=E9,F63,F7,F82 --show-source --statistics ${LINT_FILES} maigret.py
 	@echo 'warning'
 	flake8 --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics --ignore=E731,W503,E501 ${LINT_FILES} maigret.py
 	@echo 'mypy'
 	mypy ${LINT_FILES}
 speed:
 	time python3 ./maigret.py --version
 	python3 -c "import timeit; t = timeit.Timer('import maigret'); print(t.timeit(number = 1000000))"
 	python3 -X importtime -c "import maigret" 2> maigret-import.log
 	python3 -m tuna maigret-import.log
 format:
 	@echo 'black'
 	black --skip-string-normalization ${LINT_FILES}
 pull:
 	git stash
 	git checkout main
 	git pull origin main
 	git stash pop
 clean:
 	rm -rf reports htmcov dist
 install:
 	pip3 install .
@@ -8,9 +8,12 @@
    <a href="https://pypi.org/project/maigret/">
      <img alt="PyPI - Downloads" src="https://img.shields.io/pypi/dw/maigret?style=flat-square">
    </a>
    <a href="https://pypi.org/project/maigret/">
      <img alt="Views" src="https://komarev.com/ghpvc/?username=maigret&color=brightgreen&label=views&style=flat-square">
    </a>
  </p>
  <p align="center">
-    <img src="./static/maigret.png" height="200"/>
+    <img src="https://raw.githubusercontent.com/soxoj/maigret/main/static/maigret.png" height="200"/>
  </p>
 </p>
@@ -18,9 +21,9 @@
 ## About
-**Maigret** collect a dossier on a person **by username only**, checking for accounts on a huge number of sites and gathering all the available information from web pages. No API keys required. Maigret is an easy-to-use and powerful fork of [Sherlock](https://github.com/sherlock-project/sherlock).
+**Maigret** collects a dossier on a person **by username only**, checking for accounts on a huge number of sites and gathering all the available information from web pages. No API keys required. Maigret is an easy-to-use and powerful fork of [Sherlock](https://github.com/sherlock-project/sherlock).
-Currently supported more than 2000 sites ([full list](./sites.md)), search is launched against 500 popular sites in descending order of popularity by default. Also supported checking of Tor sites, I2P sites, and domains (via DNS resolving).  
+Currently supported more than 2500 sites ([full list](https://github.com/soxoj/maigret/blob/main/sites.md)), search is launched against 500 popular sites in descending order of popularity by default. Also supported checking of Tor sites, I2P sites, and domains (via DNS resolving).
 ## Main features
@@ -30,28 +33,30 @@ Currently supported more than 2000 sites ([full list](./sites.md)), search is la
 * Censorship and captcha detection
 * Requests retries
-See full description of Maigret features [in the Wiki](https://github.com/soxoj/maigret/wiki/Features).
+See full description of Maigret features [in the documentation](https://maigret.readthedocs.io/en/latest/features.html).
 ## Installation
 Maigret can be installed using pip, Docker, or simply can be launched from the cloned repo.
 Also you can run Maigret using cloud shells (see buttons below). 
-[![Open in Cloud Shell](https://user-images.githubusercontent.com/27065646/92304704-8d146d80-ef80-11ea-8c29-0deaabb1c702.png)](https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/soxoj/maigret&tutorial=README.md) [![Run on Repl.it](https://user-images.githubusercontent.com/27065646/92304596-bf719b00-ef7f-11ea-987f-2c1f3c323088.png)](https://repl.it/github/soxoj/maigret)
+Standalone EXE-binaries for Windows are located in [Releases section](https://github.com/soxoj/maigret/releases) of GitHub repository.
-<a href="https://colab.research.google.com/gist//soxoj/879b51bc3b2f8b695abb054090645000/maigret.ipynb"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab" height="40"></a>
+
 Also you can run Maigret using cloud shells and Jupyter notebooks (see buttons below). 
 [![Open in Cloud Shell](https://user-images.githubusercontent.com/27065646/92304704-8d146d80-ef80-11ea-8c29-0deaabb1c702.png)](https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/soxoj/maigret&tutorial=README.md)
 <a href="https://repl.it/github/soxoj/maigret"><img src="https://replit.com/badge/github/soxoj/maigret" alt="Run on Replit" height="50"></a>
 <a href="https://colab.research.google.com/gist/soxoj/879b51bc3b2f8b695abb054090645000/maigret-collab.ipynb"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab" height="45"></a>
 <a href="https://mybinder.org/v2/gist/soxoj/9d65c2f4d3bec5dd25949197ea73cf3a/HEAD"><img src="https://mybinder.org/badge_logo.svg" alt="Open In Binder" height="45"></a>
 ### Package installing
-**NOTE**: Python 3.6 or higher and pip is required, **Python 3.8 is recommended.**
+**NOTE**: Python 3.7 or higher and pip is required, **Python 3.8 is recommended.**
 ```bash
 # install from pypi
 pip3 install maigret
 # or clone and install manually
 git clone https://github.com/soxoj/maigret && cd maigret
 pip3 install .
 # usage
 maigret username
 ```
@@ -59,6 +64,7 @@ maigret username
 ### Cloning a repository
 ```bash
 # or clone and install manually
 git clone https://github.com/soxoj/maigret && cd maigret
 pip3 install -r requirements.txt
@@ -73,7 +79,7 @@ pip3 install -r requirements.txt
 docker pull soxoj/maigret
 # usage
-docker run soxoj/maigret:latest username
+docker run -v /mydir:/app/reports soxoj/maigret:latest username --html
 # manual build
 docker build -t maigret .
@@ -92,21 +98,26 @@ maigret user --tags photo,dating
 maigret user1 user2 user3 -a
 ```
-Use `maigret --help` to get full options description. Also options are documented in [the Maigret Wiki](https://github.com/soxoj/maigret/wiki/Command-line-options).
+Use `maigret --help` to get full options description. Also options [are documented](https://maigret.readthedocs.io/en/latest/command-line-options.html).
 ## Contributing
 Maigret has open-source code, so you may contribute your own sites by adding them to `data.json` file, or bring changes to it's code!
 If you want to contribute, don't forget to activate statistics update hook, command for it would look like this: `git config --local core.hooksPath .githooks/`
 You should make your git commits from your maigret git repo folder, or else the hook wouldn't find the statistics update script.
 ## Demo with page parsing and recursive username search
-[PDF report](./static/report_alexaimephotographycars.pdf), [HTML report](https://htmlpreview.github.io/?https://raw.githubusercontent.com/soxoj/maigret/main/static/report_alexaimephotographycars.html)
+[PDF report](https://raw.githubusercontent.com/soxoj/maigret/main/static/report_alexaimephotographycars.pdf), [HTML report](https://htmlpreview.github.io/?https://raw.githubusercontent.com/soxoj/maigret/main/static/report_alexaimephotographycars.html)
-![animation of recursive search](./static/recursive_search.svg)
+![animation of recursive search](https://raw.githubusercontent.com/soxoj/maigret/main/static/recursive_search.svg)
-![HTML report screenshot](./static/report_alexaimephotography_html_screenshot.png)
+![HTML report screenshot](https://raw.githubusercontent.com/soxoj/maigret/main/static/report_alexaimephotography_html_screenshot.png)
-![XMind report screenshot](./static/report_alexaimephotography_xmind_screenshot.png)
+![XMind 8 report screenshot](https://raw.githubusercontent.com/soxoj/maigret/main/static/report_alexaimephotography_xmind_screenshot.png)
-[Full console output](./static/recursive_search.md)
+[Full console output](https://raw.githubusercontent.com/soxoj/maigret/main/static/recursive_search.md)
 ## License
@@ -0,0 +1,18 @@
 #!/usr/bin/env python3
 import asyncio
 import sys
 from maigret.maigret import main
 def run():
    try:
        loop = asyncio.get_event_loop()
        loop.run_until_complete(main())
    except KeyboardInterrupt:
        print('Maigret is interrupted.')
        sys.exit(1)
 if __name__ == "__main__":
    run()
@@ -0,0 +1,20 @@
 # Minimal makefile for Sphinx documentation
 #
 # You can set these variables from the command line, and also
 # from the environment for the first two.
 SPHINXOPTS    ?=
 SPHINXBUILD   ?= sphinx-build
 SOURCEDIR     = source
 BUILDDIR      = build
 # Put it first so that "make" without argument is like "make help".
 help:
 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
 .PHONY: help Makefile
 # Catch-all target: route all unknown targets to Sphinx using the new
 # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
 %: Makefile
 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
@@ -0,0 +1,35 @@
@ECHO OFF
 pushd %~dp0
 REM Command file for Sphinx documentation
 if "%SPHINXBUILD%" == "" (
 	set SPHINXBUILD=sphinx-build
 )
 set SOURCEDIR=source
 set BUILDDIR=build
 if "%1" == "" goto help
 %SPHINXBUILD% >NUL 2>NUL
 if errorlevel 9009 (
 	echo.
 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
 	echo.installed, then set the SPHINXBUILD environment variable to point
 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
 	echo.may add the Sphinx directory to PATH.
 	echo.
 	echo.If you don't have Sphinx installed, grab it from
 	echo.http://sphinx-doc.org/
 	exit /b 1
 )
 %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
 goto end
 :help
 %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
 :end
 popd
@@ -0,0 +1 @@
 sphinx-copybutton
@@ -0,0 +1,127 @@
 .. _command-line-options:
 Command line options
 ====================
 Usernames
 ---------
 ``maigret username1 username2 ...``
 You can specify several usernames separated by space. Usernames are
 **not** mandatory as there are other operations modes (see below).
 Parsing of account pages and online documents
 ---------------------------------------------
 ``maigret --parse URL``
 Maigret will try to extract information about the document/account owner
 (including username and other ids) and will make a search by the
 extracted username and ids. :doc:`Examples <extracting-information-from-pages>`.
 Main options
 ------------
 Options are also configurable through settings files, see
 :doc:`settings section <settings>`.
 ``--tags`` - Filter sites for searching by tags: sites categories and
 two-letter country codes (**not a language!**). E.g. photo, dating, sport; jp, us, global.
 Multiple tags can be associated with one site. **Warning: tags markup is
 not stable now.**
 ``-n``, ``--max-connections`` - Allowed number of concurrent connections
 **(default: 100)**.
 ``-a``, ``--all-sites`` - Use all sites for scan **(default: top 500)**.
 ``--top-sites`` - Count of sites for scan ranked by Alexa Top
 **(default: top 500)**.
 ``--timeout`` - Time (in seconds) to wait for responses from sites
 **(default: 30)**. A longer timeout will be more likely to get results
 from slow sites. On the other hand, this may cause a long delay to
 gather all results. The choice of the right timeout should be carried
 out taking into account the bandwidth of the Internet connection.
 ``--cookies-jar-file`` - File with custom cookies in Netscape format
 (aka cookies.txt). You can install an extension to your browser to
 download own cookies (`Chrome <https://chrome.google.com/webstore/detail/get-cookiestxt/bgaddhkoddajcdgocldbbfleckgcbcid>`_, `Firefox <https://addons.mozilla.org/en-US/firefox/addon/cookies-txt/>`_).
 ``--no-recursion`` - Disable parsing pages for other usernames and
 recursive search by them.
 ``--use-disabled-sites`` - Use disabled sites to search (may cause many
 false positives).
 ``--id-type`` - Specify identifier(s) type (default: username).
 Supported types: gaia_id, vk_id, yandex_public_id, ok_id, wikimapia_uid.
 Currently, you must add ``-a`` flag to run a scan on sites with custom
 id types, sites will be filtered automatically.
 ``--ignore-ids`` - Do not make search by the specified username or other
 ids. Useful for repeated scanning with found known irrelevant usernames.
 ``--db`` - Load Maigret database from a JSON file or an online, valid,
 JSON file.
 ``--retries RETRIES`` - Count of attempts to restart temporarily failed
 requests.
 Reports
 -------
 ``-P``, ``--pdf`` - Generate a PDF report (general report on all
 usernames).
 ``-H``, ``--html`` - Generate an HTML report file (general report on all
 usernames).
 ``-X``, ``--xmind`` - Generate an XMind 8 mindmap (one report per
 username).
 ``-C``, ``--csv`` - Generate a CSV report (one report per username).
 ``-T``, ``--txt`` - Generate a TXT report (one report per username).
 ``-J``, ``--json`` - Generate a JSON report of specific type: simple,
 ndjson (one report per username). E.g. ``--json ndjson``
 ``-fo``, ``--folderoutput`` - Results will be saved to this folder,
 ``results`` by default. Will be created if doesn’t exist.
 Output options
 --------------
 ``-v``, ``--verbose`` - Display extra information and metrics.
 *(loglevel=WARNING)*
 ``-vv``, ``--info`` - Display service information. *(loglevel=INFO)*
 ``-vvv``, ``--debug``, ``-d`` - Display debugging information and site
 responses. *(loglevel=DEBUG)*
 ``--print-not-found`` - Print sites where the username was not found.
 ``--print-errors`` - Print errors messages: connection, captcha, site
 country ban, etc.
 Other operations modes
 ----------------------
 ``--version`` - Display version information and dependencies.
 ``--self-check`` - Do self-checking for sites and database and disable
 non-working ones **for current search session** by default. It’s useful
 for testing new internet connection (it depends on provider/hosting on
 which sites there will be censorship stub or captcha display). After
 checking Maigret asks if you want to save updates, answering y/Y will
 rewrite the local database.
 ``--submit URL`` - Do an automatic analysis of the given account URL or
 site main page URL to determine the site engine and methods to check
 account presence. After checking Maigret asks if you want to add the
 site, answering y/Y will rewrite the local database.
@@ -0,0 +1,36 @@
 # Configuration file for the Sphinx documentation builder.
 # -- Project information
 project = 'Maigret'
 copyright = '2021, soxoj'
 author = 'soxoj'
 release = '0.4.4'
 version = '0.4.4'
 # -- General configuration
 extensions = [
    'sphinx.ext.duration',
    'sphinx.ext.doctest',
    'sphinx.ext.autodoc',
    'sphinx.ext.autosummary',
    'sphinx.ext.intersphinx',
    'sphinx_copybutton'
 ]
 intersphinx_mapping = {
    'python': ('https://docs.python.org/3/', None),
    'sphinx': ('https://www.sphinx-doc.org/en/master/', None),
 }
 intersphinx_disabled_domains = ['std']
 templates_path = ['_templates']
 # -- Options for HTML output
 html_theme = 'sphinx_rtd_theme'
 # -- Options for EPUB output
 epub_show_urls = 'footnote'
@@ -0,0 +1,101 @@
 .. _development:
 Development
 ==============
 Testing
 -------
 It is recommended use Python 3.7/3.8 for test due to some conflicts in 3.9.
 Install test requirements:
 .. code-block:: console
  pip install -r test-requirements.txt
 Use the following commands to check Maigret:
 .. code-block:: console
  # run linter and typing checks
  # order of checks%
  # - critical syntax errors or undefined names
  # - flake checks
  # - mypy checks
  make lint
  # run testing with coverage html report
  # current test coverage is 60%
  make text
  # open html report
  open htmlcov/index.html
 How to publish new version of Maigret
 -------------------------------------
 **Collaborats rights are requires, write Soxoj to get them**.
 For new version publishing you must create a new branch in repository
 with a bumped version number and actual changelog first. After it you
 must create a release, and GitHub action automatically create a new 
 PyPi package. 
 - New branch example: https://github.com/soxoj/maigret/commit/e520418f6a25d7edacde2d73b41a8ae7c80ddf39
 - Release example: https://github.com/soxoj/maigret/releases/tag/v0.4.1
 1. Make a new branch locally with a new version name. Check the current version number here: https://pypi.org/project/maigret/.
 **Increase only patch version (third number)** if there are no breaking changes.
 .. code-block:: console
  git checkout -b 0.4.0
 2. Update Maigret version in three files manually:
 - setup.py
 - maigret/__version__.py 
 - docs/source/conf.py 
 3. Create a new empty text section in the beginning of the file `CHANGELOG.md` with a current date:
 .. code-block:: console
  ## [0.4.0] - 2022-01-03
 4. Get auto-generate release notes:
 - Open https://github.com/soxoj/maigret/releases/new
 - Click `Choose a tag`, enter `v0.4.0` (your version)
 - Click `Create new tag`
 - Press `+ Auto-generate release notes`
 - Copy all the text from description text field below
 - Paste it to empty text section in `CHANGELOG.txt`
 - Remove redundant lines `## What's Changed` and `## New Contributors` section if it exists
 - *Close the new release page*
 5. Commit all the changes, push, make pull request
 .. code-block:: console
  git add -p
  git commit -m 'Bump to YOUR VERSION'
  git push origin head
 6. Merge pull request
 7. Create new release
 - Open https://github.com/soxoj/maigret/releases/new again
 - Click `Choose a tag`
 - Enter actual version in format `v0.4.0`
 - Also enter actual version in the field `Release title` 
 - Click `Create new tag`
 - Press `+ Auto-generate release notes`
 - **Press "Publish release" button**
 8. That's all, now you can simply wait push to PyPi. You can monitor it in Action page: https://github.com/soxoj/maigret/actions/workflows/python-publish.yml
@@ -0,0 +1,35 @@
 .. _extracting-information-from-pages:
 Extracting information from pages
 =================================
 Maigret can parse URLs and content of web pages by URLs to extract info about account owner and other meta information.
 You must specify the URL with the option ``--parse``, it's can be a link to an account or an online document. List of supported sites `see here <https://github.com/soxoj/socid-extractor#sites>`_.
 After the end of the parsing phase, Maigret will start the search phase by :doc:`supported identifiers <supported-identifier-types>` found (usernames, ids, etc.).
 Examples
 --------
 .. code-block:: console
  $ maigret --parse https://docs.google.com/spreadsheets/d/1HtZKMLRXNsZ0HjtBmo0Gi03nUPiJIA4CC4jTYbCAnXw/edit\#gid\=0
  Scanning webpage by URL https://docs.google.com/spreadsheets/d/1HtZKMLRXNsZ0HjtBmo0Gi03nUPiJIA4CC4jTYbCAnXw/edit#gid=0...
  ┣╸org_name: Gooten
  ┗╸mime_type: application/vnd.google-apps.ritz
  Scanning webpage by URL https://clients6.google.com/drive/v2beta/files/1HtZKMLRXNsZ0HjtBmo0Gi03nUPiJIA4CC4jTYbCAnXw?fields=alternateLink%2CcopyRequiresWriterPermission%2CcreatedDate%2Cdescription%2CdriveId%2CfileSize%2CiconLink%2Cid%2Clabels(starred%2C%20trashed)%2ClastViewedByMeDate%2CmodifiedDate%2Cshared%2CteamDriveId%2CuserPermission(id%2Cname%2CemailAddress%2Cdomain%2Crole%2CadditionalRoles%2CphotoLink%2Ctype%2CwithLink)%2Cpermissions(id%2Cname%2CemailAddress%2Cdomain%2Crole%2CadditionalRoles%2CphotoLink%2Ctype%2CwithLink)%2Cparents(id)%2Ccapabilities(canMoveItemWithinDrive%2CcanMoveItemOutOfDrive%2CcanMoveItemOutOfTeamDrive%2CcanAddChildren%2CcanEdit%2CcanDownload%2CcanComment%2CcanMoveChildrenWithinDrive%2CcanRename%2CcanRemoveChildren%2CcanMoveItemIntoTeamDrive)%2Ckind&supportsTeamDrives=true&enforceSingleParent=true&key=AIzaSyC1eQ1xj69IdTMeii5r7brs3R90eck-m7k...
  ┣╸created_at: 2016-02-16T18:51:52.021Z
  ┣╸updated_at: 2019-10-23T17:15:47.157Z
  ┣╸gaia_id: 15696155517366416778
  ┣╸fullname: Nadia Burgess
  ┣╸email: nadia@gooten.com
  ┣╸image: https://lh3.googleusercontent.com/a-/AOh14GheZe1CyNa3NeJInWAl70qkip4oJ7qLsD8vDy6X=s64
  ┗╸email_username: nadia
 .. code-block:: console
  $ maigret.py --parse https://steamcommunity.com/profiles/76561199113454789
  Scanning webpage by URL https://steamcommunity.com/profiles/76561199113454789...
  ┣╸steam_id: 76561199113454789
  ┣╸nickname: Pok
  ┗╸username: Machine42
@@ -0,0 +1,78 @@
 .. _features:
 Features
 ========
 This is the list of Maigret features.
 Personal info gathering
 -----------------------
 Maigret does the `parsing of accounts webpages and extraction <https://github.com/soxoj/socid-extractor>`_ of personal info, links to other profiles, etc.
 Extracted info displayed as an additional result in CLI output and as tables in HTML and PDF reports.
 Also, Maigret use found ids and usernames from links to start a recursive search.
 Enabled by default, can be disabled with ``--no extracting``.
 Recursive search
 ----------------
 Maigret can extract some :ref:`common ids <supported-identifier-types>` and usernames from links on the account page (often people placed links to their other accounts) and immediately start new searches. All the gathered information will be displayed in CLI output and reports.
 Enabled by default, can be disabled with ``--no-recursion``.
 Reports
 -------
 Maigret currently supports HTML, PDF, TXT, XMind 8 mindmap, and JSON reports.
 HTML/PDF reports contain:
 - profile photo
 - all the gathered personal info
 - additional information about supposed personal data (full name, gender, location), resulting from statistics of all found accounts
 Also, there is a short text report in the CLI output after the end of a searching phase.
 **Warning**: XMind 8 mindmaps are incompatible with XMind 2022!
 Tags
 ----
 The Maigret sites database very big (and will be bigger), and it is maybe an overhead to run a search for all the sites.
 Also, it is often hard to understand, what sites more interesting for us in the case of a certain person.
 Tags markup allows selecting a subset of sites by interests (photo, messaging, finance, etc.) or by country. Tags of found accounts grouped and displayed in the reports.
 See full description :doc:`in the Tags Wiki page <tags>`.
 Censorship and captcha detection
 --------------------------------
 Maigret can detect common errors such as censorship stub pages, CloudFlare captcha pages, and others. 
 If you get more them 3% errors of a certain type in a session, you've got a warning message in the CLI output with recommendations to improve performance and avoid problems.
 Retries
 -------
 Maigret will do retries of the requests with temporary errors got (connection failures, proxy errors, etc.).
 One attempt by default, can be changed with option ``--retries N``.
 Archives and mirrors checking
 -----------------------------
 The Maigret database contains not only the original websites, but also mirrors, archives, and aggregators. For example:
 - `Reddit BigData search <https://camas.github.io/reddit-search/>`_
 - `Picuki <https://www.picuki.com/>`_, Instagram mirror
 - `Twitter shadowban <https://shadowban.eu/>`_ checker
 It allows getting additional info about the person and checking the existence of the account even if the main site is unavailable (bot protection, captcha, etc.)
 Simple API
 ----------
 Maigret can be easily integrated with the use of Python package `maigret <https://pypi.org/project/maigret/>`_.
 Example: the official `Telegram bot <https://github.com/soxoj/maigret-tg-bot>`_
@@ -0,0 +1,31 @@
 .. _index:
 Welcome to the Maigret docs!
 ============================
 **Maigret** is an easy-to-use and powerful OSINT tool for collecting a dossier on a person by username only.
 This is achieved by checking for accounts on a huge number of sites and gathering all the available information from web pages.
 The project's main goal - give to OSINT researchers and pentesters a **universal tool** to get maximum information about a subject and integrate it with other tools in automatization pipelines.
 You may be interested in:
 -------------------------
 - :doc:`Command line options description <command-line-options>` and :doc:`usage examples <usage-examples>`
 - :doc:`Features list <features>`
 - :doc:`Project roadmap <roadmap>`
 .. toctree::
   :hidden:
   :caption: Sections
   command-line-options
   extracting-information-from-pages
   features
   philosophy
   roadmap
   supported-identifier-types
   tags
   usage-examples
   settings
   development
@@ -0,0 +1,17 @@
 .. _philosophy:
 Philosophy
 ==========
 TL;DR: Username => Dossier
 Maigret is designed to gather all the available information about person by his usernname.
 What kind of information is this? First, links to person accounts. Secondly, all the machine-extractable
 pieces of info, such as: other usernames, full name, URLs to people's images, birthday, location (country,
 city, etc.), gender.
 All this information forms some dossier, but it also useful for other tools and analytical purposes.
 Each collected piece of data has a label of a certain format (for example, ``follower_count`` for the number
 of subscribers or ``created_at`` for account creation time) so that it can be parsed and analyzed by various
 systems and stored in databases.
@@ -0,0 +1,18 @@
 .. _roadmap:
 Roadmap
 =======
 .. figure:: https://i.imgur.com/kk8cFdR.png   
   :target: https://i.imgur.com/kk8cFdR.png
   :align: center
 Current status
 --------------
 - Sites DB stats - ok
 - Scan sessions stats - ok
 - Site engine autodetect - ok
 - Engines for all the sites - WIP
 - Unified reporting flow - ok
 - Retries - ok
@@ -0,0 +1,26 @@
 .. _settings:
 Settings
 ==============
 Options are also configurable through settings files. See
 `settings JSON file <https://github.com/soxoj/maigret/blob/main/maigret/resources/settings.json>`_
 for the list of currently supported options.
 After start Maigret tries to load configuration from the following sources in exactly the same order:
 .. code-block:: console
  # relative path, based on installed package path
  resources/settings.json
  # absolute path, configuration file in home directory
  ~/.maigret/settings.json
  # relative path, based on current working directory
  settings.json
 Missing any of these files is not an error.
 If the next settings file contains already known option,
 this option will be rewrited. So it is possible to make
 custom configuration for different users and directories.
@@ -0,0 +1,15 @@
 .. _supported-identifier-types:
 Supported identifier types
 ==========================
 Maigret can search against not only ordinary usernames, but also through certain common identifiers. There is a list of all currently supported identifiers.
 - **gaia_id** - Google inner numeric user identifier, in former times was placed in a Google Plus account URL. 
 - **steam_id** - Steam inner numeric user identifier.
 - **wikimapia_uid** - Wikimapia.org inner numeric user identifier.
 - **uidme_uguid** - uID.me inner numeric user identifier.
 - **yandex_public_id** - Yandex sites inner letter user identifier. See also: `YaSeeker <https://github.com/HowToFind-bot/YaSeeker>`_. 
 - **vk_id** - VK.com inner numeric user identifier.
 - **ok_id** - OK.ru inner numeric user identifier.
 - **yelp_userid** - Yelp inner user identifier.
@@ -0,0 +1,24 @@
 .. _tags:
 Tags
 ====
 The use of tags allows you to select a subset of the sites from big Maigret DB for search.
 **Warning: tags markup is not stable now.**
 There are several types of tags:
 1. **Country codes**: ``us``, ``jp``, ``br``... (`ISO 3166-1 alpha-2 <https://en.wikipedia.org/wiki/ISO_3166-1_alpha-2>`_). These tags reflect the site language and regional origin of its users and are then used to locate the owner of a username. If the regional origin is difficult to establish or a site is positioned as worldwide, `no country code is given`. There could be multiple country code tags for one site.
 2. **Site engines**. Most of them are forum engines now: ``uCoz``, ``vBulletin``, ``XenForo`` et al. Full list of engines stored in the Maigret database.
 3. **Sites' subject/type and interests of its users**. Full list of "standard" tags is `present in the source code <https://github.com/soxoj/maigret/blob/main/maigret/sites.py#L13>`_ only for a moment. 
 Usage
 -----
 ``--tags us,jp`` -- search on US and Japanese sites (actually marked as such in the Maigret database)
 ``--tags coding`` -- search on sites related to software development.
 ``--tags ucoz`` -- search on uCoz sites only (mostly CIS countries)
@@ -0,0 +1,53 @@
 .. _usage-examples:
 Usage examples
 ==============
 Start a search for accounts with username ``machine42`` on top 500 sites from the Maigret DB.
 .. code-block:: console
  maigret machine42
 Start a search for accounts with username ``machine42`` on **all sites** from the Maigret DB.
 .. code-block:: console
  maigret machine42 -a
 Start a search [...] and generate HTML and PDF reports.
 .. code-block:: console
  maigret machine42 -a -HP
 Start a search for accounts with username ``machine42`` only on Facebook.
 .. code-block:: console
  maigret machine42 --site Facebook
 Extract information from the Steam page by URL and start a search for accounts with found username ``machine42``.
 .. code-block:: console
  maigret --parse https://steamcommunity.com/profiles/76561199113454789 
 Start a search for accounts with username ``machine42`` only on US and Japanese sites.
 .. code-block:: console
  maigret machine42 --tags en,jp
 Start a search for accounts with username ``machine42`` only on sites related to software development.
 .. code-block:: console
  maigret machine42 --tags coding
 Start a search for accounts with username ``machine42`` on uCoz sites only (mostly CIS countries).
 .. code-block:: console
  maigret machine42 --tags ucoz
@@ -0,0 +1,68 @@
 {
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "8v6PEfyXb0Gx"
   },
   "outputs": [],
   "source": [
    "# clone the repo\n",
    "!git clone https://github.com/soxoj/maigret\n",
    "!pip3 install -r maigret/requirements.txt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "cXOQUAhDchkl"
   },
   "outputs": [],
   "source": [
    "# help\n",
    "!python3 maigret/maigret.py --help"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "SjDmpN4QGnJu"
   },
   "outputs": [],
   "source": [
    "# search\n",
    "!python3 maigret/maigret.py user"
   ]
  }
 ],
 "metadata": {
  "colab": {
   "collapsed_sections": [],
   "include_colab_link": true,
   "name": "maigret.ipynb",
   "provenance": []
  },
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
 }
@@ -1,5 +0,0 @@
 #!/bin/sh
 FILES="maigret wizard.py maigret.py tests"
 echo 'black'
 black --skip-string-normalization $FILES
@@ -1,11 +0,0 @@
 #!/bin/sh
 FILES="maigret wizard.py maigret.py tests"
 echo 'syntax errors or undefined names'
 flake8 --count --select=E9,F63,F7,F82 --show-source --statistics $FILES
 echo 'warning'
 flake8 --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics --ignore=E731,W503 $FILES
 echo 'mypy'
 mypy ./maigret ./wizard.py ./tests
@@ -7,8 +7,11 @@ from maigret.maigret import main
 def run():
    try:
-        loop = asyncio.get_event_loop()
+        if sys.version_info.minor >= 10:
-        loop.run_until_complete(main())
+            asyncio.run(main())
        else:
            loop = asyncio.get_event_loop()
            loop.run_until_complete(main())
    except KeyboardInterrupt:
        print('Maigret is interrupted.')
        sys.exit(1)
@@ -8,5 +8,6 @@ __author_email__ = 'soxoj@protonmail.com'
 from .__version__ import __version__
 from .checking import maigret as search
 from .maigret import main as cli
 from .sites import MaigretEngine, MaigretSite, MaigretDatabase
 from .notify import QueryNotifyPrint as Notifier
@@ -1,3 +1,3 @@
 """Maigret version file"""
-__version__ = '0.3.0'
+__version__ = '0.4.4'
@@ -1,7 +1,6 @@
 from http.cookiejar import MozillaCookieJar
 from http.cookies import Morsel
 import requests
 from aiohttp import CookieJar
@@ -10,6 +9,8 @@ class ParsingActivator:
    def twitter(site, logger, cookies={}):
        headers = dict(site.headers)
        del headers["x-guest-token"]
        import requests
        r = requests.post(site.activation["url"], headers=headers)
        logger.info(r)
        j = r.json()
@@ -21,6 +22,8 @@ class ParsingActivator:
        headers = dict(site.headers)
        if "Authorization" in headers:
            del headers["Authorization"]
        import requests
        r = requests.get(site.activation["url"], headers=headers)
        jwt_token = r.json()["jwt"]
        site.headers["Authorization"] = "jwt " + jwt_token
@@ -30,6 +33,8 @@ class ParsingActivator:
        headers = dict(site.headers)
        if "Authorization" in headers:
            del headers["Authorization"]
        import requests
        r = requests.get(site.activation["url"])
        bearer_token = r.json()["accessToken"]
        site.headers["authorization"] = f"Bearer {bearer_token}"
@@ -1,19 +1,24 @@
 import asyncio
 import logging
-from mock import Mock
+
 try:
    from mock import Mock
 except ImportError:
    from unittest.mock import Mock
 import re
 import ssl
 import sys
 import tqdm
 import random
 from typing import Tuple, Optional, Dict, List
 from urllib.parse import quote
 import aiohttp
 import aiodns
 import tqdm.asyncio
 from aiohttp_socks import ProxyConnector
 from python_socks import _errors as proxy_errors
 from socid_extractor import extract
 from aiohttp import TCPConnector, ClientSession, http_exceptions
 from aiohttp.client_exceptions import ServerDisconnectedError, ClientConnectorError
 from .activation import ParsingActivator, import_aiohttp_cookies
@@ -24,6 +29,7 @@ from .executors import (
    AsyncioSimpleExecutor,
    AsyncioProgressbarQueueExecutor,
 )
 from .result import QueryResult, QueryStatus
 from .sites import MaigretDatabase, MaigretSite
 from .types import QueryOptions, QueryResultWrapper
@@ -31,6 +37,7 @@ from .utils import get_random_user_agent, ascii_data_display
 SUPPORTED_IDS = (
    "username",
    "yandex_public_id",
    "gaia_id",
    "vk_id",
@@ -54,12 +61,13 @@ class SimpleAiohttpChecker(CheckerBase):
        cookie_jar = kwargs.get('cookie_jar')
        self.logger = kwargs.get('logger', Mock())
        # moved here to speed up the launch of Maigret
        from aiohttp_socks import ProxyConnector
        # make http client session
-        connector = (
+        connector = ProxyConnector.from_url(proxy) if proxy else TCPConnector(ssl=False)
            ProxyConnector.from_url(proxy) if proxy else aiohttp.TCPConnector(ssl=False)
        )
        connector.verify_ssl = False
-        self.session = aiohttp.ClientSession(
+        self.session = ClientSession(
            connector=connector, trust_env=True, cookie_jar=cookie_jar
        )
@@ -107,7 +115,7 @@ class SimpleAiohttpChecker(CheckerBase):
            error = CheckError("Connecting failure", str(e))
        except ServerDisconnectedError as e:
            error = CheckError("Server disconnected", str(e))
-        except aiohttp.http_exceptions.BadHttpMessage as e:
+        except http_exceptions.BadHttpMessage as e:
            error = CheckError("HTTP", str(e))
        except proxy_errors.ProxyError as e:
            error = CheckError("Proxy", str(e))
@@ -124,6 +132,9 @@ class SimpleAiohttpChecker(CheckerBase):
                self.logger.debug(e, exc_info=True)
                error = CheckError("Unexpected", str(e))
        if error == "Invalid proxy response":
            self.logger.debug(error, exc_info=True)
        return str(html_text), status_code, error
@@ -133,9 +144,12 @@ class ProxiedAiohttpChecker(SimpleAiohttpChecker):
        cookie_jar = kwargs.get('cookie_jar')
        self.logger = kwargs.get('logger', Mock())
        # moved here to speed up the launch of Maigret
        from aiohttp_socks import ProxyConnector
        connector = ProxyConnector.from_url(proxy)
        connector.verify_ssl = False
-        self.session = aiohttp.ClientSession(
+        self.session = ClientSession(
            connector=connector, trust_env=True, cookie_jar=cookie_jar
        )
@@ -384,7 +398,7 @@ def process_site_result(
 def make_site_result(
-    site: MaigretSite, username: str, options: QueryOptions, logger
+    site: MaigretSite, username: str, options: QueryOptions, logger, *args, **kwargs
 ) -> QueryResultWrapper:
    results_site: QueryResultWrapper = {}
@@ -408,6 +422,10 @@ def make_site_result(
    if "url" not in site.__dict__:
        logger.error("No URL for site %s", site.name)
    if kwargs.get('retry') and hasattr(site, "mirrors"):
        site.url_main = random.choice(site.mirrors)
        logger.info(f"Use {site.url_main} as a main url of site {site}")
    # URL of user on site (if it exists)
    url = site.url.format(
        urlMain=site.url_main, urlSubpath=site.url_subpath, username=quote(username)
@@ -511,7 +529,7 @@ def make_site_result(
 async def check_site_for_username(
    site, username, options: QueryOptions, logger, query_notify, *args, **kwargs
 ) -> Tuple[str, QueryResultWrapper]:
-    default_result = make_site_result(site, username, options, logger)
+    default_result = make_site_result(site, username, options, logger, retry=kwargs.get('retry'))
    future = default_result.get("future")
    if not future:
        return site.name, default_result
@@ -567,6 +585,8 @@ async def maigret(
    cookies=None,
    retries=0,
    check_domains=False,
    *args,
    **kwargs,
 ) -> QueryResultWrapper:
    """Main search func
@@ -584,7 +604,7 @@ async def maigret(
    is_parsing_enabled     -- Extract additional info from account pages.
    id_type                -- Type of username to search.
                              Default is 'username', see all supported here:
-                              https://github.com/soxoj/maigret/wiki/Supported-identifier-types
+                              https://maigret.readthedocs.io/en/latest/supported-identifier-types.html
    max_connections        -- Maximum number of concurrent connections allowed.
                              Default is 100.
    no_progressbar         -- Displaying of ASCII progressbar during scanner.
@@ -647,7 +667,8 @@ async def maigret(
        executor = AsyncioSimpleExecutor(logger=logger)
    else:
        executor = AsyncioProgressbarQueueExecutor(
-            logger=logger, in_parallel=max_connections, timeout=timeout + 0.5
+            logger=logger, in_parallel=max_connections, timeout=timeout + 0.5,
            *args, **kwargs
        )
    # make options objects for all the requests
@@ -689,7 +710,7 @@ async def maigret(
            tasks_dict[sitename] = (
                check_site_for_username,
                [site, username, options, logger, query_notify],
-                {'default': (sitename, default_result)},
+                {'default': (sitename, default_result), 'retry': retries-attempts+1},
            )
        cur_results = await executor.run(tasks_dict.values())
@@ -754,6 +775,7 @@ async def site_self_check(
    semaphore,
    db: MaigretDatabase,
    silent=False,
    proxy=None,
    tor_proxy=None,
    i2p_proxy=None,
 ):
@@ -779,6 +801,7 @@ async def site_self_check(
                forced=True,
                no_progressbar=True,
                retries=1,
                proxy=proxy,
                tor_proxy=tor_proxy,
                i2p_proxy=i2p_proxy,
            )
@@ -835,6 +858,7 @@ async def self_check(
    logger,
    silent=False,
    max_connections=10,
    proxy=None,
    tor_proxy=None,
    i2p_proxy=None,
 ) -> bool:
@@ -849,7 +873,7 @@ async def self_check(
    for _, site in all_sites.items():
        check_coro = site_self_check(
-            site, logger, sem, db, silent, tor_proxy, i2p_proxy
+            site, logger, sem, db, silent, proxy, tor_proxy, i2p_proxy
        )
        future = asyncio.ensure_future(check_coro)
        tasks.append(future)
@@ -63,8 +63,9 @@ COMMON_ERRORS = {
 ERRORS_TYPES = {
    'Captcha': 'Try to switch to another IP address or to use service cookies',
    'Bot protection': 'Try to switch to another IP address',
-    'Censorship': 'switch to another internet service provider',
+    'Censorship': 'Switch to another internet service provider',
    'Request timeout': 'Try to increase timeout or to switch to another internet service provider',
    'Connecting failure': 'Try to decrease number of parallel connections (e.g. -n 10)',
 }
 # TODO: checking for reason
@@ -81,6 +81,22 @@ class AsyncioProgressbarQueueExecutor(AsyncExecutor):
        self.queue = asyncio.Queue(self.workers_count)
        self.timeout = kwargs.get('timeout')
    async def increment_progress(self, count):
        update_func = self.progress.update
        if asyncio.iscoroutinefunction(update_func):
            await update_func(count)
        else:
            update_func(count)
        await asyncio.sleep(0)
    async def stop_progress(self):
        stop_func = self.progress.close
        if asyncio.iscoroutinefunction(stop_func):
            await stop_func()
        else:
            stop_func()
        await asyncio.sleep(0)
    async def worker(self):
        while True:
            try:
@@ -96,7 +112,7 @@ class AsyncioProgressbarQueueExecutor(AsyncExecutor):
                result = kwargs.get('default')
            self.results.append(result)
-            self.progress.update(1)
+            await self.increment_progress(1)
            self.queue.task_done()
    async def _run(self, queries: Iterable[QueryDraft]):
@@ -109,10 +125,14 @@ class AsyncioProgressbarQueueExecutor(AsyncExecutor):
        workers = [create_task_func()(self.worker()) for _ in range(min_workers)]
        self.progress = self.progress_func(total=len(queries_list))
        for t in queries_list:
            await self.queue.put(t)
        await self.queue.join()
        for w in workers:
            w.cancel()
-        self.progress.close()
+
        await self.stop_progress()
        return self.results
@@ -1,7 +1,6 @@
 """
 Maigret main module
 """
 import aiohttp
 import asyncio
 import logging
 import os
@@ -9,9 +8,9 @@ import sys
 import platform
 from argparse import ArgumentParser, RawDescriptionHelpFormatter
 from typing import List, Tuple
 import os.path as path
-import requests
+from socid_extractor import extract, parse
 from socid_extractor import extract, parse, __version__ as socid_version
 from .__version__ import __version__
 from .checking import (
@@ -34,11 +33,13 @@ from .report import (
    save_json_report,
    get_plaintext_report,
    sort_report_by_data_points,
    save_graph_report,
 )
 from .sites import MaigretDatabase
-from .submit import submit_dialog
+from .submit import Submitter
 from .types import QueryResultWrapper
 from .utils import get_dict_ascii_tree
 from .settings import Settings
 def notify_about_errors(search_results: QueryResultWrapper, query_notify):
@@ -47,7 +48,7 @@ def notify_about_errors(search_results: QueryResultWrapper, query_notify):
    for e in errs:
        if not errors.is_important(e):
            continue
-        text = f'Too many errors of type "{e["err"]}" ({e["perc"]}%)'
+        text = f'Too many errors of type "{e["err"]}" ({round(e["perc"],2)}%)'
        solution = errors.solution_of(e['err'])
        if solution:
            text = '. '.join([text, solution.capitalize()])
@@ -61,17 +62,6 @@ def notify_about_errors(search_results: QueryResultWrapper, query_notify):
        )
 def extract_ids_from_url(url: str, db: MaigretDatabase) -> dict:
    results = {}
    for s in db.sites:
        result = s.extract_id_from_url(url)
        if not result:
            continue
        _id, _type = result
        results[_id] = _type
    return results
 def extract_ids_from_page(url, logger, timeout=5) -> dict:
    results = {}
    # url, headers
@@ -117,25 +107,31 @@ def extract_ids_from_results(results: QueryResultWrapper, db: MaigretDatabase) -
                ids_results[u] = utype
        for url in dictionary.get('ids_links', []):
-            ids_results.update(extract_ids_from_url(url, db))
+            ids_results.update(db.extract_ids_from_url(url))
    return ids_results
-def setup_arguments_parser():
+def setup_arguments_parser(settings: Settings):
    from aiohttp import __version__ as aiohttp_version
    from requests import __version__ as requests_version
    from socid_extractor import __version__ as socid_version
    version_string = '\n'.join(
        [
            f'%(prog)s {__version__}',
            f'Socid-extractor:  {socid_version}',
-            f'Aiohttp:  {aiohttp.__version__}',
+            f'Aiohttp:  {aiohttp_version}',
-            f'Requests:  {requests.__version__}',
+            f'Requests:  {requests_version}',
            f'Python:  {platform.python_version()}',
        ]
    )
    parser = ArgumentParser(
        formatter_class=RawDescriptionHelpFormatter,
-        description=f"Maigret v{__version__}",
+        description=f"Maigret v{__version__}\n"
        "Documentation: https://maigret.readthedocs.io/\n"
        "All settings are also configurable through files, see docs.",
    )
    parser.add_argument(
        "username",
@@ -155,9 +151,9 @@ def setup_arguments_parser():
        metavar='TIMEOUT',
        dest="timeout",
        type=timeout_check,
-        default=30,
+        default=settings.timeout,
-        help="Time in seconds to wait for response to requests. "
+        help="Time in seconds to wait for response to requests "
-        "Default timeout of 30.0s. "
+        f"(default {settings.timeout}s). "
        "A longer timeout will be more likely to get results from slow sites. "
        "On the other hand, this may cause a long delay to gather all results. ",
    )
@@ -166,7 +162,7 @@ def setup_arguments_parser():
        action="store",
        type=int,
        metavar='RETRIES',
-        default=1,
+        default=settings.retries_count,
        help="Attempts to restart temporarily failed requests.",
    )
    parser.add_argument(
@@ -175,21 +171,21 @@ def setup_arguments_parser():
        action="store",
        type=int,
        dest="connections",
-        default=100,
+        default=settings.max_connections,
-        help="Allowed number of concurrent connections.",
+        help=f"Allowed number of concurrent connections (default {settings.max_connections}).",
    )
    parser.add_argument(
        "--no-recursion",
        action="store_true",
        dest="disable_recursive_search",
-        default=False,
+        default=(not settings.recursive_search),
        help="Disable recursive search by additional data extracted from pages.",
    )
    parser.add_argument(
        "--no-extracting",
        action="store_true",
        dest="disable_extracting",
-        default=False,
+        default=(not settings.info_extracting),
        help="Disable parsing pages for additional data and other usernames.",
    )
    parser.add_argument(
@@ -203,14 +199,14 @@ def setup_arguments_parser():
        "--db",
        metavar="DB_FILE",
        dest="db_file",
-        default=None,
+        default=settings.sites_db_path,
-        help="Load Maigret database from a JSON file or an online, valid, JSON file.",
+        help="Load Maigret database from a JSON file or HTTP web resource.",
    )
    parser.add_argument(
        "--cookies-jar-file",
        metavar="COOKIE_FILE",
        dest="cookie_file",
-        default=None,
+        default=settings.cookie_jar_file,
        help="File with cookies.",
    )
    parser.add_argument(
@@ -218,7 +214,7 @@ def setup_arguments_parser():
        action="append",
        metavar='IGNORED_IDS',
        dest="ignore_ids_list",
-        default=[],
+        default=settings.ignore_ids_list,
        help="Do not make search by the specified username or other ids.",
    )
    # reports options
@@ -226,7 +222,7 @@ def setup_arguments_parser():
        "--folderoutput",
        "-fo",
        dest="folderoutput",
-        default="reports",
+        default=settings.reports_path,
        metavar="PATH",
        help="If using multiple usernames, the output of the results will be saved to this folder.",
    )
@@ -236,27 +232,27 @@ def setup_arguments_parser():
        metavar='PROXY_URL',
        action="store",
        dest="proxy",
-        default=None,
+        default=settings.proxy_url,
        help="Make requests over a proxy. e.g. socks5://127.0.0.1:1080",
    )
    parser.add_argument(
        "--tor-proxy",
        metavar='TOR_PROXY_URL',
        action="store",
-        default='socks5://127.0.0.1:9050',
+        default=settings.tor_proxy_url,
        help="Specify URL of your Tor gateway. Default is socks5://127.0.0.1:9050",
    )
    parser.add_argument(
        "--i2p-proxy",
        metavar='I2P_PROXY_URL',
        action="store",
-        default='http://127.0.0.1:4444',
+        default=settings.i2p_proxy_url,
        help="Specify URL of your I2P gateway. Default is http://127.0.0.1:4444",
    )
    parser.add_argument(
        "--with-domains",
        action="store_true",
-        default=False,
+        default=settings.domain_search,
        help="Enable (experimental) feature of checking domains on usernames.",
    )
@@ -268,13 +264,13 @@ def setup_arguments_parser():
        "--all-sites",
        action="store_true",
        dest="all_sites",
-        default=False,
+        default=settings.scan_all_sites,
        help="Use all sites for scan.",
    )
    filter_group.add_argument(
        "--top-sites",
        action="store",
-        default=500,
+        default=settings.top_sites_count,
        metavar="N",
        type=int,
        help="Count of sites for scan ranked by Alexa Top (default: 500).",
@@ -287,13 +283,13 @@ def setup_arguments_parser():
        action="append",
        metavar='SITE_NAME',
        dest="site_list",
-        default=[],
+        default=settings.scan_sites_list,
        help="Limit analysis to just the specified sites (multiple option).",
    )
    filter_group.add_argument(
        "--use-disabled-sites",
        action="store_true",
-        default=False,
+        default=settings.scan_disabled_sites,
        help="Use disabled sites to search (may cause many false positives).",
    )
@@ -320,7 +316,7 @@ def setup_arguments_parser():
    modes_group.add_argument(
        "--self-check",
        action="store_true",
-        default=False,
+        default=settings.self_check_enabled,
        help="Do self check for sites and database and disable non-working ones.",
    )
    modes_group.add_argument(
@@ -337,14 +333,14 @@ def setup_arguments_parser():
        "--print-not-found",
        action="store_true",
        dest="print_not_found",
-        default=False,
+        default=settings.print_not_found,
        help="Print sites where the username was not found.",
    )
    output_group.add_argument(
        "--print-errors",
        action="store_true",
        dest="print_check_errors",
-        default=False,
+        default=settings.print_check_errors,
        help="Print errors messages: connection, captcha, site country ban, etc.",
    )
    output_group.add_argument(
@@ -376,14 +372,14 @@ def setup_arguments_parser():
        "--no-color",
        action="store_true",
        dest="no_color",
-        default=False,
+        default=(not settings.colored_print),
        help="Don't color terminal output",
    )
    output_group.add_argument(
        "--no-progressbar",
        action="store_true",
        dest="no_progressbar",
-        default=False,
+        default=(not settings.show_progressbar),
        help="Don't show progressbar.",
    )
@@ -395,7 +391,7 @@ def setup_arguments_parser():
        "--txt",
        action="store_true",
        dest="txt",
-        default=False,
+        default=settings.txt_report,
        help="Create a TXT report (one report per username).",
    )
    report_group.add_argument(
@@ -403,7 +399,7 @@ def setup_arguments_parser():
        "--csv",
        action="store_true",
        dest="csv",
-        default=False,
+        default=settings.csv_report,
        help="Create a CSV report (one report per username).",
    )
    report_group.add_argument(
@@ -411,7 +407,7 @@ def setup_arguments_parser():
        "--html",
        action="store_true",
        dest="html",
-        default=False,
+        default=settings.html_report,
        help="Create an HTML report file (general report on all usernames).",
    )
    report_group.add_argument(
@@ -419,7 +415,7 @@ def setup_arguments_parser():
        "--xmind",
        action="store_true",
        dest="xmind",
-        default=False,
+        default=settings.xmind_report,
        help="Generate an XMind 8 mindmap report (one report per username).",
    )
    report_group.add_argument(
@@ -427,16 +423,24 @@ def setup_arguments_parser():
        "--pdf",
        action="store_true",
        dest="pdf",
-        default=False,
+        default=settings.pdf_report,
        help="Generate a PDF report (general report on all usernames).",
    )
    report_group.add_argument(
        "-G",
        "--graph",
        action="store_true",
        dest="graph",
        default=settings.graph_report,
        help="Generate a graph report (general report on all usernames).",
    )
    report_group.add_argument(
        "-J",
        "--json",
        action="store",
        metavar='TYPE',
        dest="json",
-        default='',
+        default=settings.json_report_type,
        choices=SUPPORTED_JSON_REPORT_FORMATS,
        help=f"Generate a JSON report of specific type: {', '.join(SUPPORTED_JSON_REPORT_FORMATS)}"
        " (one report per username).",
@@ -444,7 +448,7 @@ def setup_arguments_parser():
    parser.add_argument(
        "--reports-sorting",
-        default='default',
+        default=settings.report_sorting,
        choices=('default', 'data'),
        help="Method of results sorting in reports (default: in order of getting the result)",
    )
@@ -452,9 +456,6 @@ def setup_arguments_parser():
 async def main():
    arg_parser = setup_arguments_parser()
    args = arg_parser.parse_args()
    # Logging
    log_level = logging.ERROR
    logging.basicConfig(
@@ -462,15 +463,27 @@ async def main():
        datefmt='%H:%M:%S',
        level=log_level,
    )
    logger = logging.getLogger('maigret')
    logger.setLevel(log_level)
    # Load settings
    settings = Settings()
    settings_loaded, err = settings.load()
    if not settings_loaded:
        logger.error(err)
        sys.exit(3)
    arg_parser = setup_arguments_parser(settings)
    args = arg_parser.parse_args()
    # Re-set loggging level based on args
    if args.debug:
        log_level = logging.DEBUG
    elif args.info:
        log_level = logging.INFO
    elif args.verbose:
        log_level = logging.WARNING
    logger = logging.getLogger('maigret')
    logger.setLevel(log_level)
    # Usernames initial list
@@ -496,10 +509,7 @@ async def main():
    if args.tags:
        args.tags = list(set(str(args.tags).split(',')))
-    if args.db_file is None:
+    db_file = path.join(path.dirname(path.realpath(__file__)), args.db_file)
        args.db_file = os.path.join(
            os.path.dirname(os.path.realpath(__file__)), "resources/data.json"
        )
    if args.top_sites == 0 or args.all_sites:
        args.top_sites = sys.maxsize
@@ -514,7 +524,7 @@ async def main():
    )
    # Create object with all information about sites we are aware of.
-    db = MaigretDatabase().load_from_file(args.db_file)
+    db = MaigretDatabase().load_from_path(db_file)
    get_top_sites_for_id = lambda x: db.ranked_sites_dict(
        top=args.top_sites,
        tags=args.tags,
@@ -526,11 +536,10 @@ async def main():
    site_data = get_top_sites_for_id(args.id_type)
    if args.new_site_to_submit:
-        is_submitted = await submit_dialog(
+        submitter = Submitter(db=db, logger=logger, settings=settings, args=args)
-            db, args.new_site_to_submit, args.cookie_file, logger
+        is_submitted = await submitter.dialog(args.new_site_to_submit, args.cookie_file)
        )
        if is_submitted:
-            db.save_to_file(args.db_file)
+            db.save_to_file(db_file)
    # Database self-checking
    if args.self_check:
@@ -539,6 +548,7 @@ async def main():
            db,
            site_data,
            logger,
            proxy=args.proxy,
            max_connections=args.connections,
            tor_proxy=args.tor_proxy,
            i2p_proxy=args.i2p_proxy,
@@ -548,7 +558,7 @@ async def main():
                'y',
                '',
            ):
-                db.save_to_file(args.db_file)
+                db.save_to_file(db_file)
                print('Database was successfully updated.')
            else:
                print('Updates will be applied only for current search session.')
@@ -556,13 +566,15 @@ async def main():
    # Database statistics
    if args.stats:
-        print(db.get_db_stats(db.sites_dict))
+        print(db.get_db_stats())
    report_dir = path.join(os.getcwd(), args.folderoutput)
    # Make reports folder is not exists
-    os.makedirs(args.folderoutput, exist_ok=True)
+    os.makedirs(report_dir, exist_ok=True)
    # Define one report filename template
-    report_filepath_tpl = os.path.join(args.folderoutput, 'report_{username}{postfix}')
+    report_filepath_tpl = path.join(report_dir, 'report_{username}{postfix}')
    if usernames == {}:
        # magic params to exit after init
@@ -678,7 +690,9 @@ async def main():
        username = report_context['username']
        if args.html:
-            filename = report_filepath_tpl.format(username=username, postfix='.html')
+            filename = report_filepath_tpl.format(
                username=username, postfix='_plain.html'
            )
            save_html_report(filename, report_context)
            query_notify.warning(f'HTML report on all usernames saved in {filename}')
@@ -687,19 +701,29 @@ async def main():
            save_pdf_report(filename, report_context)
            query_notify.warning(f'PDF report on all usernames saved in {filename}')
        if args.graph:
            filename = report_filepath_tpl.format(
                username=username, postfix='_graph.html'
            )
            save_graph_report(filename, general_results, db)
            query_notify.warning(f'Graph report on all usernames saved in {filename}')
        text_report = get_plaintext_report(report_context)
        if text_report:
            query_notify.info('Short text report:')
            print(text_report)
    # update database
-    db.save_to_file(args.db_file)
+    db.save_to_file(db_file)
 def run():
    try:
-        loop = asyncio.get_event_loop()
+        if sys.version_info.minor >= 10:
-        loop.run_until_complete(main())
+            asyncio.run(main())
        else:
            loop = asyncio.get_event_loop()
            loop.run_until_complete(main())
    except KeyboardInterrupt:
        print('Maigret is interrupted.')
        sys.exit(1)
@@ -1,3 +1,4 @@
 import ast
 import csv
 import io
 import json
@@ -6,13 +7,13 @@ import os
 from datetime import datetime
 from typing import Dict, Any
 import pycountry
 import xmind
 from dateutil.parser import parse as parse_datetime_str
 from jinja2 import Template
 from xhtml2pdf import pisa
 from .checking import SUPPORTED_IDS
 from .result import QueryStatus
 from .sites import MaigretDatabase
 from .utils import is_country_tag, CaseConverter, enrich_link_str
 SUPPORTED_JSON_REPORT_FORMATS = [
@@ -66,13 +67,17 @@ def save_txt_report(filename: str, username: str, results: dict):
 def save_html_report(filename: str, context: dict):
    template, _ = generate_report_template(is_pdf=False)
    filled_template = template.render(**context)
-    with open(filename, "w") as f:
+    with open(filename, "w", encoding="utf-8") as f:
        f.write(filled_template)
 def save_pdf_report(filename: str, context: dict):
    template, css = generate_report_template(is_pdf=True)
    filled_template = template.render(**context)
    # moved here to speed up the launch of Maigret
    from xhtml2pdf import pisa
    with open(filename, "w+b") as f:
        pisa.pisaDocument(io.StringIO(filled_template), dest=f, default_css=css)
@@ -82,6 +87,131 @@ def save_json_report(filename: str, username: str, results: dict, report_type: s
        generate_json_report(username, results, f, report_type=report_type)
 class MaigretGraph:
    other_params = {'size': 10, 'group': 3}
    site_params = {'size': 15, 'group': 2}
    username_params = {'size': 20, 'group': 1}
    def __init__(self, graph):
        self.G = graph
    def add_node(self, key, value):
        node_name = f'{key}: {value}'
        params = self.other_params
        if key in SUPPORTED_IDS:
            params = self.username_params
        elif value.startswith('http'):
            params = self.site_params
        self.G.add_node(node_name, title=node_name, **params)
        if value != value.lower():
            normalized_node_name = self.add_node(key, value.lower())
            self.link(node_name, normalized_node_name)
        return node_name
    def link(self, node1_name, node2_name):
        self.G.add_edge(node1_name, node2_name, weight=2)
 def save_graph_report(filename: str, username_results: list, db: MaigretDatabase):
    # moved here to speed up the launch of Maigret
    import networkx as nx
    G = nx.Graph()
    graph = MaigretGraph(G)
    for username, id_type, results in username_results:
        username_node_name = graph.add_node(id_type, username)
        for website_name in results:
            dictionary = results[website_name]
            # TODO: fix no site data issue
            if not dictionary:
                continue
            if dictionary.get("is_similar"):
                continue
            status = dictionary.get("status")
            if not status:  # FIXME: currently in case of timeout
                continue
            if dictionary["status"].status != QueryStatus.CLAIMED:
                continue
            site_fallback_name = dictionary.get(
                'url_user', f'{website_name}: {username.lower()}'
            )
            # site_node_name = dictionary.get('url_user', f'{website_name}: {username.lower()}')
            site_node_name = graph.add_node('site', site_fallback_name)
            graph.link(username_node_name, site_node_name)
            def process_ids(parent_node, ids):
                for k, v in ids.items():
                    if k.endswith('_count') or k.startswith('is_') or k.endswith('_at'):
                        continue
                    if k in 'image':
                        continue
                    v_data = v
                    if v.startswith('['):
                        try:
                            v_data = ast.literal_eval(v)
                        except Exception as e:
                            logging.error(e)
                    # value is a list
                    if isinstance(v_data, list):
                        list_node_name = graph.add_node(k, site_fallback_name)
                        for vv in v_data:
                            data_node_name = graph.add_node(vv, site_fallback_name)
                            graph.link(list_node_name, data_node_name)
                            add_ids = {
                                a: b for b, a in db.extract_ids_from_url(vv).items()
                            }
                            if add_ids:
                                process_ids(data_node_name, add_ids)
                    else:
                        # value is just a string
                        # ids_data_name = f'{k}: {v}'
                        # if ids_data_name == parent_node:
                        #     continue
                        ids_data_name = graph.add_node(k, v)
                        # G.add_node(ids_data_name, size=10, title=ids_data_name, group=3)
                        graph.link(parent_node, ids_data_name)
                        # check for username
                        if 'username' in k or k in SUPPORTED_IDS:
                            new_username_node_name = graph.add_node('username', v)
                            graph.link(ids_data_name, new_username_node_name)
                        add_ids = {k: v for v, k in db.extract_ids_from_url(v).items()}
                        if add_ids:
                            process_ids(ids_data_name, add_ids)
            if status.ids_data:
                process_ids(site_node_name, status.ids_data)
    nodes_to_remove = []
    for node in G.nodes:
        if len(str(node)) > 100:
            nodes_to_remove.append(node)
    [G.remove_node(node) for node in nodes_to_remove]
    # moved here to speed up the launch of Maigret
    from pyvis.network import Network
    nt = Network(notebook=True, height="750px", width="100%")
    nt.from_nx(G)
    nt.show(filename)
 def get_plaintext_report(context: dict) -> str:
    output = (context['brief'] + " ").replace('. ', '.\n')
    interests = list(map(lambda x: x[0], context.get('interests_tuple_list', [])))
@@ -130,6 +260,9 @@ def generate_report_context(username_results: list):
    first_seen = None
    # moved here to speed up the launch of Maigret
    import pycountry
    for username, id_type, results in username_results:
        found_accounts = 0
        new_ids = []
@@ -0,0 +1,48 @@
 {
    "presence_strings": [
        "username",
        "not found",
        "пользователь",
        "profile",
        "lastname",
        "firstname",
        "biography",
        "birthday",
        "репутация",
        "информация",
        "e-mail"
    ],
    "supposed_usernames": [
        "alex", "god", "admin", "red", "blue", "john"
    ],
    "retries_count": 1,
    "sites_db_path": "resources/data.json",
    "timeout": 30,
    "max_connections": 100,
    "recursive_search": true,
    "info_extracting": true,
    "cookie_jar_file": null,
    "ignore_ids_list": [],
    "reports_path": "reports",
    "proxy_url": null,
    "tor_proxy_url": "socks5://127.0.0.1:9050",
    "i2p_proxy_url": "http://127.0.0.1:4444",
    "domain_search": false,
    "scan_all_sites": false,
    "top_sites_count": 500,
    "scan_disabled_sites": false,
    "scan_sites_list": [],
    "self_check_enabled": false,
    "print_not_found": false,
    "print_check_errors": false,
    "colored_print": true,
    "show_progressbar": true,
    "report_sorting": "default",
    "json_report_type": "",
    "txt_report": false,
    "csv_report": false,
    "xmind_report": false,
    "graph_report": false,
    "pdf_report": false,
    "html_report": false
 }
@@ -68,6 +68,7 @@
        <div class="row-mb">
            <div class="col-md">
                <div class="card flex-md-row mb-4 box-shadow h-md-250">
                    <span style="position: absolute; right: 10px;"><a href="https://github.com/soxoj/maigret/issues/new?assignees=soxoj&amp;labels=bug&amp;template=report-false-result.md&amp;title=Invalid%20result%20{{ v.url_user }}">Invalid?</a></span>
                    <img class="card-img-right flex-auto d-md-block" alt="Photo" style="width: 200px; height: 200px; object-fit: scale-down;" src="{{ v.status and v.status.ids_data and v.status.ids_data.image or 'https://i.imgur.com/040fmbw.png' }}" data-holder-rendered="true">
                    <div class="card-body d-flex flex-column align-items-start" style="padding-top: 0;">
                    <h3 class="mb-0" style="padding-top: 1rem;">
@@ -38,4 +38,8 @@ div {
  border-bottom-color: #3e3e3e;
  border-bottom-width: 1px;
  border-bottom-style: solid;
 }
 .invalid-button {
  position: absolute;
  left: 10px;
 }
@@ -64,6 +64,7 @@
            <div class="sitebox" style="margin-top: 20px;" >
                <div>
                    <div>
                        <span class="invalid-button"><a href="https://github.com/soxoj/maigret/issues/new?assignees=soxoj&amp;labels=bug&amp;template=report-false-result.md&amp;title=Invalid%20result%20{{ v.url_user }}">Invalid?</a></span>
                        <table>
                            <tr>
                                <td valign="top">
@@ -0,0 +1,85 @@
 import os
 import os.path as path
 import json
 from typing import List
 SETTINGS_FILES_PATHS = [
    path.join(path.dirname(path.realpath(__file__)), "resources/settings.json"),
    '~/.maigret/settings.json',
    path.join(os.getcwd(), 'settings.json'),
 ]
 class Settings:
    # main maigret setting
    retries_count: int
    sites_db_path: str
    timeout: int
    max_connections: int
    recursive_search: bool
    info_extracting: bool
    cookie_jar_file: str
    ignore_ids_list: List
    reports_path: str
    proxy_url: str
    tor_proxy_url: str
    i2p_proxy_url: str
    domain_search: bool
    scan_all_sites: bool
    top_sites_count: int
    scan_disabled_sites: bool
    scan_sites_list: List
    self_check_enabled: bool
    print_not_found: bool
    print_check_errors: bool
    colored_print: bool
    show_progressbar: bool
    report_sorting: str
    json_report_type: str
    txt_report: bool
    csv_report: bool
    xmind_report: bool
    pdf_report: bool
    html_report: bool
    graph_report: bool
    # submit mode settings
    presence_strings: list
    supposed_usernames: list
    def __init__(self):
        pass
    def load(self, paths=None):
        was_inited = False
        if not paths:
            paths = SETTINGS_FILES_PATHS
        for filename in paths:
            data = {}
            try:
                with open(filename, "r", encoding="utf-8") as file:
                    data = json.load(file)
            except FileNotFoundError:
                # treast as a normal situation
                pass
            except Exception as error:
                return False, ValueError(
                    f"Problem with parsing json contents of "
                    f"settings file '{filename}':  {str(error)}."
                )
            self.__dict__.update(data)
            if data:
                was_inited = True
        return (
            was_inited,
            f'None of the default settings files found: {", ".join(paths)}',
        )
    @property
    def json(self):
        return self.__dict__
@@ -5,70 +5,8 @@ import json
 import sys
 from typing import Optional, List, Dict, Any, Tuple
 import requests
 from .utils import CaseConverter, URLMatcher, is_country_tag
 # TODO: move to data.json
 SUPPORTED_TAGS = [
    "gaming",
    "coding",
    "photo",
    "music",
    "blog",
    "finance",
    "freelance",
    "dating",
    "tech",
    "forum",
    "porn",
    "erotic",
    "webcam",
    "video",
    "movies",
    "hacking",
    "art",
    "discussion",
    "sharing",
    "writing",
    "wiki",
    "business",
    "shopping",
    "sport",
    "books",
    "news",
    "documents",
    "travel",
    "maps",
    "hobby",
    "apps",
    "classified",
    "career",
    "geosocial",
    "streaming",
    "education",
    "networking",
    "torrent",
    "science",
    "medicine",
    "reading",
    "stock",
    "messaging",
    "trading",
    "links",
    "fashion",
    "tasks",
    "military",
    "auto",
    "gambling",
    "cybercriminal",
    "review",
    "bookmarks",
    "design",
    "tor",
    "i2p",
 ]
 class MaigretEngine:
    site: Dict[str, Any] = {}
@@ -204,12 +142,12 @@ class MaigretSite:
        errors.update(self.errors)
        return errors
-    def get_url_type(self) -> str:
+    def get_url_template(self) -> str:
        url = URLMatcher.extract_main_part(self.url)
        if url.startswith("{username}"):
            url = "SUBDOMAIN"
        elif url == "":
-            url = f"{self.url} ({self.engine})"
+            url = f"{self.url} ({self.engine or 'no engine'})"
        else:
            parts = url.split("/")
            url = "/" + "/".join(parts[1:])
@@ -273,8 +211,9 @@ class MaigretSite:
 class MaigretDatabase:
    def __init__(self):
-        self._sites = []
+        self._tags: list = []
-        self._engines = []
+        self._sites: list = []
        self._engines: list = []
    @property
    def sites(self):
@@ -351,9 +290,13 @@ class MaigretDatabase:
        return self
    def save_to_file(self, filename: str) -> "MaigretDatabase":
        if '://' in filename:
            return self
        db_data = {
            "sites": {site.name: site.strip_engine_data().json for site in self._sites},
            "engines": {engine.name: engine.json for engine in self._engines},
            "tags": self._tags,
        }
        json_data = json.dumps(db_data, indent=4)
@@ -367,6 +310,9 @@ class MaigretDatabase:
        # Add all of site information from the json file to internal site list.
        site_data = json_data.get("sites", {})
        engines_data = json_data.get("engines", {})
        tags = json_data.get("tags", [])
        self._tags += tags
        for engine_name in engines_data:
            self._engines.append(MaigretEngine(engine_name, engines_data[engine_name]))
@@ -399,12 +345,20 @@ class MaigretDatabase:
        return self.load_from_json(data)
-    def load_from_url(self, url: str) -> "MaigretDatabase":
+    def load_from_path(self, path: str) -> "MaigretDatabase":
        if '://' in path:
            return self.load_from_http(path)
        else:
            return self.load_from_file(path)
    def load_from_http(self, url: str) -> "MaigretDatabase":
        is_url_valid = url.startswith("http://") or url.startswith("https://")
        if not is_url_valid:
            raise FileNotFoundError(f"Invalid data file URL '{url}'.")
        import requests
        try:
            response = requests.get(url=url)
        except Exception as error:
@@ -455,9 +409,18 @@ class MaigretDatabase:
        return found_flags
-    def get_db_stats(self, sites_dict):
+    def extract_ids_from_url(self, url: str) -> dict:
-        if not sites_dict:
+        results = {}
-            sites_dict = self.sites_dict()
+        for s in self._sites:
            result = s.extract_id_from_url(url)
            if not result:
                continue
            _id, _type = result
            results[_id] = _type
        return results
    def get_db_stats(self, is_markdown=False):
        sites_dict = self.sites_dict
        urls = {}
        tags = {}
@@ -465,31 +428,58 @@ class MaigretDatabase:
        disabled_count = 0
        total_count = len(sites_dict)
        message_checks = 0
        message_checks_one_factor = 0
        status_checks = 0
        for _, site in sites_dict.items():
            if site.disabled:
                disabled_count += 1
-            url_type = site.get_url_type()
+            url_type = site.get_url_template()
            urls[url_type] = urls.get(url_type, 0) + 1
            if site.check_type == 'message' and not site.disabled:
                message_checks += 1
                if site.absence_strs and site.presense_strs:
                    continue
                message_checks_one_factor += 1
            if site.check_type == 'status_code':
                status_checks += 1
            if not site.tags:
                tags["NO_TAGS"] = tags.get("NO_TAGS", 0) + 1
            for tag in filter(lambda x: not is_country_tag(x), site.tags):
                tags[tag] = tags.get(tag, 0) + 1
-        output += f"Enabled/total sites: {total_count - disabled_count}/{total_count}\n"
+        enabled_count = total_count-disabled_count
-        output += "Top profile URLs:\n"
+        enabled_perc = round(100*enabled_count/total_count, 2)
-        for url, count in sorted(urls.items(), key=lambda x: x[1], reverse=True)[:20]:
+        output += f"Enabled/total sites: {enabled_count}/{total_count} = {enabled_perc}%\n\n"
        checks_perc = round(100*message_checks_one_factor/enabled_count, 2)
        output += f"Incomplete message checks: {message_checks_one_factor}/{enabled_count} = {checks_perc}% (false positive risks)\n\n"
        status_checks_perc = round(100*status_checks/enabled_count, 2)
        output += f"Status code checks: {status_checks}/{enabled_count} = {status_checks_perc}% (false positive risks)\n\n"
        output += f"False positive risk (total): {checks_perc+status_checks_perc:.2f}%\n\n"
        top_urls_count = 20
        output += f"Top {top_urls_count} profile URLs:\n"
        for url, count in sorted(urls.items(), key=lambda x: x[1], reverse=True)[:top_urls_count]:
            if count == 1:
                break
-            output += f"{count}\t{url}\n"
+            output += f"- ({count})\t`{url}`\n" if is_markdown else f"{count}\t{url}\n"
-        output += "Top tags:\n"
+        top_tags_count = 20
-        for tag, count in sorted(tags.items(), key=lambda x: x[1], reverse=True)[:200]:
+        output += f"\nTop {top_tags_count} tags:\n"
        for tag, count in sorted(tags.items(), key=lambda x: x[1], reverse=True)[:top_tags_count]:
            mark = ""
-            if tag not in SUPPORTED_TAGS:
+            if tag not in self._tags:
                mark = " (non-standard)"
-            output += f"{count}\t{tag}{mark}\n"
+            output += f"- ({count})\t`{tag}`{mark}\n" if is_markdown else f"{count}\t{tag}{mark}\n"
        return output
@@ -1,389 +1,436 @@
 import asyncio
-import difflib
+import json
 import re
-from typing import List
+from typing import List, Tuple
 import xml.etree.ElementTree as ET
 from aiohttp import TCPConnector, ClientSession
 import requests
 import cloudscraper
 from .activation import import_aiohttp_cookies
 from .checking import maigret
 from .result import QueryStatus
 from .settings import Settings
 from .sites import MaigretDatabase, MaigretSite, MaigretEngine
-from .utils import get_random_user_agent
+from .utils import get_random_user_agent, get_match_ratio
-DESIRED_STRINGS = [
+class CloudflareSession:
-    "username",
+    def __init__(self):
-    "not found",
+        self.scraper = cloudscraper.create_scraper()
    "пользователь",
    "profile",
    "lastname",
    "firstname",
    "biography",
    "birthday",
    "репутация",
    "информация",
    "e-mail",
 ]
-SUPPOSED_USERNAMES = ["alex", "god", "admin", "red", "blue", "john"]
+    async def get(self, *args, **kwargs):
        await asyncio.sleep(0)
        res = self.scraper.get(*args, **kwargs)
        self.last_text = res.text
        self.status = res.status_code
        return self
-HEADERS = {
+    def status_code(self):
-    "User-Agent": get_random_user_agent(),
+        return self.status
 }
-SEPARATORS = "\"'"
+    async def text(self):
        await asyncio.sleep(0)
        return self.last_text
-RATIO = 0.6
+    async def close(self):
 TOP_FEATURES = 5
 URL_RE = re.compile(r"https?://(www\.)?")
 def get_match_ratio(x):
    return round(
        max(
            [difflib.SequenceMatcher(a=x.lower(), b=y).ratio() for y in DESIRED_STRINGS]
        ),
        2,
    )
 def get_alexa_rank(site_url_main):
    url = f"http://data.alexa.com/data?cli=10&url={site_url_main}"
    xml_data = requests.get(url).text
    root = ET.fromstring(xml_data)
    alexa_rank = 0
    try:
        alexa_rank = int(root.find('.//REACH').attrib['RANK'])
    except Exception:
        pass
-    return alexa_rank
+class Submitter:
-
+    HEADERS = {
-
+        "User-Agent": get_random_user_agent(),
 def extract_mainpage_url(url):
    return "/".join(url.split("/", 3)[:3])
 async def site_self_check(site, logger, semaphore, db: MaigretDatabase, silent=False):
    changes = {
        "disabled": False,
    }
-    check_data = [
+    SEPARATORS = "\"'"
        (site.username_claimed, QueryStatus.CLAIMED),
        (site.username_unclaimed, QueryStatus.AVAILABLE),
    ]
-    logger.info(f"Checking {site.name}...")
+    RATIO = 0.6
    TOP_FEATURES = 5
    URL_RE = re.compile(r"https?://(www\.)?")
-    for username, status in check_data:
+    def __init__(self, db: MaigretDatabase, settings: Settings, logger, args):
-        results_dict = await maigret(
+        self.settings = settings
-            username=username,
+        self.args = args
-            site_dict={site.name: site},
+        self.db = db
-            logger=logger,
+        self.logger = logger
-            timeout=30,
+
-            id_type=site.type,
+        from aiohttp_socks import ProxyConnector
-            forced=True,
+        proxy = self.args.proxy
-            no_progressbar=True,
+        cookie_jar = None
        if args.cookie_file:
            cookie_jar = import_aiohttp_cookies(args.cookie_file)
        connector = ProxyConnector.from_url(proxy) if proxy else TCPConnector(ssl=False)
        connector.verify_ssl = False
        self.session = ClientSession(
            connector=connector, trust_env=True, cookie_jar=cookie_jar
        )
-        # don't disable entries with other ids types
+    @staticmethod
-        # TODO: make normal checking
+    def get_alexa_rank(site_url_main):
-        if site.name not in results_dict:
+        url = f"http://data.alexa.com/data?cli=10&url={site_url_main}"
-            logger.info(results_dict)
+        xml_data = requests.get(url).text
-            changes["disabled"] = True
+        root = ET.fromstring(xml_data)
-            continue
+        alexa_rank = 0
-        result = results_dict[site.name]["status"]
+        try:
            alexa_rank = int(root.find('.//REACH').attrib['RANK'])
        except Exception:
            pass
-        site_status = result.status
+        return alexa_rank
-        if site_status != status:
+    @staticmethod
-            if site_status == QueryStatus.UNKNOWN:
+    def extract_mainpage_url(url):
-                msgs = site.absence_strs
+        return "/".join(url.split("/", 3)[:3])
                etype = site.check_type
                logger.warning(
                    "Error while searching '%s' in %s: %s, %s, check type %s",
                    username,
                    site.name,
                    result.context,
                    msgs,
                    etype,
                )
                # don't disable in case of available username
                if status == QueryStatus.CLAIMED:
                    changes["disabled"] = True
            elif status == QueryStatus.CLAIMED:
                logger.warning(
                    f"Not found `{username}` in {site.name}, must be claimed"
                )
                logger.info(results_dict[site.name])
                changes["disabled"] = True
            else:
                logger.warning(f"Found `{username}` in {site.name}, must be available")
                logger.info(results_dict[site.name])
                changes["disabled"] = True
-    logger.info(f"Site {site.name} checking is finished")
+    async def site_self_check(self, site, semaphore, silent=False):
        changes = {
            "disabled": False,
        }
-    return changes
+        check_data = [
-
+            (site.username_claimed, QueryStatus.CLAIMED),
-
+            (site.username_unclaimed, QueryStatus.AVAILABLE),
 def generate_additional_fields_dialog(engine: MaigretEngine, dialog):
    fields = {}
    if 'urlSubpath' in engine.site.get('url', ''):
        msg = (
            'Detected engine suppose additional URL subpath using (/forum/, /blog/, etc). '
            'Enter in manually if it exists: '
        )
        subpath = input(msg).strip('/')
        if subpath:
            fields['urlSubpath'] = f'/{subpath}'
    return fields
 async def detect_known_engine(
    db, url_exists, url_mainpage, logger
 ) -> List[MaigretSite]:
    try:
        r = requests.get(url_mainpage)
        logger.debug(r.text)
    except Exception as e:
        logger.warning(e)
        print("Some error while checking main page")
        return []
    for engine in db.engines:
        strs_to_check = engine.__dict__.get("presenseStrs")
        if strs_to_check and r and r.text:
            all_strs_in_response = True
            for s in strs_to_check:
                if s not in r.text:
                    all_strs_in_response = False
            sites = []
            if all_strs_in_response:
                engine_name = engine.__dict__.get("name")
                print(f"Detected engine {engine_name} for site {url_mainpage}")
                usernames_to_check = SUPPOSED_USERNAMES
                supposed_username = extract_username_dialog(url_exists)
                if supposed_username:
                    usernames_to_check = [supposed_username] + usernames_to_check
                add_fields = generate_additional_fields_dialog(engine, url_exists)
                for u in usernames_to_check:
                    site_data = {
                        "urlMain": url_mainpage,
                        "name": url_mainpage.split("//")[1],
                        "engine": engine_name,
                        "usernameClaimed": u,
                        "usernameUnclaimed": "noonewouldeverusethis7",
                        **add_fields,
                    }
                    logger.info(site_data)
                    maigret_site = MaigretSite(url_mainpage.split("/")[-1], site_data)
                    maigret_site.update_from_engine(db.engines_dict[engine_name])
                    sites.append(maigret_site)
                return sites
    return []
 def extract_username_dialog(url):
    url_parts = url.rstrip("/").split("/")
    supposed_username = url_parts[-1].strip('@')
    entered_username = input(
        f'Is "{supposed_username}" a valid username? If not, write it manually: '
    )
    return entered_username if entered_username else supposed_username
 async def check_features_manually(
    db, url_exists, url_mainpage, cookie_file, logger, redirects=False
 ):
    custom_headers = {}
    while True:
        header_key = input(
            'Specify custom header if you need or just press Enter to skip. Header name: '
        )
        if not header_key:
            break
        header_value = input('Header value: ')
        custom_headers[header_key.strip()] = header_value.strip()
    supposed_username = extract_username_dialog(url_exists)
    non_exist_username = "noonewouldeverusethis7"
    url_user = url_exists.replace(supposed_username, "{username}")
    url_not_exists = url_exists.replace(supposed_username, non_exist_username)
    headers = dict(HEADERS)
    headers.update(custom_headers)
    # cookies
    cookie_dict = None
    if cookie_file:
        logger.info(f'Use {cookie_file} for cookies')
        cookie_jar = import_aiohttp_cookies(cookie_file)
        cookie_dict = {c.key: c.value for c in cookie_jar}
    exists_resp = requests.get(
        url_exists, cookies=cookie_dict, headers=headers, allow_redirects=redirects
    )
    logger.debug(url_exists)
    logger.debug(exists_resp.status_code)
    logger.debug(exists_resp.text)
    non_exists_resp = requests.get(
        url_not_exists, cookies=cookie_dict, headers=headers, allow_redirects=redirects
    )
    logger.debug(url_not_exists)
    logger.debug(non_exists_resp.status_code)
    logger.debug(non_exists_resp.text)
    a = exists_resp.text
    b = non_exists_resp.text
    tokens_a = set(re.split(f'[{SEPARATORS}]', a))
    tokens_b = set(re.split(f'[{SEPARATORS}]', b))
    a_minus_b = tokens_a.difference(tokens_b)
    b_minus_a = tokens_b.difference(tokens_a)
    if len(a_minus_b) == len(b_minus_a) == 0:
        print("The pages for existing and non-existing account are the same!")
    top_features_count = int(
        input(f"Specify count of features to extract [default {TOP_FEATURES}]: ")
        or TOP_FEATURES
    )
    presence_list = sorted(a_minus_b, key=get_match_ratio, reverse=True)[
        :top_features_count
    ]
    print("Detected text features of existing account: " + ", ".join(presence_list))
    features = input("If features was not detected correctly, write it manually: ")
    if features:
        presence_list = list(map(str.strip, features.split(",")))
    absence_list = sorted(b_minus_a, key=get_match_ratio, reverse=True)[
        :top_features_count
    ]
    print("Detected text features of non-existing account: " + ", ".join(absence_list))
    features = input("If features was not detected correctly, write it manually: ")
    if features:
        absence_list = list(map(str.strip, features.split(",")))
    site_data = {
        "absenceStrs": absence_list,
        "presenseStrs": presence_list,
        "url": url_user,
        "urlMain": url_mainpage,
        "usernameClaimed": supposed_username,
        "usernameUnclaimed": non_exist_username,
        "checkType": "message",
    }
    if headers != HEADERS:
        site_data['headers'] = headers
    site = MaigretSite(url_mainpage.split("/")[-1], site_data)
    return site
 async def submit_dialog(db, url_exists, cookie_file, logger):
    domain_raw = URL_RE.sub("", url_exists).strip().strip("/")
    domain_raw = domain_raw.split("/")[0]
    logger.info('Domain is %s', domain_raw)
    # check for existence
    matched_sites = list(filter(lambda x: domain_raw in x.url_main + x.url, db.sites))
    if matched_sites:
        print(
            f'Sites with domain "{domain_raw}" already exists in the Maigret database!'
        )
        status = lambda s: "(disabled)" if s.disabled else ""
        url_block = lambda s: f"\n\t{s.url_main}\n\t{s.url}"
        print(
            "\n".join(
                [
                    f"{site.name} {status(site)}{url_block(site)}"
                    for site in matched_sites
                ]
            )
        )
        if input("Do you want to continue? [yN] ").lower() in "n":
            return False
    url_mainpage = extract_mainpage_url(url_exists)
    print('Detecting site engine, please wait...')
    sites = []
    try:
        sites = await detect_known_engine(db, url_exists, url_mainpage, logger)
    except KeyboardInterrupt:
        print('Engine detect process is interrupted.')
    if not sites:
        print("Unable to detect site engine, lets generate checking features")
        sites = [
            await check_features_manually(
                db, url_exists, url_mainpage, cookie_file, logger
            )
        ]
-    logger.debug(sites[0].__dict__)
+        self.logger.info(f"Checking {site.name}...")
-    sem = asyncio.Semaphore(1)
+        for username, status in check_data:
-
+            results_dict = await maigret(
-    print("Checking, please wait...")
+                username=username,
-    found = False
+                site_dict={site.name: site},
-    chosen_site = None
+                proxy=self.args.proxy,
-    for s in sites:
+                logger=self.logger,
-        chosen_site = s
+                cookies=self.args.cookie_file,
-        result = await site_self_check(s, logger, sem, db)
+                timeout=30,
-        if not result["disabled"]:
+                id_type=site.type,
-            found = True
+                forced=True,
-            break
+                no_progressbar=True,
    if not found:
        print(
            f"Sorry, we couldn't find params to detect account presence/absence in {chosen_site.name}."
        )
        print(
            "Try to run this mode again and increase features count or choose others."
        )
        return False
    else:
        if (
            input(
                f"Site {chosen_site.name} successfully checked. Do you want to save it in the Maigret DB? [Yn] "
            )
-            .lower()
+
-            .strip("y")
+            # don't disable entries with other ids types
-        ):
+            # TODO: make normal checking
            if site.name not in results_dict:
                self.logger.info(results_dict)
                changes["disabled"] = True
                continue
            result = results_dict[site.name]["status"]
            site_status = result.status
            if site_status != status:
                if site_status == QueryStatus.UNKNOWN:
                    msgs = site.absence_strs
                    etype = site.check_type
                    self.logger.warning(
                        "Error while searching '%s' in %s: %s, %s, check type %s",
                        username,
                        site.name,
                        result.context,
                        msgs,
                        etype,
                    )
                    # don't disable in case of available username
                    if status == QueryStatus.CLAIMED:
                        changes["disabled"] = True
                elif status == QueryStatus.CLAIMED:
                    self.logger.warning(
                        f"Not found `{username}` in {site.name}, must be claimed"
                    )
                    self.logger.info(results_dict[site.name])
                    changes["disabled"] = True
                else:
                    self.logger.warning(
                        f"Found `{username}` in {site.name}, must be available"
                    )
                    self.logger.info(results_dict[site.name])
                    changes["disabled"] = True
        self.logger.info(f"Site {site.name} checking is finished")
        return changes
    def generate_additional_fields_dialog(self, engine: MaigretEngine, dialog):
        fields = {}
        if 'urlSubpath' in engine.site.get('url', ''):
            msg = (
                'Detected engine suppose additional URL subpath using (/forum/, /blog/, etc). '
                'Enter in manually if it exists: '
            )
            subpath = input(msg).strip('/')
            if subpath:
                fields['urlSubpath'] = f'/{subpath}'
        return fields
    async def detect_known_engine(self, url_exists, url_mainpage) -> [List[MaigretSite], str]:
        resp_text = ''
        try:
            r = await self.session.get(url_mainpage)
            content = await r.content.read()
            charset = r.charset or "utf-8"
            resp_text = content.decode(charset, "ignore")
            self.logger.debug(resp_text)
        except Exception as e:
            self.logger.warning(e)
            print("Some error while checking main page")
            return [], resp_text
        for engine in self.db.engines:
            strs_to_check = engine.__dict__.get("presenseStrs")
            if strs_to_check and resp_text:
                all_strs_in_response = True
                for s in strs_to_check:
                    if s not in resp_text:
                        all_strs_in_response = False
                sites = []
                if all_strs_in_response:
                    engine_name = engine.__dict__.get("name")
                    print(f"Detected engine {engine_name} for site {url_mainpage}")
                    usernames_to_check = self.settings.supposed_usernames
                    supposed_username = self.extract_username_dialog(url_exists)
                    if supposed_username:
                        usernames_to_check = [supposed_username] + usernames_to_check
                    add_fields = self.generate_additional_fields_dialog(
                        engine, url_exists
                    )
                    for u in usernames_to_check:
                        site_data = {
                            "urlMain": url_mainpage,
                            "name": url_mainpage.split("//")[1],
                            "engine": engine_name,
                            "usernameClaimed": u,
                            "usernameUnclaimed": "noonewouldeverusethis7",
                            **add_fields,
                        }
                        self.logger.info(site_data)
                        maigret_site = MaigretSite(
                            url_mainpage.split("/")[-1], site_data
                        )
                        maigret_site.update_from_engine(
                            self.db.engines_dict[engine_name]
                        )
                        sites.append(maigret_site)
                    return sites, resp_text
        return [], resp_text
    def extract_username_dialog(self, url):
        url_parts = url.rstrip("/").split("/")
        supposed_username = url_parts[-1].strip('@')
        entered_username = input(
            f'Is "{supposed_username}" a valid username? If not, write it manually: '
        )
        return entered_username if entered_username else supposed_username
    async def check_features_manually(
        self, url_exists, url_mainpage, cookie_file, redirects=False
    ):
        custom_headers = {}
        while self.args.verbose:
            header_key = input(
                'Specify custom header if you need or just press Enter to skip. Header name: '
            )
            if not header_key:
                break
            header_value = input('Header value: ')
            custom_headers[header_key.strip()] = header_value.strip()
        supposed_username = self.extract_username_dialog(url_exists)
        non_exist_username = "noonewouldeverusethis7"
        url_user = url_exists.replace(supposed_username, "{username}")
        url_not_exists = url_exists.replace(supposed_username, non_exist_username)
        headers = dict(self.HEADERS)
        headers.update(custom_headers)
        exists_resp = await self.session.get(
            url_exists,
            headers=headers,
            allow_redirects=redirects,
        )
        exists_resp_text = await exists_resp.text()
        self.logger.debug(url_exists)
        self.logger.debug(exists_resp.status)
        self.logger.debug(exists_resp_text)
        non_exists_resp = await self.session.get(
            url_not_exists,
            headers=headers,
            allow_redirects=redirects,
        )
        non_exists_resp_text = await non_exists_resp.text()
        self.logger.debug(url_not_exists)
        self.logger.debug(non_exists_resp.status)
        self.logger.debug(non_exists_resp_text)
        a = exists_resp_text
        b = non_exists_resp_text
        tokens_a = set(re.split(f'[{self.SEPARATORS}]', a))
        tokens_b = set(re.split(f'[{self.SEPARATORS}]', b))
        a_minus_b = tokens_a.difference(tokens_b)
        b_minus_a = tokens_b.difference(tokens_a)
        if len(a_minus_b) == len(b_minus_a) == 0:
            print("The pages for existing and non-existing account are the same!")
        top_features_count = int(
            input(
                f"Specify count of features to extract [default {self.TOP_FEATURES}]: "
            )
            or self.TOP_FEATURES
        )
        match_fun = get_match_ratio(self.settings.presence_strings)
        presence_list = sorted(a_minus_b, key=match_fun, reverse=True)[
            :top_features_count
        ]
        print("Detected text features of existing account: " + ", ".join(presence_list))
        features = input("If features was not detected correctly, write it manually: ")
        if features:
            presence_list = list(map(str.strip, features.split(",")))
        absence_list = sorted(b_minus_a, key=match_fun, reverse=True)[
            :top_features_count
        ]
        print(
            "Detected text features of non-existing account: " + ", ".join(absence_list)
        )
        features = input("If features was not detected correctly, write it manually: ")
        if features:
            absence_list = list(map(str.strip, features.split(",")))
        site_data = {
            "absenceStrs": absence_list,
            "presenseStrs": presence_list,
            "url": url_user,
            "urlMain": url_mainpage,
            "usernameClaimed": supposed_username,
            "usernameUnclaimed": non_exist_username,
            "checkType": "message",
        }
        if headers != self.HEADERS:
            site_data['headers'] = headers
        site = MaigretSite(url_mainpage.split("/")[-1], site_data)
        return site
    async def dialog(self, url_exists, cookie_file):
        domain_raw = self.URL_RE.sub("", url_exists).strip().strip("/")
        domain_raw = domain_raw.split("/")[0]
        self.logger.info('Domain is %s', domain_raw)
        # check for existence
        matched_sites = list(
            filter(lambda x: domain_raw in x.url_main + x.url, self.db.sites)
        )
        if matched_sites:
            print(
                f'Sites with domain "{domain_raw}" already exists in the Maigret database!'
            )
            status = lambda s: "(disabled)" if s.disabled else ""
            url_block = lambda s: f"\n\t{s.url_main}\n\t{s.url}"
            print(
                "\n".join(
                    [
                        f"{site.name} {status(site)}{url_block(site)}"
                        for site in matched_sites
                    ]
                )
            )
            if input("Do you want to continue? [yN] ").lower() in "n":
                return False
        url_mainpage = self.extract_mainpage_url(url_exists)
        print('Detecting site engine, please wait...')
        sites = []
        try:
            sites, text = await self.detect_known_engine(url_exists, url_exists)
        except KeyboardInterrupt:
            print('Engine detect process is interrupted.')
        if 'cloudflare' in text.lower():
            print('Cloudflare protection detected. I will use cloudscraper for futher work')
            # self.session = CloudflareSession()
        if not sites:
            print("Unable to detect site engine, lets generate checking features")
            redirects = False
            if self.args.verbose:
                redirects = 'y' in input('Should we do redirects automatically? [yN] ').lower()
            sites = [
                await self.check_features_manually(
                    url_exists, url_mainpage, cookie_file, redirects,
                )
            ]
        self.logger.debug(sites[0].__dict__)
        sem = asyncio.Semaphore(1)
        print("Checking, please wait...")
        found = False
        chosen_site = None
        for s in sites:
            chosen_site = s
            result = await self.site_self_check(s, sem)
            if not result["disabled"]:
                found = True
                break
        if not found:
            print(
                f"Sorry, we couldn't find params to detect account presence/absence in {chosen_site.name}."
            )
            print(
                "Try to run this mode again and increase features count or choose others."
            )
            self.logger.debug(json.dumps(chosen_site.json))
            return False
        else:
            if (
                input(
                    f"Site {chosen_site.name} successfully checked. Do you want to save it in the Maigret DB? [Yn] "
                )
                .lower()
                .strip("y")
            ):
                return False
-    chosen_site.name = input("Change site name if you want: ") or chosen_site.name
+        if self.args.verbose:
-    chosen_site.tags = list(map(str.strip, input("Site tags: ").split(',')))
+            source = input("Name the source site if it is mirror: ")
-    rank = get_alexa_rank(chosen_site.url_main)
+            if source:
-    if rank:
+                chosen_site.source = source
        print(f'New alexa rank: {rank}')
        chosen_site.alexa_rank = rank
-    logger.debug(chosen_site.json)
+        chosen_site.name = input("Change site name if you want: ") or chosen_site.name
-    site_data = chosen_site.strip_engine_data()
+        chosen_site.tags = list(map(str.strip, input("Site tags: ").split(',')))
-    logger.debug(site_data.json)
+        rank = Submitter.get_alexa_rank(chosen_site.url_main)
-    db.update_site(site_data)
+        if rank:
-    return True
+            print(f'New alexa rank: {rank}')
            chosen_site.alexa_rank = rank
        self.logger.debug(chosen_site.json)
        site_data = chosen_site.strip_engine_data()
        self.logger.debug(site_data.json)
        self.db.update_site(site_data)
        return True
@@ -1,4 +1,6 @@
 # coding: utf8
 import ast
 import difflib
 import re
 import random
 from typing import Any
@@ -40,7 +42,7 @@ def enrich_link_str(link: str) -> str:
 class URLMatcher:
-    _HTTP_URL_RE_STR = "^https?://(www.)?(.+)$"
+    _HTTP_URL_RE_STR = "^https?://(www.|m.)?(.+)$"
    HTTP_URL_RE = re.compile(_HTTP_URL_RE_STR)
    UNSAFE_SYMBOLS = ".?"
@@ -64,7 +66,7 @@ class URLMatcher:
        )
        regexp_str = self._HTTP_URL_RE_STR.replace("(.+)", url_regexp)
-        return re.compile(regexp_str)
+        return re.compile(regexp_str, re.IGNORECASE)
 def ascii_data_display(data: str) -> Any:
@@ -72,15 +74,22 @@ def ascii_data_display(data: str) -> Any:
 def get_dict_ascii_tree(items, prepend="", new_line=True):
    new_result = b'\xe2\x94\x9c'.decode()
    new_line = b'\xe2\x94\x80'.decode()
    last_result = b'\xe2\x94\x94'.decode()
    skip_result = b'\xe2\x94\x82'.decode()
    text = ""
    for num, item in enumerate(items):
-        box_symbol = "┣╸" if num != len(items) - 1 else "┗╸"
+        box_symbol = (
            new_result + new_line if num != len(items) - 1 else last_result + new_line
        )
        if type(item) == tuple:
            field_name, field_value = item
            if field_value.startswith("['"):
                is_last_item = num == len(items) - 1
-                prepend_symbols = " " * 3 if is_last_item else " ┃ "
+                prepend_symbols = " " * 3 if is_last_item else f" {skip_result} "
                data = ascii_data_display(field_value)
                field_value = get_dict_ascii_tree(data, prepend_symbols)
            text += f"\n{prepend}{box_symbol}{field_name}: {field_value}"
@@ -95,3 +104,18 @@ def get_dict_ascii_tree(items, prepend="", new_line=True):
 def get_random_user_agent():
    return random.choice(DEFAULT_USER_AGENTS)
 def get_match_ratio(base_strs: list):
    def get_match_inner(s: str):
        return round(
            max(
                [
                    difflib.SequenceMatcher(a=s.lower(), b=s2.lower()).ratio()
                    for s2 in base_strs
                ]
            ),
            2,
        )
    return get_match_inner
@@ -0,0 +1,7 @@
 #!/usr/bin/env python3
 import asyncio
 import maigret
 if __name__ == "__main__":
    asyncio.run(maigret.cli())
@@ -0,0 +1,55 @@
 # -*- mode: python ; coding: utf-8 -*-
 from PyInstaller.utils.hooks import collect_all
 datas = []
 binaries = []
 hiddenimports = []
 full_import_modules = ['maigret', 'socid_extractor', 'arabic_reshaper', 'pyvis', 'reportlab.graphics.barcode']
 for module in full_import_modules:
    tmp_ret = collect_all(module)
    datas += tmp_ret[0]; binaries += tmp_ret[1]; hiddenimports += tmp_ret[2]
 hiddenimports += ['PySocks', 'beautifulsoup4', 'python-dateutil',
                  'future-annotations', 'six', 'python-bidi',
                  'typing-extensions', 'attrs', 'torrequest']
 block_cipher = None
 a = Analysis(['maigret_standalone.py'],
             pathex=[],
             binaries=binaries,
             datas=datas,
             hiddenimports=hiddenimports,
             hookspath=[],
             hooksconfig={},
             runtime_hooks=[],
             excludes=[],
             win_no_prefer_redirects=False,
             win_private_assemblies=False,
             cipher=block_cipher,
             noarchive=False)
 pyz = PYZ(a.pure, a.zipped_data,
             cipher=block_cipher)
 exe = EXE(pyz,
          a.scripts,
          a.binaries,
          a.zipfiles,
          a.datas,  
          [],
          name='maigret_standalone',
          debug=False,
          bootloader_ignore_signals=False,
          strip=False,
          upx=True,
          upx_exclude=[],
          runtime_tmpdir=None,
          console=True,
          disable_windowed_traceback=False,
          target_arch=None,
          codesign_identity=None,
          entitlements_file=None )
@@ -0,0 +1,5 @@
 maigret @ https://github.com/soxoj/maigret/archive/refs/heads/main.zip
 pefile==2022.5.30
 psutil==5.9.2
 pyinstaller @ https://github.com/pyinstaller/pyinstaller/archive/develop.zip
 pywin32-ctypes==0.2.0
@@ -3,3 +3,4 @@
 filterwarnings =
    error
    ignore::UserWarning
 asyncio_mode=auto
@@ -1,39 +1,39 @@
 aiodns==3.0.0
-aiohttp==3.7.4
+aiohttp==3.8.3
-aiohttp-socks==0.5.5
+aiohttp-socks==0.7.1
-arabic-reshaper==2.1.1
+arabic-reshaper==2.1.4
-async-timeout==3.0.1
+async-timeout==4.0.2
-attrs==20.3.0
+attrs==22.1.0
-beautifulsoup4==4.9.3
+certifi==2022.9.24
-bs4==0.0.1
+chardet==5.0.0
-certifi==2020.12.5
+colorama==0.4.6
 chardet==3.0.4
 colorama==0.4.4
 python-dateutil==2.8.1
 future==0.18.2
 future-annotations==1.0.0
 html5lib==1.1
-idna==2.10
+idna==3.4
-Jinja2==2.11.3
+Jinja2==3.1.2
-lxml==4.6.3
+lxml==4.9.1
-MarkupSafe==1.1.1
+MarkupSafe==2.1.1
-mock==4.0.2
+mock==4.0.3
-multidict==5.1.0
+multidict==6.0.2
-pycountry==20.7.3
+pycountry==22.3.5
-PyPDF2==1.26.0
+PyPDF2==2.10.8
 PySocks==1.7.1
 python-bidi==0.4.2
-python-socks==1.1.2
+requests==2.28.1
 requests>=2.24.0
 requests-futures==1.0.0
-six==1.15.0
+six==1.16.0
 socid-extractor>=0.0.21
-soupsieve==2.1
+soupsieve==2.3.2.post1
-stem==1.8.0
+stem==1.8.1
 torrequest==0.1.0
-tqdm==4.55.0
+tqdm==4.64.1
-typing-extensions==3.7.4.3
+typing-extensions==4.4.0
 webencodings==0.5.1
-xhtml2pdf==0.2.5
+xhtml2pdf==0.2.8
 XMind==1.2.0
-yarl==1.6.3
+yarl==1.8.1
 networkx==2.5.1
 pyvis==0.2.1
 reportlab==3.6.11
 cloudscraper==1.2.66
@@ -5,14 +5,13 @@ from setuptools import (
 with open('README.md') as fh:
-    readme = fh.read()
+    long_description = fh.read()
    long_description = readme.replace('./', 'https://raw.githubusercontent.com/soxoj/maigret/main/')
 with open('requirements.txt') as rf:
    requires = rf.read().splitlines()
 setup(name='maigret',
-      version='0.3.0',
+      version='0.4.4',
      description='Collect a dossier on a person by username from a huge number of sites',
      long_description=long_description,
      long_description_content_type="text/markdown",
@@ -0,0 +1,43 @@
 name: maigret2
 adopt-info: maigret2
 summary: SOCMINT / Instagram
 description: |
  Test Test Test
 license: MIT
 base: core20
 grade: stable
 confinement: strict
 compression: lzo
 architectures:
  - build-on: amd64
 apps:
  maigret2:
    command: bin/maigret
    environment:
      LC_ALL: C.UTF-8
    plugs:
      - home
      - network
 parts:
  maigret2:
    plugin: python
    source: https://github.com/soxoj/maigret
    source-type: git
    build-packages:
      - python3-pip
      - python3-six
      - python3
    stage-packages:
      - python3
      - python3-six
    override-pull: |
      snapcraftctl pull
      snapcraftctl set-version "$(git describe --tags | sed 's/^v//' | cut -d "-" -f1)"
@@ -1,6 +1,8 @@
-flake8==3.8.4
+reportlab==3.6.11
-pytest==6.2.4
+flake8==5.0.4
-pytest-asyncio==0.14.0
+pytest==7.2.0
-pytest-cov==2.10.1
+pytest-asyncio==0.16.0;python_version<"3.7"
-pytest-httpserver==1.0.0
+pytest-asyncio==0.20.1;python_version>="3.7"
-pytest-rerunfailures==9.1.1
+pytest-cov==4.0.0
 pytest-httpserver==1.0.6
 pytest-rerunfailures==10.2
@@ -1,4 +0,0 @@
 #!/bin/sh
 coverage run --source=./maigret -m pytest tests
 coverage report -m
 coverage html
@@ -7,10 +7,12 @@ from _pytest.mark import Mark
 from maigret.sites import MaigretDatabase
 from maigret.maigret import setup_arguments_parser
 from maigret.settings import Settings
 CUR_PATH = os.path.dirname(os.path.realpath(__file__))
 JSON_FILE = os.path.join(CUR_PATH, '../maigret/resources/data.json')
 SETTINGS_FILE = os.path.join(CUR_PATH, '../maigret/resources/settings.json')
 TEST_JSON_FILE = os.path.join(CUR_PATH, 'db.json')
 LOCAL_TEST_JSON_FILE = os.path.join(CUR_PATH, 'local.json')
 empty_mark = Mark('', (), {})
@@ -59,7 +61,9 @@ def reports_autoclean():
@pytest.fixture(scope='session')
 def argparser():
-    return setup_arguments_parser()
+    settings = Settings()
    settings.load([SETTINGS_FILE])
    return setup_arguments_parser(settings)
@pytest.fixture(scope="session")
@@ -7,12 +7,13 @@ DEFAULT_ARGS: Dict[str, Any] = {
    'connections': 100,
    'cookie_file': None,
    'csv': False,
-    'db_file': None,
+    'db_file': 'resources/data.json',
    'debug': False,
    'disable_extracting': False,
    'disable_recursive_search': False,
    'folderoutput': 'reports',
    'html': False,
    'graph': False,
    'id_type': 'username',
    'ignore_ids_list': [],
    'info': False,
@@ -1,15 +1,16 @@
 """Maigret data test functions"""
 from maigret.utils import is_country_tag
 from maigret.sites import SUPPORTED_TAGS
 def test_tags_validity(default_db):
    unknown_tags = set()
    tags = default_db._tags
    for site in default_db.sites:
        for tag in filter(lambda x: not is_country_tag(x), site.tags):
-            if tag not in SUPPORTED_TAGS:
+            if tag not in tags:
                unknown_tags.add(tag)
    assert unknown_tags == set()
@@ -63,7 +63,10 @@ async def test_asyncio_progressbar_queue_executor():
    assert executor.execution_time < 0.5
    executor = AsyncioProgressbarQueueExecutor(logger=logger, in_parallel=5)
-    assert await executor.run(tasks) == [0, 3, 6, 1, 4, 7, 9, 2, 5, 8]
+    assert await executor.run(tasks) in (
        [0, 3, 6, 1, 4, 7, 9, 2, 5, 8],
        [0, 3, 6, 1, 4, 9, 7, 2, 5, 8],
    )
    assert executor.execution_time > 0.3
    assert executor.execution_time < 0.4
@@ -9,7 +9,6 @@ from maigret.maigret import self_check, maigret
 from maigret.maigret import (
    extract_ids_from_page,
    extract_ids_from_results,
    extract_ids_from_url,
 )
 from maigret.sites import MaigretSite
 from maigret.result import QueryResult, QueryStatus
@@ -20,7 +19,7 @@ RESULTS_EXAMPLE = {
        'cookies': None,
        'parsing_enabled': False,
        'url_main': 'https://www.reddit.com/',
-        'username': 'Facebook',
+        'username': 'Skyeng',
    },
    'GooglePlayStore': {
        'cookies': None,
@@ -29,8 +28,8 @@ RESULTS_EXAMPLE = {
        'parsing_enabled': False,
        'rank': 1,
        'url_main': 'https://play.google.com/store',
-        'url_user': 'https://play.google.com/store/apps/developer?id=Facebook',
+        'url_user': 'https://play.google.com/store/apps/developer?id=Skyeng',
-        'username': 'Facebook',
+        'username': 'Skyeng',
    },
 }
@@ -49,11 +48,12 @@ def test_self_check_db_positive_disable(test_db):
@pytest.mark.slow
@pytest.mark.skip(reason="broken, fixme")
 def test_self_check_db_positive_enable(test_db):
    logger = Mock()
    test_db.sites[0].disabled = True
-    test_db.sites[0].username_claimed = 'Facebook'
+    test_db.sites[0].username_claimed = 'Skyeng'
    assert test_db.sites[0].disabled is True
    loop = asyncio.get_event_loop()
@@ -79,12 +79,13 @@ def test_self_check_db_negative_disabled(test_db):
    assert test_db.sites[0].disabled is True
@pytest.mark.skip(reason='broken, fixme')
@pytest.mark.slow
 def test_self_check_db_negative_enabled(test_db):
    logger = Mock()
    test_db.sites[0].disabled = False
-    test_db.sites[0].username_claimed = 'Facebook'
+    test_db.sites[0].username_claimed = 'Skyeng'
    assert test_db.sites[0].disabled is False
    loop = asyncio.get_event_loop()
@@ -96,10 +97,11 @@ def test_self_check_db_negative_enabled(test_db):
@pytest.mark.slow
@pytest.mark.skip(reason="broken, fixme")
 def test_maigret_results(test_db):
    logger = Mock()
-    username = 'Facebook'
+    username = 'Skyeng'
    loop = asyncio.get_event_loop()
    results = loop.run_until_complete(
        maigret(username, site_dict=test_db.sites_dict, logger=logger, timeout=30)
@@ -144,18 +146,18 @@ def test_maigret_results(test_db):
 def test_extract_ids_from_url(default_db):
-    assert extract_ids_from_url('https://www.reddit.com/user/test', default_db) == {
+    assert default_db.extract_ids_from_url('https://www.reddit.com/user/test') == {
        'test': 'username'
    }
-    assert extract_ids_from_url('https://vk.com/id123', default_db) == {'123': 'vk_id'}
+    assert default_db.extract_ids_from_url('https://vk.com/id123') == {'123': 'vk_id'}
-    assert extract_ids_from_url('https://vk.com/ida123', default_db) == {
+    assert default_db.extract_ids_from_url('https://vk.com/ida123') == {
        'ida123': 'username'
    }
-    assert extract_ids_from_url(
+    assert default_db.extract_ids_from_url(
-        'https://my.mail.ru/yandex.ru/dipres8904/', default_db
+        'https://my.mail.ru/yandex.ru/dipres8904/'
    ) == {'dipres8904': 'username'}
-    assert extract_ids_from_url(
+    assert default_db.extract_ids_from_url(
-        'https://reviews.yandex.ru/user/adbced123', default_db
+        'https://reviews.yandex.ru/user/adbced123'
    ) == {'adbced123': 'yandex_public_id'}
@@ -2,6 +2,7 @@
 import copy
 import json
 import os
 import pytest
 from io import StringIO
 import xmind
@@ -424,6 +425,7 @@ def test_html_report_broken():
    assert SUPPOSED_BROKEN_INTERESTS in report_text
@pytest.mark.skip(reason='connection reset, fixme')
 def test_pdf_report():
    report_name = 'report_test.pdf'
    context = generate_report_context(TEST)
@@ -115,7 +115,7 @@ def test_site_url_detector():
    assert (
        db.sites[0].url_regexp.pattern
-        == r'^https?://(www.)?forum\.amperka\.ru/members/\?username=(.+?)$'
+        == r'^https?://(www.|m.)?forum\.amperka\.ru/members/\?username=(.+?)$'
    )
    assert (
        db.sites[0].detect_username('http://forum.amperka.ru/members/?username=test')
@@ -179,3 +179,26 @@ def test_ranked_sites_dict_id_type():
    assert len(db.ranked_sites_dict()) == 2
    assert len(db.ranked_sites_dict(id_type='username')) == 2
    assert len(db.ranked_sites_dict(id_type='gaia_id')) == 1
 def test_get_url_template():
    site = MaigretSite(
        "test",
        {
            "urlMain": "https://ya.ru/",
            "url": "{urlMain}{urlSubpath}/members/?username={username}",
        },
    )
    assert (
        site.get_url_template()
        == "{urlMain}{urlSubpath}/members/?username={username} (no engine)"
    )
    site = MaigretSite(
        "test",
        {
            "urlMain": "https://ya.ru/",
            "url": "https://{username}.ya.ru",
        },
    )
    assert site.get_url_template() == "SUBDOMAIN"
@@ -8,6 +8,7 @@ from maigret.utils import (
    enrich_link_str,
    URLMatcher,
    get_dict_ascii_tree,
    get_match_ratio,
 )
@@ -72,7 +73,7 @@ def test_url_extract_main_part():
        ['/', ''],
    ]
-    url_regexp = re.compile('^https?://(www.)?flickr.com/photos/(.+?)$')
+    url_regexp = re.compile(r'^https?://(www\.)?flickr.com/photos/(.+?)$')
    # combine parts variations
    for url_parts in itertools.product(*parts):
        url = ''.join(url_parts)
@@ -97,7 +98,7 @@ def test_url_make_profile_url_regexp():
        # ensure all combinations match pattern
        assert (
            URLMatcher.make_profile_url_regexp(url).pattern
-            == r'^https?://(www.)?flickr\.com/photos/(.+?)$'
+            == r'^https?://(www.|m.)?flickr\.com/photos/(.+?)$'
        )
@@ -122,17 +123,23 @@ def test_get_dict_ascii_tree():
    assert (
        ascii_tree
        == """
- ┣╸uid: dXJpOm5vZGU6VXNlcjoyNjQwMzQxNQ==
+ ├─uid: dXJpOm5vZGU6VXNlcjoyNjQwMzQxNQ==
- ┣╸legacy_id: 26403415
+ ├─legacy_id: 26403415
- ┣╸username: alexaimephotographycars
+ ├─username: alexaimephotographycars
- ┣╸name: Alex Aimé
+ ├─name: Alex Aimé
- ┣╸links: 
+ ├─links: 
- ┃ ┗╸ www.instagram.com/street.reality.photography/
+ │ └─ www.instagram.com/street.reality.photography/
- ┣╸created_at: 2018-05-04T10:17:01.000+0000
+ ├─created_at: 2018-05-04T10:17:01.000+0000
- ┣╸image: https://drscdn.500px.org/user_avatar/26403415/q%3D85_w%3D300_h%3D300/v2?webp=true&v=2&sig=0235678a4f7b65e007e864033ebfaf5ef6d87fad34f80a8639d985320c20fe3b
+ ├─image: https://drscdn.500px.org/user_avatar/26403415/q%3D85_w%3D300_h%3D300/v2?webp=true&v=2&sig=0235678a4f7b65e007e864033ebfaf5ef6d87fad34f80a8639d985320c20fe3b
- ┣╸image_bg: https://drscdn.500px.org/user_cover/26403415/q%3D65_m%3D2048/v2?webp=true&v=1&sig=bea411fb158391a4fdad498874ff17088f91257e59dfb376ff67e3a44c3a4201
+ ├─image_bg: https://drscdn.500px.org/user_cover/26403415/q%3D65_m%3D2048/v2?webp=true&v=1&sig=bea411fb158391a4fdad498874ff17088f91257e59dfb376ff67e3a44c3a4201
- ┣╸website: www.instagram.com/street.reality.photography/
+ ├─website: www.instagram.com/street.reality.photography/
- ┣╸facebook_link:  www.instagram.com/street.reality.photography/
+ ├─facebook_link:  www.instagram.com/street.reality.photography/
- ┣╸instagram_username: Street.Reality.Photography
+ ├─instagram_username: Street.Reality.Photography
- ┗╸twitter_username: Alexaimephotogr"""
+ └─twitter_username: Alexaimephotogr"""
    )
 def test_get_match_ratio():
    fun = get_match_ratio(["test", "maigret", "username"])
    assert fun("test") == 1
@@ -3,7 +3,7 @@ import random
 from argparse import ArgumentParser, RawDescriptionHelpFormatter
 from maigret.maigret import MaigretDatabase
-from maigret.submit import get_alexa_rank
+from maigret.submit import Submitter
 def update_tags(site):
@@ -22,7 +22,7 @@ def update_tags(site):
        site.disabled = True
    print(f'Old alexa rank: {site.alexa_rank}')
-    rank = get_alexa_rank(site.url_main)
+    rank = Submitter.get_alexa_rank(site.url_main)
    if rank:
        print(f'New alexa rank: {rank}')
        site.alexa_rank = rank
@@ -36,6 +36,7 @@ if __name__ == '__main__':
    parser.add_argument("--base","-b", metavar="BASE_FILE",
                        dest="base_file", default="maigret/resources/data.json",
                        help="JSON file with sites data to update.")
    parser.add_argument("--name", help="Name of site to check")
    pool = list()
@@ -45,12 +46,17 @@ if __name__ == '__main__':
    db.load_from_file(args.base_file).sites
    while True:
-        site = random.choice(db.sites)
+        if args.name:
            sites = list(db.ranked_sites_dict(names=[args.name]).values())
            site = random.choice(sites)
        else:
            site = random.choice(db.sites)
        if site.engine == 'uCoz':
            continue
-        if not 'in' in site.tags:
+        # if not 'in' in site.tags:
-            continue
+        #     continue
        update_tags(site)
@@ -0,0 +1,152 @@
 #!/usr/bin/env python3
 """Maigret: Supported Site Listing with Alexa ranking and country tags
 This module generates the listing of supported sites in file `SITES.md`
 and pretty prints file with sites data.
 """
 import aiohttp
 import asyncio
 import json
 import sys
 import requests
 import logging
 import threading
 import xml.etree.ElementTree as ET
 from datetime import datetime
 from argparse import ArgumentParser, RawDescriptionHelpFormatter
 import tqdm.asyncio
 from maigret.maigret import get_response, site_self_check
 from maigret.sites import MaigretSite, MaigretDatabase, MaigretEngine
 from maigret.utils import CaseConverter
 async def check_engine_of_site(site_name, sites_with_engines, future, engine_name, semaphore, logger):
    async with semaphore:
        response = await get_response(request_future=future,
                                      site_name=site_name,
                                      logger=logger)
        html_text, status_code, error_text, expection_text = response
        if html_text and engine_name in html_text:
            sites_with_engines.append(site_name)
            return True
    return False
 if __name__ == '__main__':
    parser = ArgumentParser(formatter_class=RawDescriptionHelpFormatter
                            )
    parser.add_argument("--base","-b", metavar="BASE_FILE",
                        dest="base_file", default="maigret/resources/data.json",
                        help="JSON file with sites data to update.")
    parser.add_argument('--engine', '-e', help='check only selected engine', type=str)
    args = parser.parse_args()
    log_level = logging.INFO
    logging.basicConfig(
        format='[%(filename)s:%(lineno)d] %(levelname)-3s  %(asctime)s %(message)s',
        datefmt='%H:%M:%S',
        level=log_level
    )
    logger = logging.getLogger('engines-check')
    logger.setLevel(log_level)
    db = MaigretDatabase()
    sites_subset = db.load_from_file(args.base_file).sites
    sites = {site.name: site for site in sites_subset}
    with open(args.base_file, "r", encoding="utf-8") as data_file:
        sites_info = json.load(data_file)
        engines = sites_info['engines']
    for engine_name, engine_data in engines.items():
        if args.engine and args.engine != engine_name:
            continue
        if not 'presenseStrs' in engine_data:
            print(f'No features to automatically detect sites on engine {engine_name}')
            continue
        engine_obj = MaigretEngine(engine_name, engine_data)
        # setup connections for checking both engine and usernames
        connector = aiohttp.TCPConnector(ssl=False)
        connector.verify_ssl=False
        session = aiohttp.ClientSession(connector=connector)
        sem = asyncio.Semaphore(100)
        loop = asyncio.get_event_loop()
        tasks = []
        # check sites without engine if they look like sites on this engine
        new_engine_sites = []
        for site_name, site_data in sites.items():
            if site_data.engine:
                continue
            future = session.get(url=site_data.url_main,
                                 allow_redirects=True,
                                 timeout=10,
                                 )
            check_engine_coro = check_engine_of_site(site_name, new_engine_sites, future, engine_name, sem, logger)
            future = asyncio.ensure_future(check_engine_coro)
            tasks.append(future)
        # progress bar
        for f in tqdm.asyncio.tqdm.as_completed(tasks):
            loop.run_until_complete(f)
        print(f'Total detected {len(new_engine_sites)} sites on engine {engine_name}')
        # dict with new found engine sites
        new_sites = {site_name: sites[site_name] for site_name in new_engine_sites}
        # update sites obj from engine
        for site_name, site in new_sites.items():
            site.request_future = None
            site.engine = engine_name
            site.update_from_engine(engine_obj)
        async def update_site_data(site_name, site_data, all_sites, logger, no_progressbar):
            updates = await site_self_check(site_name, site_data, logger, no_progressbar)
            all_sites[site_name].update(updates)
        tasks = []
        # for new_site_name, new_site_data in new_sites.items():
            # coro = update_site_data(new_site_name, new_site_data, new_sites, logger)
            # future = asyncio.ensure_future(coro)
            # tasks.append(future)
        # asyncio.gather(*tasks)
        for new_site_name, new_site_data in new_sites.items():
            coro = update_site_data(new_site_name, new_site_data, new_sites, logger, no_progressbar=True)
            loop.run_until_complete(coro)
        updated_sites_count = 0
        for s in new_sites:
            site = new_sites[s]
            site.request_future = None
            if site.disabled:
                print(f'{site.name} failed username checking of engine {engine_name}')
                continue
            site = site.strip_engine_data()
            db.update_site(site)
            updated_sites_count += 1
            db.save_to_file(args.base_file)
            print(f'Site "{s}": ' + json.dumps(site.json, indent=4))
        print(f'Updated total {updated_sites_count} sites!')
        print(f'Checking all sites on engine {engine_name}')
        loop.run_until_complete(session.close())
    print("\nFinished updating supported site listing!")
@@ -0,0 +1,280 @@
 #!/usr/bin/env python3
 import json
 import random
 import re
 import tqdm.asyncio
 from mock import Mock
 import requests
 from maigret.maigret import *
 from maigret.result import QueryStatus
 from maigret.sites import MaigretSite
 URL_RE = re.compile(r"https?://(www\.)?")
 TIMEOUT = 200
 async def maigret_check(site, site_data, username, status, logger):
    query_notify = Mock()
    logger.debug(f'Checking {site}...')
    for username, status in [(username, status)]:
        results = await maigret(
            username,
            {site: site_data},
            logger,
            query_notify,
            timeout=TIMEOUT,
            forced=True,
            no_progressbar=True,
        )
        if results[site]['status'].status != status:
            if results[site]['status'].status == QueryStatus.UNKNOWN:
                msg = site_data.absence_strs
                etype = site_data.check_type
                context = results[site]['status'].context
                logger.debug(f'Error while searching {username} in {site}, must be claimed. Context: {context}')
                # if site_data.get('errors'):
                #     continue
                return False
            if status == QueryStatus.CLAIMED:
                logger.debug(f'Not found {username} in {site}, must be claimed')
                logger.debug(results[site])
                pass
            else:
                logger.debug(f'Found {username} in {site}, must be available')
                logger.debug(results[site])
                pass
            return False
    return site_data
 async def check_and_add_maigret_site(site_data, semaphore, logger, ok_usernames, bad_usernames):
    async with semaphore:
        sitename = site_data.name
        positive = False
        negative = False
        for ok_username in ok_usernames:
            site_data.username_claimed = ok_username
            status = QueryStatus.CLAIMED
            if await maigret_check(sitename, site_data, ok_username, status, logger):
                # print(f'{sitename} positive case is okay')
                positive = True
                break
        for bad_username in bad_usernames:
            site_data.username_unclaimed = bad_username
            status = QueryStatus.AVAILABLE
            if await maigret_check(sitename, site_data, bad_username, status, logger):
                # print(f'{sitename} negative case is okay')
                negative = True
                break
        if positive and negative:
            site_data = site_data.strip_engine_data()
            db.update_site(site_data)
            print(site_data.json)
            try:
                db.save_to_file(args.base_file)
            except Exception as e:
                logging.error(e, exc_info=True)
            print(f'Saved new site {sitename}...')
            ok_sites.append(site_data)
 if __name__ == '__main__':
    parser = ArgumentParser(formatter_class=RawDescriptionHelpFormatter
                            )
    parser.add_argument("--base", "-b", metavar="BASE_FILE",
                        dest="base_file", default="maigret/resources/data.json",
                        help="JSON file with sites data to update.")
    parser.add_argument("--add-engine", dest="add_engine", help="Additional engine to check")
    parser.add_argument("--only-engine", dest="only_engine", help="Use only this engine from detected to check")
    parser.add_argument('--check', help='only check sites in database', action='store_true')
    parser.add_argument('--random', help='shuffle list of urls', action='store_true', default=False)
    parser.add_argument('--top', help='top count of records in file', type=int, default=10000)
    parser.add_argument('--filter', help='substring to filter input urls', type=str, default='')
    parser.add_argument('--username', help='preferable username to check with', type=str)
    parser.add_argument(
        "--info",
        "-vv",
        action="store_true",
        dest="info",
        default=False,
        help="Display service information.",
    )
    parser.add_argument(
        "--verbose",
        "-v",
        action="store_true",
        dest="verbose",
        default=False,
        help="Display extra information and metrics.",
    )
    parser.add_argument(
        "-d",
        "--debug",
        "-vvv",
        action="store_true",
        dest="debug",
        default=False,
        help="Saving debugging information and sites responses in debug.txt.",
    )
    parser.add_argument("urls_file",
                        metavar='URLS_FILE',
                        action="store",
                        help="File with base site URLs"
                        )
    args = parser.parse_args()
    log_level = logging.ERROR
    if args.debug:
        log_level = logging.DEBUG
    elif args.info:
        log_level = logging.INFO
    elif args.verbose:
        log_level = logging.WARNING
    logging.basicConfig(
        format='[%(filename)s:%(lineno)d] %(levelname)-3s  %(asctime)s %(message)s',
        datefmt='%H:%M:%S',
        level=log_level
    )
    logger = logging.getLogger('engines-check')
    logger.setLevel(log_level)
    db = MaigretDatabase()
    sites_subset = db.load_from_file(args.base_file).sites
    sites = {site.name: site for site in sites_subset}
    engines = db.engines
    # TODO: usernames extractors
    ok_usernames = ['alex', 'god', 'admin', 'red', 'blue', 'john']
    if args.username:
        ok_usernames = [args.username] + ok_usernames
    bad_usernames = ['noonewouldeverusethis7']
    with open(args.urls_file, 'r') as urls_file:
        urls = urls_file.read().splitlines()
        if args.random:
            random.shuffle(urls)
        urls = urls[:args.top]
    raw_maigret_data = json.dumps({site.name: site.json for site in sites_subset})
    new_sites = []
    for site in tqdm.asyncio.tqdm(urls):
        site_lowercase = site.lower()
        domain_raw = URL_RE.sub('', site_lowercase).strip().strip('/')
        domain_raw = domain_raw.split('/')[0]
        if args.filter and args.filter not in domain_raw:
            logger.debug('Site %s skipped due to filtering by "%s"', domain_raw, args.filter)
            continue
        if domain_raw in raw_maigret_data:
            logger.debug(f'Site {domain_raw} already exists in the Maigret database!')
            continue
        if '"' in domain_raw:
            logger.debug(f'Invalid site {domain_raw}')
            continue
        main_page_url = '/'.join(site.split('/', 3)[:3])
        site_data = {
            'url': site,
            'urlMain': main_page_url,
            'name': domain_raw,
        }
        try:
            r = requests.get(main_page_url, timeout=5)
        except:
            r = None
            pass
        detected_engines = []
        for e in engines:
            strs_to_check = e.__dict__.get('presenseStrs')
            if strs_to_check and r and r.text:
                all_strs_in_response = True
                for s in strs_to_check:
                    if not s in r.text:
                        all_strs_in_response = False
                if all_strs_in_response:
                    engine_name = e.__dict__.get('name')
                    detected_engines.append(engine_name)
                    logger.info(f'Detected engine {engine_name} for site {main_page_url}')
        if args.only_engine and args.only_engine in detected_engines:
            detected_engines = [args.only_engine]
        elif not detected_engines and args.add_engine:
            logging.debug('Could not detect any engine, applying default engine %s...', args.add_engine)
            detected_engines = [args.add_engine]
        def create_site_from_engine(sitename, data, e):
            site = MaigretSite(sitename, data)
            site.update_from_engine(db.engines_dict[e])
            site.engine = e
            return site
        for engine_name in detected_engines:
            site = create_site_from_engine(domain_raw, site_data, engine_name)
            new_sites.append(site)
            logger.debug(site.json)
            # if engine_name == "phpBB":
            #     site_data_with_subpath = dict(site_data)
            #     site_data_with_subpath["urlSubpath"] = "/forum"
            #     site = create_site_from_engine(domain_raw, site_data_with_subpath, engine_name)
            #     new_sites.append(site)
        # except Exception as e:
        #     print(f'Error: {str(e)}')
        #     pass
    print(f'Found {len(new_sites)}/{len(urls)} new sites')
    if args.check:
        for s in new_sites:
            print(s.url_main)
        sys.exit(0)
    sem = asyncio.Semaphore(20)
    loop = asyncio.get_event_loop()
    ok_sites = []
    tasks = []
    for site in new_sites:
        check_coro = check_and_add_maigret_site(site, sem, logger, ok_usernames, bad_usernames)
        future = asyncio.ensure_future(check_coro)
        tasks.append(future)
    for f in tqdm.asyncio.tqdm.as_completed(tasks, timeout=TIMEOUT):
        try:
            loop.run_until_complete(f)
        except asyncio.exceptions.TimeoutError:
            pass
    print(f'Found and saved {len(ok_sites)} sites!')
@@ -0,0 +1,36 @@
 import sys
 import difflib
 import requests
 a = requests.get(sys.argv[1]).text
 b = requests.get(sys.argv[2]).text
 tokens_a = set(a.split('"'))
 tokens_b = set(b.split('"'))
 a_minus_b = tokens_a.difference(tokens_b)
 b_minus_a = tokens_b.difference(tokens_a)
 print(a_minus_b)
 print(b_minus_a)
 print(len(a_minus_b))
 print(len(b_minus_a))
 desired_strings = ["username", "not found", "пользователь", "profile", "lastname", "firstname", "biography",
 "birthday", "репутация", "информация", "e-mail"]
 def get_match_ratio(x):
    return round(max([
    	difflib.SequenceMatcher(a=x.lower(), b=y).ratio()
    	for y in desired_strings
    ]), 2)
 RATIO = 0.6
 print(sorted(a_minus_b, key=get_match_ratio, reverse=True)[:10])
 print(sorted(b_minus_a, key=get_match_ratio, reverse=True)[:10])
@@ -25,7 +25,7 @@ RANKS.update({
    '100000000': '100M',
 })
-SEMAPHORE = threading.Semaphore(10)
+SEMAPHORE = threading.Semaphore(20)
 def get_rank(domain_to_query, site, print_errors=True):
    with SEMAPHORE:
@@ -114,7 +114,7 @@ Rank data fetched from Alexa by domains.
                sys.stdout.flush()
                index = index + 1
-        sites_full_list = [(s, s.alexa_rank) for s in sites_subset]
+        sites_full_list = [(s, int(s.alexa_rank)) for s in sites_subset]
        sites_full_list.sort(reverse=False, key=lambda x: x[1])
@@ -137,7 +137,11 @@ Rank data fetched from Alexa by domains.
            site_file.write(f'1. {favicon} [{site}]({url_main})*: top {valid_rank}{tags}*{note}\n')
            db.update_site(site)
-        site_file.write(f'\nAlexa.com rank data fetched at ({datetime.utcnow()} UTC)\n')
+        site_file.write(f'\nThe list was updated at ({datetime.utcnow()} UTC)\n')
        db.save_to_file(args.base_file)
        statistics_text = db.get_db_stats(is_markdown=True)
        site_file.write('## Statistics\n\n')
        site_file.write(statistics_text)
    print("\nFinished updating supported site listing!")
		`@@ -0,0 +1,2 @@`
							`#!/bin/sh`
							`python3 ./utils/update_site_data.py`
		`@@ -0,0 +1,3 @@`
							`# These are supported funding model platforms`

							`patreon: soxoj`
`@@ -1,3 +1,3 @@`
	`"""Maigret version file"""`	`"""Maigret version file"""`

	`__version__ = '0.3.0'`	`__version__ = '0.4.4'`