Compare commits

..

11 Commits

Author SHA1 Message Date
soxoj 53d698bb7b Merge pull request #95 from soxoj/socid-bump
Updated socid_extractor version
2021-03-30 00:37:02 +03:00
soxoj 23fff42ca7 Merge pull request #94 from soxoj/dependabot/pip/lxml-4.6.3
Bump lxml from 4.6.2 to 4.6.3
2021-03-30 00:34:13 +03:00
Soxoj 51d9e6f5f6 Bump to v0.1.17 2021-03-30 00:33:51 +03:00
Soxoj 640c04f20b Updated socid_extractor version 2021-03-30 00:31:40 +03:00
dependabot[bot] 69f78e331b Bump lxml from 4.6.2 to 4.6.3
Bumps [lxml](https://github.com/lxml/lxml) from 4.6.2 to 4.6.3.
- [Release notes](https://github.com/lxml/lxml/releases)
- [Changelog](https://github.com/lxml/lxml/blob/master/CHANGES.txt)
- [Commits](https://github.com/lxml/lxml/compare/lxml-4.6.2...lxml-4.6.3)

Signed-off-by: dependabot[bot] <support@github.com>
2021-03-29 21:25:19 +00:00
soxoj 69c315b00e Merge pull request #93 from soxoj/docs-requirements
Documentation and API improving
2021-03-30 00:24:49 +03:00
Soxoj b755628a1d Documentation and API improving 2021-03-30 00:19:17 +03:00
soxoj 7490a412db Merge pull request #92 from soxoj/ignore403-bugfix
Fixed bug with ignore403 for engine-based sites
2021-03-28 17:40:35 +03:00
Soxoj 2741680d4a Fixed bug with ignore403 for engine-based sites 2021-03-28 17:37:18 +03:00
soxoj e5fc221ce2 Merge pull request #91 from soxoj/async-3.6.9-fix
Fix of 3.6.9 asyncio create_task error
2021-03-24 21:43:11 +03:00
Soxoj a044e3dd79 Fix of 3.6.9 asyncio create_task error 2021-03-24 21:37:56 +03:00
11 changed files with 97 additions and 69 deletions
+1 -1
View File
@@ -15,7 +15,7 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: [3.6, 3.7, 3.8, 3.9]
python-version: [3.6.9, 3.7, 3.8, 3.9]
steps:
- uses: actions/checkout@v2
+5
View File
@@ -2,6 +2,11 @@
## [Unreleased]
## [0.1.17] - 2021-03-30
* simplified maigret search API
* improved documentation
* fixed 403 response code ignoring bug
## [0.1.16] - 2021-03-21
* improved URL parsing mode
* improved sites submit mode
+2
View File
@@ -1 +1,3 @@
"""Maigret"""
from .checking import maigret as search
+29 -14
View File
@@ -1,5 +1,6 @@
import asyncio
import logging
from mock import Mock
import re
import ssl
import sys
@@ -119,7 +120,14 @@ class AsyncioProgressbarQueueExecutor(AsyncExecutor):
async def _run(self, tasks: QueriesDraft):
self.results = []
workers = [asyncio.create_task(self.worker())
if sys.version_info.minor > 6:
create_task = asyncio.create_task
else:
loop = asyncio.get_event_loop()
create_task = loop.create_task
workers = [create_task(self.worker())
for _ in range(self.workers_count)]
task_list = list(tasks)
self.progress = self.progress_func(total=len(task_list))
@@ -259,7 +267,7 @@ def process_site_result(response, query_notify, logger, results_info, site: Maig
if status_code and not error_text:
error_text, site_error_text = detect_error_page(html_text, status_code, failure_errors,
site.ignore_403)
site.ignore403)
if site.activation and html_text:
is_need_activation = any([s for s in site.activation['marks'] if s in html_text])
@@ -387,26 +395,32 @@ def process_site_result(response, query_notify, logger, results_info, site: Maig
return results_info
async def maigret(username, site_dict, query_notify, logger,
async def maigret(username, site_dict, logger, query_notify=None,
proxy=None, timeout=None, is_parsing_enabled=False,
id_type='username', debug=False, forced=False,
max_connections=100, no_progressbar=False,
cookies=None):
"""Main search func
Checks for existence of username on various social media sites.
Checks for existence of username on certain sites.
Keyword Arguments:
username -- String indicating username that report
should be created against.
site_dict -- Dictionary containing all of the site data.
username -- Username string will be used for search.
site_dict -- Dictionary containing sites data.
query_notify -- Object with base type of QueryNotify().
This will be used to notify the caller about
query results.
proxy -- String indicating the proxy URL
logger -- Standard Python logger object.
timeout -- Time in seconds to wait before timing out request.
Default is no timeout.
is_parsing_enabled -- Search for other usernames in website pages.
is_parsing_enabled -- Extract additional info from account pages.
id_type -- Type of username to search.
Default is 'username', see all supported here:
https://github.com/soxoj/maigret/wiki/Supported-identifier-types
max_connections -- Maximum number of concurrent connections allowed.
Default is 100.
no_progressbar -- Displaying of ASCII progressbar during scanner.
cookies -- Filename of a cookie jar file to use for each request.
Return Value:
Dictionary containing results from report. Key of dictionary is the name
@@ -423,6 +437,9 @@ async def maigret(username, site_dict, query_notify, logger,
"""
# Notify caller that we are starting the query.
if not query_notify:
query_notify = Mock()
query_notify.start(username, id_type)
# TODO: connector
@@ -602,7 +619,6 @@ def timeout_check(value):
async def site_self_check(site, logger, semaphore, db: MaigretDatabase, silent=False):
query_notify = Mock()
changes = {
'disabled': False,
}
@@ -622,10 +638,9 @@ async def site_self_check(site, logger, semaphore, db: MaigretDatabase, silent=F
for username, status in check_data:
async with semaphore:
results_dict = await maigret(
username,
{site.name: site},
query_notify,
logger,
username=username,
site_dict={site.name: site},
logger=logger,
timeout=30,
id_type=site.type,
forced=True,
+4 -4
View File
@@ -21,7 +21,7 @@ from .sites import MaigretDatabase
from .submit import submit_dialog
from .utils import get_dict_ascii_tree
__version__ = '0.1.16'
__version__ = '0.1.17'
async def main():
@@ -341,9 +341,9 @@ async def main():
sites_to_check = get_top_sites_for_id(id_type)
results = await maigret(username,
dict(sites_to_check),
query_notify,
results = await maigret(username=username,
site_dict=dict(sites_to_check),
query_notify=query_notify,
proxy=args.proxy,
timeout=args.timeout,
is_parsing_enabled=parsing_enabled,
+42 -39
View File
@@ -1413,22 +1413,22 @@
"usernameUnclaimed": "noonewouldeverusethis"
},
"Avto-forum.name": {
"ignore403": true,
"tags": [
"ru"
],
"engine": "XenForo",
"alexaRank": 716960,
"ignore403": true,
"urlMain": "https://avto-forum.name",
"usernameClaimed": "mariya",
"usernameUnclaimed": "noonewouldeverusethis7"
},
"Avtoforum": {
"ignore403": true,
"tags": [
"ru"
],
"engine": "XenForo",
"ignore403": true,
"urlMain": "https://avtoforum.org",
"usernameClaimed": "tim",
"usernameUnclaimed": "noonewouldeverusethis7"
@@ -1566,25 +1566,13 @@
"usernameUnclaimed": "noonewouldeverusethis7"
},
"XSS.is": {
"ignore403": true,
"tags": [
"in",
"hacking",
"ru"
],
"activation": {
"method": "xssis",
"marks": [
"errorHtml"
],
"url": "https://xss.is/login/keep-alive",
"src": "csrf",
"dst": "x-guest-token"
},
"checkType": "status_code",
"getParams": {
"_xfToken": "1611179947,a2710362e45dad9aa1da381e21941a38"
},
"engine": "XenForo",
"alexaRank": 165220,
"url": "https://xss.is/index.php?members/find&q={username}&_xfRequestUri=%2Fmembers%2F%3Fkey%3Dmost_messages&_xfWithData=1&_xfResponseType=json",
"urlMain": "https://xss.is",
"usernameClaimed": "adam",
"usernameUnclaimed": "noonewouldeverusethis7"
@@ -1654,6 +1642,7 @@
"usernameUnclaimed": "noonewouldeverusethis7"
},
"BeerMoneyForum": {
"ignore403": true,
"tags": [
"jp",
"ve"
@@ -1661,7 +1650,6 @@
"checkType": "message",
"absenceStrs": "The specified member cannot be found.",
"alexaRank": 11581,
"ignore403": true,
"url": "https://www.beermoneyforum.com/members/?username={username}",
"urlMain": "https://www.beermoneyforum.com",
"usernameClaimed": "Yugocean",
@@ -2031,6 +2019,7 @@
"usernameUnclaimed": "noonewouldeverusethis7"
},
"BoomInfo": {
"ignore403": true,
"tags": [
"ru",
"ua"
@@ -2038,7 +2027,6 @@
"checkType": "message",
"absenceStrs": "\u0423\u043a\u0430\u0437\u0430\u043d\u043d\u044b\u0439 \u043f\u043e\u043b\u044c\u0437\u043e\u0432\u0430\u0442\u0435\u043b\u044c \u043d\u0435 \u043d\u0430\u0439\u0434\u0435\u043d. \u041f\u043e\u0436\u0430\u043b\u0443\u0439\u0441\u0442\u0430, \u0432\u0432\u0435\u0434\u0438\u0442\u0435 \u0434\u0440\u0443\u0433\u043e\u0435 \u0438\u043c\u044f.",
"alexaRank": 1680672,
"ignore403": true,
"url": "https://boominfo.ru/members/?username={username}",
"urlMain": "https://boominfo.ru",
"usernameClaimed": "boominfo",
@@ -3591,12 +3579,12 @@
"usernameUnclaimed": "noonewouldeverusethis777"
},
"Dumpz": {
"ignore403": true,
"tags": [
"ru"
],
"engine": "XenForo",
"alexaRank": 1291982,
"ignore403": true,
"urlMain": "https://dumpz.ws",
"usernameClaimed": "emailx45",
"usernameUnclaimed": "noonewouldeverusethis7"
@@ -3848,13 +3836,13 @@
"usernameUnclaimed": "noonewouldeverusethis7"
},
"Erogen.club": {
"ignore403": true,
"tags": [
"ru",
"ua"
],
"engine": "XenForo",
"alexaRank": 479929,
"ignore403": true,
"urlMain": "https://erogen.club",
"usernameClaimed": "yanok",
"usernameUnclaimed": "noonewouldeverusethis7"
@@ -5814,18 +5802,19 @@
},
"Gunandgame": {
"disabled": true,
"ignore403": true,
"tags": [
"us"
],
"checkType": "message",
"absenceStrs": "The specified member cannot be found. Please enter a member's entire name.",
"ignore403": true,
"url": "https://www.gunandgame.com/members/?username={username}",
"urlMain": "https://www.gunandgame.co",
"usernameClaimed": "adam",
"usernameUnclaimed": "noonewouldeverusethis7"
},
"Gunboards": {
"ignore403": true,
"tags": [
"in",
"us"
@@ -5835,7 +5824,6 @@
],
"engine": "XenForo",
"alexaRank": 464194,
"ignore403": true,
"urlMain": "https://forums.gunboards.com",
"usernameClaimed": "adam",
"usernameUnclaimed": "noonewouldeverusethis7"
@@ -8503,12 +8491,12 @@
},
"Musiker-board": {
"disabled": true,
"ignore403": true,
"tags": [
"de"
],
"engine": "XenForo",
"alexaRank": 151707,
"ignore403": true,
"urlMain": "https://www.musiker-board.de",
"usernameClaimed": "adam",
"usernameUnclaimed": "noonewouldeverusethis7"
@@ -9053,13 +9041,13 @@
"usernameUnclaimed": "noonewouldeverusethis7"
},
"Niketalk": {
"ignore403": true,
"tags": [
"us"
],
"checkType": "message",
"absenceStrs": "The specified member cannot be found",
"alexaRank": 165332,
"ignore403": true,
"url": "https://niketalk.com/members/?username={username}",
"urlMain": "https://niketalk.com",
"usernameClaimed": "adam",
@@ -9636,6 +9624,7 @@
"usernameUnclaimed": "noonewouldeverusethis7"
},
"Pbnation": {
"ignore403": true,
"tags": [
"ca",
"us"
@@ -9643,7 +9632,6 @@
"checkType": "message",
"absenceStrs": "This user has not registered",
"alexaRank": 107502,
"ignore403": true,
"url": "https://www.pbnation.com/member.php?username={username}",
"urlMain": "https://www.pbnation.com/",
"usernameClaimed": "adam",
@@ -10314,6 +10302,7 @@
"usernameUnclaimed": "noonewouldeverusethis7"
},
"Prosvetlenie": {
"ignore403": true,
"tags": [
"kg",
"ru"
@@ -10321,7 +10310,6 @@
"checkType": "message",
"absenceStrs": "\u0423\u043a\u0430\u0437\u0430\u043d\u043d\u044b\u0439 \u043f\u043e\u043b\u044c\u0437\u043e\u0432\u0430\u0442\u0435\u043b\u044c \u043d\u0435 \u043d\u0430\u0439\u0434\u0435\u043d",
"alexaRank": 2256482,
"ignore403": true,
"url": "http://www.prosvetlenie.org/forum/members/?username={username}",
"urlMain": "http://www.prosvetlenie.org",
"usernameClaimed": "odin",
@@ -10533,6 +10521,7 @@
"usernameUnclaimed": "noonewouldeverusethis7"
},
"RPGGeek": {
"ignore403": true,
"tags": [
"gaming",
"us"
@@ -10540,7 +10529,6 @@
"checkType": "message",
"absenceStrs": "User does not exist",
"alexaRank": 177522,
"ignore403": true,
"url": "https://rpggeek.com/user/{username}",
"urlMain": "https://rpggeek.com",
"usernameClaimed": "adam",
@@ -10548,13 +10536,13 @@
},
"RPGRussia": {
"disabled": true,
"ignore403": true,
"tags": [
"ru",
"us"
],
"engine": "XenForo",
"alexaRank": 256354,
"ignore403": true,
"urlMain": "https://rpgrussia.com",
"usernameClaimed": "adam",
"usernameUnclaimed": "noonewouldeverusethis7"
@@ -11240,13 +11228,13 @@
"usernameUnclaimed": "noonewouldeverusethis7"
},
"Rusfishing": {
"ignore403": true,
"tags": [
"ru"
],
"checkType": "message",
"absenceStrs": "\u0423\u043a\u0430\u0437\u0430\u043d\u043d\u044b\u0439 \u043f\u043e\u043b\u044c\u0437\u043e\u0432\u0430\u0442\u0435\u043b\u044c \u043d\u0435 \u043d\u0430\u0439\u0434\u0435\u043d",
"alexaRank": 77147,
"ignore403": true,
"url": "https://www.rusfishing.ru/forum/members/?username={username}",
"urlMain": "https://www.rusfishing.ru",
"usernameClaimed": "ale8443",
@@ -11545,12 +11533,12 @@
"usernameUnclaimed": "noonewouldeverusethis7"
},
"Sexforum.ws": {
"ignore403": true,
"tags": [
"ru"
],
"engine": "XenForo",
"alexaRank": 1815966,
"ignore403": true,
"urlMain": "http://sexforum.ws",
"usernameClaimed": "katrin1988",
"usernameUnclaimed": "noonewouldeverusethis7"
@@ -12129,7 +12117,7 @@
"us"
],
"headers": {
"authorization": "Bearer BQCyNy7oN-nOZ-rNK_jL0R4GHS7f3uWdK8AdxZ9m-pg9dbm7t-mLNaaHCG2BpDeV4AekGfl4AscbXLMqeB4"
"authorization": "Bearer BQBgSgdI2iyspCVcbJZjVlMt3-84iAe7qyMh_ZO095wRaD68tB-Zz-US5cXPD0DSVfluwjZWn1Bf2EVJIN8"
},
"errors": {
"Spotify is currently not available in your country.": "Access denied in your country, use proxy/vpn"
@@ -13095,13 +13083,13 @@
"usernameUnclaimed": "noonewouldeverusethis7"
},
"TotalStavki": {
"ignore403": true,
"tags": [
"ru"
],
"checkType": "message",
"absenceStrs": "\u0423\u043a\u0430\u0437\u0430\u043d\u043d\u044b\u0439 \u043f\u043e\u043b\u044c\u0437\u043e\u0432\u0430\u0442\u0435\u043b\u044c \u043d\u0435 \u043d\u0430\u0439\u0434\u0435\u043d",
"alexaRank": 5459551,
"ignore403": true,
"url": "https://totalstavki.ru/forum/members/?username={username}",
"urlMain": "https://totalstavki.ru",
"usernameClaimed": "turbo",
@@ -13431,7 +13419,7 @@
"sec-ch-ua": "Google Chrome\";v=\"87\", \" Not;A Brand\";v=\"99\", \"Chromium\";v=\"87\"",
"authorization": "Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA",
"user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36",
"x-guest-token": "1373570939997335554"
"x-guest-token": "1376637415348113408"
},
"errors": {
"Bad guest token": "x-guest-token update required"
@@ -13778,6 +13766,7 @@
"usernameUnclaimed": "noonewouldeverusethis7"
},
"VideogameGeek": {
"ignore403": true,
"tags": [
"gaming",
"us"
@@ -13785,7 +13774,6 @@
"checkType": "message",
"absenceStrs": "User does not exist",
"alexaRank": 719092,
"ignore403": true,
"url": "https://videogamegeek.com/user/{username}",
"urlMain": "https://videogamegeek.com",
"usernameClaimed": "adam",
@@ -13808,7 +13796,7 @@
"video"
],
"headers": {
"Authorization": "jwt eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJleHAiOjE2MTYzMjE3MDAsInVzZXJfaWQiOm51bGwsImFwcF9pZCI6NTg0NzksInNjb3BlcyI6InB1YmxpYyIsInRlYW1fdXNlcl9pZCI6bnVsbH0.yS9-vO94E_Ad_e2krnmOoqXWcaLxhZSrRvjUVHkgIPk"
"Authorization": "jwt eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJleHAiOjE2MTcwNTE4NDAsInVzZXJfaWQiOm51bGwsImFwcF9pZCI6NTg0NzksInNjb3BlcyI6InB1YmxpYyIsInRlYW1fdXNlcl9pZCI6bnVsbH0.iZzO-_VDARa_honzp7KvRSaSK0qMQ7n8dp9k6gah_bE"
},
"activation": {
"url": "https://vimeo.com/_rv/viewer",
@@ -13893,13 +13881,13 @@
"usernameUnclaimed": "noonewouldeverusethis7"
},
"Vlmi": {
"ignore403": true,
"tags": [
"ru",
"ua"
],
"engine": "XenForo",
"alexaRank": 725829,
"ignore403": true,
"urlMain": "https://vlmi.biz",
"usernameClaimed": "mixa",
"usernameUnclaimed": "noonewouldeverusethis7"
@@ -14516,6 +14504,7 @@
"usernameUnclaimed": "noonewouldeverusethis7"
},
"Wuz": {
"ignore403": true,
"tags": [
"by",
"ru"
@@ -14523,7 +14512,6 @@
"checkType": "message",
"absenceStrs": "\u0423\u043a\u0430\u0437\u0430\u043d\u043d\u044b\u0439 \u043f\u043e\u043b\u044c\u0437\u043e\u0432\u0430\u0442\u0435\u043b\u044c \u043d\u0435 \u043d\u0430\u0439\u0434\u0435\u043d",
"alexaRank": 1927898,
"ignore403": true,
"url": "http://wuz.by/forum/members/?username={username}",
"urlMain": "http://wuz.by",
"usernameClaimed": "adam",
@@ -16069,6 +16057,7 @@
"usernameUnclaimed": "noonewouldeverusethis7"
},
"forums.overclockers.co.uk": {
"ignore403": true,
"tags": [
"gb",
"uk"
@@ -16076,7 +16065,6 @@
"checkType": "message",
"absenceStrs": "The specified member cannot be found. Please enter a member's entire name.",
"alexaRank": 10013,
"ignore403": true,
"url": "https://forums.overclockers.co.uk/members/?username={username}",
"urlMain": "https://forums.overclockers.co.uk",
"usernameClaimed": "adam",
@@ -23481,6 +23469,21 @@
"urlMain": "https://skyblock.net",
"usernameClaimed": "alex",
"usernameUnclaimed": "noonewouldeverusethis7"
},
"codeberg.org": {
"checkType": "message",
"presenseStrs": [
"user profile",
" username text center"
],
"absenceStrs": [
"og:description",
" ui centered image"
],
"url": "https://codeberg.org/{username}",
"urlMain": "https://codeberg.org",
"usernameClaimed": "pcastela",
"usernameUnclaimed": "noonewouldeverusethis7"
}
},
"engines": {
+1 -1
View File
@@ -46,7 +46,7 @@ class MaigretSite:
self.disabled = False
self.similar_search = False
self.ignore_403 = False
self.ignore403 = False
self.tags = []
self.type = 'username'
+3 -5
View File
@@ -27,7 +27,6 @@ def extract_mainpage_url(url):
async def site_self_check(site, logger, semaphore, db: MaigretDatabase, silent=False):
query_notify = Mock()
changes = {
'disabled': False,
}
@@ -41,10 +40,9 @@ async def site_self_check(site, logger, semaphore, db: MaigretDatabase, silent=F
for username, status in check_data:
results_dict = await maigret(
username,
{site.name: site},
query_notify,
logger,
username=username,
site_dict={site.name: site},
logger=logger,
timeout=30,
id_type=site.type,
forced=True,
+2 -4
View File
@@ -14,21 +14,19 @@ future-annotations==1.0.0
html5lib==1.1
idna==2.10
Jinja2==2.11.3
lxml==4.6.2
lxml==4.6.3
MarkupSafe==1.1.1
mock==4.0.2
multidict==5.1.0
Pillow==8.1.1
pycountry==20.7.3
PyPDF2==1.26.0
PySocks==1.7.1
python-bidi==0.4.2
python-socks==1.1.2
reportlab==3.5.59
requests>=2.24.0
requests-futures==1.0.0
six==1.15.0
socid-extractor>=0.0.15
socid-extractor>=0.0.16
soupsieve==2.1
stem==1.8.0
torrequest==0.1.0
+1 -1
View File
@@ -12,7 +12,7 @@ with open('requirements.txt') as rf:
requires = rf.read().splitlines()
setup(name='maigret',
version='0.1.15',
version='0.1.17',
description='Collect a dossier on a person by username from a huge number of sites',
long_description=long_description,
long_description_content_type="text/markdown",
+7
View File
@@ -26,6 +26,13 @@ def test_case_convert_snake_to_title():
assert b == 'Camel cased string'
def test_case_convert_camel_with_digits_to_snake():
a = 'ignore403'
b = CaseConverter.camel_to_snake(a)
assert b == 'ignore403'
def test_is_country_tag():
assert is_country_tag('ru') == True
assert is_country_tag('FR') == True