mirror of
https://github.com/soxoj/maigret.git
synced 2026-05-09 16:14:32 +00:00
Compare commits
14 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| c6661e22ff | |||
| fdb68b5e80 | |||
| 9fe6b99239 | |||
| b9d303fde3 | |||
| d29e88d96f | |||
| 731a8e01f9 | |||
| cf7acfd8c8 | |||
| 9e6bd05acc | |||
| 6ea1dc33f7 | |||
| d5bc92d26a | |||
| f7263c9b3c | |||
| e6f82a8ba3 | |||
| ba7a38092c | |||
| 92a1677213 |
@@ -2,6 +2,11 @@
|
||||
|
||||
## [Unreleased]
|
||||
|
||||
## [0.1.15] - 2021-03-14
|
||||
* improved HTML reports
|
||||
* fixed python-3.6- specific error
|
||||
* false positives fixes
|
||||
|
||||
## [0.1.14] - 2021-02-25
|
||||
* added JSON export formats
|
||||
* improved tags markup
|
||||
|
||||
@@ -72,10 +72,6 @@ docker run maigret user
|
||||
|
||||
[PDF report](./static/report_alexaimephotographycars.pdf), [HTML report](https://htmlpreview.github.io/?https://raw.githubusercontent.com/soxoj/maigret/main/static/report_alexaimephotographycars.html)
|
||||
|
||||
```bash
|
||||
maigret alexaimephotographycars
|
||||
```
|
||||
|
||||

|
||||
|
||||

|
||||
|
||||
+16
-10
@@ -2,6 +2,7 @@ import asyncio
|
||||
import logging
|
||||
import re
|
||||
import ssl
|
||||
import sys
|
||||
|
||||
import aiohttp
|
||||
import tqdm.asyncio
|
||||
@@ -61,9 +62,6 @@ async def get_response(request_future, site_name, logger):
|
||||
except asyncio.TimeoutError as errt:
|
||||
error_text = "Timeout Error"
|
||||
expection_text = str(errt)
|
||||
except (ssl.SSLCertVerificationError, ssl.SSLError) as err:
|
||||
error_text = "SSL Error"
|
||||
expection_text = str(err)
|
||||
except aiohttp.client_exceptions.ClientConnectorError as err:
|
||||
error_text = "Error Connecting"
|
||||
expection_text = str(err)
|
||||
@@ -74,10 +72,16 @@ async def get_response(request_future, site_name, logger):
|
||||
error_text = "Proxy Error"
|
||||
expection_text = str(err)
|
||||
except Exception as err:
|
||||
logger.warning(f'Unhandled error while requesting {site_name}: {err}')
|
||||
logger.debug(err, exc_info=True)
|
||||
error_text = "Some Error"
|
||||
expection_text = str(err)
|
||||
# python-specific exceptions
|
||||
if sys.version_info.minor > 6:
|
||||
if isinstance(err, ssl.SSLCertVerificationError) or isinstance(err, ssl.SSLError):
|
||||
error_text = "SSL Error"
|
||||
expection_text = str(err)
|
||||
else:
|
||||
logger.warning(f'Unhandled error while requesting {site_name}: {err}')
|
||||
logger.debug(err, exc_info=True)
|
||||
error_text = "Some Error"
|
||||
expection_text = str(err)
|
||||
|
||||
# TODO: return only needed information
|
||||
return html_text, status_code, error_text, expection_text
|
||||
@@ -174,6 +178,8 @@ def process_site_result(response, query_notify, logger, results_info, site: Maig
|
||||
activate_fun(site, logger)
|
||||
except AttributeError:
|
||||
logger.warning(f'Activation method {method} for site {site.name} not found!')
|
||||
except Exception as e:
|
||||
logger.warning(f'Failed activation {method} for site {site.name}: {e}')
|
||||
|
||||
# presense flags
|
||||
# True by default
|
||||
@@ -289,7 +295,7 @@ def process_site_result(response, query_notify, logger, results_info, site: Maig
|
||||
|
||||
|
||||
async def maigret(username, site_dict, query_notify, logger,
|
||||
proxy=None, timeout=None, recursive_search=False,
|
||||
proxy=None, timeout=None, is_parsing_enabled=False,
|
||||
id_type='username', debug=False, forced=False,
|
||||
max_connections=100, no_progressbar=False,
|
||||
cookies=None):
|
||||
@@ -307,7 +313,7 @@ async def maigret(username, site_dict, query_notify, logger,
|
||||
proxy -- String indicating the proxy URL
|
||||
timeout -- Time in seconds to wait before timing out request.
|
||||
Default is no timeout.
|
||||
recursive_search -- Search for other usernames in website pages & recursive search by them.
|
||||
is_parsing_enabled -- Search for other usernames in website pages.
|
||||
|
||||
Return Value:
|
||||
Dictionary containing results from report. Key of dictionary is the name
|
||||
@@ -364,7 +370,7 @@ async def maigret(username, site_dict, query_notify, logger,
|
||||
|
||||
# Record URL of main site and username
|
||||
results_site['username'] = username
|
||||
results_site['parsing_enabled'] = recursive_search
|
||||
results_site['parsing_enabled'] = is_parsing_enabled
|
||||
results_site['url_main'] = site.url_main
|
||||
results_site['cookies'] = cookie_jar and cookie_jar.filter_cookies(site.url_main) or None
|
||||
|
||||
|
||||
+8
-3
@@ -17,7 +17,7 @@ from .report import save_csv_report, save_xmind_report, save_html_report, save_p
|
||||
save_json_report
|
||||
from .submit import submit_dialog
|
||||
|
||||
__version__ = '0.1.14'
|
||||
__version__ = '0.1.15'
|
||||
|
||||
|
||||
async def main():
|
||||
@@ -106,7 +106,11 @@ async def main():
|
||||
)
|
||||
parser.add_argument("--no-recursion",
|
||||
action="store_true", dest="disable_recursive_search", default=False,
|
||||
help="Disable parsing pages for other usernames and recursive search by them."
|
||||
help="Disable recursive search by additional data extracted from pages."
|
||||
)
|
||||
parser.add_argument("--no-extracting",
|
||||
action="store_true", dest="disable_extracting", default=False,
|
||||
help="Disable parsing pages for additional data and other usernames."
|
||||
)
|
||||
parser.add_argument("--self-check",
|
||||
action="store_true", default=False,
|
||||
@@ -203,6 +207,7 @@ async def main():
|
||||
and u not in args.ignore_ids_list
|
||||
}
|
||||
|
||||
parsing_enabled = not args.disable_extracting
|
||||
recursive_search_enabled = not args.disable_recursive_search
|
||||
|
||||
# Make prompts
|
||||
@@ -324,7 +329,7 @@ async def main():
|
||||
query_notify,
|
||||
proxy=args.proxy,
|
||||
timeout=args.timeout,
|
||||
recursive_search=recursive_search_enabled,
|
||||
is_parsing_enabled=parsing_enabled,
|
||||
id_type=id_type,
|
||||
debug=args.verbose,
|
||||
logger=logger,
|
||||
|
||||
+30
-12
@@ -2546,7 +2546,7 @@
|
||||
"us"
|
||||
],
|
||||
"checkType": "message",
|
||||
"absenceStrs": "Missing page... somebody made a wrong move.",
|
||||
"absenceStrs": "The page you are looking for doesn\u2019t exist. (404)",
|
||||
"alexaRank": 221,
|
||||
"url": "https://www.chess.com/ru/member/{username}",
|
||||
"urlMain": "https://www.chess.com/ru/",
|
||||
@@ -9597,6 +9597,7 @@
|
||||
"usernameUnclaimed": "noonewouldeverusethis7"
|
||||
},
|
||||
"Partyflock": {
|
||||
"disabled": true,
|
||||
"tags": [
|
||||
"in",
|
||||
"nl"
|
||||
@@ -12229,7 +12230,7 @@
|
||||
"music"
|
||||
],
|
||||
"headers": {
|
||||
"authorization": "Bearer BQCPMLJ7MjV66wMUgXl0DUh5j_HVEPhwnncWZXaD1DgRJj_m4s3pO4MGXL5IosKKof1uQ0QwwI7cnGrhQs0"
|
||||
"authorization": "Bearer BQDb4T33eeZ7nHS1vr_igE5UM5m6l6gI60KcWjvhBmp-uiPDj-2m7IBRif4k1HfF-coCK4oaL0ZUsNIczTA"
|
||||
},
|
||||
"errors": {
|
||||
"Spotify is currently not available in your country.": "Access denied in your country, use proxy/vpn"
|
||||
@@ -13015,6 +13016,7 @@
|
||||
"usernameUnclaimed": "noonewouldeverusethis7"
|
||||
},
|
||||
"Thedaftclub": {
|
||||
"disabled": true,
|
||||
"tags": [
|
||||
"us"
|
||||
],
|
||||
@@ -13076,15 +13078,19 @@
|
||||
"usernameUnclaimed": "noonewouldeverusethis7"
|
||||
},
|
||||
"TikTok": {
|
||||
"tags": [
|
||||
"global",
|
||||
"us"
|
||||
],
|
||||
"headers": {
|
||||
"user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36"
|
||||
},
|
||||
"errors": {
|
||||
"tiktok-verify-page": "Captcha detected"
|
||||
},
|
||||
"checkType": "message",
|
||||
"absenceStrs": "<title></title>",
|
||||
"presenseStrs": [
|
||||
"nickname"
|
||||
],
|
||||
"absenceStrs": [
|
||||
"serverCode\":404"
|
||||
],
|
||||
"alexaRank": 132,
|
||||
"url": "https://www.tiktok.com/@{username}",
|
||||
"urlMain": "https://www.tiktok.com/",
|
||||
@@ -13240,7 +13246,12 @@
|
||||
"ru"
|
||||
],
|
||||
"checkType": "message",
|
||||
"absenceStrs": "<title>, \u0430\u0432\u0442\u043e\u0440 \u043d\u0430 Tproger</title>",
|
||||
"presenseStrs": [
|
||||
"<meta property=\"og:url\" content=\"https://tproger.ru/author/"
|
||||
],
|
||||
"absenceStrs": [
|
||||
"<title>404"
|
||||
],
|
||||
"alexaRank": 39077,
|
||||
"url": "https://tproger.ru/author/{username}/",
|
||||
"urlMain": "https://tproger.ru",
|
||||
@@ -13535,7 +13546,7 @@
|
||||
"sec-ch-ua": "Google Chrome\";v=\"87\", \" Not;A Brand\";v=\"99\", \"Chromium\";v=\"87\"",
|
||||
"authorization": "Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA",
|
||||
"user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36",
|
||||
"x-guest-token": "1363222310963077123"
|
||||
"x-guest-token": "1370458260256530436"
|
||||
},
|
||||
"errors": {
|
||||
"Bad guest token": "x-guest-token update required"
|
||||
@@ -13551,7 +13562,9 @@
|
||||
},
|
||||
"urlProbe": "https://twitter.com/i/api/graphql/ZRnOhhXPwue_JGILb9TNug/UserByScreenName?variables=%7B%22screen_name%22%3A%22{username}%22%2C%22withHighlightedLabel%22%3Atrue%7D",
|
||||
"checkType": "message",
|
||||
"absenceStrs": "Not found",
|
||||
"absenceStrs": [
|
||||
" not found"
|
||||
],
|
||||
"alexaRank": 56,
|
||||
"url": "https://twitter.com/{username}",
|
||||
"urlMain": "https://www.twitter.com/",
|
||||
@@ -13901,7 +13914,7 @@
|
||||
"video"
|
||||
],
|
||||
"headers": {
|
||||
"Authorization": "jwt eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJleHAiOjE2MTM4NTM0MjAsInVzZXJfaWQiOm51bGwsImFwcF9pZCI6NTg0NzksInNjb3BlcyI6InB1YmxpYyIsInRlYW1fdXNlcl9pZCI6bnVsbH0.Xd8moX-C5rpQPGmx3BKz2Y9MBHdBfKq2Qo6RK4l3IBc"
|
||||
"Authorization": "jwt eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJleHAiOjE2MTU1Nzg2MDAsInVzZXJfaWQiOm51bGwsImFwcF9pZCI6NTg0NzksInNjb3BlcyI6InB1YmxpYyIsInRlYW1fdXNlcl9pZCI6bnVsbH0.znOMYMyDNqfX0D_olrRH7s8r8TP-75vDPPznai82ZKY"
|
||||
},
|
||||
"activation": {
|
||||
"url": "https://vimeo.com/_rv/viewer",
|
||||
@@ -13951,6 +13964,9 @@
|
||||
"x-tool": "vt-ui-main",
|
||||
"x-vt-anti-abuse-header": "MTM0NTMxNTA3MTItWkc5dWRDQmlaU0JsZG1scy0xNjA3NDMzMzM3LjI3MQ=="
|
||||
},
|
||||
"errors": {
|
||||
"RecaptchaRequiredError": "Captcha detected"
|
||||
},
|
||||
"checkType": "message",
|
||||
"absenceStrs": "not found",
|
||||
"alexaRank": 4932,
|
||||
@@ -23191,6 +23207,7 @@
|
||||
"usernameUnclaimed": "noonewouldeverusethis7"
|
||||
},
|
||||
"brute.pw": {
|
||||
"disabled": true,
|
||||
"engine": "XenForo",
|
||||
"alexaRank": 984755,
|
||||
"urlMain": "https://brute.pw",
|
||||
@@ -23265,7 +23282,7 @@
|
||||
"site": {
|
||||
"absenceStrs": [
|
||||
"The requested page could not be found.",
|
||||
"The specified member cannot be found. Please enter a member's entire name.",
|
||||
"The specified member cannot be found. Please enter a member",
|
||||
"\u0423\u043a\u0430\u0437\u0430\u043d\u043d\u044b\u0439 \u043f\u043e\u043b\u044c\u0437\u043e\u0432\u0430\u0442\u0435\u043b\u044c \u043d\u0435 \u043d\u0430\u0439\u0434\u0435\u043d. \u041f\u043e\u0436\u0430\u043b\u0443\u0439\u0441\u0442\u0430, \u0432\u0432\u0435\u0434\u0438\u0442\u0435 \u0434\u0440\u0443\u0433\u043e\u0435 \u0438\u043c\u044f.",
|
||||
"Le membre sp\u00e9cifi\u00e9 est introuvable. Veuillez saisir le nom complet d'un membre.",
|
||||
"Belirtilen \u00fcye bulunamad\u0131. L\u00fctfen bir \u00fcyenin tam ad\u0131n\u0131 giriniz."
|
||||
@@ -23327,6 +23344,7 @@
|
||||
],
|
||||
"checkType": "message",
|
||||
"errors": {
|
||||
"You have been banned": "IP ban",
|
||||
"The administrator has banned your IP address": "IP ban",
|
||||
"\u0418\u0437\u0432\u0438\u043d\u0438\u0442\u0435, \u0441\u0435\u0440\u0432\u0435\u0440 \u043f\u0435\u0440\u0435\u0433\u0440\u0443\u0436\u0435\u043d. \u041f\u043e\u0436\u0430\u043b\u0443\u0439\u0441\u0442\u0430, \u043f\u043e\u043f\u0440\u043e\u0431\u0443\u0439\u0442\u0435 \u0437\u0430\u0439\u0442\u0438 \u043f\u043e\u0437\u0436\u0435.": "Server is overloaded"
|
||||
},
|
||||
|
||||
@@ -68,7 +68,7 @@
|
||||
<div class="row-mb">
|
||||
<div class="col-md">
|
||||
<div class="card flex-md-row mb-4 box-shadow h-md-250">
|
||||
<img class="card-img-right flex-auto d-none d-md-block" alt="Photo" style="width: 200px; height: 200px; object-fit: scale-down;" src="{{ v.status.ids_data.image or 'https://i.imgur.com/040fmbw.png' }}" data-holder-rendered="true">
|
||||
<img class="card-img-right flex-auto d-md-block" alt="Photo" style="width: 200px; height: 200px; object-fit: scale-down;" src="{{ v.status.ids_data.image or 'https://i.imgur.com/040fmbw.png' }}" data-holder-rendered="true">
|
||||
<div class="card-body d-flex flex-column align-items-start" style="padding-top: 0;">
|
||||
<h3 class="mb-0" style="padding-top: 1rem;">
|
||||
<a class="text-dark" href="{{ v.url_main }}" target="_blank">{{ k }}</a>
|
||||
|
||||
+2
-2
@@ -1,4 +1,4 @@
|
||||
aiohttp==3.7.3
|
||||
aiohttp==3.7.4
|
||||
aiohttp-socks==0.5.5
|
||||
arabic-reshaper==2.1.1
|
||||
async-timeout==3.0.1
|
||||
@@ -28,7 +28,7 @@ reportlab==3.5.59
|
||||
requests>=2.24.0
|
||||
requests-futures==1.0.0
|
||||
six==1.15.0
|
||||
socid-extractor>=0.0.12
|
||||
socid-extractor>=0.0.13
|
||||
soupsieve==2.1
|
||||
stem==1.8.0
|
||||
torrequest==0.1.0
|
||||
|
||||
@@ -12,7 +12,7 @@ with open('requirements.txt') as rf:
|
||||
requires = rf.read().splitlines()
|
||||
|
||||
setup(name='maigret',
|
||||
version='0.1.14',
|
||||
version='0.1.15',
|
||||
description='Collect a dossier on a person by username from a huge number of sites',
|
||||
long_description=long_description,
|
||||
long_description_content_type="text/markdown",
|
||||
|
||||
Reference in New Issue
Block a user