Compare commits

...

14 Commits

Author SHA1 Message Date
soxoj c6661e22ff Merge pull request #72 from soxoj/v0.1.15
Bump to 0.1.15
2021-03-14 20:15:12 +03:00
Soxoj fdb68b5e80 Bump to 0.1.15 2021-03-14 20:11:32 +03:00
soxoj 9fe6b99239 Merge pull request #71 from soxoj/html-report-img-fix
Fixed HTML report images hiding for small screens + some minor fixes
2021-03-14 17:31:12 +03:00
Soxoj b9d303fde3 Fixed HTML report images hiding for small screens + some minor fixes 2021-03-14 16:15:31 +03:00
soxoj d29e88d96f Merge pull request #70 from soxoj/extracting-flag
Added separate `no-extracing` flag to rule page parsing
2021-03-14 13:22:29 +03:00
Soxoj 731a8e01f9 Added separate no-extracing flag to rule page parsing 2021-03-14 13:03:29 +03:00
soxoj cf7acfd8c8 Merge pull request #69 from soxoj/tiktok-fix
TikTok fixes
2021-03-13 00:02:25 +03:00
soxoj 9e6bd05acc Merge pull request #68 from soxoj/ssl-error-catching
Fixed catching of python-specific exception
2021-03-13 00:00:45 +03:00
Soxoj 6ea1dc33f7 TikTok fixes 2021-03-12 23:58:46 +03:00
Soxoj d5bc92d26a Fixed catching of python-specific exception 2021-03-12 23:34:59 +03:00
soxoj f7263c9b3c Merge pull request #67 from soxoj/fp-fixes
Some false positives fixes
2021-03-12 23:31:54 +03:00
Soxoj e6f82a8ba3 Some false positives fixes 2021-03-12 22:53:53 +03:00
soxoj ba7a38092c Merge pull request #65 from soxoj/dependabot/pip/aiohttp-3.7.4
Bump aiohttp from 3.7.3 to 3.7.4
2021-02-26 22:06:04 +03:00
dependabot[bot] 92a1677213 Bump aiohttp from 3.7.3 to 3.7.4
Bumps [aiohttp](https://github.com/aio-libs/aiohttp) from 3.7.3 to 3.7.4.
- [Release notes](https://github.com/aio-libs/aiohttp/releases)
- [Changelog](https://github.com/aio-libs/aiohttp/blob/master/CHANGES.rst)
- [Commits](https://github.com/aio-libs/aiohttp/compare/v3.7.3...v3.7.4)

Signed-off-by: dependabot[bot] <support@github.com>
2021-02-26 03:07:44 +00:00
8 changed files with 63 additions and 33 deletions
+5
View File
@@ -2,6 +2,11 @@
## [Unreleased] ## [Unreleased]
## [0.1.15] - 2021-03-14
* improved HTML reports
* fixed python-3.6- specific error
* false positives fixes
## [0.1.14] - 2021-02-25 ## [0.1.14] - 2021-02-25
* added JSON export formats * added JSON export formats
* improved tags markup * improved tags markup
-4
View File
@@ -72,10 +72,6 @@ docker run maigret user
[PDF report](./static/report_alexaimephotographycars.pdf), [HTML report](https://htmlpreview.github.io/?https://raw.githubusercontent.com/soxoj/maigret/main/static/report_alexaimephotographycars.html) [PDF report](./static/report_alexaimephotographycars.pdf), [HTML report](https://htmlpreview.github.io/?https://raw.githubusercontent.com/soxoj/maigret/main/static/report_alexaimephotographycars.html)
```bash
maigret alexaimephotographycars
```
![animation of recursive search](./static/recursive_search.svg) ![animation of recursive search](./static/recursive_search.svg)
![HTML report screenshot](./static/report_alexaimephotography_html_screenshot.png) ![HTML report screenshot](./static/report_alexaimephotography_html_screenshot.png)
+12 -6
View File
@@ -2,6 +2,7 @@ import asyncio
import logging import logging
import re import re
import ssl import ssl
import sys
import aiohttp import aiohttp
import tqdm.asyncio import tqdm.asyncio
@@ -61,9 +62,6 @@ async def get_response(request_future, site_name, logger):
except asyncio.TimeoutError as errt: except asyncio.TimeoutError as errt:
error_text = "Timeout Error" error_text = "Timeout Error"
expection_text = str(errt) expection_text = str(errt)
except (ssl.SSLCertVerificationError, ssl.SSLError) as err:
error_text = "SSL Error"
expection_text = str(err)
except aiohttp.client_exceptions.ClientConnectorError as err: except aiohttp.client_exceptions.ClientConnectorError as err:
error_text = "Error Connecting" error_text = "Error Connecting"
expection_text = str(err) expection_text = str(err)
@@ -74,6 +72,12 @@ async def get_response(request_future, site_name, logger):
error_text = "Proxy Error" error_text = "Proxy Error"
expection_text = str(err) expection_text = str(err)
except Exception as err: except Exception as err:
# python-specific exceptions
if sys.version_info.minor > 6:
if isinstance(err, ssl.SSLCertVerificationError) or isinstance(err, ssl.SSLError):
error_text = "SSL Error"
expection_text = str(err)
else:
logger.warning(f'Unhandled error while requesting {site_name}: {err}') logger.warning(f'Unhandled error while requesting {site_name}: {err}')
logger.debug(err, exc_info=True) logger.debug(err, exc_info=True)
error_text = "Some Error" error_text = "Some Error"
@@ -174,6 +178,8 @@ def process_site_result(response, query_notify, logger, results_info, site: Maig
activate_fun(site, logger) activate_fun(site, logger)
except AttributeError: except AttributeError:
logger.warning(f'Activation method {method} for site {site.name} not found!') logger.warning(f'Activation method {method} for site {site.name} not found!')
except Exception as e:
logger.warning(f'Failed activation {method} for site {site.name}: {e}')
# presense flags # presense flags
# True by default # True by default
@@ -289,7 +295,7 @@ def process_site_result(response, query_notify, logger, results_info, site: Maig
async def maigret(username, site_dict, query_notify, logger, async def maigret(username, site_dict, query_notify, logger,
proxy=None, timeout=None, recursive_search=False, proxy=None, timeout=None, is_parsing_enabled=False,
id_type='username', debug=False, forced=False, id_type='username', debug=False, forced=False,
max_connections=100, no_progressbar=False, max_connections=100, no_progressbar=False,
cookies=None): cookies=None):
@@ -307,7 +313,7 @@ async def maigret(username, site_dict, query_notify, logger,
proxy -- String indicating the proxy URL proxy -- String indicating the proxy URL
timeout -- Time in seconds to wait before timing out request. timeout -- Time in seconds to wait before timing out request.
Default is no timeout. Default is no timeout.
recursive_search -- Search for other usernames in website pages & recursive search by them. is_parsing_enabled -- Search for other usernames in website pages.
Return Value: Return Value:
Dictionary containing results from report. Key of dictionary is the name Dictionary containing results from report. Key of dictionary is the name
@@ -364,7 +370,7 @@ async def maigret(username, site_dict, query_notify, logger,
# Record URL of main site and username # Record URL of main site and username
results_site['username'] = username results_site['username'] = username
results_site['parsing_enabled'] = recursive_search results_site['parsing_enabled'] = is_parsing_enabled
results_site['url_main'] = site.url_main results_site['url_main'] = site.url_main
results_site['cookies'] = cookie_jar and cookie_jar.filter_cookies(site.url_main) or None results_site['cookies'] = cookie_jar and cookie_jar.filter_cookies(site.url_main) or None
+8 -3
View File
@@ -17,7 +17,7 @@ from .report import save_csv_report, save_xmind_report, save_html_report, save_p
save_json_report save_json_report
from .submit import submit_dialog from .submit import submit_dialog
__version__ = '0.1.14' __version__ = '0.1.15'
async def main(): async def main():
@@ -106,7 +106,11 @@ async def main():
) )
parser.add_argument("--no-recursion", parser.add_argument("--no-recursion",
action="store_true", dest="disable_recursive_search", default=False, action="store_true", dest="disable_recursive_search", default=False,
help="Disable parsing pages for other usernames and recursive search by them." help="Disable recursive search by additional data extracted from pages."
)
parser.add_argument("--no-extracting",
action="store_true", dest="disable_extracting", default=False,
help="Disable parsing pages for additional data and other usernames."
) )
parser.add_argument("--self-check", parser.add_argument("--self-check",
action="store_true", default=False, action="store_true", default=False,
@@ -203,6 +207,7 @@ async def main():
and u not in args.ignore_ids_list and u not in args.ignore_ids_list
} }
parsing_enabled = not args.disable_extracting
recursive_search_enabled = not args.disable_recursive_search recursive_search_enabled = not args.disable_recursive_search
# Make prompts # Make prompts
@@ -324,7 +329,7 @@ async def main():
query_notify, query_notify,
proxy=args.proxy, proxy=args.proxy,
timeout=args.timeout, timeout=args.timeout,
recursive_search=recursive_search_enabled, is_parsing_enabled=parsing_enabled,
id_type=id_type, id_type=id_type,
debug=args.verbose, debug=args.verbose,
logger=logger, logger=logger,
+30 -12
View File
@@ -2546,7 +2546,7 @@
"us" "us"
], ],
"checkType": "message", "checkType": "message",
"absenceStrs": "Missing page... somebody made a wrong move.", "absenceStrs": "The page you are looking for doesn\u2019t exist. (404)",
"alexaRank": 221, "alexaRank": 221,
"url": "https://www.chess.com/ru/member/{username}", "url": "https://www.chess.com/ru/member/{username}",
"urlMain": "https://www.chess.com/ru/", "urlMain": "https://www.chess.com/ru/",
@@ -9597,6 +9597,7 @@
"usernameUnclaimed": "noonewouldeverusethis7" "usernameUnclaimed": "noonewouldeverusethis7"
}, },
"Partyflock": { "Partyflock": {
"disabled": true,
"tags": [ "tags": [
"in", "in",
"nl" "nl"
@@ -12229,7 +12230,7 @@
"music" "music"
], ],
"headers": { "headers": {
"authorization": "Bearer BQCPMLJ7MjV66wMUgXl0DUh5j_HVEPhwnncWZXaD1DgRJj_m4s3pO4MGXL5IosKKof1uQ0QwwI7cnGrhQs0" "authorization": "Bearer BQDb4T33eeZ7nHS1vr_igE5UM5m6l6gI60KcWjvhBmp-uiPDj-2m7IBRif4k1HfF-coCK4oaL0ZUsNIczTA"
}, },
"errors": { "errors": {
"Spotify is currently not available in your country.": "Access denied in your country, use proxy/vpn" "Spotify is currently not available in your country.": "Access denied in your country, use proxy/vpn"
@@ -13015,6 +13016,7 @@
"usernameUnclaimed": "noonewouldeverusethis7" "usernameUnclaimed": "noonewouldeverusethis7"
}, },
"Thedaftclub": { "Thedaftclub": {
"disabled": true,
"tags": [ "tags": [
"us" "us"
], ],
@@ -13076,15 +13078,19 @@
"usernameUnclaimed": "noonewouldeverusethis7" "usernameUnclaimed": "noonewouldeverusethis7"
}, },
"TikTok": { "TikTok": {
"tags": [
"global",
"us"
],
"headers": { "headers": {
"user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36" "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36"
}, },
"errors": {
"tiktok-verify-page": "Captcha detected"
},
"checkType": "message", "checkType": "message",
"absenceStrs": "<title></title>", "presenseStrs": [
"nickname"
],
"absenceStrs": [
"serverCode\":404"
],
"alexaRank": 132, "alexaRank": 132,
"url": "https://www.tiktok.com/@{username}", "url": "https://www.tiktok.com/@{username}",
"urlMain": "https://www.tiktok.com/", "urlMain": "https://www.tiktok.com/",
@@ -13240,7 +13246,12 @@
"ru" "ru"
], ],
"checkType": "message", "checkType": "message",
"absenceStrs": "<title>, \u0430\u0432\u0442\u043e\u0440 \u043d\u0430 Tproger</title>", "presenseStrs": [
"<meta property=\"og:url\" content=\"https://tproger.ru/author/"
],
"absenceStrs": [
"<title>404"
],
"alexaRank": 39077, "alexaRank": 39077,
"url": "https://tproger.ru/author/{username}/", "url": "https://tproger.ru/author/{username}/",
"urlMain": "https://tproger.ru", "urlMain": "https://tproger.ru",
@@ -13535,7 +13546,7 @@
"sec-ch-ua": "Google Chrome\";v=\"87\", \" Not;A Brand\";v=\"99\", \"Chromium\";v=\"87\"", "sec-ch-ua": "Google Chrome\";v=\"87\", \" Not;A Brand\";v=\"99\", \"Chromium\";v=\"87\"",
"authorization": "Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA", "authorization": "Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA",
"user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36", "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36",
"x-guest-token": "1363222310963077123" "x-guest-token": "1370458260256530436"
}, },
"errors": { "errors": {
"Bad guest token": "x-guest-token update required" "Bad guest token": "x-guest-token update required"
@@ -13551,7 +13562,9 @@
}, },
"urlProbe": "https://twitter.com/i/api/graphql/ZRnOhhXPwue_JGILb9TNug/UserByScreenName?variables=%7B%22screen_name%22%3A%22{username}%22%2C%22withHighlightedLabel%22%3Atrue%7D", "urlProbe": "https://twitter.com/i/api/graphql/ZRnOhhXPwue_JGILb9TNug/UserByScreenName?variables=%7B%22screen_name%22%3A%22{username}%22%2C%22withHighlightedLabel%22%3Atrue%7D",
"checkType": "message", "checkType": "message",
"absenceStrs": "Not found", "absenceStrs": [
" not found"
],
"alexaRank": 56, "alexaRank": 56,
"url": "https://twitter.com/{username}", "url": "https://twitter.com/{username}",
"urlMain": "https://www.twitter.com/", "urlMain": "https://www.twitter.com/",
@@ -13901,7 +13914,7 @@
"video" "video"
], ],
"headers": { "headers": {
"Authorization": "jwt eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJleHAiOjE2MTM4NTM0MjAsInVzZXJfaWQiOm51bGwsImFwcF9pZCI6NTg0NzksInNjb3BlcyI6InB1YmxpYyIsInRlYW1fdXNlcl9pZCI6bnVsbH0.Xd8moX-C5rpQPGmx3BKz2Y9MBHdBfKq2Qo6RK4l3IBc" "Authorization": "jwt eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJleHAiOjE2MTU1Nzg2MDAsInVzZXJfaWQiOm51bGwsImFwcF9pZCI6NTg0NzksInNjb3BlcyI6InB1YmxpYyIsInRlYW1fdXNlcl9pZCI6bnVsbH0.znOMYMyDNqfX0D_olrRH7s8r8TP-75vDPPznai82ZKY"
}, },
"activation": { "activation": {
"url": "https://vimeo.com/_rv/viewer", "url": "https://vimeo.com/_rv/viewer",
@@ -13951,6 +13964,9 @@
"x-tool": "vt-ui-main", "x-tool": "vt-ui-main",
"x-vt-anti-abuse-header": "MTM0NTMxNTA3MTItWkc5dWRDQmlaU0JsZG1scy0xNjA3NDMzMzM3LjI3MQ==" "x-vt-anti-abuse-header": "MTM0NTMxNTA3MTItWkc5dWRDQmlaU0JsZG1scy0xNjA3NDMzMzM3LjI3MQ=="
}, },
"errors": {
"RecaptchaRequiredError": "Captcha detected"
},
"checkType": "message", "checkType": "message",
"absenceStrs": "not found", "absenceStrs": "not found",
"alexaRank": 4932, "alexaRank": 4932,
@@ -23191,6 +23207,7 @@
"usernameUnclaimed": "noonewouldeverusethis7" "usernameUnclaimed": "noonewouldeverusethis7"
}, },
"brute.pw": { "brute.pw": {
"disabled": true,
"engine": "XenForo", "engine": "XenForo",
"alexaRank": 984755, "alexaRank": 984755,
"urlMain": "https://brute.pw", "urlMain": "https://brute.pw",
@@ -23265,7 +23282,7 @@
"site": { "site": {
"absenceStrs": [ "absenceStrs": [
"The requested page could not be found.", "The requested page could not be found.",
"The specified member cannot be found. Please enter a member's entire name.", "The specified member cannot be found. Please enter a member",
"\u0423\u043a\u0430\u0437\u0430\u043d\u043d\u044b\u0439 \u043f\u043e\u043b\u044c\u0437\u043e\u0432\u0430\u0442\u0435\u043b\u044c \u043d\u0435 \u043d\u0430\u0439\u0434\u0435\u043d. \u041f\u043e\u0436\u0430\u043b\u0443\u0439\u0441\u0442\u0430, \u0432\u0432\u0435\u0434\u0438\u0442\u0435 \u0434\u0440\u0443\u0433\u043e\u0435 \u0438\u043c\u044f.", "\u0423\u043a\u0430\u0437\u0430\u043d\u043d\u044b\u0439 \u043f\u043e\u043b\u044c\u0437\u043e\u0432\u0430\u0442\u0435\u043b\u044c \u043d\u0435 \u043d\u0430\u0439\u0434\u0435\u043d. \u041f\u043e\u0436\u0430\u043b\u0443\u0439\u0441\u0442\u0430, \u0432\u0432\u0435\u0434\u0438\u0442\u0435 \u0434\u0440\u0443\u0433\u043e\u0435 \u0438\u043c\u044f.",
"Le membre sp\u00e9cifi\u00e9 est introuvable. Veuillez saisir le nom complet d'un membre.", "Le membre sp\u00e9cifi\u00e9 est introuvable. Veuillez saisir le nom complet d'un membre.",
"Belirtilen \u00fcye bulunamad\u0131. L\u00fctfen bir \u00fcyenin tam ad\u0131n\u0131 giriniz." "Belirtilen \u00fcye bulunamad\u0131. L\u00fctfen bir \u00fcyenin tam ad\u0131n\u0131 giriniz."
@@ -23327,6 +23344,7 @@
], ],
"checkType": "message", "checkType": "message",
"errors": { "errors": {
"You have been banned": "IP ban",
"The administrator has banned your IP address": "IP ban", "The administrator has banned your IP address": "IP ban",
"\u0418\u0437\u0432\u0438\u043d\u0438\u0442\u0435, \u0441\u0435\u0440\u0432\u0435\u0440 \u043f\u0435\u0440\u0435\u0433\u0440\u0443\u0436\u0435\u043d. \u041f\u043e\u0436\u0430\u043b\u0443\u0439\u0441\u0442\u0430, \u043f\u043e\u043f\u0440\u043e\u0431\u0443\u0439\u0442\u0435 \u0437\u0430\u0439\u0442\u0438 \u043f\u043e\u0437\u0436\u0435.": "Server is overloaded" "\u0418\u0437\u0432\u0438\u043d\u0438\u0442\u0435, \u0441\u0435\u0440\u0432\u0435\u0440 \u043f\u0435\u0440\u0435\u0433\u0440\u0443\u0436\u0435\u043d. \u041f\u043e\u0436\u0430\u043b\u0443\u0439\u0441\u0442\u0430, \u043f\u043e\u043f\u0440\u043e\u0431\u0443\u0439\u0442\u0435 \u0437\u0430\u0439\u0442\u0438 \u043f\u043e\u0437\u0436\u0435.": "Server is overloaded"
}, },
+1 -1
View File
@@ -68,7 +68,7 @@
<div class="row-mb"> <div class="row-mb">
<div class="col-md"> <div class="col-md">
<div class="card flex-md-row mb-4 box-shadow h-md-250"> <div class="card flex-md-row mb-4 box-shadow h-md-250">
<img class="card-img-right flex-auto d-none d-md-block" alt="Photo" style="width: 200px; height: 200px; object-fit: scale-down;" src="{{ v.status.ids_data.image or 'https://i.imgur.com/040fmbw.png' }}" data-holder-rendered="true"> <img class="card-img-right flex-auto d-md-block" alt="Photo" style="width: 200px; height: 200px; object-fit: scale-down;" src="{{ v.status.ids_data.image or 'https://i.imgur.com/040fmbw.png' }}" data-holder-rendered="true">
<div class="card-body d-flex flex-column align-items-start" style="padding-top: 0;"> <div class="card-body d-flex flex-column align-items-start" style="padding-top: 0;">
<h3 class="mb-0" style="padding-top: 1rem;"> <h3 class="mb-0" style="padding-top: 1rem;">
<a class="text-dark" href="{{ v.url_main }}" target="_blank">{{ k }}</a> <a class="text-dark" href="{{ v.url_main }}" target="_blank">{{ k }}</a>
+2 -2
View File
@@ -1,4 +1,4 @@
aiohttp==3.7.3 aiohttp==3.7.4
aiohttp-socks==0.5.5 aiohttp-socks==0.5.5
arabic-reshaper==2.1.1 arabic-reshaper==2.1.1
async-timeout==3.0.1 async-timeout==3.0.1
@@ -28,7 +28,7 @@ reportlab==3.5.59
requests>=2.24.0 requests>=2.24.0
requests-futures==1.0.0 requests-futures==1.0.0
six==1.15.0 six==1.15.0
socid-extractor>=0.0.12 socid-extractor>=0.0.13
soupsieve==2.1 soupsieve==2.1
stem==1.8.0 stem==1.8.0
torrequest==0.1.0 torrequest==0.1.0
+1 -1
View File
@@ -12,7 +12,7 @@ with open('requirements.txt') as rf:
requires = rf.read().splitlines() requires = rf.read().splitlines()
setup(name='maigret', setup(name='maigret',
version='0.1.14', version='0.1.15',
description='Collect a dossier on a person by username from a huge number of sites', description='Collect a dossier on a person by username from a huge number of sites',
long_description=long_description, long_description=long_description,
long_description_content_type="text/markdown", long_description_content_type="text/markdown",