Compare commits

..

14 Commits

Author SHA1 Message Date
soxoj c6661e22ff Merge pull request #72 from soxoj/v0.1.15
Bump to 0.1.15
2021-03-14 20:15:12 +03:00
Soxoj fdb68b5e80 Bump to 0.1.15 2021-03-14 20:11:32 +03:00
soxoj 9fe6b99239 Merge pull request #71 from soxoj/html-report-img-fix
Fixed HTML report images hiding for small screens + some minor fixes
2021-03-14 17:31:12 +03:00
Soxoj b9d303fde3 Fixed HTML report images hiding for small screens + some minor fixes 2021-03-14 16:15:31 +03:00
soxoj d29e88d96f Merge pull request #70 from soxoj/extracting-flag
Added separate `no-extracing` flag to rule page parsing
2021-03-14 13:22:29 +03:00
Soxoj 731a8e01f9 Added separate no-extracing flag to rule page parsing 2021-03-14 13:03:29 +03:00
soxoj cf7acfd8c8 Merge pull request #69 from soxoj/tiktok-fix
TikTok fixes
2021-03-13 00:02:25 +03:00
soxoj 9e6bd05acc Merge pull request #68 from soxoj/ssl-error-catching
Fixed catching of python-specific exception
2021-03-13 00:00:45 +03:00
Soxoj 6ea1dc33f7 TikTok fixes 2021-03-12 23:58:46 +03:00
Soxoj d5bc92d26a Fixed catching of python-specific exception 2021-03-12 23:34:59 +03:00
soxoj f7263c9b3c Merge pull request #67 from soxoj/fp-fixes
Some false positives fixes
2021-03-12 23:31:54 +03:00
Soxoj e6f82a8ba3 Some false positives fixes 2021-03-12 22:53:53 +03:00
soxoj ba7a38092c Merge pull request #65 from soxoj/dependabot/pip/aiohttp-3.7.4
Bump aiohttp from 3.7.3 to 3.7.4
2021-02-26 22:06:04 +03:00
dependabot[bot] 92a1677213 Bump aiohttp from 3.7.3 to 3.7.4
Bumps [aiohttp](https://github.com/aio-libs/aiohttp) from 3.7.3 to 3.7.4.
- [Release notes](https://github.com/aio-libs/aiohttp/releases)
- [Changelog](https://github.com/aio-libs/aiohttp/blob/master/CHANGES.rst)
- [Commits](https://github.com/aio-libs/aiohttp/compare/v3.7.3...v3.7.4)

Signed-off-by: dependabot[bot] <support@github.com>
2021-02-26 03:07:44 +00:00
8 changed files with 63 additions and 33 deletions
+5
View File
@@ -2,6 +2,11 @@
## [Unreleased]
## [0.1.15] - 2021-03-14
* improved HTML reports
* fixed python-3.6- specific error
* false positives fixes
## [0.1.14] - 2021-02-25
* added JSON export formats
* improved tags markup
-4
View File
@@ -72,10 +72,6 @@ docker run maigret user
[PDF report](./static/report_alexaimephotographycars.pdf), [HTML report](https://htmlpreview.github.io/?https://raw.githubusercontent.com/soxoj/maigret/main/static/report_alexaimephotographycars.html)
```bash
maigret alexaimephotographycars
```
![animation of recursive search](./static/recursive_search.svg)
![HTML report screenshot](./static/report_alexaimephotography_html_screenshot.png)
+16 -10
View File
@@ -2,6 +2,7 @@ import asyncio
import logging
import re
import ssl
import sys
import aiohttp
import tqdm.asyncio
@@ -61,9 +62,6 @@ async def get_response(request_future, site_name, logger):
except asyncio.TimeoutError as errt:
error_text = "Timeout Error"
expection_text = str(errt)
except (ssl.SSLCertVerificationError, ssl.SSLError) as err:
error_text = "SSL Error"
expection_text = str(err)
except aiohttp.client_exceptions.ClientConnectorError as err:
error_text = "Error Connecting"
expection_text = str(err)
@@ -74,10 +72,16 @@ async def get_response(request_future, site_name, logger):
error_text = "Proxy Error"
expection_text = str(err)
except Exception as err:
logger.warning(f'Unhandled error while requesting {site_name}: {err}')
logger.debug(err, exc_info=True)
error_text = "Some Error"
expection_text = str(err)
# python-specific exceptions
if sys.version_info.minor > 6:
if isinstance(err, ssl.SSLCertVerificationError) or isinstance(err, ssl.SSLError):
error_text = "SSL Error"
expection_text = str(err)
else:
logger.warning(f'Unhandled error while requesting {site_name}: {err}')
logger.debug(err, exc_info=True)
error_text = "Some Error"
expection_text = str(err)
# TODO: return only needed information
return html_text, status_code, error_text, expection_text
@@ -174,6 +178,8 @@ def process_site_result(response, query_notify, logger, results_info, site: Maig
activate_fun(site, logger)
except AttributeError:
logger.warning(f'Activation method {method} for site {site.name} not found!')
except Exception as e:
logger.warning(f'Failed activation {method} for site {site.name}: {e}')
# presense flags
# True by default
@@ -289,7 +295,7 @@ def process_site_result(response, query_notify, logger, results_info, site: Maig
async def maigret(username, site_dict, query_notify, logger,
proxy=None, timeout=None, recursive_search=False,
proxy=None, timeout=None, is_parsing_enabled=False,
id_type='username', debug=False, forced=False,
max_connections=100, no_progressbar=False,
cookies=None):
@@ -307,7 +313,7 @@ async def maigret(username, site_dict, query_notify, logger,
proxy -- String indicating the proxy URL
timeout -- Time in seconds to wait before timing out request.
Default is no timeout.
recursive_search -- Search for other usernames in website pages & recursive search by them.
is_parsing_enabled -- Search for other usernames in website pages.
Return Value:
Dictionary containing results from report. Key of dictionary is the name
@@ -364,7 +370,7 @@ async def maigret(username, site_dict, query_notify, logger,
# Record URL of main site and username
results_site['username'] = username
results_site['parsing_enabled'] = recursive_search
results_site['parsing_enabled'] = is_parsing_enabled
results_site['url_main'] = site.url_main
results_site['cookies'] = cookie_jar and cookie_jar.filter_cookies(site.url_main) or None
+8 -3
View File
@@ -17,7 +17,7 @@ from .report import save_csv_report, save_xmind_report, save_html_report, save_p
save_json_report
from .submit import submit_dialog
__version__ = '0.1.14'
__version__ = '0.1.15'
async def main():
@@ -106,7 +106,11 @@ async def main():
)
parser.add_argument("--no-recursion",
action="store_true", dest="disable_recursive_search", default=False,
help="Disable parsing pages for other usernames and recursive search by them."
help="Disable recursive search by additional data extracted from pages."
)
parser.add_argument("--no-extracting",
action="store_true", dest="disable_extracting", default=False,
help="Disable parsing pages for additional data and other usernames."
)
parser.add_argument("--self-check",
action="store_true", default=False,
@@ -203,6 +207,7 @@ async def main():
and u not in args.ignore_ids_list
}
parsing_enabled = not args.disable_extracting
recursive_search_enabled = not args.disable_recursive_search
# Make prompts
@@ -324,7 +329,7 @@ async def main():
query_notify,
proxy=args.proxy,
timeout=args.timeout,
recursive_search=recursive_search_enabled,
is_parsing_enabled=parsing_enabled,
id_type=id_type,
debug=args.verbose,
logger=logger,
+30 -12
View File
@@ -2546,7 +2546,7 @@
"us"
],
"checkType": "message",
"absenceStrs": "Missing page... somebody made a wrong move.",
"absenceStrs": "The page you are looking for doesn\u2019t exist. (404)",
"alexaRank": 221,
"url": "https://www.chess.com/ru/member/{username}",
"urlMain": "https://www.chess.com/ru/",
@@ -9597,6 +9597,7 @@
"usernameUnclaimed": "noonewouldeverusethis7"
},
"Partyflock": {
"disabled": true,
"tags": [
"in",
"nl"
@@ -12229,7 +12230,7 @@
"music"
],
"headers": {
"authorization": "Bearer BQCPMLJ7MjV66wMUgXl0DUh5j_HVEPhwnncWZXaD1DgRJj_m4s3pO4MGXL5IosKKof1uQ0QwwI7cnGrhQs0"
"authorization": "Bearer BQDb4T33eeZ7nHS1vr_igE5UM5m6l6gI60KcWjvhBmp-uiPDj-2m7IBRif4k1HfF-coCK4oaL0ZUsNIczTA"
},
"errors": {
"Spotify is currently not available in your country.": "Access denied in your country, use proxy/vpn"
@@ -13015,6 +13016,7 @@
"usernameUnclaimed": "noonewouldeverusethis7"
},
"Thedaftclub": {
"disabled": true,
"tags": [
"us"
],
@@ -13076,15 +13078,19 @@
"usernameUnclaimed": "noonewouldeverusethis7"
},
"TikTok": {
"tags": [
"global",
"us"
],
"headers": {
"user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36"
},
"errors": {
"tiktok-verify-page": "Captcha detected"
},
"checkType": "message",
"absenceStrs": "<title></title>",
"presenseStrs": [
"nickname"
],
"absenceStrs": [
"serverCode\":404"
],
"alexaRank": 132,
"url": "https://www.tiktok.com/@{username}",
"urlMain": "https://www.tiktok.com/",
@@ -13240,7 +13246,12 @@
"ru"
],
"checkType": "message",
"absenceStrs": "<title>, \u0430\u0432\u0442\u043e\u0440 \u043d\u0430 Tproger</title>",
"presenseStrs": [
"<meta property=\"og:url\" content=\"https://tproger.ru/author/"
],
"absenceStrs": [
"<title>404"
],
"alexaRank": 39077,
"url": "https://tproger.ru/author/{username}/",
"urlMain": "https://tproger.ru",
@@ -13535,7 +13546,7 @@
"sec-ch-ua": "Google Chrome\";v=\"87\", \" Not;A Brand\";v=\"99\", \"Chromium\";v=\"87\"",
"authorization": "Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA",
"user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36",
"x-guest-token": "1363222310963077123"
"x-guest-token": "1370458260256530436"
},
"errors": {
"Bad guest token": "x-guest-token update required"
@@ -13551,7 +13562,9 @@
},
"urlProbe": "https://twitter.com/i/api/graphql/ZRnOhhXPwue_JGILb9TNug/UserByScreenName?variables=%7B%22screen_name%22%3A%22{username}%22%2C%22withHighlightedLabel%22%3Atrue%7D",
"checkType": "message",
"absenceStrs": "Not found",
"absenceStrs": [
" not found"
],
"alexaRank": 56,
"url": "https://twitter.com/{username}",
"urlMain": "https://www.twitter.com/",
@@ -13901,7 +13914,7 @@
"video"
],
"headers": {
"Authorization": "jwt eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJleHAiOjE2MTM4NTM0MjAsInVzZXJfaWQiOm51bGwsImFwcF9pZCI6NTg0NzksInNjb3BlcyI6InB1YmxpYyIsInRlYW1fdXNlcl9pZCI6bnVsbH0.Xd8moX-C5rpQPGmx3BKz2Y9MBHdBfKq2Qo6RK4l3IBc"
"Authorization": "jwt eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJleHAiOjE2MTU1Nzg2MDAsInVzZXJfaWQiOm51bGwsImFwcF9pZCI6NTg0NzksInNjb3BlcyI6InB1YmxpYyIsInRlYW1fdXNlcl9pZCI6bnVsbH0.znOMYMyDNqfX0D_olrRH7s8r8TP-75vDPPznai82ZKY"
},
"activation": {
"url": "https://vimeo.com/_rv/viewer",
@@ -13951,6 +13964,9 @@
"x-tool": "vt-ui-main",
"x-vt-anti-abuse-header": "MTM0NTMxNTA3MTItWkc5dWRDQmlaU0JsZG1scy0xNjA3NDMzMzM3LjI3MQ=="
},
"errors": {
"RecaptchaRequiredError": "Captcha detected"
},
"checkType": "message",
"absenceStrs": "not found",
"alexaRank": 4932,
@@ -23191,6 +23207,7 @@
"usernameUnclaimed": "noonewouldeverusethis7"
},
"brute.pw": {
"disabled": true,
"engine": "XenForo",
"alexaRank": 984755,
"urlMain": "https://brute.pw",
@@ -23265,7 +23282,7 @@
"site": {
"absenceStrs": [
"The requested page could not be found.",
"The specified member cannot be found. Please enter a member's entire name.",
"The specified member cannot be found. Please enter a member",
"\u0423\u043a\u0430\u0437\u0430\u043d\u043d\u044b\u0439 \u043f\u043e\u043b\u044c\u0437\u043e\u0432\u0430\u0442\u0435\u043b\u044c \u043d\u0435 \u043d\u0430\u0439\u0434\u0435\u043d. \u041f\u043e\u0436\u0430\u043b\u0443\u0439\u0441\u0442\u0430, \u0432\u0432\u0435\u0434\u0438\u0442\u0435 \u0434\u0440\u0443\u0433\u043e\u0435 \u0438\u043c\u044f.",
"Le membre sp\u00e9cifi\u00e9 est introuvable. Veuillez saisir le nom complet d'un membre.",
"Belirtilen \u00fcye bulunamad\u0131. L\u00fctfen bir \u00fcyenin tam ad\u0131n\u0131 giriniz."
@@ -23327,6 +23344,7 @@
],
"checkType": "message",
"errors": {
"You have been banned": "IP ban",
"The administrator has banned your IP address": "IP ban",
"\u0418\u0437\u0432\u0438\u043d\u0438\u0442\u0435, \u0441\u0435\u0440\u0432\u0435\u0440 \u043f\u0435\u0440\u0435\u0433\u0440\u0443\u0436\u0435\u043d. \u041f\u043e\u0436\u0430\u043b\u0443\u0439\u0441\u0442\u0430, \u043f\u043e\u043f\u0440\u043e\u0431\u0443\u0439\u0442\u0435 \u0437\u0430\u0439\u0442\u0438 \u043f\u043e\u0437\u0436\u0435.": "Server is overloaded"
},
+1 -1
View File
@@ -68,7 +68,7 @@
<div class="row-mb">
<div class="col-md">
<div class="card flex-md-row mb-4 box-shadow h-md-250">
<img class="card-img-right flex-auto d-none d-md-block" alt="Photo" style="width: 200px; height: 200px; object-fit: scale-down;" src="{{ v.status.ids_data.image or 'https://i.imgur.com/040fmbw.png' }}" data-holder-rendered="true">
<img class="card-img-right flex-auto d-md-block" alt="Photo" style="width: 200px; height: 200px; object-fit: scale-down;" src="{{ v.status.ids_data.image or 'https://i.imgur.com/040fmbw.png' }}" data-holder-rendered="true">
<div class="card-body d-flex flex-column align-items-start" style="padding-top: 0;">
<h3 class="mb-0" style="padding-top: 1rem;">
<a class="text-dark" href="{{ v.url_main }}" target="_blank">{{ k }}</a>
+2 -2
View File
@@ -1,4 +1,4 @@
aiohttp==3.7.3
aiohttp==3.7.4
aiohttp-socks==0.5.5
arabic-reshaper==2.1.1
async-timeout==3.0.1
@@ -28,7 +28,7 @@ reportlab==3.5.59
requests>=2.24.0
requests-futures==1.0.0
six==1.15.0
socid-extractor>=0.0.12
socid-extractor>=0.0.13
soupsieve==2.1
stem==1.8.0
torrequest==0.1.0
+1 -1
View File
@@ -12,7 +12,7 @@ with open('requirements.txt') as rf:
requires = rf.read().splitlines()
setup(name='maigret',
version='0.1.14',
version='0.1.15',
description='Collect a dossier on a person by username from a huge number of sites',
long_description=long_description,
long_description_content_type="text/markdown",