mirror of
https://github.com/soxoj/maigret.git
synced 2026-05-06 22:19:01 +00:00
Added checker of Tor sites
This commit is contained in:
@@ -35,7 +35,7 @@ class ParsingActivator:
|
|||||||
site.headers["authorization"] = f"Bearer {bearer_token}"
|
site.headers["authorization"] = f"Bearer {bearer_token}"
|
||||||
|
|
||||||
|
|
||||||
async def import_aiohttp_cookies(cookiestxt_filename):
|
def import_aiohttp_cookies(cookiestxt_filename):
|
||||||
cookies_obj = MozillaCookieJar(cookiestxt_filename)
|
cookies_obj = MozillaCookieJar(cookiestxt_filename)
|
||||||
cookies_obj.load(ignore_discard=True, ignore_expires=True)
|
cookies_obj.load(ignore_discard=True, ignore_expires=True)
|
||||||
|
|
||||||
|
|||||||
+118
-59
@@ -43,49 +43,96 @@ SUPPORTED_IDS = (
|
|||||||
BAD_CHARS = "#"
|
BAD_CHARS = "#"
|
||||||
|
|
||||||
|
|
||||||
async def get_response(request_future, logger) -> Tuple[str, int, Optional[CheckError]]:
|
class SimpleAiohttpChecker:
|
||||||
html_text = None
|
def __init__(self, *args, **kwargs):
|
||||||
status_code = 0
|
proxy = kwargs.get('proxy')
|
||||||
error: Optional[CheckError] = CheckError("Unknown")
|
cookie_jar = kwargs.get('cookie_jar')
|
||||||
|
self.logger = kwargs.get('logger', Mock())
|
||||||
|
|
||||||
try:
|
# make http client session
|
||||||
response = await request_future
|
connector = (
|
||||||
|
ProxyConnector.from_url(proxy) if proxy else aiohttp.TCPConnector(ssl=False)
|
||||||
|
)
|
||||||
|
connector.verify_ssl = False
|
||||||
|
self.session = aiohttp.ClientSession(
|
||||||
|
connector=connector, trust_env=True, cookie_jar=cookie_jar
|
||||||
|
)
|
||||||
|
|
||||||
status_code = response.status
|
def prepare(self, url, headers=None, allow_redirects=True, timeout=0, method='get'):
|
||||||
response_content = await response.content.read()
|
if method == 'get':
|
||||||
charset = response.charset or "utf-8"
|
request_method = self.session.get
|
||||||
decoded_content = response_content.decode(charset, "ignore")
|
|
||||||
html_text = decoded_content
|
|
||||||
|
|
||||||
error = None
|
|
||||||
if status_code == 0:
|
|
||||||
error = CheckError("Connection lost")
|
|
||||||
|
|
||||||
logger.debug(html_text)
|
|
||||||
|
|
||||||
except asyncio.TimeoutError as e:
|
|
||||||
error = CheckError("Request timeout", str(e))
|
|
||||||
except ClientConnectorError as e:
|
|
||||||
error = CheckError("Connecting failure", str(e))
|
|
||||||
except ServerDisconnectedError as e:
|
|
||||||
error = CheckError("Server disconnected", str(e))
|
|
||||||
except aiohttp.http_exceptions.BadHttpMessage as e:
|
|
||||||
error = CheckError("HTTP", str(e))
|
|
||||||
except proxy_errors.ProxyError as e:
|
|
||||||
error = CheckError("Proxy", str(e))
|
|
||||||
except KeyboardInterrupt:
|
|
||||||
error = CheckError("Interrupted")
|
|
||||||
except Exception as e:
|
|
||||||
# python-specific exceptions
|
|
||||||
if sys.version_info.minor > 6 and (
|
|
||||||
isinstance(e, ssl.SSLCertVerificationError) or isinstance(e, ssl.SSLError)
|
|
||||||
):
|
|
||||||
error = CheckError("SSL", str(e))
|
|
||||||
else:
|
else:
|
||||||
logger.debug(e, exc_info=True)
|
request_method = self.session.head
|
||||||
error = CheckError("Unexpected", str(e))
|
|
||||||
|
|
||||||
return str(html_text), status_code, error
|
future = request_method(
|
||||||
|
url=url,
|
||||||
|
headers=headers,
|
||||||
|
allow_redirects=allow_redirects,
|
||||||
|
timeout=timeout,
|
||||||
|
)
|
||||||
|
|
||||||
|
return future
|
||||||
|
|
||||||
|
async def close(self):
|
||||||
|
await self.session.close()
|
||||||
|
|
||||||
|
async def check(self, future) -> Tuple[str, int, Optional[CheckError]]:
|
||||||
|
html_text = None
|
||||||
|
status_code = 0
|
||||||
|
error: Optional[CheckError] = CheckError("Unknown")
|
||||||
|
|
||||||
|
try:
|
||||||
|
response = await future
|
||||||
|
|
||||||
|
status_code = response.status
|
||||||
|
response_content = await response.content.read()
|
||||||
|
charset = response.charset or "utf-8"
|
||||||
|
decoded_content = response_content.decode(charset, "ignore")
|
||||||
|
html_text = decoded_content
|
||||||
|
|
||||||
|
error = None
|
||||||
|
if status_code == 0:
|
||||||
|
error = CheckError("Connection lost")
|
||||||
|
|
||||||
|
self.logger.debug(html_text)
|
||||||
|
|
||||||
|
except asyncio.TimeoutError as e:
|
||||||
|
error = CheckError("Request timeout", str(e))
|
||||||
|
except ClientConnectorError as e:
|
||||||
|
error = CheckError("Connecting failure", str(e))
|
||||||
|
except ServerDisconnectedError as e:
|
||||||
|
error = CheckError("Server disconnected", str(e))
|
||||||
|
except aiohttp.http_exceptions.BadHttpMessage as e:
|
||||||
|
error = CheckError("HTTP", str(e))
|
||||||
|
except proxy_errors.ProxyError as e:
|
||||||
|
error = CheckError("Proxy", str(e))
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
error = CheckError("Interrupted")
|
||||||
|
except Exception as e:
|
||||||
|
# python-specific exceptions
|
||||||
|
if sys.version_info.minor > 6 and (
|
||||||
|
isinstance(e, ssl.SSLCertVerificationError)
|
||||||
|
or isinstance(e, ssl.SSLError)
|
||||||
|
):
|
||||||
|
error = CheckError("SSL", str(e))
|
||||||
|
else:
|
||||||
|
self.logger.debug(e, exc_info=True)
|
||||||
|
error = CheckError("Unexpected", str(e))
|
||||||
|
|
||||||
|
return str(html_text), status_code, error
|
||||||
|
|
||||||
|
|
||||||
|
class TorAiohttpChecker(SimpleAiohttpChecker):
|
||||||
|
def __init__(self, *args, **kwargs):
|
||||||
|
proxy = kwargs.get('proxy')
|
||||||
|
cookie_jar = kwargs.get('cookie_jar')
|
||||||
|
self.logger = kwargs.get('logger', Mock())
|
||||||
|
|
||||||
|
connector = ProxyConnector.from_url(proxy)
|
||||||
|
connector.verify_ssl = False
|
||||||
|
self.session = aiohttp.ClientSession(
|
||||||
|
connector=connector, trust_env=True, cookie_jar=cookie_jar
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
# TODO: move to separate class
|
# TODO: move to separate class
|
||||||
@@ -322,7 +369,8 @@ def make_site_result(
|
|||||||
# workaround to prevent slash errors
|
# workaround to prevent slash errors
|
||||||
url = re.sub("(?<!:)/+", "/", url)
|
url = re.sub("(?<!:)/+", "/", url)
|
||||||
|
|
||||||
session = options['session']
|
# always clearweb_checker for now
|
||||||
|
checker = options["checkers"][site.network]
|
||||||
|
|
||||||
# site check is disabled
|
# site check is disabled
|
||||||
if site.disabled and not options['forced']:
|
if site.disabled and not options['forced']:
|
||||||
@@ -381,12 +429,12 @@ def make_site_result(
|
|||||||
# In most cases when we are detecting by status code,
|
# In most cases when we are detecting by status code,
|
||||||
# it is not necessary to get the entire body: we can
|
# it is not necessary to get the entire body: we can
|
||||||
# detect fine with just the HEAD response.
|
# detect fine with just the HEAD response.
|
||||||
request_method = session.head
|
request_method = 'head'
|
||||||
else:
|
else:
|
||||||
# Either this detect method needs the content associated
|
# Either this detect method needs the content associated
|
||||||
# with the GET response, or this specific website will
|
# with the GET response, or this specific website will
|
||||||
# not respond properly unless we request the whole page.
|
# not respond properly unless we request the whole page.
|
||||||
request_method = session.get
|
request_method = 'get'
|
||||||
|
|
||||||
if site.check_type == "response_url":
|
if site.check_type == "response_url":
|
||||||
# Site forwards request to a different URL if username not
|
# Site forwards request to a different URL if username not
|
||||||
@@ -398,7 +446,8 @@ def make_site_result(
|
|||||||
# The final result of the request will be what is available.
|
# The final result of the request will be what is available.
|
||||||
allow_redirects = True
|
allow_redirects = True
|
||||||
|
|
||||||
future = request_method(
|
future = checker.prepare(
|
||||||
|
method=request_method,
|
||||||
url=url_probe,
|
url=url_probe,
|
||||||
headers=headers,
|
headers=headers,
|
||||||
allow_redirects=allow_redirects,
|
allow_redirects=allow_redirects,
|
||||||
@@ -407,6 +456,7 @@ def make_site_result(
|
|||||||
|
|
||||||
# Store future request object in the results object
|
# Store future request object in the results object
|
||||||
results_site["future"] = future
|
results_site["future"] = future
|
||||||
|
results_site["checker"] = checker
|
||||||
|
|
||||||
return results_site
|
return results_site
|
||||||
|
|
||||||
@@ -419,7 +469,9 @@ async def check_site_for_username(
|
|||||||
if not future:
|
if not future:
|
||||||
return site.name, default_result
|
return site.name, default_result
|
||||||
|
|
||||||
response = await get_response(request_future=future, logger=logger)
|
checker = default_result["checker"]
|
||||||
|
|
||||||
|
response = await checker.check(future=future)
|
||||||
|
|
||||||
response_result = process_site_result(
|
response_result = process_site_result(
|
||||||
response, query_notify, logger, default_result, site
|
response, query_notify, logger, default_result, site
|
||||||
@@ -430,9 +482,9 @@ async def check_site_for_username(
|
|||||||
return site.name, response_result
|
return site.name, response_result
|
||||||
|
|
||||||
|
|
||||||
async def debug_ip_request(session, logger):
|
async def debug_ip_request(checker, logger):
|
||||||
future = session.get(url="https://icanhazip.com")
|
future = checker.prepare(url="https://icanhazip.com")
|
||||||
ip, status, check_error = await get_response(future, logger)
|
ip, status, check_error = await checker.check(future)
|
||||||
if ip:
|
if ip:
|
||||||
logger.debug(f"My IP is: {ip.strip()}")
|
logger.debug(f"My IP is: {ip.strip()}")
|
||||||
else:
|
else:
|
||||||
@@ -456,6 +508,7 @@ async def maigret(
|
|||||||
logger,
|
logger,
|
||||||
query_notify=None,
|
query_notify=None,
|
||||||
proxy=None,
|
proxy=None,
|
||||||
|
tor_proxy=None,
|
||||||
timeout=3,
|
timeout=3,
|
||||||
is_parsing_enabled=False,
|
is_parsing_enabled=False,
|
||||||
id_type="username",
|
id_type="username",
|
||||||
@@ -508,23 +561,24 @@ async def maigret(
|
|||||||
|
|
||||||
query_notify.start(username, id_type)
|
query_notify.start(username, id_type)
|
||||||
|
|
||||||
# make http client session
|
|
||||||
connector = (
|
|
||||||
ProxyConnector.from_url(proxy) if proxy else aiohttp.TCPConnector(ssl=False)
|
|
||||||
)
|
|
||||||
connector.verify_ssl = False
|
|
||||||
|
|
||||||
cookie_jar = None
|
cookie_jar = None
|
||||||
if cookies:
|
if cookies:
|
||||||
logger.debug(f"Using cookies jar file {cookies}")
|
logger.debug(f"Using cookies jar file {cookies}")
|
||||||
cookie_jar = await import_aiohttp_cookies(cookies)
|
cookie_jar = import_aiohttp_cookies(cookies)
|
||||||
|
|
||||||
session = aiohttp.ClientSession(
|
clearweb_checker = SimpleAiohttpChecker(
|
||||||
connector=connector, trust_env=True, cookie_jar=cookie_jar
|
proxy=proxy, cookie_jar=cookie_jar, logger=logger
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# TODO
|
||||||
|
tor_checker = Mock()
|
||||||
|
if tor_proxy:
|
||||||
|
tor_checker = TorAiohttpChecker( # type: ignore
|
||||||
|
proxy=tor_proxy, cookie_jar=cookie_jar, logger=logger
|
||||||
|
)
|
||||||
|
|
||||||
if logger.level == logging.DEBUG:
|
if logger.level == logging.DEBUG:
|
||||||
await debug_ip_request(session, logger)
|
await debug_ip_request(clearweb_checker, logger)
|
||||||
|
|
||||||
# setup parallel executor
|
# setup parallel executor
|
||||||
executor: Optional[AsyncExecutor] = None
|
executor: Optional[AsyncExecutor] = None
|
||||||
@@ -538,7 +592,10 @@ async def maigret(
|
|||||||
# make options objects for all the requests
|
# make options objects for all the requests
|
||||||
options: QueryOptions = {}
|
options: QueryOptions = {}
|
||||||
options["cookies"] = cookie_jar
|
options["cookies"] = cookie_jar
|
||||||
options["session"] = session
|
options["checkers"] = {
|
||||||
|
'': clearweb_checker,
|
||||||
|
'tor': tor_checker,
|
||||||
|
}
|
||||||
options["parsing"] = is_parsing_enabled
|
options["parsing"] = is_parsing_enabled
|
||||||
options["timeout"] = timeout
|
options["timeout"] = timeout
|
||||||
options["id_type"] = id_type
|
options["id_type"] = id_type
|
||||||
@@ -591,7 +648,9 @@ async def maigret(
|
|||||||
)
|
)
|
||||||
|
|
||||||
# closing http client session
|
# closing http client session
|
||||||
await session.close()
|
await clearweb_checker.close()
|
||||||
|
if tor_proxy:
|
||||||
|
await tor_checker.close()
|
||||||
|
|
||||||
# notify caller that all queries are finished
|
# notify caller that all queries are finished
|
||||||
query_notify.finish()
|
query_notify.finish()
|
||||||
|
|||||||
@@ -238,6 +238,13 @@ def setup_arguments_parser():
|
|||||||
default=None,
|
default=None,
|
||||||
help="Make requests over a proxy. e.g. socks5://127.0.0.1:1080",
|
help="Make requests over a proxy. e.g. socks5://127.0.0.1:1080",
|
||||||
)
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--tor-proxy",
|
||||||
|
metavar='TOR_PROXY_URL',
|
||||||
|
action="store",
|
||||||
|
default='socks5://127.0.0.1:9050',
|
||||||
|
help="Specify URL of your Tor gateway. Default is socks5://127.0.0.1:9050",
|
||||||
|
)
|
||||||
|
|
||||||
filter_group = parser.add_argument_group(
|
filter_group = parser.add_argument_group(
|
||||||
'Site filtering', 'Options to set site search scope'
|
'Site filtering', 'Options to set site search scope'
|
||||||
@@ -584,6 +591,7 @@ async def main():
|
|||||||
site_dict=dict(sites_to_check),
|
site_dict=dict(sites_to_check),
|
||||||
query_notify=query_notify,
|
query_notify=query_notify,
|
||||||
proxy=args.proxy,
|
proxy=args.proxy,
|
||||||
|
tor_proxy=args.tor_proxy,
|
||||||
timeout=args.timeout,
|
timeout=args.timeout,
|
||||||
is_parsing_enabled=parsing_enabled,
|
is_parsing_enabled=parsing_enabled,
|
||||||
id_type=id_type,
|
id_type=id_type,
|
||||||
|
|||||||
+3
-2
@@ -281,8 +281,9 @@ def generate_json_report(username: str, results: dict, file, report_type):
|
|||||||
data = dict(site_result)
|
data = dict(site_result)
|
||||||
data["status"] = data["status"].json()
|
data["status"] = data["status"].json()
|
||||||
data["site"] = data["site"].json
|
data["site"] = data["site"].json
|
||||||
if "future" in data:
|
for field in ["future", "checker"]:
|
||||||
del data["future"]
|
if field in data:
|
||||||
|
del data[field]
|
||||||
|
|
||||||
if is_report_per_line:
|
if is_report_per_line:
|
||||||
data["sitename"] = sitename
|
data["sitename"] = sitename
|
||||||
|
|||||||
@@ -27744,6 +27744,23 @@
|
|||||||
"tags": [
|
"tags": [
|
||||||
"business"
|
"business"
|
||||||
]
|
]
|
||||||
|
},
|
||||||
|
"HiddenAnswers": {
|
||||||
|
"tags": [
|
||||||
|
"tor"
|
||||||
|
],
|
||||||
|
"network": "tor",
|
||||||
|
"url": "http://answerszuvs3gg2l64e6hmnryudl5zgrmwm3vh65hzszdghblddvfiqd.onion/user/{username}",
|
||||||
|
"urlMain": "http://answerszuvs3gg2l64e6hmnryudl5zgrmwm3vh65hzszdghblddvfiqd.onion",
|
||||||
|
"usernameClaimed": "theredqueen",
|
||||||
|
"usernameUnclaimed": "noonewouldeverusethis7",
|
||||||
|
"checkType": "message",
|
||||||
|
"absenceStrs": [
|
||||||
|
"Page not found"
|
||||||
|
],
|
||||||
|
"presenseStrs": [
|
||||||
|
"qa-part-form-profile"
|
||||||
|
]
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"engines": {
|
"engines": {
|
||||||
|
|||||||
@@ -65,6 +65,7 @@ SUPPORTED_TAGS = [
|
|||||||
"review",
|
"review",
|
||||||
"bookmarks",
|
"bookmarks",
|
||||||
"design",
|
"design",
|
||||||
|
"tor",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
@@ -122,6 +123,8 @@ class MaigretSite:
|
|||||||
alexa_rank = None
|
alexa_rank = None
|
||||||
source = None
|
source = None
|
||||||
|
|
||||||
|
network = ''
|
||||||
|
|
||||||
def __init__(self, name, information):
|
def __init__(self, name, information):
|
||||||
self.name = name
|
self.name = name
|
||||||
self.url_subpath = ""
|
self.url_subpath = ""
|
||||||
|
|||||||
@@ -40,7 +40,7 @@ async def test_import_aiohttp_cookies():
|
|||||||
with open(cookies_filename, 'w') as f:
|
with open(cookies_filename, 'w') as f:
|
||||||
f.write(COOKIES_TXT)
|
f.write(COOKIES_TXT)
|
||||||
|
|
||||||
cookie_jar = await import_aiohttp_cookies(cookies_filename)
|
cookie_jar = import_aiohttp_cookies(cookies_filename)
|
||||||
assert list(cookie_jar._cookies.keys()) == ['xss.is', 'httpbin.org']
|
assert list(cookie_jar._cookies.keys()) == ['xss.is', 'httpbin.org']
|
||||||
|
|
||||||
url = 'https://httpbin.org/cookies'
|
url = 'https://httpbin.org/cookies'
|
||||||
|
|||||||
@@ -9,6 +9,7 @@ def site_result_except(server, username, **kwargs):
|
|||||||
server.expect_request('/url', query_string=query).respond_with_data(**kwargs)
|
server.expect_request('/url', query_string=query).respond_with_data(**kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.slow
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_checking_by_status_code(httpserver, local_test_db):
|
async def test_checking_by_status_code(httpserver, local_test_db):
|
||||||
sites_dict = local_test_db.sites_dict
|
sites_dict = local_test_db.sites_dict
|
||||||
@@ -23,6 +24,7 @@ async def test_checking_by_status_code(httpserver, local_test_db):
|
|||||||
assert result['StatusCode']['status'].is_found() is False
|
assert result['StatusCode']['status'].is_found() is False
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.slow
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_checking_by_message_positive_full(httpserver, local_test_db):
|
async def test_checking_by_message_positive_full(httpserver, local_test_db):
|
||||||
sites_dict = local_test_db.sites_dict
|
sites_dict = local_test_db.sites_dict
|
||||||
@@ -37,6 +39,7 @@ async def test_checking_by_message_positive_full(httpserver, local_test_db):
|
|||||||
assert result['Message']['status'].is_found() is False
|
assert result['Message']['status'].is_found() is False
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.slow
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_checking_by_message_positive_part(httpserver, local_test_db):
|
async def test_checking_by_message_positive_part(httpserver, local_test_db):
|
||||||
sites_dict = local_test_db.sites_dict
|
sites_dict = local_test_db.sites_dict
|
||||||
@@ -51,6 +54,7 @@ async def test_checking_by_message_positive_part(httpserver, local_test_db):
|
|||||||
assert result['Message']['status'].is_found() is False
|
assert result['Message']['status'].is_found() is False
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.slow
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_checking_by_message_negative(httpserver, local_test_db):
|
async def test_checking_by_message_negative(httpserver, local_test_db):
|
||||||
sites_dict = local_test_db.sites_dict
|
sites_dict = local_test_db.sites_dict
|
||||||
|
|||||||
@@ -31,6 +31,7 @@ DEFAULT_ARGS: Dict[str, Any] = {
|
|||||||
'stats': False,
|
'stats': False,
|
||||||
'tags': '',
|
'tags': '',
|
||||||
'timeout': 30,
|
'timeout': 30,
|
||||||
|
'tor_proxy': 'socks5://127.0.0.1:9050',
|
||||||
'top_sites': 500,
|
'top_sites': 500,
|
||||||
'txt': False,
|
'txt': False,
|
||||||
'use_disabled_sites': False,
|
'use_disabled_sites': False,
|
||||||
|
|||||||
@@ -138,6 +138,7 @@ def test_maigret_results(test_db):
|
|||||||
|
|
||||||
assert results['Reddit'].get('future') is None
|
assert results['Reddit'].get('future') is None
|
||||||
del results['GooglePlayStore']['future']
|
del results['GooglePlayStore']['future']
|
||||||
|
del results['GooglePlayStore']['checker']
|
||||||
|
|
||||||
assert results == RESULTS_EXAMPLE
|
assert results == RESULTS_EXAMPLE
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user