mirror of
https://github.com/soxoj/maigret.git
synced 2026-05-17 03:45:36 +00:00
Compare commits
21 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 9b7f36dc24 | |||
| 05167ad30c | |||
| cee6f0aa43 | |||
| 02cf330e37 | |||
| 5c8f7a3af0 | |||
| 13e1b6f4d1 | |||
| 5179cb56eb | |||
| 1a2c7e944a | |||
| f7eae046a1 | |||
| bdff08cb70 | |||
| a468cb1cd3 | |||
| 0fe933e8a1 | |||
| 5c3de91181 | |||
| 3356463102 | |||
| 7ac03cf5ca | |||
| 4aeacef07d | |||
| 8de1830cf3 | |||
| ba6169659e | |||
| 4a5c5c3f07 | |||
| 4ba7fcb1ff | |||
| a76f95858f |
@@ -2,6 +2,12 @@
|
|||||||
|
|
||||||
## [Unreleased]
|
## [Unreleased]
|
||||||
|
|
||||||
|
## [0.3.0] - 2021-06-02
|
||||||
|
* added support of Tor and I2P sites
|
||||||
|
* added experimental DNS checking feature
|
||||||
|
* implemented sorting by data points for reports
|
||||||
|
* reports fixes
|
||||||
|
|
||||||
## [0.2.4] - 2021-05-18
|
## [0.2.4] - 2021-05-18
|
||||||
* cli output report
|
* cli output report
|
||||||
* various improvements
|
* various improvements
|
||||||
|
|||||||
@@ -8,12 +8,6 @@
|
|||||||
<a href="https://pypi.org/project/maigret/">
|
<a href="https://pypi.org/project/maigret/">
|
||||||
<img alt="PyPI - Downloads" src="https://img.shields.io/pypi/dw/maigret?style=flat-square">
|
<img alt="PyPI - Downloads" src="https://img.shields.io/pypi/dw/maigret?style=flat-square">
|
||||||
</a>
|
</a>
|
||||||
<a href="https://gitter.im/maigret-osint/community">
|
|
||||||
<img alt="Chat - Gitter" src="./static/chat_gitter.svg" />
|
|
||||||
</a>
|
|
||||||
<a href="https://twitter.com/intent/follow?screen_name=sox0j">
|
|
||||||
<img src="https://img.shields.io/twitter/follow/sox0j?label=Follow%20sox0j&style=social&color=blue" alt="Follow @sox0j" />
|
|
||||||
</a>
|
|
||||||
</p>
|
</p>
|
||||||
<p align="center">
|
<p align="center">
|
||||||
<img src="./static/maigret.png" height="200"/>
|
<img src="./static/maigret.png" height="200"/>
|
||||||
@@ -24,9 +18,9 @@
|
|||||||
|
|
||||||
## About
|
## About
|
||||||
|
|
||||||
**Maigret** collect a dossier on a person **by username only**, checking for accounts on a huge number of sites and gathering all the available information from web pages. Maigret is an easy-to-use and powerful fork of [Sherlock](https://github.com/sherlock-project/sherlock).
|
**Maigret** collect a dossier on a person **by username only**, checking for accounts on a huge number of sites and gathering all the available information from web pages. No API keys required. Maigret is an easy-to-use and powerful fork of [Sherlock](https://github.com/sherlock-project/sherlock).
|
||||||
|
|
||||||
Currently supported more than 2000 sites ([full list](./sites.md)), search is launched against 500 popular sites in descending order of popularity by default.
|
Currently supported more than 2000 sites ([full list](./sites.md)), search is launched against 500 popular sites in descending order of popularity by default. Also supported checking of Tor sites, I2P sites, and domains (via DNS resolving).
|
||||||
|
|
||||||
## Main features
|
## Main features
|
||||||
|
|
||||||
|
|||||||
@@ -1,3 +1,3 @@
|
|||||||
"""Maigret version file"""
|
"""Maigret version file"""
|
||||||
|
|
||||||
__version__ = '0.2.4'
|
__version__ = '0.3.0'
|
||||||
|
|||||||
@@ -35,7 +35,7 @@ class ParsingActivator:
|
|||||||
site.headers["authorization"] = f"Bearer {bearer_token}"
|
site.headers["authorization"] = f"Bearer {bearer_token}"
|
||||||
|
|
||||||
|
|
||||||
async def import_aiohttp_cookies(cookiestxt_filename):
|
def import_aiohttp_cookies(cookiestxt_filename):
|
||||||
cookies_obj = MozillaCookieJar(cookiestxt_filename)
|
cookies_obj = MozillaCookieJar(cookiestxt_filename)
|
||||||
cookies_obj.load(ignore_discard=True, ignore_expires=True)
|
cookies_obj.load(ignore_discard=True, ignore_expires=True)
|
||||||
|
|
||||||
|
|||||||
+170
-28
@@ -9,6 +9,7 @@ from typing import Tuple, Optional, Dict, List
|
|||||||
from urllib.parse import quote
|
from urllib.parse import quote
|
||||||
|
|
||||||
import aiohttp
|
import aiohttp
|
||||||
|
import aiodns
|
||||||
import tqdm.asyncio
|
import tqdm.asyncio
|
||||||
from aiohttp_socks import ProxyConnector
|
from aiohttp_socks import ProxyConnector
|
||||||
from python_socks import _errors as proxy_errors
|
from python_socks import _errors as proxy_errors
|
||||||
@@ -43,13 +44,50 @@ SUPPORTED_IDS = (
|
|||||||
BAD_CHARS = "#"
|
BAD_CHARS = "#"
|
||||||
|
|
||||||
|
|
||||||
async def get_response(request_future, logger) -> Tuple[str, int, Optional[CheckError]]:
|
class CheckerBase:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class SimpleAiohttpChecker(CheckerBase):
|
||||||
|
def __init__(self, *args, **kwargs):
|
||||||
|
proxy = kwargs.get('proxy')
|
||||||
|
cookie_jar = kwargs.get('cookie_jar')
|
||||||
|
self.logger = kwargs.get('logger', Mock())
|
||||||
|
|
||||||
|
# make http client session
|
||||||
|
connector = (
|
||||||
|
ProxyConnector.from_url(proxy) if proxy else aiohttp.TCPConnector(ssl=False)
|
||||||
|
)
|
||||||
|
connector.verify_ssl = False
|
||||||
|
self.session = aiohttp.ClientSession(
|
||||||
|
connector=connector, trust_env=True, cookie_jar=cookie_jar
|
||||||
|
)
|
||||||
|
|
||||||
|
def prepare(self, url, headers=None, allow_redirects=True, timeout=0, method='get'):
|
||||||
|
if method == 'get':
|
||||||
|
request_method = self.session.get
|
||||||
|
else:
|
||||||
|
request_method = self.session.head
|
||||||
|
|
||||||
|
future = request_method(
|
||||||
|
url=url,
|
||||||
|
headers=headers,
|
||||||
|
allow_redirects=allow_redirects,
|
||||||
|
timeout=timeout,
|
||||||
|
)
|
||||||
|
|
||||||
|
return future
|
||||||
|
|
||||||
|
async def close(self):
|
||||||
|
await self.session.close()
|
||||||
|
|
||||||
|
async def check(self, future) -> Tuple[str, int, Optional[CheckError]]:
|
||||||
html_text = None
|
html_text = None
|
||||||
status_code = 0
|
status_code = 0
|
||||||
error: Optional[CheckError] = CheckError("Unknown")
|
error: Optional[CheckError] = CheckError("Unknown")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
response = await request_future
|
response = await future
|
||||||
|
|
||||||
status_code = response.status
|
status_code = response.status
|
||||||
response_content = await response.content.read()
|
response_content = await response.content.read()
|
||||||
@@ -61,7 +99,7 @@ async def get_response(request_future, logger) -> Tuple[str, int, Optional[Check
|
|||||||
if status_code == 0:
|
if status_code == 0:
|
||||||
error = CheckError("Connection lost")
|
error = CheckError("Connection lost")
|
||||||
|
|
||||||
logger.debug(html_text)
|
self.logger.debug(html_text)
|
||||||
|
|
||||||
except asyncio.TimeoutError as e:
|
except asyncio.TimeoutError as e:
|
||||||
error = CheckError("Request timeout", str(e))
|
error = CheckError("Request timeout", str(e))
|
||||||
@@ -78,16 +116,72 @@ async def get_response(request_future, logger) -> Tuple[str, int, Optional[Check
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
# python-specific exceptions
|
# python-specific exceptions
|
||||||
if sys.version_info.minor > 6 and (
|
if sys.version_info.minor > 6 and (
|
||||||
isinstance(e, ssl.SSLCertVerificationError) or isinstance(e, ssl.SSLError)
|
isinstance(e, ssl.SSLCertVerificationError)
|
||||||
|
or isinstance(e, ssl.SSLError)
|
||||||
):
|
):
|
||||||
error = CheckError("SSL", str(e))
|
error = CheckError("SSL", str(e))
|
||||||
else:
|
else:
|
||||||
logger.debug(e, exc_info=True)
|
self.logger.debug(e, exc_info=True)
|
||||||
error = CheckError("Unexpected", str(e))
|
error = CheckError("Unexpected", str(e))
|
||||||
|
|
||||||
return str(html_text), status_code, error
|
return str(html_text), status_code, error
|
||||||
|
|
||||||
|
|
||||||
|
class ProxiedAiohttpChecker(SimpleAiohttpChecker):
|
||||||
|
def __init__(self, *args, **kwargs):
|
||||||
|
proxy = kwargs.get('proxy')
|
||||||
|
cookie_jar = kwargs.get('cookie_jar')
|
||||||
|
self.logger = kwargs.get('logger', Mock())
|
||||||
|
|
||||||
|
connector = ProxyConnector.from_url(proxy)
|
||||||
|
connector.verify_ssl = False
|
||||||
|
self.session = aiohttp.ClientSession(
|
||||||
|
connector=connector, trust_env=True, cookie_jar=cookie_jar
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class AiodnsDomainResolver(CheckerBase):
|
||||||
|
def __init__(self, *args, **kwargs):
|
||||||
|
loop = asyncio.get_event_loop()
|
||||||
|
self.logger = kwargs.get('logger', Mock())
|
||||||
|
self.resolver = aiodns.DNSResolver(loop=loop)
|
||||||
|
|
||||||
|
def prepare(self, url, headers=None, allow_redirects=True, timeout=0, method='get'):
|
||||||
|
return self.resolver.query(url, 'A')
|
||||||
|
|
||||||
|
async def check(self, future) -> Tuple[str, int, Optional[CheckError]]:
|
||||||
|
status = 404
|
||||||
|
error = None
|
||||||
|
text = ''
|
||||||
|
|
||||||
|
try:
|
||||||
|
res = await future
|
||||||
|
text = str(res[0].host)
|
||||||
|
status = 200
|
||||||
|
except aiodns.error.DNSError:
|
||||||
|
pass
|
||||||
|
except Exception as e:
|
||||||
|
self.logger.error(e, exc_info=True)
|
||||||
|
error = CheckError('DNS resolve error', str(e))
|
||||||
|
|
||||||
|
return text, status, error
|
||||||
|
|
||||||
|
|
||||||
|
class CheckerMock:
|
||||||
|
def __init__(self, *args, **kwargs):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def prepare(self, url, headers=None, allow_redirects=True, timeout=0, method='get'):
|
||||||
|
return None
|
||||||
|
|
||||||
|
async def check(self, future) -> Tuple[str, int, Optional[CheckError]]:
|
||||||
|
await asyncio.sleep(0)
|
||||||
|
return '', 0, None
|
||||||
|
|
||||||
|
async def close(self):
|
||||||
|
return
|
||||||
|
|
||||||
|
|
||||||
# TODO: move to separate class
|
# TODO: move to separate class
|
||||||
def detect_error_page(
|
def detect_error_page(
|
||||||
html_text, status_code, fail_flags, ignore_403
|
html_text, status_code, fail_flags, ignore_403
|
||||||
@@ -322,7 +416,8 @@ def make_site_result(
|
|||||||
# workaround to prevent slash errors
|
# workaround to prevent slash errors
|
||||||
url = re.sub("(?<!:)/+", "/", url)
|
url = re.sub("(?<!:)/+", "/", url)
|
||||||
|
|
||||||
session = options['session']
|
# always clearweb_checker for now
|
||||||
|
checker = options["checkers"][site.protocol]
|
||||||
|
|
||||||
# site check is disabled
|
# site check is disabled
|
||||||
if site.disabled and not options['forced']:
|
if site.disabled and not options['forced']:
|
||||||
@@ -381,12 +476,12 @@ def make_site_result(
|
|||||||
# In most cases when we are detecting by status code,
|
# In most cases when we are detecting by status code,
|
||||||
# it is not necessary to get the entire body: we can
|
# it is not necessary to get the entire body: we can
|
||||||
# detect fine with just the HEAD response.
|
# detect fine with just the HEAD response.
|
||||||
request_method = session.head
|
request_method = 'head'
|
||||||
else:
|
else:
|
||||||
# Either this detect method needs the content associated
|
# Either this detect method needs the content associated
|
||||||
# with the GET response, or this specific website will
|
# with the GET response, or this specific website will
|
||||||
# not respond properly unless we request the whole page.
|
# not respond properly unless we request the whole page.
|
||||||
request_method = session.get
|
request_method = 'get'
|
||||||
|
|
||||||
if site.check_type == "response_url":
|
if site.check_type == "response_url":
|
||||||
# Site forwards request to a different URL if username not
|
# Site forwards request to a different URL if username not
|
||||||
@@ -398,7 +493,8 @@ def make_site_result(
|
|||||||
# The final result of the request will be what is available.
|
# The final result of the request will be what is available.
|
||||||
allow_redirects = True
|
allow_redirects = True
|
||||||
|
|
||||||
future = request_method(
|
future = checker.prepare(
|
||||||
|
method=request_method,
|
||||||
url=url_probe,
|
url=url_probe,
|
||||||
headers=headers,
|
headers=headers,
|
||||||
allow_redirects=allow_redirects,
|
allow_redirects=allow_redirects,
|
||||||
@@ -407,6 +503,7 @@ def make_site_result(
|
|||||||
|
|
||||||
# Store future request object in the results object
|
# Store future request object in the results object
|
||||||
results_site["future"] = future
|
results_site["future"] = future
|
||||||
|
results_site["checker"] = checker
|
||||||
|
|
||||||
return results_site
|
return results_site
|
||||||
|
|
||||||
@@ -419,7 +516,9 @@ async def check_site_for_username(
|
|||||||
if not future:
|
if not future:
|
||||||
return site.name, default_result
|
return site.name, default_result
|
||||||
|
|
||||||
response = await get_response(request_future=future, logger=logger)
|
checker = default_result["checker"]
|
||||||
|
|
||||||
|
response = await checker.check(future=future)
|
||||||
|
|
||||||
response_result = process_site_result(
|
response_result = process_site_result(
|
||||||
response, query_notify, logger, default_result, site
|
response, query_notify, logger, default_result, site
|
||||||
@@ -430,9 +529,9 @@ async def check_site_for_username(
|
|||||||
return site.name, response_result
|
return site.name, response_result
|
||||||
|
|
||||||
|
|
||||||
async def debug_ip_request(session, logger):
|
async def debug_ip_request(checker, logger):
|
||||||
future = session.get(url="https://icanhazip.com")
|
future = checker.prepare(url="https://icanhazip.com")
|
||||||
ip, status, check_error = await get_response(future, logger)
|
ip, status, check_error = await checker.check(future)
|
||||||
if ip:
|
if ip:
|
||||||
logger.debug(f"My IP is: {ip.strip()}")
|
logger.debug(f"My IP is: {ip.strip()}")
|
||||||
else:
|
else:
|
||||||
@@ -456,6 +555,8 @@ async def maigret(
|
|||||||
logger,
|
logger,
|
||||||
query_notify=None,
|
query_notify=None,
|
||||||
proxy=None,
|
proxy=None,
|
||||||
|
tor_proxy=None,
|
||||||
|
i2p_proxy=None,
|
||||||
timeout=3,
|
timeout=3,
|
||||||
is_parsing_enabled=False,
|
is_parsing_enabled=False,
|
||||||
id_type="username",
|
id_type="username",
|
||||||
@@ -465,6 +566,7 @@ async def maigret(
|
|||||||
no_progressbar=False,
|
no_progressbar=False,
|
||||||
cookies=None,
|
cookies=None,
|
||||||
retries=0,
|
retries=0,
|
||||||
|
check_domains=False,
|
||||||
) -> QueryResultWrapper:
|
) -> QueryResultWrapper:
|
||||||
"""Main search func
|
"""Main search func
|
||||||
|
|
||||||
@@ -508,23 +610,36 @@ async def maigret(
|
|||||||
|
|
||||||
query_notify.start(username, id_type)
|
query_notify.start(username, id_type)
|
||||||
|
|
||||||
# make http client session
|
|
||||||
connector = (
|
|
||||||
ProxyConnector.from_url(proxy) if proxy else aiohttp.TCPConnector(ssl=False)
|
|
||||||
)
|
|
||||||
connector.verify_ssl = False
|
|
||||||
|
|
||||||
cookie_jar = None
|
cookie_jar = None
|
||||||
if cookies:
|
if cookies:
|
||||||
logger.debug(f"Using cookies jar file {cookies}")
|
logger.debug(f"Using cookies jar file {cookies}")
|
||||||
cookie_jar = await import_aiohttp_cookies(cookies)
|
cookie_jar = import_aiohttp_cookies(cookies)
|
||||||
|
|
||||||
session = aiohttp.ClientSession(
|
clearweb_checker = SimpleAiohttpChecker(
|
||||||
connector=connector, trust_env=True, cookie_jar=cookie_jar
|
proxy=proxy, cookie_jar=cookie_jar, logger=logger
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# TODO
|
||||||
|
tor_checker = CheckerMock()
|
||||||
|
if tor_proxy:
|
||||||
|
tor_checker = ProxiedAiohttpChecker( # type: ignore
|
||||||
|
proxy=tor_proxy, cookie_jar=cookie_jar, logger=logger
|
||||||
|
)
|
||||||
|
|
||||||
|
# TODO
|
||||||
|
i2p_checker = CheckerMock()
|
||||||
|
if i2p_proxy:
|
||||||
|
i2p_checker = ProxiedAiohttpChecker( # type: ignore
|
||||||
|
proxy=i2p_proxy, cookie_jar=cookie_jar, logger=logger
|
||||||
|
)
|
||||||
|
|
||||||
|
# TODO
|
||||||
|
dns_checker = CheckerMock()
|
||||||
|
if check_domains:
|
||||||
|
dns_checker = AiodnsDomainResolver(logger=logger) # type: ignore
|
||||||
|
|
||||||
if logger.level == logging.DEBUG:
|
if logger.level == logging.DEBUG:
|
||||||
await debug_ip_request(session, logger)
|
await debug_ip_request(clearweb_checker, logger)
|
||||||
|
|
||||||
# setup parallel executor
|
# setup parallel executor
|
||||||
executor: Optional[AsyncExecutor] = None
|
executor: Optional[AsyncExecutor] = None
|
||||||
@@ -538,7 +653,12 @@ async def maigret(
|
|||||||
# make options objects for all the requests
|
# make options objects for all the requests
|
||||||
options: QueryOptions = {}
|
options: QueryOptions = {}
|
||||||
options["cookies"] = cookie_jar
|
options["cookies"] = cookie_jar
|
||||||
options["session"] = session
|
options["checkers"] = {
|
||||||
|
'': clearweb_checker,
|
||||||
|
'tor': tor_checker,
|
||||||
|
'dns': dns_checker,
|
||||||
|
'i2p': i2p_checker,
|
||||||
|
}
|
||||||
options["parsing"] = is_parsing_enabled
|
options["parsing"] = is_parsing_enabled
|
||||||
options["timeout"] = timeout
|
options["timeout"] = timeout
|
||||||
options["id_type"] = id_type
|
options["id_type"] = id_type
|
||||||
@@ -591,7 +711,11 @@ async def maigret(
|
|||||||
)
|
)
|
||||||
|
|
||||||
# closing http client session
|
# closing http client session
|
||||||
await session.close()
|
await clearweb_checker.close()
|
||||||
|
if tor_proxy:
|
||||||
|
await tor_checker.close()
|
||||||
|
if i2p_proxy:
|
||||||
|
await i2p_checker.close()
|
||||||
|
|
||||||
# notify caller that all queries are finished
|
# notify caller that all queries are finished
|
||||||
query_notify.finish()
|
query_notify.finish()
|
||||||
@@ -625,7 +749,13 @@ def timeout_check(value):
|
|||||||
|
|
||||||
|
|
||||||
async def site_self_check(
|
async def site_self_check(
|
||||||
site: MaigretSite, logger, semaphore, db: MaigretDatabase, silent=False
|
site: MaigretSite,
|
||||||
|
logger,
|
||||||
|
semaphore,
|
||||||
|
db: MaigretDatabase,
|
||||||
|
silent=False,
|
||||||
|
tor_proxy=None,
|
||||||
|
i2p_proxy=None,
|
||||||
):
|
):
|
||||||
changes = {
|
changes = {
|
||||||
"disabled": False,
|
"disabled": False,
|
||||||
@@ -649,6 +779,8 @@ async def site_self_check(
|
|||||||
forced=True,
|
forced=True,
|
||||||
no_progressbar=True,
|
no_progressbar=True,
|
||||||
retries=1,
|
retries=1,
|
||||||
|
tor_proxy=tor_proxy,
|
||||||
|
i2p_proxy=i2p_proxy,
|
||||||
)
|
)
|
||||||
|
|
||||||
# don't disable entries with other ids types
|
# don't disable entries with other ids types
|
||||||
@@ -658,6 +790,8 @@ async def site_self_check(
|
|||||||
changes["disabled"] = True
|
changes["disabled"] = True
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
logger.debug(results_dict)
|
||||||
|
|
||||||
result = results_dict[site.name]["status"]
|
result = results_dict[site.name]["status"]
|
||||||
|
|
||||||
site_status = result.status
|
site_status = result.status
|
||||||
@@ -696,7 +830,13 @@ async def site_self_check(
|
|||||||
|
|
||||||
|
|
||||||
async def self_check(
|
async def self_check(
|
||||||
db: MaigretDatabase, site_data: dict, logger, silent=False, max_connections=10
|
db: MaigretDatabase,
|
||||||
|
site_data: dict,
|
||||||
|
logger,
|
||||||
|
silent=False,
|
||||||
|
max_connections=10,
|
||||||
|
tor_proxy=None,
|
||||||
|
i2p_proxy=None,
|
||||||
) -> bool:
|
) -> bool:
|
||||||
sem = asyncio.Semaphore(max_connections)
|
sem = asyncio.Semaphore(max_connections)
|
||||||
tasks = []
|
tasks = []
|
||||||
@@ -708,7 +848,9 @@ async def self_check(
|
|||||||
disabled_old_count = disabled_count(all_sites.values())
|
disabled_old_count = disabled_count(all_sites.values())
|
||||||
|
|
||||||
for _, site in all_sites.items():
|
for _, site in all_sites.items():
|
||||||
check_coro = site_self_check(site, logger, sem, db, silent)
|
check_coro = site_self_check(
|
||||||
|
site, logger, sem, db, silent, tor_proxy, i2p_proxy
|
||||||
|
)
|
||||||
future = asyncio.ensure_future(check_coro)
|
future = asyncio.ensure_future(check_coro)
|
||||||
tasks.append(future)
|
tasks.append(future)
|
||||||
|
|
||||||
|
|||||||
+40
-1
@@ -33,6 +33,7 @@ from .report import (
|
|||||||
SUPPORTED_JSON_REPORT_FORMATS,
|
SUPPORTED_JSON_REPORT_FORMATS,
|
||||||
save_json_report,
|
save_json_report,
|
||||||
get_plaintext_report,
|
get_plaintext_report,
|
||||||
|
sort_report_by_data_points,
|
||||||
)
|
)
|
||||||
from .sites import MaigretDatabase
|
from .sites import MaigretDatabase
|
||||||
from .submit import submit_dialog
|
from .submit import submit_dialog
|
||||||
@@ -238,6 +239,26 @@ def setup_arguments_parser():
|
|||||||
default=None,
|
default=None,
|
||||||
help="Make requests over a proxy. e.g. socks5://127.0.0.1:1080",
|
help="Make requests over a proxy. e.g. socks5://127.0.0.1:1080",
|
||||||
)
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--tor-proxy",
|
||||||
|
metavar='TOR_PROXY_URL',
|
||||||
|
action="store",
|
||||||
|
default='socks5://127.0.0.1:9050',
|
||||||
|
help="Specify URL of your Tor gateway. Default is socks5://127.0.0.1:9050",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--i2p-proxy",
|
||||||
|
metavar='I2P_PROXY_URL',
|
||||||
|
action="store",
|
||||||
|
default='http://127.0.0.1:4444',
|
||||||
|
help="Specify URL of your I2P gateway. Default is http://127.0.0.1:4444",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--with-domains",
|
||||||
|
action="store_true",
|
||||||
|
default=False,
|
||||||
|
help="Enable (experimental) feature of checking domains on usernames.",
|
||||||
|
)
|
||||||
|
|
||||||
filter_group = parser.add_argument_group(
|
filter_group = parser.add_argument_group(
|
||||||
'Site filtering', 'Options to set site search scope'
|
'Site filtering', 'Options to set site search scope'
|
||||||
@@ -420,6 +441,13 @@ def setup_arguments_parser():
|
|||||||
help=f"Generate a JSON report of specific type: {', '.join(SUPPORTED_JSON_REPORT_FORMATS)}"
|
help=f"Generate a JSON report of specific type: {', '.join(SUPPORTED_JSON_REPORT_FORMATS)}"
|
||||||
" (one report per username).",
|
" (one report per username).",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
parser.add_argument(
|
||||||
|
"--reports-sorting",
|
||||||
|
default='default',
|
||||||
|
choices=('default', 'data'),
|
||||||
|
help="Method of results sorting in reports (default: in order of getting the result)",
|
||||||
|
)
|
||||||
return parser
|
return parser
|
||||||
|
|
||||||
|
|
||||||
@@ -508,7 +536,12 @@ async def main():
|
|||||||
if args.self_check:
|
if args.self_check:
|
||||||
print('Maigret sites database self-checking...')
|
print('Maigret sites database self-checking...')
|
||||||
is_need_update = await self_check(
|
is_need_update = await self_check(
|
||||||
db, site_data, logger, max_connections=args.connections
|
db,
|
||||||
|
site_data,
|
||||||
|
logger,
|
||||||
|
max_connections=args.connections,
|
||||||
|
tor_proxy=args.tor_proxy,
|
||||||
|
i2p_proxy=args.i2p_proxy,
|
||||||
)
|
)
|
||||||
if is_need_update:
|
if is_need_update:
|
||||||
if input('Do you want to save changes permanently? [Yn]\n').lower() in (
|
if input('Do you want to save changes permanently? [Yn]\n').lower() in (
|
||||||
@@ -584,6 +617,8 @@ async def main():
|
|||||||
site_dict=dict(sites_to_check),
|
site_dict=dict(sites_to_check),
|
||||||
query_notify=query_notify,
|
query_notify=query_notify,
|
||||||
proxy=args.proxy,
|
proxy=args.proxy,
|
||||||
|
tor_proxy=args.tor_proxy,
|
||||||
|
i2p_proxy=args.i2p_proxy,
|
||||||
timeout=args.timeout,
|
timeout=args.timeout,
|
||||||
is_parsing_enabled=parsing_enabled,
|
is_parsing_enabled=parsing_enabled,
|
||||||
id_type=id_type,
|
id_type=id_type,
|
||||||
@@ -594,10 +629,14 @@ async def main():
|
|||||||
max_connections=args.connections,
|
max_connections=args.connections,
|
||||||
no_progressbar=args.no_progressbar,
|
no_progressbar=args.no_progressbar,
|
||||||
retries=args.retries,
|
retries=args.retries,
|
||||||
|
check_domains=args.with_domains,
|
||||||
)
|
)
|
||||||
|
|
||||||
notify_about_errors(results, query_notify)
|
notify_about_errors(results, query_notify)
|
||||||
|
|
||||||
|
if args.reports_sorting == "data":
|
||||||
|
results = sort_report_by_data_points(results)
|
||||||
|
|
||||||
general_results.append((username, id_type, results))
|
general_results.append((username, id_type, results))
|
||||||
|
|
||||||
# TODO: tests
|
# TODO: tests
|
||||||
|
|||||||
+35
-9
@@ -36,6 +36,18 @@ def filter_supposed_data(data):
|
|||||||
return filtered_supposed_data
|
return filtered_supposed_data
|
||||||
|
|
||||||
|
|
||||||
|
def sort_report_by_data_points(results):
|
||||||
|
return dict(
|
||||||
|
sorted(
|
||||||
|
results.items(),
|
||||||
|
key=lambda x: len(
|
||||||
|
(x[1].get('status') and x[1]['status'].ids_data or {}).keys()
|
||||||
|
),
|
||||||
|
reverse=True,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
"""
|
"""
|
||||||
REPORTS SAVING
|
REPORTS SAVING
|
||||||
"""
|
"""
|
||||||
@@ -243,14 +255,18 @@ def generate_csv_report(username: str, results: dict, csvfile):
|
|||||||
["username", "name", "url_main", "url_user", "exists", "http_status"]
|
["username", "name", "url_main", "url_user", "exists", "http_status"]
|
||||||
)
|
)
|
||||||
for site in results:
|
for site in results:
|
||||||
|
# TODO: fix the reason
|
||||||
|
status = 'Unknown'
|
||||||
|
if "status" in results[site]:
|
||||||
|
status = str(results[site]["status"].status)
|
||||||
writer.writerow(
|
writer.writerow(
|
||||||
[
|
[
|
||||||
username,
|
username,
|
||||||
site,
|
site,
|
||||||
results[site]["url_main"],
|
results[site].get("url_main", ""),
|
||||||
results[site]["url_user"],
|
results[site].get("url_user", ""),
|
||||||
str(results[site]["status"].status),
|
status,
|
||||||
results[site]["http_status"],
|
results[site].get("http_status", 0),
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -262,7 +278,10 @@ def generate_txt_report(username: str, results: dict, file):
|
|||||||
# TODO: fix no site data issue
|
# TODO: fix no site data issue
|
||||||
if not dictionary:
|
if not dictionary:
|
||||||
continue
|
continue
|
||||||
if dictionary.get("status").status == QueryStatus.CLAIMED:
|
if (
|
||||||
|
dictionary.get("status")
|
||||||
|
and dictionary["status"].status == QueryStatus.CLAIMED
|
||||||
|
):
|
||||||
exists_counter += 1
|
exists_counter += 1
|
||||||
file.write(dictionary["url_user"] + "\n")
|
file.write(dictionary["url_user"] + "\n")
|
||||||
file.write(f"Total Websites Username Detected On : {exists_counter}")
|
file.write(f"Total Websites Username Detected On : {exists_counter}")
|
||||||
@@ -275,14 +294,18 @@ def generate_json_report(username: str, results: dict, file, report_type):
|
|||||||
for sitename in results:
|
for sitename in results:
|
||||||
site_result = results[sitename]
|
site_result = results[sitename]
|
||||||
# TODO: fix no site data issue
|
# TODO: fix no site data issue
|
||||||
if not site_result or site_result.get("status").status != QueryStatus.CLAIMED:
|
if not site_result or not site_result.get("status"):
|
||||||
|
continue
|
||||||
|
|
||||||
|
if site_result["status"].status != QueryStatus.CLAIMED:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
data = dict(site_result)
|
data = dict(site_result)
|
||||||
data["status"] = data["status"].json()
|
data["status"] = data["status"].json()
|
||||||
data["site"] = data["site"].json
|
data["site"] = data["site"].json
|
||||||
if "future" in data:
|
for field in ["future", "checker"]:
|
||||||
del data["future"]
|
if field in data:
|
||||||
|
del data[field]
|
||||||
|
|
||||||
if is_report_per_line:
|
if is_report_per_line:
|
||||||
data["sitename"] = sitename
|
data["sitename"] = sitename
|
||||||
@@ -331,8 +354,11 @@ def design_xmind_sheet(sheet, username, results):
|
|||||||
|
|
||||||
for website_name in results:
|
for website_name in results:
|
||||||
dictionary = results[website_name]
|
dictionary = results[website_name]
|
||||||
|
if not dictionary:
|
||||||
|
continue
|
||||||
result_status = dictionary.get("status")
|
result_status = dictionary.get("status")
|
||||||
if result_status.status != QueryStatus.CLAIMED:
|
# TODO: fix the reason
|
||||||
|
if not result_status or result_status.status != QueryStatus.CLAIMED:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
stripped_tags = list(map(lambda x: x.strip(), result_status.tags))
|
stripped_tags = list(map(lambda x: x.strip(), result_status.tags))
|
||||||
|
|||||||
+519
-24
@@ -3438,6 +3438,7 @@
|
|||||||
"usernameUnclaimed": "noonewouldeverusethis7"
|
"usernameUnclaimed": "noonewouldeverusethis7"
|
||||||
},
|
},
|
||||||
"Destructoid": {
|
"Destructoid": {
|
||||||
|
"disabled": true,
|
||||||
"tags": [
|
"tags": [
|
||||||
"us"
|
"us"
|
||||||
],
|
],
|
||||||
@@ -5795,19 +5796,6 @@
|
|||||||
"usernameClaimed": "adam",
|
"usernameClaimed": "adam",
|
||||||
"usernameUnclaimed": "noonewouldeverusethis7"
|
"usernameUnclaimed": "noonewouldeverusethis7"
|
||||||
},
|
},
|
||||||
"Giphy": {
|
|
||||||
"tags": [
|
|
||||||
"photo",
|
|
||||||
"us",
|
|
||||||
"video"
|
|
||||||
],
|
|
||||||
"checkType": "status_code",
|
|
||||||
"alexaRank": 653,
|
|
||||||
"urlMain": "https://giphy.com/",
|
|
||||||
"url": "https://giphy.com/{username}",
|
|
||||||
"usernameClaimed": "blue",
|
|
||||||
"usernameUnclaimed": "noonewouldeverusethis7"
|
|
||||||
},
|
|
||||||
"GipsysTeam": {
|
"GipsysTeam": {
|
||||||
"tags": [
|
"tags": [
|
||||||
"ru"
|
"ru"
|
||||||
@@ -8230,6 +8218,7 @@
|
|||||||
],
|
],
|
||||||
"checkType": "message",
|
"checkType": "message",
|
||||||
"absenceStrs": [
|
"absenceStrs": [
|
||||||
|
"\u0417\u0430\u043f\u0440\u043e\u0448\u0435\u043d\u043d\u0430\u044f \u0432\u0430\u043c\u0438 \u0441\u0442\u0440\u0430\u043d\u0438\u0446\u0430 \u043d\u0435 \u043d\u0430\u0439\u0434\u0435\u043d\u0430.",
|
||||||
"\u0414\u0430\u043d\u043d\u044b\u0435 \u043e \u0432\u044b\u0431\u0440\u0430\u043d\u043d\u043e\u043c \u043f\u043e\u043b\u044c\u0437\u043e\u0432\u0430\u0442\u0435\u043b\u0435 \u043d\u0435 \u0441\u0443\u0449\u0435\u0441\u0442\u0432\u0443\u044e\u0442",
|
"\u0414\u0430\u043d\u043d\u044b\u0435 \u043e \u0432\u044b\u0431\u0440\u0430\u043d\u043d\u043e\u043c \u043f\u043e\u043b\u044c\u0437\u043e\u0432\u0430\u0442\u0435\u043b\u0435 \u043d\u0435 \u0441\u0443\u0449\u0435\u0441\u0442\u0432\u0443\u044e\u0442",
|
||||||
"Information on selected user does not exist"
|
"Information on selected user does not exist"
|
||||||
],
|
],
|
||||||
@@ -13035,7 +13024,7 @@
|
|||||||
"us"
|
"us"
|
||||||
],
|
],
|
||||||
"headers": {
|
"headers": {
|
||||||
"authorization": "Bearer BQBFMMVu1dPwJPlnzUteNyF8xlZy7545QnhHizEHWEUQGQrRLznY5k9B9v7JdAsL-wU-Tcep51JTqBesKKY"
|
"authorization": "Bearer BQCypIuUtz7zDFov8xN86mj1BelLf7Apf9WBaC5yYfNkmGe4r7Hz4Awp6dqPuCAP9K9F5yYtjbyZX_vlr4I"
|
||||||
},
|
},
|
||||||
"errors": {
|
"errors": {
|
||||||
"Spotify is currently not available in your country.": "Access denied in your country, use proxy/vpn"
|
"Spotify is currently not available in your country.": "Access denied in your country, use proxy/vpn"
|
||||||
@@ -13329,6 +13318,7 @@
|
|||||||
"ru"
|
"ru"
|
||||||
],
|
],
|
||||||
"checkType": "response_url",
|
"checkType": "response_url",
|
||||||
|
"regexCheck": "^[^-]+$",
|
||||||
"alexaRank": 1499,
|
"alexaRank": 1499,
|
||||||
"urlMain": "https://studfile.net",
|
"urlMain": "https://studfile.net",
|
||||||
"url": "https://studfile.net/users/{username}/",
|
"url": "https://studfile.net/users/{username}/",
|
||||||
@@ -13811,16 +13801,13 @@
|
|||||||
},
|
},
|
||||||
"TheSimsResource": {
|
"TheSimsResource": {
|
||||||
"tags": [
|
"tags": [
|
||||||
"de",
|
"gaming"
|
||||||
"gaming",
|
|
||||||
"it",
|
|
||||||
"us"
|
|
||||||
],
|
],
|
||||||
"checkType": "status_code",
|
"checkType": "response_url",
|
||||||
"alexaRank": 12278,
|
"alexaRank": 12278,
|
||||||
"urlMain": "https://www.thesimsresource.com/",
|
"urlMain": "https://www.thesimsresource.com/",
|
||||||
"url": "https://www.thesimsresource.com/members/{username}/",
|
"url": "https://www.thesimsresource.com/members/{username}/",
|
||||||
"usernameClaimed": "adam",
|
"usernameClaimed": "DanSimsFantasy",
|
||||||
"usernameUnclaimed": "noonewouldeverusethis7"
|
"usernameUnclaimed": "noonewouldeverusethis7"
|
||||||
},
|
},
|
||||||
"TheStudentRoom": {
|
"TheStudentRoom": {
|
||||||
@@ -14463,7 +14450,7 @@
|
|||||||
"sec-ch-ua": "Google Chrome\";v=\"87\", \" Not;A Brand\";v=\"99\", \"Chromium\";v=\"87\"",
|
"sec-ch-ua": "Google Chrome\";v=\"87\", \" Not;A Brand\";v=\"99\", \"Chromium\";v=\"87\"",
|
||||||
"authorization": "Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA",
|
"authorization": "Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA",
|
||||||
"user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36",
|
"user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36",
|
||||||
"x-guest-token": "1394397954526560260"
|
"x-guest-token": "1400174453577900043"
|
||||||
},
|
},
|
||||||
"errors": {
|
"errors": {
|
||||||
"Bad guest token": "x-guest-token update required"
|
"Bad guest token": "x-guest-token update required"
|
||||||
@@ -14870,7 +14857,7 @@
|
|||||||
"video"
|
"video"
|
||||||
],
|
],
|
||||||
"headers": {
|
"headers": {
|
||||||
"Authorization": "jwt eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJleHAiOjE2MjEyODYyODAsInVzZXJfaWQiOm51bGwsImFwcF9pZCI6NTg0NzksInNjb3BlcyI6InB1YmxpYyIsInRlYW1fdXNlcl9pZCI6bnVsbH0.mxLdaOuP260WcxBvhadTTUQyn8t75pWNhTmtZLFS-W4"
|
"Authorization": "jwt eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJleHAiOjE2MjI2NjcxMjAsInVzZXJfaWQiOm51bGwsImFwcF9pZCI6NTg0NzksInNjb3BlcyI6InB1YmxpYyIsInRlYW1fdXNlcl9pZCI6bnVsbH0.V4VVbLzNwPU21rNP5moSxrPcPw--C7_Qz9VHgcJc1CA"
|
||||||
},
|
},
|
||||||
"activation": {
|
"activation": {
|
||||||
"url": "https://vimeo.com/_rv/viewer",
|
"url": "https://vimeo.com/_rv/viewer",
|
||||||
@@ -15834,6 +15821,32 @@
|
|||||||
"usernameClaimed": "yandex",
|
"usernameClaimed": "yandex",
|
||||||
"usernameUnclaimed": "noonewouldeverusethis7"
|
"usernameUnclaimed": "noonewouldeverusethis7"
|
||||||
},
|
},
|
||||||
|
"YandexCollections API (by yandex_public_id)": {
|
||||||
|
"tags": [
|
||||||
|
"ru",
|
||||||
|
"sharing"
|
||||||
|
],
|
||||||
|
"type": "yandex_public_id",
|
||||||
|
"headers": {
|
||||||
|
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.141 Safari/537.36"
|
||||||
|
},
|
||||||
|
"errors": {
|
||||||
|
"action=\"/checkcaptcha\" onsubmit": "Captcha detected, use proxy/vpn"
|
||||||
|
},
|
||||||
|
"checkType": "message",
|
||||||
|
"presenseStrs": [
|
||||||
|
"public_id"
|
||||||
|
],
|
||||||
|
"absenceStrs": [
|
||||||
|
"cl-not-found-content__title"
|
||||||
|
],
|
||||||
|
"alexaRank": 50,
|
||||||
|
"urlMain": "https://yandex.ru/collections/",
|
||||||
|
"url": "https://yandex.ru/collections/api/users/{username}/",
|
||||||
|
"source": "Yandex",
|
||||||
|
"usernameClaimed": "hx0aur0arkyebkxztq8pr8b4dg",
|
||||||
|
"usernameUnclaimed": "noonewouldeverusethis7"
|
||||||
|
},
|
||||||
"YandexMarket": {
|
"YandexMarket": {
|
||||||
"tags": [
|
"tags": [
|
||||||
"ru"
|
"ru"
|
||||||
@@ -16275,8 +16288,8 @@
|
|||||||
},
|
},
|
||||||
"author.today": {
|
"author.today": {
|
||||||
"tags": [
|
"tags": [
|
||||||
"ru",
|
"reading",
|
||||||
"reading"
|
"ru"
|
||||||
],
|
],
|
||||||
"checkType": "status_code",
|
"checkType": "status_code",
|
||||||
"alexaRank": 12218,
|
"alexaRank": 12218,
|
||||||
@@ -27744,6 +27757,488 @@
|
|||||||
"tags": [
|
"tags": [
|
||||||
"business"
|
"business"
|
||||||
]
|
]
|
||||||
|
},
|
||||||
|
"HiddenAnswers": {
|
||||||
|
"tags": [
|
||||||
|
"tor"
|
||||||
|
],
|
||||||
|
"protocol": "tor",
|
||||||
|
"url": "http://answerszuvs3gg2l64e6hmnryudl5zgrmwm3vh65hzszdghblddvfiqd.onion/user/{username}",
|
||||||
|
"urlMain": "http://answerszuvs3gg2l64e6hmnryudl5zgrmwm3vh65hzszdghblddvfiqd.onion",
|
||||||
|
"usernameClaimed": "theredqueen",
|
||||||
|
"usernameUnclaimed": "noonewouldeverusethis7",
|
||||||
|
"checkType": "message",
|
||||||
|
"absenceStrs": [
|
||||||
|
"Page not found"
|
||||||
|
],
|
||||||
|
"presenseStrs": [
|
||||||
|
"qa-part-form-profile"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
".com": {
|
||||||
|
"protocol": "dns",
|
||||||
|
"url": "{username}.com",
|
||||||
|
"urlMain": "{username}.com",
|
||||||
|
"usernameClaimed": "soxoj",
|
||||||
|
"usernameUnclaimed": "noonewouldeverusethis7",
|
||||||
|
"checkType": "status_code"
|
||||||
|
},
|
||||||
|
".pro": {
|
||||||
|
"protocol": "dns",
|
||||||
|
"url": "{username}.pro",
|
||||||
|
"urlMain": "{username}.pro",
|
||||||
|
"usernameClaimed": "alex",
|
||||||
|
"usernameUnclaimed": "noonewouldeverusethis7",
|
||||||
|
"checkType": "status_code"
|
||||||
|
},
|
||||||
|
".me": {
|
||||||
|
"protocol": "dns",
|
||||||
|
"url": "{username}.me",
|
||||||
|
"urlMain": "{username}.me",
|
||||||
|
"usernameClaimed": "alex",
|
||||||
|
"usernameUnclaimed": "noonewouldeverusethis7",
|
||||||
|
"checkType": "status_code"
|
||||||
|
},
|
||||||
|
".biz": {
|
||||||
|
"protocol": "dns",
|
||||||
|
"url": "{username}.biz",
|
||||||
|
"urlMain": "{username}.biz",
|
||||||
|
"usernameClaimed": "alex",
|
||||||
|
"usernameUnclaimed": "noonewouldeverusethis7",
|
||||||
|
"checkType": "status_code"
|
||||||
|
},
|
||||||
|
".email": {
|
||||||
|
"protocol": "dns",
|
||||||
|
"url": "{username}.email",
|
||||||
|
"urlMain": "{username}.email",
|
||||||
|
"usernameClaimed": "alex",
|
||||||
|
"usernameUnclaimed": "noonewouldeverusethis7",
|
||||||
|
"checkType": "status_code"
|
||||||
|
},
|
||||||
|
".guru": {
|
||||||
|
"protocol": "dns",
|
||||||
|
"url": "{username}.guru",
|
||||||
|
"urlMain": "{username}.guru",
|
||||||
|
"usernameClaimed": "alex",
|
||||||
|
"usernameUnclaimed": "noonewouldeverusethis7",
|
||||||
|
"checkType": "status_code"
|
||||||
|
},
|
||||||
|
".ddns.net": {
|
||||||
|
"protocol": "dns",
|
||||||
|
"url": "{username}.ddns.net",
|
||||||
|
"urlMain": "{username}.ddns.net",
|
||||||
|
"usernameClaimed": "repack",
|
||||||
|
"usernameUnclaimed": "noonewouldeverusethis7",
|
||||||
|
"checkType": "status_code"
|
||||||
|
},
|
||||||
|
"Ameblo": {
|
||||||
|
"absenceStrs": [
|
||||||
|
"THROW_NOT_FOUND_EXCEPTION"
|
||||||
|
],
|
||||||
|
"presenseStrs": [
|
||||||
|
"profile"
|
||||||
|
],
|
||||||
|
"url": "https://ameblo.jp/{username}",
|
||||||
|
"urlMain": "https://ameblo.jp",
|
||||||
|
"usernameClaimed": "senpai",
|
||||||
|
"usernameUnclaimed": "noonewouldeverusethis7",
|
||||||
|
"checkType": "message",
|
||||||
|
"alexaRank": 374,
|
||||||
|
"tags": [
|
||||||
|
"blog",
|
||||||
|
"jp"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"Observable": {
|
||||||
|
"absenceStrs": [
|
||||||
|
"<title>Observable</title>"
|
||||||
|
],
|
||||||
|
"presenseStrs": [
|
||||||
|
"profile_email"
|
||||||
|
],
|
||||||
|
"url": "https://observablehq.com/@{username}",
|
||||||
|
"urlMain": "https://observablehq.com",
|
||||||
|
"usernameClaimed": "theabbie",
|
||||||
|
"usernameUnclaimed": "noonewouldeverusethis7",
|
||||||
|
"checkType": "message",
|
||||||
|
"alexaRank": 25120,
|
||||||
|
"tags": [
|
||||||
|
"sharing"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"galactictalk.org": {
|
||||||
|
"urlMain": "https://galactictalk.org",
|
||||||
|
"engine": "Flarum",
|
||||||
|
"usernameClaimed": "theabbie",
|
||||||
|
"usernameUnclaimed": "noonewouldeverusethis7"
|
||||||
|
},
|
||||||
|
"discuss.bootstrapped.fm": {
|
||||||
|
"urlMain": "https://discuss.bootstrapped.fm",
|
||||||
|
"engine": "Discourse",
|
||||||
|
"usernameClaimed": "theabbie",
|
||||||
|
"usernameUnclaimed": "noonewouldeverusethis7"
|
||||||
|
},
|
||||||
|
"discourse.mozilla.org": {
|
||||||
|
"urlMain": "https://discourse.mozilla.org",
|
||||||
|
"engine": "Discourse",
|
||||||
|
"usernameClaimed": "theabbie",
|
||||||
|
"usernameUnclaimed": "noonewouldeverusethis7"
|
||||||
|
},
|
||||||
|
"ipinit.in": {
|
||||||
|
"urlMain": "http://ipinit.in",
|
||||||
|
"engine": "Wordpress/Author",
|
||||||
|
"usernameClaimed": "god",
|
||||||
|
"usernameUnclaimed": "noonewouldeverusethis7"
|
||||||
|
},
|
||||||
|
"donorbox": {
|
||||||
|
"absenceStrs": [
|
||||||
|
"/orgs/new"
|
||||||
|
],
|
||||||
|
"presenseStrs": [
|
||||||
|
"donation_first_name"
|
||||||
|
],
|
||||||
|
"url": "https://donorbox.org/{username}",
|
||||||
|
"urlMain": "https://donorbox.org",
|
||||||
|
"usernameClaimed": "theabbie",
|
||||||
|
"usernameUnclaimed": "noonewouldeverusethis7",
|
||||||
|
"checkType": "message",
|
||||||
|
"alexaRank": 19812,
|
||||||
|
"tags": [
|
||||||
|
"finance"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"telescope.ac": {
|
||||||
|
"absenceStrs": [
|
||||||
|
">Not found</h1>"
|
||||||
|
],
|
||||||
|
"presenseStrs": [
|
||||||
|
"og:site_name",
|
||||||
|
"alternate",
|
||||||
|
"article",
|
||||||
|
"project",
|
||||||
|
"og:title"
|
||||||
|
],
|
||||||
|
"url": "https://telescope.ac/{username}",
|
||||||
|
"urlMain": "https://telescope.ac",
|
||||||
|
"usernameClaimed": "theabbie",
|
||||||
|
"usernameUnclaimed": "noonewouldeverusethis7",
|
||||||
|
"checkType": "message",
|
||||||
|
"alexaRank": 167480,
|
||||||
|
"tags": [
|
||||||
|
"blog"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"sessionize.com": {
|
||||||
|
"absenceStrs": [
|
||||||
|
"Page Not Found</h3>"
|
||||||
|
],
|
||||||
|
"presenseStrs": [
|
||||||
|
"role=",
|
||||||
|
"filter"
|
||||||
|
],
|
||||||
|
"url": "https://sessionize.com/{username}/",
|
||||||
|
"urlMain": "https://sessionize.com",
|
||||||
|
"usernameClaimed": "theabbie",
|
||||||
|
"usernameUnclaimed": "noonewouldeverusethis7",
|
||||||
|
"checkType": "message",
|
||||||
|
"alexaRank": 132025,
|
||||||
|
"tags": [
|
||||||
|
"business"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"getmakerlog.com": {
|
||||||
|
"absenceStrs": [
|
||||||
|
"<title>Home | Makerlog</title>"
|
||||||
|
],
|
||||||
|
"presenseStrs": [
|
||||||
|
"profile",
|
||||||
|
"first_name",
|
||||||
|
"username\\"
|
||||||
|
],
|
||||||
|
"url": "https://getmakerlog.com/@{username}",
|
||||||
|
"urlMain": "https://getmakerlog.com",
|
||||||
|
"usernameClaimed": "theabbie",
|
||||||
|
"usernameUnclaimed": "noonewouldeverusethis7",
|
||||||
|
"checkType": "message",
|
||||||
|
"alexaRank": 224990,
|
||||||
|
"tags": [
|
||||||
|
"business"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"giphy.com": {
|
||||||
|
"absenceStrs": [
|
||||||
|
"404 Not Found"
|
||||||
|
],
|
||||||
|
"presenseStrs": [
|
||||||
|
"Giphy",
|
||||||
|
"al:ios:app_name"
|
||||||
|
],
|
||||||
|
"url": "https://giphy.com/channel/{username}",
|
||||||
|
"urlMain": "https://giphy.com",
|
||||||
|
"usernameClaimed": "theabbie",
|
||||||
|
"usernameUnclaimed": "noonewouldeverusethis7",
|
||||||
|
"checkType": "message",
|
||||||
|
"alexaRank": 695,
|
||||||
|
"tags": [
|
||||||
|
"video"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"clarity.fm": {
|
||||||
|
"absenceStrs": [
|
||||||
|
"On Demand Business Advice</title"
|
||||||
|
],
|
||||||
|
"presenseStrs": [
|
||||||
|
"user-profile-image"
|
||||||
|
],
|
||||||
|
"url": "https://clarity.fm/{username}",
|
||||||
|
"urlMain": "https://clarity.fm",
|
||||||
|
"usernameClaimed": "theabbie",
|
||||||
|
"usernameUnclaimed": "noonewouldeverusethis7",
|
||||||
|
"checkType": "message",
|
||||||
|
"alexaRank": 31250,
|
||||||
|
"tags": [
|
||||||
|
"business"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"videohive.net": {
|
||||||
|
"absenceStrs": [
|
||||||
|
"Page Not Found | VideoHive"
|
||||||
|
],
|
||||||
|
"presenseStrs": [
|
||||||
|
"user-info",
|
||||||
|
"user-info__badges"
|
||||||
|
],
|
||||||
|
"url": "https://videohive.net/user/{username}",
|
||||||
|
"urlMain": "https://videohive.net",
|
||||||
|
"usernameClaimed": "theabbie",
|
||||||
|
"usernameUnclaimed": "noonewouldeverusethis7",
|
||||||
|
"checkType": "message",
|
||||||
|
"alexaRank": 4270,
|
||||||
|
"tags": [
|
||||||
|
"video"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"boards.theforce.net": {
|
||||||
|
"urlMain": "https://boards.theforce.net",
|
||||||
|
"engine": "XenForo",
|
||||||
|
"usernameClaimed": "alex",
|
||||||
|
"usernameUnclaimed": "noonewouldeverusethis7"
|
||||||
|
},
|
||||||
|
"aussiehomebrewer.com": {
|
||||||
|
"urlMain": "https://aussiehomebrewer.com",
|
||||||
|
"engine": "XenForo",
|
||||||
|
"usernameClaimed": "alex",
|
||||||
|
"usernameUnclaimed": "noonewouldeverusethis7"
|
||||||
|
},
|
||||||
|
"forum-ukraina.net": {
|
||||||
|
"urlMain": "https://forum-ukraina.net",
|
||||||
|
"engine": "XenForo",
|
||||||
|
"usernameClaimed": "alex",
|
||||||
|
"usernameUnclaimed": "noonewouldeverusethis7"
|
||||||
|
},
|
||||||
|
"forum-history.ru": {
|
||||||
|
"urlMain": "http://forum-history.ru",
|
||||||
|
"engine": "vBulletin",
|
||||||
|
"usernameClaimed": "red",
|
||||||
|
"usernameUnclaimed": "noonewouldeverusethis7"
|
||||||
|
},
|
||||||
|
"forum.vn.ua": {
|
||||||
|
"urlMain": "http://forum.vn.ua",
|
||||||
|
"engine": "vBulletin",
|
||||||
|
"usernameClaimed": "alex",
|
||||||
|
"usernameUnclaimed": "noonewouldeverusethis7"
|
||||||
|
},
|
||||||
|
"forum.bestflowers.ru": {
|
||||||
|
"urlMain": "https://forum.bestflowers.ru",
|
||||||
|
"engine": "XenForo",
|
||||||
|
"usernameClaimed": "alex",
|
||||||
|
"usernameUnclaimed": "noonewouldeverusethis7"
|
||||||
|
},
|
||||||
|
"forum.alconar.ru": {
|
||||||
|
"urlMain": "https://forum.alconar.ru",
|
||||||
|
"engine": "phpBB/Search",
|
||||||
|
"usernameClaimed": "admin",
|
||||||
|
"usernameUnclaimed": "noonewouldeverusethis7"
|
||||||
|
},
|
||||||
|
"forum.lancerx.ru": {
|
||||||
|
"urlMain": "https://forum.lancerx.ru",
|
||||||
|
"engine": "phpBB/Search",
|
||||||
|
"usernameClaimed": "alex",
|
||||||
|
"usernameUnclaimed": "noonewouldeverusethis7"
|
||||||
|
},
|
||||||
|
"mfarmer.ru": {
|
||||||
|
"urlMain": "http://www.mfarmer.ru",
|
||||||
|
"engine": "vBulletin",
|
||||||
|
"usernameClaimed": "alex",
|
||||||
|
"usernameUnclaimed": "noonewouldeverusethis7"
|
||||||
|
},
|
||||||
|
"forum.league17.ru": {
|
||||||
|
"urlMain": "https://forum.league17.ru",
|
||||||
|
"engine": "phpBB/Search",
|
||||||
|
"usernameClaimed": "alex",
|
||||||
|
"usernameUnclaimed": "noonewouldeverusethis7"
|
||||||
|
},
|
||||||
|
"krskforum.com": {
|
||||||
|
"urlMain": "https://krskforum.com",
|
||||||
|
"engine": "phpBB/Search",
|
||||||
|
"usernameClaimed": "admin",
|
||||||
|
"usernameUnclaimed": "noonewouldeverusethis7"
|
||||||
|
},
|
||||||
|
"forum.rarib.ag": {
|
||||||
|
"urlMain": "https://forum.rarib.ag",
|
||||||
|
"engine": "phpBB",
|
||||||
|
"usernameClaimed": "alex",
|
||||||
|
"usernameUnclaimed": "noonewouldeverusethis7"
|
||||||
|
},
|
||||||
|
"forum.oneclickchicks.com": {
|
||||||
|
"urlMain": "https://forum.oneclickchicks.com",
|
||||||
|
"engine": "vBulletin",
|
||||||
|
"usernameClaimed": "alex",
|
||||||
|
"usernameUnclaimed": "noonewouldeverusethis7"
|
||||||
|
},
|
||||||
|
"forum.trade-print.ru": {
|
||||||
|
"urlMain": "http://forum.trade-print.ru",
|
||||||
|
"engine": "vBulletin",
|
||||||
|
"usernameClaimed": "alex",
|
||||||
|
"usernameUnclaimed": "noonewouldeverusethis7"
|
||||||
|
},
|
||||||
|
"forum.setcombg.com": {
|
||||||
|
"urlMain": "https://forum.setcombg.com",
|
||||||
|
"engine": "vBulletin",
|
||||||
|
"usernameClaimed": "alex",
|
||||||
|
"usernameUnclaimed": "noonewouldeverusethis7"
|
||||||
|
},
|
||||||
|
"vw-bus.ru": {
|
||||||
|
"urlMain": "https://vw-bus.ru",
|
||||||
|
"engine": "phpBB",
|
||||||
|
"usernameClaimed": "alex",
|
||||||
|
"usernameUnclaimed": "noonewouldeverusethis7"
|
||||||
|
},
|
||||||
|
"forum.ya1.ru": {
|
||||||
|
"urlMain": "https://forum.ya1.ru",
|
||||||
|
"engine": "vBulletin",
|
||||||
|
"usernameClaimed": "alex",
|
||||||
|
"usernameUnclaimed": "noonewouldeverusethis7"
|
||||||
|
},
|
||||||
|
"forum.wordreference.com": {
|
||||||
|
"urlMain": "https://forum.wordreference.com",
|
||||||
|
"engine": "XenForo",
|
||||||
|
"usernameClaimed": "alex",
|
||||||
|
"usernameUnclaimed": "noonewouldeverusethis7"
|
||||||
|
},
|
||||||
|
"forums.zooclub.ru": {
|
||||||
|
"urlMain": "https://forums.zooclub.ru",
|
||||||
|
"engine": "vBulletin",
|
||||||
|
"usernameClaimed": "alex",
|
||||||
|
"usernameUnclaimed": "noonewouldeverusethis7"
|
||||||
|
},
|
||||||
|
"homebrewtalk.com": {
|
||||||
|
"urlMain": "https://www.homebrewtalk.com",
|
||||||
|
"engine": "XenForo",
|
||||||
|
"usernameClaimed": "alex",
|
||||||
|
"usernameUnclaimed": "noonewouldeverusethis7"
|
||||||
|
},
|
||||||
|
"navimba.com": {
|
||||||
|
"urlMain": "https://navimba.com",
|
||||||
|
"engine": "phpBB/Search",
|
||||||
|
"usernameClaimed": "admin",
|
||||||
|
"usernameUnclaimed": "noonewouldeverusethis7"
|
||||||
|
},
|
||||||
|
"niva-club.net": {
|
||||||
|
"urlMain": "https://www.niva-club.net",
|
||||||
|
"engine": "phpBB/Search",
|
||||||
|
"usernameClaimed": "red",
|
||||||
|
"usernameUnclaimed": "noonewouldeverusethis7"
|
||||||
|
},
|
||||||
|
"red-forum.com": {
|
||||||
|
"urlMain": "https://red-forum.com",
|
||||||
|
"engine": "XenForo",
|
||||||
|
"usernameClaimed": "alex",
|
||||||
|
"usernameUnclaimed": "noonewouldeverusethis7"
|
||||||
|
},
|
||||||
|
"scaleforum.ru": {
|
||||||
|
"urlMain": "http://www.scaleforum.ru",
|
||||||
|
"engine": "vBulletin",
|
||||||
|
"usernameClaimed": "alex",
|
||||||
|
"usernameUnclaimed": "noonewouldeverusethis7"
|
||||||
|
},
|
||||||
|
"sign-forum.ru": {
|
||||||
|
"urlMain": "https://sign-forum.ru",
|
||||||
|
"engine": "phpBB/Search",
|
||||||
|
"usernameClaimed": "alex",
|
||||||
|
"usernameUnclaimed": "noonewouldeverusethis7"
|
||||||
|
},
|
||||||
|
"rec.poker": {
|
||||||
|
"urlMain": "https://rec.poker",
|
||||||
|
"engine": "Wordpress/Author",
|
||||||
|
"usernameClaimed": "alex",
|
||||||
|
"usernameUnclaimed": "noonewouldeverusethis7"
|
||||||
|
},
|
||||||
|
"uforum.uz": {
|
||||||
|
"urlMain": "https://uforum.uz",
|
||||||
|
"engine": "vBulletin",
|
||||||
|
"usernameClaimed": "alex",
|
||||||
|
"usernameUnclaimed": "noonewouldeverusethis7"
|
||||||
|
},
|
||||||
|
"DarkNet Trust": {
|
||||||
|
"absenceStrs": [
|
||||||
|
"notify notify-red mx-auto text-center"
|
||||||
|
],
|
||||||
|
"presenseStrs": [
|
||||||
|
"profiles"
|
||||||
|
],
|
||||||
|
"protocol": "tor",
|
||||||
|
"url": "http://dntrustmucd4mwec.onion/u/{username}",
|
||||||
|
"urlMain": "http://dntrustmucd4mwec.onion",
|
||||||
|
"usernameClaimed": "cheshirecat82",
|
||||||
|
"usernameUnclaimed": "noonewouldeverusethis7",
|
||||||
|
"checkType": "message",
|
||||||
|
"headers": {
|
||||||
|
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; rv:78.0) Gecko/20100101 Firefox/78.0",
|
||||||
|
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
|
||||||
|
"Accept-Language": "en-US,en;q=0.5"
|
||||||
|
},
|
||||||
|
"tags": [
|
||||||
|
"tor"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"i2pforum": {
|
||||||
|
"protocol": "i2p",
|
||||||
|
"urlMain": "http://i2pforum.i2p",
|
||||||
|
"usernameClaimed": "zzz",
|
||||||
|
"usernameUnclaimed": "noonewouldeverusethis7",
|
||||||
|
"engine": "phpBB/Search",
|
||||||
|
"tags": [
|
||||||
|
"i2p"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"forum.freeton.org": {
|
||||||
|
"urlMain": "https://forum.freeton.org",
|
||||||
|
"engine": "Discourse",
|
||||||
|
"usernameClaimed": "maximmuzychenka",
|
||||||
|
"usernameUnclaimed": "noonewouldeverusethis7",
|
||||||
|
"alexaRank": 192123,
|
||||||
|
"tags": [
|
||||||
|
"finance",
|
||||||
|
"forum"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"lyricstranslate.com": {
|
||||||
|
"absenceStrs": [
|
||||||
|
"Page not found | Lyrics Translate"
|
||||||
|
],
|
||||||
|
"presenseStrs": [
|
||||||
|
"profileid"
|
||||||
|
],
|
||||||
|
"url": "https://lyricstranslate.com/sco/translator/{username}",
|
||||||
|
"urlMain": "https://lyricstranslate.com",
|
||||||
|
"usernameClaimed": "charming43",
|
||||||
|
"usernameUnclaimed": "noonewouldeverusethis7",
|
||||||
|
"checkType": "message",
|
||||||
|
"alexaRank": 3744,
|
||||||
|
"tags": [
|
||||||
|
"music"
|
||||||
|
]
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"engines": {
|
"engines": {
|
||||||
|
|||||||
@@ -68,7 +68,7 @@
|
|||||||
<div class="row-mb">
|
<div class="row-mb">
|
||||||
<div class="col-md">
|
<div class="col-md">
|
||||||
<div class="card flex-md-row mb-4 box-shadow h-md-250">
|
<div class="card flex-md-row mb-4 box-shadow h-md-250">
|
||||||
<img class="card-img-right flex-auto d-md-block" alt="Photo" style="width: 200px; height: 200px; object-fit: scale-down;" src="{{ v.status.ids_data.image or 'https://i.imgur.com/040fmbw.png' }}" data-holder-rendered="true">
|
<img class="card-img-right flex-auto d-md-block" alt="Photo" style="width: 200px; height: 200px; object-fit: scale-down;" src="{{ v.status and v.status.ids_data and v.status.ids_data.image or 'https://i.imgur.com/040fmbw.png' }}" data-holder-rendered="true">
|
||||||
<div class="card-body d-flex flex-column align-items-start" style="padding-top: 0;">
|
<div class="card-body d-flex flex-column align-items-start" style="padding-top: 0;">
|
||||||
<h3 class="mb-0" style="padding-top: 1rem;">
|
<h3 class="mb-0" style="padding-top: 1rem;">
|
||||||
<a class="text-dark" href="{{ v.url_main }}" target="_blank">{{ k }}</a>
|
<a class="text-dark" href="{{ v.url_main }}" target="_blank">{{ k }}</a>
|
||||||
|
|||||||
+11
-1
@@ -65,6 +65,8 @@ SUPPORTED_TAGS = [
|
|||||||
"review",
|
"review",
|
||||||
"bookmarks",
|
"bookmarks",
|
||||||
"design",
|
"design",
|
||||||
|
"tor",
|
||||||
|
"i2p",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
@@ -122,6 +124,8 @@ class MaigretSite:
|
|||||||
alexa_rank = None
|
alexa_rank = None
|
||||||
source = None
|
source = None
|
||||||
|
|
||||||
|
protocol = ''
|
||||||
|
|
||||||
def __init__(self, name, information):
|
def __init__(self, name, information):
|
||||||
self.name = name
|
self.name = name
|
||||||
self.url_subpath = ""
|
self.url_subpath = ""
|
||||||
@@ -301,12 +305,18 @@ class MaigretDatabase:
|
|||||||
lambda x: isinstance(x.engine, str) and x.engine.lower() in normalized_tags
|
lambda x: isinstance(x.engine, str) and x.engine.lower() in normalized_tags
|
||||||
)
|
)
|
||||||
is_tags_ok = lambda x: set(x.tags).intersection(set(normalized_tags))
|
is_tags_ok = lambda x: set(x.tags).intersection(set(normalized_tags))
|
||||||
|
is_protocol_in_tags = lambda x: x.protocol and x.protocol in normalized_tags
|
||||||
is_disabled_needed = lambda x: not x.disabled or (
|
is_disabled_needed = lambda x: not x.disabled or (
|
||||||
"disabled" in tags or disabled
|
"disabled" in tags or disabled
|
||||||
)
|
)
|
||||||
is_id_type_ok = lambda x: x.type == id_type
|
is_id_type_ok = lambda x: x.type == id_type
|
||||||
|
|
||||||
filter_tags_engines_fun = lambda x: not tags or is_engine_ok(x) or is_tags_ok(x)
|
filter_tags_engines_fun = (
|
||||||
|
lambda x: not tags
|
||||||
|
or is_engine_ok(x)
|
||||||
|
or is_tags_ok(x)
|
||||||
|
or is_protocol_in_tags(x)
|
||||||
|
)
|
||||||
filter_names_fun = lambda x: not names or is_name_ok(x) or is_source_ok(x)
|
filter_names_fun = lambda x: not names or is_name_ok(x) or is_source_ok(x)
|
||||||
|
|
||||||
filter_fun = (
|
filter_fun = (
|
||||||
|
|||||||
+28
-7
@@ -32,6 +32,8 @@ HEADERS = {
|
|||||||
"User-Agent": get_random_user_agent(),
|
"User-Agent": get_random_user_agent(),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
SEPARATORS = "\"'"
|
||||||
|
|
||||||
RATIO = 0.6
|
RATIO = 0.6
|
||||||
TOP_FEATURES = 5
|
TOP_FEATURES = 5
|
||||||
URL_RE = re.compile(r"https?://(www\.)?")
|
URL_RE = re.compile(r"https?://(www\.)?")
|
||||||
@@ -195,7 +197,7 @@ async def detect_known_engine(
|
|||||||
|
|
||||||
def extract_username_dialog(url):
|
def extract_username_dialog(url):
|
||||||
url_parts = url.rstrip("/").split("/")
|
url_parts = url.rstrip("/").split("/")
|
||||||
supposed_username = url_parts[-1]
|
supposed_username = url_parts[-1].strip('@')
|
||||||
entered_username = input(
|
entered_username = input(
|
||||||
f'Is "{supposed_username}" a valid username? If not, write it manually: '
|
f'Is "{supposed_username}" a valid username? If not, write it manually: '
|
||||||
)
|
)
|
||||||
@@ -203,38 +205,53 @@ def extract_username_dialog(url):
|
|||||||
|
|
||||||
|
|
||||||
async def check_features_manually(
|
async def check_features_manually(
|
||||||
db, url_exists, url_mainpage, cookie_file, logger, redirects=True
|
db, url_exists, url_mainpage, cookie_file, logger, redirects=False
|
||||||
):
|
):
|
||||||
|
custom_headers = {}
|
||||||
|
while True:
|
||||||
|
header_key = input(
|
||||||
|
'Specify custom header if you need or just press Enter to skip. Header name: '
|
||||||
|
)
|
||||||
|
if not header_key:
|
||||||
|
break
|
||||||
|
header_value = input('Header value: ')
|
||||||
|
custom_headers[header_key.strip()] = header_value.strip()
|
||||||
|
|
||||||
supposed_username = extract_username_dialog(url_exists)
|
supposed_username = extract_username_dialog(url_exists)
|
||||||
non_exist_username = "noonewouldeverusethis7"
|
non_exist_username = "noonewouldeverusethis7"
|
||||||
|
|
||||||
url_user = url_exists.replace(supposed_username, "{username}")
|
url_user = url_exists.replace(supposed_username, "{username}")
|
||||||
url_not_exists = url_exists.replace(supposed_username, non_exist_username)
|
url_not_exists = url_exists.replace(supposed_username, non_exist_username)
|
||||||
|
|
||||||
|
headers = dict(HEADERS)
|
||||||
|
headers.update(custom_headers)
|
||||||
|
|
||||||
# cookies
|
# cookies
|
||||||
cookie_dict = None
|
cookie_dict = None
|
||||||
if cookie_file:
|
if cookie_file:
|
||||||
logger.info(f'Use {cookie_file} for cookies')
|
logger.info(f'Use {cookie_file} for cookies')
|
||||||
cookie_jar = await import_aiohttp_cookies(cookie_file)
|
cookie_jar = import_aiohttp_cookies(cookie_file)
|
||||||
cookie_dict = {c.key: c.value for c in cookie_jar}
|
cookie_dict = {c.key: c.value for c in cookie_jar}
|
||||||
|
|
||||||
exists_resp = requests.get(
|
exists_resp = requests.get(
|
||||||
url_exists, cookies=cookie_dict, headers=HEADERS, allow_redirects=redirects
|
url_exists, cookies=cookie_dict, headers=headers, allow_redirects=redirects
|
||||||
)
|
)
|
||||||
|
logger.debug(url_exists)
|
||||||
logger.debug(exists_resp.status_code)
|
logger.debug(exists_resp.status_code)
|
||||||
logger.debug(exists_resp.text)
|
logger.debug(exists_resp.text)
|
||||||
|
|
||||||
non_exists_resp = requests.get(
|
non_exists_resp = requests.get(
|
||||||
url_not_exists, cookies=cookie_dict, headers=HEADERS, allow_redirects=redirects
|
url_not_exists, cookies=cookie_dict, headers=headers, allow_redirects=redirects
|
||||||
)
|
)
|
||||||
|
logger.debug(url_not_exists)
|
||||||
logger.debug(non_exists_resp.status_code)
|
logger.debug(non_exists_resp.status_code)
|
||||||
logger.debug(non_exists_resp.text)
|
logger.debug(non_exists_resp.text)
|
||||||
|
|
||||||
a = exists_resp.text
|
a = exists_resp.text
|
||||||
b = non_exists_resp.text
|
b = non_exists_resp.text
|
||||||
|
|
||||||
tokens_a = set(a.split('"'))
|
tokens_a = set(re.split(f'[{SEPARATORS}]', a))
|
||||||
tokens_b = set(b.split('"'))
|
tokens_b = set(re.split(f'[{SEPARATORS}]', b))
|
||||||
|
|
||||||
a_minus_b = tokens_a.difference(tokens_b)
|
a_minus_b = tokens_a.difference(tokens_b)
|
||||||
b_minus_a = tokens_b.difference(tokens_a)
|
b_minus_a = tokens_b.difference(tokens_a)
|
||||||
@@ -276,6 +293,9 @@ async def check_features_manually(
|
|||||||
"checkType": "message",
|
"checkType": "message",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if headers != HEADERS:
|
||||||
|
site_data['headers'] = headers
|
||||||
|
|
||||||
site = MaigretSite(url_mainpage.split("/")[-1], site_data)
|
site = MaigretSite(url_mainpage.split("/")[-1], site_data)
|
||||||
return site
|
return site
|
||||||
|
|
||||||
@@ -283,6 +303,7 @@ async def check_features_manually(
|
|||||||
async def submit_dialog(db, url_exists, cookie_file, logger):
|
async def submit_dialog(db, url_exists, cookie_file, logger):
|
||||||
domain_raw = URL_RE.sub("", url_exists).strip().strip("/")
|
domain_raw = URL_RE.sub("", url_exists).strip().strip("/")
|
||||||
domain_raw = domain_raw.split("/")[0]
|
domain_raw = domain_raw.split("/")[0]
|
||||||
|
logger.info('Domain is %s', domain_raw)
|
||||||
|
|
||||||
# check for existence
|
# check for existence
|
||||||
matched_sites = list(filter(lambda x: domain_raw in x.url_main + x.url, db.sites))
|
matched_sites = list(filter(lambda x: domain_raw in x.url_main + x.url, db.sites))
|
||||||
|
|||||||
+2
-1
@@ -1,3 +1,4 @@
|
|||||||
|
aiodns==3.0.0
|
||||||
aiohttp==3.7.4
|
aiohttp==3.7.4
|
||||||
aiohttp-socks==0.5.5
|
aiohttp-socks==0.5.5
|
||||||
arabic-reshaper==2.1.1
|
arabic-reshaper==2.1.1
|
||||||
@@ -26,7 +27,7 @@ python-socks==1.1.2
|
|||||||
requests>=2.24.0
|
requests>=2.24.0
|
||||||
requests-futures==1.0.0
|
requests-futures==1.0.0
|
||||||
six==1.15.0
|
six==1.15.0
|
||||||
socid-extractor>=0.0.20
|
socid-extractor>=0.0.21
|
||||||
soupsieve==2.1
|
soupsieve==2.1
|
||||||
stem==1.8.0
|
stem==1.8.0
|
||||||
torrequest==0.1.0
|
torrequest==0.1.0
|
||||||
|
|||||||
@@ -12,7 +12,7 @@ with open('requirements.txt') as rf:
|
|||||||
requires = rf.read().splitlines()
|
requires = rf.read().splitlines()
|
||||||
|
|
||||||
setup(name='maigret',
|
setup(name='maigret',
|
||||||
version='0.2.4',
|
version='0.3.0',
|
||||||
description='Collect a dossier on a person by username from a huge number of sites',
|
description='Collect a dossier on a person by username from a huge number of sites',
|
||||||
long_description=long_description,
|
long_description=long_description,
|
||||||
long_description_content_type="text/markdown",
|
long_description_content_type="text/markdown",
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
|
|
||||||
## List of supported sites (search methods): total 2515
|
## List of supported sites (search methods): total 2560
|
||||||
|
|
||||||
Rank data fetched from Alexa by domains.
|
Rank data fetched from Alexa by domains.
|
||||||
|
|
||||||
@@ -39,8 +39,9 @@ Rank data fetched from Alexa by domains.
|
|||||||
1.  [Tanks (https://tanks.mail.ru)](https://tanks.mail.ru)*: top 50, forum, gaming, ru*
|
1.  [Tanks (https://tanks.mail.ru)](https://tanks.mail.ru)*: top 50, forum, gaming, ru*
|
||||||
1.  [Warface (https://wf.mail.ru)](https://wf.mail.ru)*: top 50, forum, ru*
|
1.  [Warface (https://wf.mail.ru)](https://wf.mail.ru)*: top 50, forum, ru*
|
||||||
1.  [YandexReviews (https://yandex.ru/)](https://yandex.ru/)*: top 50, ru*
|
1.  [YandexReviews (https://yandex.ru/)](https://yandex.ru/)*: top 50, ru*
|
||||||
1.  [YandexBugbounty (https://yandex.ru/bugbounty/)](https://yandex.ru/bugbounty/)*: top 50, hacking, ru*
|
1.  [YandexBugbounty (https://yandex.ru/bugbounty/)](https://yandex.ru/bugbounty/)*: top 50, hacking, ru*, search is disabled
|
||||||
1.  [YandexCollections API (https://yandex.ru/collections/)](https://yandex.ru/collections/)*: top 50, ru, sharing*
|
1.  [YandexCollections API (https://yandex.ru/collections/)](https://yandex.ru/collections/)*: top 50, ru, sharing*
|
||||||
|
1.  [YandexCollections API (by yandex_public_id) (https://yandex.ru/collections/)](https://yandex.ru/collections/)*: top 50, ru, sharing*
|
||||||
1.  [YandexMarket (https://market.yandex.ru/)](https://market.yandex.ru/)*: top 50, ru*
|
1.  [YandexMarket (https://market.yandex.ru/)](https://market.yandex.ru/)*: top 50, ru*
|
||||||
1.  [YandexMusic (https://music.yandex.ru/)](https://music.yandex.ru/)*: top 50, music, ru*
|
1.  [YandexMusic (https://music.yandex.ru/)](https://music.yandex.ru/)*: top 50, music, ru*
|
||||||
1.  [YandexZnatoki (https://yandex.ru/q/)](https://yandex.ru/q/)*: top 50, ru*
|
1.  [YandexZnatoki (https://yandex.ru/q/)](https://yandex.ru/q/)*: top 50, ru*
|
||||||
@@ -107,6 +108,7 @@ Rank data fetched from Alexa by domains.
|
|||||||
1.  [mercadolivre (https://www.mercadolivre.com.br)](https://www.mercadolivre.com.br)*: top 500, br*
|
1.  [mercadolivre (https://www.mercadolivre.com.br)](https://www.mercadolivre.com.br)*: top 500, br*
|
||||||
1.  [Crunchyroll (https://www.crunchyroll.com/)](https://www.crunchyroll.com/)*: top 500, forum, movies, us*
|
1.  [Crunchyroll (https://www.crunchyroll.com/)](https://www.crunchyroll.com/)*: top 500, forum, movies, us*
|
||||||
1.  [WordPressOrg (https://wordpress.org/)](https://wordpress.org/)*: top 500, in*
|
1.  [WordPressOrg (https://wordpress.org/)](https://wordpress.org/)*: top 500, in*
|
||||||
|
1.  [Ameblo (https://ameblo.jp)](https://ameblo.jp)*: top 500, blog, jp*
|
||||||
1.  [Unsplash (https://unsplash.com/)](https://unsplash.com/)*: top 500, art, photo*
|
1.  [Unsplash (https://unsplash.com/)](https://unsplash.com/)*: top 500, art, photo*
|
||||||
1.  [Steam (https://steamcommunity.com/)](https://steamcommunity.com/)*: top 500, gaming*
|
1.  [Steam (https://steamcommunity.com/)](https://steamcommunity.com/)*: top 500, gaming*
|
||||||
1.  [Steam (by id) (https://steamcommunity.com/)](https://steamcommunity.com/)*: top 500, gaming*
|
1.  [Steam (by id) (https://steamcommunity.com/)](https://steamcommunity.com/)*: top 500, gaming*
|
||||||
@@ -131,12 +133,12 @@ Rank data fetched from Alexa by domains.
|
|||||||
1.  [Kickstarter (https://www.kickstarter.com)](https://www.kickstarter.com)*: top 1K, finance, us*
|
1.  [Kickstarter (https://www.kickstarter.com)](https://www.kickstarter.com)*: top 1K, finance, us*
|
||||||
1.  [forums.ea.com (https://forums.ea.com)](https://forums.ea.com)*: top 1K, forum, gaming, us*
|
1.  [forums.ea.com (https://forums.ea.com)](https://forums.ea.com)*: top 1K, forum, gaming, us*
|
||||||
1.  [Envato (https://forums.envato.com)](https://forums.envato.com)*: top 1K, au, forum, in*
|
1.  [Envato (https://forums.envato.com)](https://forums.envato.com)*: top 1K, au, forum, in*
|
||||||
1.  [Giphy (https://giphy.com/)](https://giphy.com/)*: top 1K, photo, us, video*
|
|
||||||
1.  [Ultimate-Guitar (https://ultimate-guitar.com/)](https://ultimate-guitar.com/)*: top 1K, us*
|
1.  [Ultimate-Guitar (https://ultimate-guitar.com/)](https://ultimate-guitar.com/)*: top 1K, us*
|
||||||
1.  [Freelancer.com (https://www.freelancer.com/)](https://www.freelancer.com/)*: top 1K, freelance, us*
|
1.  [Freelancer.com (https://www.freelancer.com/)](https://www.freelancer.com/)*: top 1K, freelance, us*
|
||||||
1.  [YouPorn (https://youporn.com)](https://youporn.com)*: top 1K, porn, us*
|
1.  [YouPorn (https://youporn.com)](https://youporn.com)*: top 1K, porn, us*
|
||||||
1.  [Dreamstime (https://www.dreamstime.com)](https://www.dreamstime.com)*: top 1K, art, photo, stock*
|
1.  [Dreamstime (https://www.dreamstime.com)](https://www.dreamstime.com)*: top 1K, art, photo, stock*
|
||||||
1.  [TheVerge (https://www.theverge.com)](https://www.theverge.com)*: top 1K, us*
|
1.  [TheVerge (https://www.theverge.com)](https://www.theverge.com)*: top 1K, us*
|
||||||
|
1.  [giphy.com (https://giphy.com)](https://giphy.com)*: top 1K, video*
|
||||||
1.  [Championat (https://www.championat.com/)](https://www.championat.com/)*: top 1K, ru*
|
1.  [Championat (https://www.championat.com/)](https://www.championat.com/)*: top 1K, ru*
|
||||||
1.  [Wattpad (https://www.wattpad.com/)](https://www.wattpad.com/)*: top 1K, reading, writing*
|
1.  [Wattpad (https://www.wattpad.com/)](https://www.wattpad.com/)*: top 1K, reading, writing*
|
||||||
1.  [Disqus (https://disqus.com/)](https://disqus.com/)*: top 1K, discussion*
|
1.  [Disqus (https://disqus.com/)](https://disqus.com/)*: top 1K, discussion*
|
||||||
@@ -246,6 +248,7 @@ Rank data fetched from Alexa by domains.
|
|||||||
1.  [4pda (https://4pda.ru/)](https://4pda.ru/)*: top 5K, ru*
|
1.  [4pda (https://4pda.ru/)](https://4pda.ru/)*: top 5K, ru*
|
||||||
1.  [Weforum (https://www.weforum.org)](https://www.weforum.org)*: top 5K, forum, us*
|
1.  [Weforum (https://www.weforum.org)](https://www.weforum.org)*: top 5K, forum, us*
|
||||||
1.  [techspot.com (http://www.techspot.com/community/)](http://www.techspot.com/community/)*: top 5K, forum, us*
|
1.  [techspot.com (http://www.techspot.com/community/)](http://www.techspot.com/community/)*: top 5K, forum, us*
|
||||||
|
1.  [lyricstranslate.com (https://lyricstranslate.com)](https://lyricstranslate.com)*: top 5K, music*
|
||||||
1.  [Venmo (https://venmo.com/)](https://venmo.com/)*: top 5K, finance, us*
|
1.  [Venmo (https://venmo.com/)](https://venmo.com/)*: top 5K, finance, us*
|
||||||
1.  [Wikidot (http://www.wikidot.com/)](http://www.wikidot.com/)*: top 5K, us*
|
1.  [Wikidot (http://www.wikidot.com/)](http://www.wikidot.com/)*: top 5K, us*
|
||||||
1.  [Letterboxd (https://letterboxd.com/)](https://letterboxd.com/)*: top 5K, us*
|
1.  [Letterboxd (https://letterboxd.com/)](https://letterboxd.com/)*: top 5K, us*
|
||||||
@@ -254,6 +257,7 @@ Rank data fetched from Alexa by domains.
|
|||||||
1.  [We Heart It (https://weheartit.com/)](https://weheartit.com/)*: top 5K, blog, in, photo*
|
1.  [We Heart It (https://weheartit.com/)](https://weheartit.com/)*: top 5K, blog, in, photo*
|
||||||
1.  [FilmWeb (https://www.filmweb.pl/user/adam)](https://www.filmweb.pl/user/adam)*: top 5K, movies, pl*
|
1.  [FilmWeb (https://www.filmweb.pl/user/adam)](https://www.filmweb.pl/user/adam)*: top 5K, movies, pl*
|
||||||
1.  [forums.bulbagarden.net (http://forums.bulbagarden.net)](http://forums.bulbagarden.net)*: top 5K, forum, us*
|
1.  [forums.bulbagarden.net (http://forums.bulbagarden.net)](http://forums.bulbagarden.net)*: top 5K, forum, us*
|
||||||
|
1.  [videohive.net (https://videohive.net)](https://videohive.net)*: top 5K, video*
|
||||||
1.  [BoardGameGeek (https://www.boardgamegeek.com)](https://www.boardgamegeek.com)*: top 5K, gaming, us*
|
1.  [BoardGameGeek (https://www.boardgamegeek.com)](https://www.boardgamegeek.com)*: top 5K, gaming, us*
|
||||||
1.  [osu! (https://osu.ppy.sh/)](https://osu.ppy.sh/)*: top 5K, us*
|
1.  [osu! (https://osu.ppy.sh/)](https://osu.ppy.sh/)*: top 5K, us*
|
||||||
1.  [Pluralsight (https://app.pluralsight.com)](https://app.pluralsight.com)*: top 5K, in, us*
|
1.  [Pluralsight (https://app.pluralsight.com)](https://app.pluralsight.com)*: top 5K, in, us*
|
||||||
@@ -392,8 +396,8 @@ Rank data fetched from Alexa by domains.
|
|||||||
1.  [AnimeNewsNetwork (https://www.animenewsnetwork.com)](https://www.animenewsnetwork.com)*: top 100K, gb, us*
|
1.  [AnimeNewsNetwork (https://www.animenewsnetwork.com)](https://www.animenewsnetwork.com)*: top 100K, gb, us*
|
||||||
1.  [Smule (https://www.smule.com/)](https://www.smule.com/)*: top 100K, music*
|
1.  [Smule (https://www.smule.com/)](https://www.smule.com/)*: top 100K, music*
|
||||||
1.  [TVTropes (https://tvtropes.org)](https://tvtropes.org)*: top 100K, us*
|
1.  [TVTropes (https://tvtropes.org)](https://tvtropes.org)*: top 100K, us*
|
||||||
1.  [author.today (https://author.today)](https://author.today)*: top 100K, ru*
|
1.  [author.today (https://author.today)](https://author.today)*: top 100K, reading, ru*
|
||||||
1.  [TheSimsResource (https://www.thesimsresource.com/)](https://www.thesimsresource.com/)*: top 100K, de, gaming, it, us*
|
1.  [TheSimsResource (https://www.thesimsresource.com/)](https://www.thesimsresource.com/)*: top 100K, gaming*
|
||||||
1.  [N4g (https://n4g.com/)](https://n4g.com/)*: top 100K, gaming, news, us*
|
1.  [N4g (https://n4g.com/)](https://n4g.com/)*: top 100K, gaming, news, us*
|
||||||
1.  [Teletype (https://teletype.in)](https://teletype.in)*: top 100K, in, writing*
|
1.  [Teletype (https://teletype.in)](https://teletype.in)*: top 100K, in, writing*
|
||||||
1.  [Empflix (https://www.empflix.com)](https://www.empflix.com)*: top 100K, de, fr, porn*
|
1.  [Empflix (https://www.empflix.com)](https://www.empflix.com)*: top 100K, de, fr, porn*
|
||||||
@@ -453,6 +457,7 @@ Rank data fetched from Alexa by domains.
|
|||||||
1.  [Jimdo (https://jimdosite.com/)](https://jimdosite.com/)*: top 100K, jp*
|
1.  [Jimdo (https://jimdosite.com/)](https://jimdosite.com/)*: top 100K, jp*
|
||||||
1.  [club.cnews.ru (https://club.cnews.ru/)](https://club.cnews.ru/)*: top 100K, blog, ru*
|
1.  [club.cnews.ru (https://club.cnews.ru/)](https://club.cnews.ru/)*: top 100K, blog, ru*
|
||||||
1.  [PSNProfiles.com (https://psnprofiles.com/)](https://psnprofiles.com/)*: top 100K, gaming*
|
1.  [PSNProfiles.com (https://psnprofiles.com/)](https://psnprofiles.com/)*: top 100K, gaming*
|
||||||
|
1.  [donorbox (https://donorbox.org)](https://donorbox.org)*: top 100K, finance*
|
||||||
1.  [Sbazar.cz (https://www.sbazar.cz/)](https://www.sbazar.cz/)*: top 100K, cz, shopping*
|
1.  [Sbazar.cz (https://www.sbazar.cz/)](https://www.sbazar.cz/)*: top 100K, cz, shopping*
|
||||||
1.  [EuroFootball (https://www.euro-football.ru)](https://www.euro-football.ru)*: top 100K, ru*
|
1.  [EuroFootball (https://www.euro-football.ru)](https://www.euro-football.ru)*: top 100K, ru*
|
||||||
1.  [Raidforums (https://raidforums.com/)](https://raidforums.com/)*: top 100K, cybercriminal, forum*
|
1.  [Raidforums (https://raidforums.com/)](https://raidforums.com/)*: top 100K, cybercriminal, forum*
|
||||||
@@ -488,9 +493,10 @@ Rank data fetched from Alexa by domains.
|
|||||||
1.  [ESET (https://forum.esetnod32.ru)](https://forum.esetnod32.ru)*: top 100K, forum, ru*
|
1.  [ESET (https://forum.esetnod32.ru)](https://forum.esetnod32.ru)*: top 100K, forum, ru*
|
||||||
1.  [Dreamwidth (https://dreamwidth.org/profile)](https://dreamwidth.org/profile)*: top 100K, in, us*
|
1.  [Dreamwidth (https://dreamwidth.org/profile)](https://dreamwidth.org/profile)*: top 100K, in, us*
|
||||||
1.  [sparkpeople (https://www.sparkpeople.com)](https://www.sparkpeople.com)*: top 100K, us*
|
1.  [sparkpeople (https://www.sparkpeople.com)](https://www.sparkpeople.com)*: top 100K, us*
|
||||||
1.  [Destructoid (https://www.destructoid.com)](https://www.destructoid.com)*: top 100K, us*
|
1.  [Destructoid (https://www.destructoid.com)](https://www.destructoid.com)*: top 100K, us*, search is disabled
|
||||||
1.  [uID.me (by username) (https://uid.me/)](https://uid.me/)*: top 100K, ru*
|
1.  [uID.me (by username) (https://uid.me/)](https://uid.me/)*: top 100K, ru*
|
||||||
1.  [uID.me (by uguid) (https://uid.me/)](https://uid.me/)*: top 100K, ru*
|
1.  [uID.me (by uguid) (https://uid.me/)](https://uid.me/)*: top 100K, ru*
|
||||||
|
1.  [Observable (https://observablehq.com)](https://observablehq.com)*: top 100K, sharing*
|
||||||
1.  [Overclockers (https://overclockers.ru)](https://overclockers.ru)*: top 100K, ru*
|
1.  [Overclockers (https://overclockers.ru)](https://overclockers.ru)*: top 100K, ru*
|
||||||
1.  [HackingWithSwift (https://www.hackingwithswift.com)](https://www.hackingwithswift.com)*: top 100K, us*
|
1.  [HackingWithSwift (https://www.hackingwithswift.com)](https://www.hackingwithswift.com)*: top 100K, us*
|
||||||
1.  [YouNow (https://www.younow.com/)](https://www.younow.com/)*: top 100K, be, us*
|
1.  [YouNow (https://www.younow.com/)](https://www.younow.com/)*: top 100K, be, us*
|
||||||
@@ -526,6 +532,7 @@ Rank data fetched from Alexa by domains.
|
|||||||
1.  [forums.battlefield.com (https://forums.battlefield.com)](https://forums.battlefield.com)*: top 100K, forum, gaming, gb, us*, search is disabled
|
1.  [forums.battlefield.com (https://forums.battlefield.com)](https://forums.battlefield.com)*: top 100K, forum, gaming, gb, us*, search is disabled
|
||||||
1.  [GotovimDoma (https://gotovim-doma.ru)](https://gotovim-doma.ru)*: top 100K, ru*
|
1.  [GotovimDoma (https://gotovim-doma.ru)](https://gotovim-doma.ru)*: top 100K, ru*
|
||||||
1.  [prosportsdaily (https://forums.prosportsdaily.com)](https://forums.prosportsdaily.com)*: top 100K, forum, in, us*
|
1.  [prosportsdaily (https://forums.prosportsdaily.com)](https://forums.prosportsdaily.com)*: top 100K, forum, in, us*
|
||||||
|
1.  [clarity.fm (https://clarity.fm)](https://clarity.fm)*: top 100K, business*
|
||||||
1.  [Bukkit (https://bukkit.org/)](https://bukkit.org/)*: top 100K, at, forum, us*
|
1.  [Bukkit (https://bukkit.org/)](https://bukkit.org/)*: top 100K, at, forum, us*
|
||||||
1.  [Elakiri (https://elakiri.com)](https://elakiri.com)*: top 100K, lk*
|
1.  [Elakiri (https://elakiri.com)](https://elakiri.com)*: top 100K, lk*
|
||||||
1.  [Manutd (https://manutd.one)](https://manutd.one)*: top 100K, forum, sport*
|
1.  [Manutd (https://manutd.one)](https://manutd.one)*: top 100K, forum, sport*
|
||||||
@@ -778,6 +785,7 @@ Rank data fetched from Alexa by domains.
|
|||||||
1.  [Mobile-files (https://www.mobile-files.com/)](https://www.mobile-files.com/)*: top 10M, forum, ru, us*
|
1.  [Mobile-files (https://www.mobile-files.com/)](https://www.mobile-files.com/)*: top 10M, forum, ru, us*
|
||||||
1.  [Fluther (https://www.fluther.com/)](https://www.fluther.com/)*: top 10M, in, us*
|
1.  [Fluther (https://www.fluther.com/)](https://www.fluther.com/)*: top 10M, in, us*
|
||||||
1.  [Comedy (https://www.comedy.co.uk)](https://www.comedy.co.uk)*: top 10M, gb, in, movies, pk, us*
|
1.  [Comedy (https://www.comedy.co.uk)](https://www.comedy.co.uk)*: top 10M, gb, in, movies, pk, us*
|
||||||
|
1.  [sessionize.com (https://sessionize.com)](https://sessionize.com)*: top 10M, business*
|
||||||
1.  [Fireworktv (https://fireworktv.com)](https://fireworktv.com)*: top 10M, in, jp*
|
1.  [Fireworktv (https://fireworktv.com)](https://fireworktv.com)*: top 10M, in, jp*
|
||||||
1.  [funcom (https://forums.funcom.com)](https://forums.funcom.com)*: top 10M, forum, us*
|
1.  [funcom (https://forums.funcom.com)](https://forums.funcom.com)*: top 10M, forum, us*
|
||||||
1.  [RoyalCams (https://royalcams.com)](https://royalcams.com)*: top 10M, gr, in, ng, ru, us, webcam*
|
1.  [RoyalCams (https://royalcams.com)](https://royalcams.com)*: top 10M, gr, in, ng, ru, us, webcam*
|
||||||
@@ -817,6 +825,7 @@ Rank data fetched from Alexa by domains.
|
|||||||
1.  [Ustream (http://www.ustream.tv)](http://www.ustream.tv)*: top 10M, eg, us*
|
1.  [Ustream (http://www.ustream.tv)](http://www.ustream.tv)*: top 10M, eg, us*
|
||||||
1.  [Geodesist (https://geodesist.ru)](https://geodesist.ru)*: top 10M, forum, ru*
|
1.  [Geodesist (https://geodesist.ru)](https://geodesist.ru)*: top 10M, forum, ru*
|
||||||
1.  [Serveradmin (https://serveradmin.ru/)](https://serveradmin.ru/)*: top 10M, ru*
|
1.  [Serveradmin (https://serveradmin.ru/)](https://serveradmin.ru/)*: top 10M, ru*
|
||||||
|
1.  [telescope.ac (https://telescope.ac)](https://telescope.ac)*: top 10M, blog*
|
||||||
1.  [Ya-uchitel (https://ya-uchitel.ru/)](https://ya-uchitel.ru/)*: top 10M, ru*
|
1.  [Ya-uchitel (https://ya-uchitel.ru/)](https://ya-uchitel.ru/)*: top 10M, ru*
|
||||||
1.  [ResidentAdvisor (https://www.residentadvisor.net)](https://www.residentadvisor.net)*: top 10M, us*
|
1.  [ResidentAdvisor (https://www.residentadvisor.net)](https://www.residentadvisor.net)*: top 10M, us*
|
||||||
1.  [Weburg (https://weburg.net)](https://weburg.net)*: top 10M, ru*
|
1.  [Weburg (https://weburg.net)](https://weburg.net)*: top 10M, ru*
|
||||||
@@ -840,6 +849,7 @@ Rank data fetched from Alexa by domains.
|
|||||||
1.  [Cqham (http://www.cqham.ru)](http://www.cqham.ru)*: top 10M, ru, tech*
|
1.  [Cqham (http://www.cqham.ru)](http://www.cqham.ru)*: top 10M, ru, tech*
|
||||||
1.  [KharkovForum (https://www.kharkovforum.com/)](https://www.kharkovforum.com/)*: top 10M, forum, ua*
|
1.  [KharkovForum (https://www.kharkovforum.com/)](https://www.kharkovforum.com/)*: top 10M, forum, ua*
|
||||||
1.  [Studwork (https://studwork.org/)](https://studwork.org/)*: top 10M, ru*
|
1.  [Studwork (https://studwork.org/)](https://studwork.org/)*: top 10M, ru*
|
||||||
|
1.  [forum.freeton.org (https://forum.freeton.org)](https://forum.freeton.org)*: top 10M, finance, forum*
|
||||||
1.  [Playlists (https://playlists.net)](https://playlists.net)*: top 10M, in, us*
|
1.  [Playlists (https://playlists.net)](https://playlists.net)*: top 10M, in, us*
|
||||||
1.  [Liberapay (https://liberapay.com)](https://liberapay.com)*: top 10M, eg, finance, in, pk, us, za*
|
1.  [Liberapay (https://liberapay.com)](https://liberapay.com)*: top 10M, eg, finance, in, pk, us, za*
|
||||||
1.  [artinvestment (https://forum.artinvestment.ru/)](https://forum.artinvestment.ru/)*: top 10M, forum, ru*
|
1.  [artinvestment (https://forum.artinvestment.ru/)](https://forum.artinvestment.ru/)*: top 10M, forum, ru*
|
||||||
@@ -871,6 +881,7 @@ Rank data fetched from Alexa by domains.
|
|||||||
1.  [Paypal (https://www.paypal.me)](https://www.paypal.me)*: top 10M, finance*
|
1.  [Paypal (https://www.paypal.me)](https://www.paypal.me)*: top 10M, finance*
|
||||||
1.  [Seatracker (https://seatracker.ru/)](https://seatracker.ru/)*: top 10M, ru*
|
1.  [Seatracker (https://seatracker.ru/)](https://seatracker.ru/)*: top 10M, ru*
|
||||||
1.  [Hctorpedo (http://hctorpedo.ru)](http://hctorpedo.ru)*: top 10M, ru*
|
1.  [Hctorpedo (http://hctorpedo.ru)](http://hctorpedo.ru)*: top 10M, ru*
|
||||||
|
1.  [getmakerlog.com (https://getmakerlog.com)](https://getmakerlog.com)*: top 10M, business*
|
||||||
1.  [Cmet4uk (https://cmet4uk.ru)](https://cmet4uk.ru)*: top 10M, ru*
|
1.  [Cmet4uk (https://cmet4uk.ru)](https://cmet4uk.ru)*: top 10M, ru*
|
||||||
1.  [popjustice (https://forum.popjustice.com)](https://forum.popjustice.com)*: top 10M, co, forum, in, sg, us*
|
1.  [popjustice (https://forum.popjustice.com)](https://forum.popjustice.com)*: top 10M, co, forum, in, sg, us*
|
||||||
1.  [RPGGeek (https://rpggeek.com)](https://rpggeek.com)*: top 10M, gaming, us*
|
1.  [RPGGeek (https://rpggeek.com)](https://rpggeek.com)*: top 10M, gaming, us*
|
||||||
@@ -2518,5 +2529,39 @@ Rank data fetched from Alexa by domains.
|
|||||||
1.  [discuss.hashicorp.com (https://discuss.hashicorp.com)](https://discuss.hashicorp.com)*: top 100M, tech*
|
1.  [discuss.hashicorp.com (https://discuss.hashicorp.com)](https://discuss.hashicorp.com)*: top 100M, tech*
|
||||||
1.  [Blogger (by GAIA id) (https://www.blogger.com)](https://www.blogger.com)*: top 100M, blog*
|
1.  [Blogger (by GAIA id) (https://www.blogger.com)](https://www.blogger.com)*: top 100M, blog*
|
||||||
1.  [Weebly (http://weebly.com)](http://weebly.com)*: top 100M, business*
|
1.  [Weebly (http://weebly.com)](http://weebly.com)*: top 100M, business*
|
||||||
|
1.  [HiddenAnswers (http://answerszuvs3gg2l64e6hmnryudl5zgrmwm3vh65hzszdghblddvfiqd.onion)](http://answerszuvs3gg2l64e6hmnryudl5zgrmwm3vh65hzszdghblddvfiqd.onion)*: top 100M, tor*
|
||||||
|
1.  [.com ({username}.com)]({username}.com)*: top 100M*
|
||||||
|
1.  [galactictalk.org (https://galactictalk.org)](https://galactictalk.org)*: top 100M*
|
||||||
|
1.  [discuss.bootstrapped.fm (https://discuss.bootstrapped.fm)](https://discuss.bootstrapped.fm)*: top 100M*
|
||||||
|
1.  [discourse.mozilla.org (https://discourse.mozilla.org)](https://discourse.mozilla.org)*: top 100M*
|
||||||
|
1.  [ipinit.in (http://ipinit.in)](http://ipinit.in)*: top 100M*
|
||||||
|
1.  [boards.theforce.net (https://boards.theforce.net)](https://boards.theforce.net)*: top 100M*
|
||||||
|
1.  [aussiehomebrewer.com (https://aussiehomebrewer.com)](https://aussiehomebrewer.com)*: top 100M*
|
||||||
|
1.  [forum-ukraina.net (https://forum-ukraina.net)](https://forum-ukraina.net)*: top 100M*
|
||||||
|
1.  [forum-history.ru (http://forum-history.ru)](http://forum-history.ru)*: top 100M*
|
||||||
|
1.  [forum.vn.ua (http://forum.vn.ua)](http://forum.vn.ua)*: top 100M*
|
||||||
|
1.  [forum.bestflowers.ru (https://forum.bestflowers.ru)](https://forum.bestflowers.ru)*: top 100M*
|
||||||
|
1.  [forum.alconar.ru (https://forum.alconar.ru)](https://forum.alconar.ru)*: top 100M*
|
||||||
|
1.  [forum.lancerx.ru (https://forum.lancerx.ru)](https://forum.lancerx.ru)*: top 100M*
|
||||||
|
1.  [mfarmer.ru (http://www.mfarmer.ru)](http://www.mfarmer.ru)*: top 100M*
|
||||||
|
1.  [forum.league17.ru (https://forum.league17.ru)](https://forum.league17.ru)*: top 100M*
|
||||||
|
1.  [krskforum.com (https://krskforum.com)](https://krskforum.com)*: top 100M*
|
||||||
|
1.  [forum.rarib.ag (https://forum.rarib.ag)](https://forum.rarib.ag)*: top 100M*
|
||||||
|
1.  [forum.oneclickchicks.com (https://forum.oneclickchicks.com)](https://forum.oneclickchicks.com)*: top 100M*
|
||||||
|
1.  [forum.trade-print.ru (http://forum.trade-print.ru)](http://forum.trade-print.ru)*: top 100M*
|
||||||
|
1.  [forum.setcombg.com (https://forum.setcombg.com)](https://forum.setcombg.com)*: top 100M*
|
||||||
|
1.  [vw-bus.ru (https://vw-bus.ru)](https://vw-bus.ru)*: top 100M*
|
||||||
|
1.  [forum.ya1.ru (https://forum.ya1.ru)](https://forum.ya1.ru)*: top 100M*
|
||||||
|
1.  [forum.wordreference.com (https://forum.wordreference.com)](https://forum.wordreference.com)*: top 100M*
|
||||||
|
1.  [forums.zooclub.ru (https://forums.zooclub.ru)](https://forums.zooclub.ru)*: top 100M*
|
||||||
|
1.  [homebrewtalk.com (https://www.homebrewtalk.com)](https://www.homebrewtalk.com)*: top 100M*
|
||||||
|
1.  [navimba.com (https://navimba.com)](https://navimba.com)*: top 100M*
|
||||||
|
1.  [niva-club.net (https://www.niva-club.net)](https://www.niva-club.net)*: top 100M*
|
||||||
|
1.  [red-forum.com (https://red-forum.com)](https://red-forum.com)*: top 100M*
|
||||||
|
1.  [scaleforum.ru (http://www.scaleforum.ru)](http://www.scaleforum.ru)*: top 100M*
|
||||||
|
1.  [sign-forum.ru (https://sign-forum.ru)](https://sign-forum.ru)*: top 100M*
|
||||||
|
1.  [rec.poker (https://rec.poker)](https://rec.poker)*: top 100M*
|
||||||
|
1.  [uforum.uz (https://uforum.uz)](https://uforum.uz)*: top 100M*
|
||||||
|
1.  [DarkNet Trust (http://dntrustmucd4mwec.onion)](http://dntrustmucd4mwec.onion)*: top 100M, tor*
|
||||||
|
|
||||||
Alexa.com rank data fetched at (2021-05-16 14:01:29.561381 UTC)
|
Alexa.com rank data fetched at (2021-05-31 21:26:56.886650 UTC)
|
||||||
|
|||||||
@@ -40,7 +40,7 @@ async def test_import_aiohttp_cookies():
|
|||||||
with open(cookies_filename, 'w') as f:
|
with open(cookies_filename, 'w') as f:
|
||||||
f.write(COOKIES_TXT)
|
f.write(COOKIES_TXT)
|
||||||
|
|
||||||
cookie_jar = await import_aiohttp_cookies(cookies_filename)
|
cookie_jar = import_aiohttp_cookies(cookies_filename)
|
||||||
assert list(cookie_jar._cookies.keys()) == ['xss.is', 'httpbin.org']
|
assert list(cookie_jar._cookies.keys()) == ['xss.is', 'httpbin.org']
|
||||||
|
|
||||||
url = 'https://httpbin.org/cookies'
|
url = 'https://httpbin.org/cookies'
|
||||||
|
|||||||
@@ -9,6 +9,7 @@ def site_result_except(server, username, **kwargs):
|
|||||||
server.expect_request('/url', query_string=query).respond_with_data(**kwargs)
|
server.expect_request('/url', query_string=query).respond_with_data(**kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.slow
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_checking_by_status_code(httpserver, local_test_db):
|
async def test_checking_by_status_code(httpserver, local_test_db):
|
||||||
sites_dict = local_test_db.sites_dict
|
sites_dict = local_test_db.sites_dict
|
||||||
@@ -23,6 +24,7 @@ async def test_checking_by_status_code(httpserver, local_test_db):
|
|||||||
assert result['StatusCode']['status'].is_found() is False
|
assert result['StatusCode']['status'].is_found() is False
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.slow
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_checking_by_message_positive_full(httpserver, local_test_db):
|
async def test_checking_by_message_positive_full(httpserver, local_test_db):
|
||||||
sites_dict = local_test_db.sites_dict
|
sites_dict = local_test_db.sites_dict
|
||||||
@@ -37,6 +39,7 @@ async def test_checking_by_message_positive_full(httpserver, local_test_db):
|
|||||||
assert result['Message']['status'].is_found() is False
|
assert result['Message']['status'].is_found() is False
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.slow
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_checking_by_message_positive_part(httpserver, local_test_db):
|
async def test_checking_by_message_positive_part(httpserver, local_test_db):
|
||||||
sites_dict = local_test_db.sites_dict
|
sites_dict = local_test_db.sites_dict
|
||||||
@@ -51,6 +54,7 @@ async def test_checking_by_message_positive_part(httpserver, local_test_db):
|
|||||||
assert result['Message']['status'].is_found() is False
|
assert result['Message']['status'].is_found() is False
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.slow
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_checking_by_message_negative(httpserver, local_test_db):
|
async def test_checking_by_message_negative(httpserver, local_test_db):
|
||||||
sites_dict = local_test_db.sites_dict
|
sites_dict = local_test_db.sites_dict
|
||||||
|
|||||||
@@ -25,17 +25,21 @@ DEFAULT_ARGS: Dict[str, Any] = {
|
|||||||
'print_check_errors': False,
|
'print_check_errors': False,
|
||||||
'print_not_found': False,
|
'print_not_found': False,
|
||||||
'proxy': None,
|
'proxy': None,
|
||||||
|
'reports_sorting': 'default',
|
||||||
'retries': 1,
|
'retries': 1,
|
||||||
'self_check': False,
|
'self_check': False,
|
||||||
'site_list': [],
|
'site_list': [],
|
||||||
'stats': False,
|
'stats': False,
|
||||||
'tags': '',
|
'tags': '',
|
||||||
'timeout': 30,
|
'timeout': 30,
|
||||||
|
'tor_proxy': 'socks5://127.0.0.1:9050',
|
||||||
|
'i2p_proxy': 'http://127.0.0.1:4444',
|
||||||
'top_sites': 500,
|
'top_sites': 500,
|
||||||
'txt': False,
|
'txt': False,
|
||||||
'use_disabled_sites': False,
|
'use_disabled_sites': False,
|
||||||
'username': [],
|
'username': [],
|
||||||
'verbose': False,
|
'verbose': False,
|
||||||
|
'with_domains': False,
|
||||||
'xmind': False,
|
'xmind': False,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -138,6 +138,7 @@ def test_maigret_results(test_db):
|
|||||||
|
|
||||||
assert results['Reddit'].get('future') is None
|
assert results['Reddit'].get('future') is None
|
||||||
del results['GooglePlayStore']['future']
|
del results['GooglePlayStore']['future']
|
||||||
|
del results['GooglePlayStore']['checker']
|
||||||
|
|
||||||
assert results == RESULTS_EXAMPLE
|
assert results == RESULTS_EXAMPLE
|
||||||
|
|
||||||
|
|||||||
+98
-2
@@ -45,6 +45,19 @@ EXAMPLE_RESULTS = {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
BROKEN_RESULTS = {
|
||||||
|
'GitHub': {
|
||||||
|
'username': 'test',
|
||||||
|
'parsing_enabled': True,
|
||||||
|
'url_main': 'https://www.github.com/',
|
||||||
|
'url_user': 'https://www.github.com/test',
|
||||||
|
'http_status': 200,
|
||||||
|
'is_similar': False,
|
||||||
|
'rank': 78,
|
||||||
|
'site': MaigretSite('test', {}),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
GOOD_500PX_RESULT = copy.deepcopy(GOOD_RESULT)
|
GOOD_500PX_RESULT = copy.deepcopy(GOOD_RESULT)
|
||||||
GOOD_500PX_RESULT.tags = ['photo', 'us', 'global']
|
GOOD_500PX_RESULT.tags = ['photo', 'us', 'global']
|
||||||
GOOD_500PX_RESULT.ids_data = {
|
GOOD_500PX_RESULT.ids_data = {
|
||||||
@@ -239,10 +252,13 @@ TEST = [
|
|||||||
]
|
]
|
||||||
|
|
||||||
SUPPOSED_BRIEF = """Search by username alexaimephotographycars returned 1 accounts. Found target's other IDs: alexaimephotography, Alexaimephotogr. Search by username alexaimephotography returned 2 accounts. Search by username Alexaimephotogr returned 1 accounts. Extended info extracted from 3 accounts."""
|
SUPPOSED_BRIEF = """Search by username alexaimephotographycars returned 1 accounts. Found target's other IDs: alexaimephotography, Alexaimephotogr. Search by username alexaimephotography returned 2 accounts. Search by username Alexaimephotogr returned 1 accounts. Extended info extracted from 3 accounts."""
|
||||||
|
SUPPOSED_BROKEN_BRIEF = """Search by username alexaimephotographycars returned 0 accounts. Search by username alexaimephotography returned 2 accounts. Search by username Alexaimephotogr returned 1 accounts. Extended info extracted from 2 accounts."""
|
||||||
SUPPOSED_INTERESTS = "Interests: photo <span class=\"text-muted\">(2)</span>, news <span class=\"text-muted\">(1)</span>, social <span class=\"text-muted\">(1)</span>"
|
|
||||||
|
|
||||||
SUPPOSED_GEO = "Geo: us <span class=\"text-muted\">(3)</span>"
|
SUPPOSED_GEO = "Geo: us <span class=\"text-muted\">(3)</span>"
|
||||||
|
SUPPOSED_BROKEN_GEO = "Geo: us <span class=\"text-muted\">(2)</span>"
|
||||||
|
|
||||||
|
SUPPOSED_INTERESTS = "Interests: photo <span class=\"text-muted\">(2)</span>, news <span class=\"text-muted\">(1)</span>, social <span class=\"text-muted\">(1)</span>"
|
||||||
|
SUPPOSED_BROKEN_INTERESTS = "Interests: news <span class=\"text-muted\">(1)</span>, photo <span class=\"text-muted\">(1)</span>, social <span class=\"text-muted\">(1)</span>"
|
||||||
|
|
||||||
|
|
||||||
def test_generate_report_template():
|
def test_generate_report_template():
|
||||||
@@ -270,6 +286,19 @@ def test_generate_csv_report():
|
|||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def test_generate_csv_report_broken():
|
||||||
|
csvfile = StringIO()
|
||||||
|
generate_csv_report('test', BROKEN_RESULTS, csvfile)
|
||||||
|
|
||||||
|
csvfile.seek(0)
|
||||||
|
data = csvfile.readlines()
|
||||||
|
|
||||||
|
assert data == [
|
||||||
|
'username,name,url_main,url_user,exists,http_status\r\n',
|
||||||
|
'test,GitHub,https://www.github.com/,https://www.github.com/test,Unknown,200\r\n',
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
def test_generate_txt_report():
|
def test_generate_txt_report():
|
||||||
txtfile = StringIO()
|
txtfile = StringIO()
|
||||||
generate_txt_report('test', EXAMPLE_RESULTS, txtfile)
|
generate_txt_report('test', EXAMPLE_RESULTS, txtfile)
|
||||||
@@ -283,6 +312,18 @@ def test_generate_txt_report():
|
|||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def test_generate_txt_report_broken():
|
||||||
|
txtfile = StringIO()
|
||||||
|
generate_txt_report('test', BROKEN_RESULTS, txtfile)
|
||||||
|
|
||||||
|
txtfile.seek(0)
|
||||||
|
data = txtfile.readlines()
|
||||||
|
|
||||||
|
assert data == [
|
||||||
|
'Total Websites Username Detected On : 0',
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
def test_generate_json_simple_report():
|
def test_generate_json_simple_report():
|
||||||
jsonfile = StringIO()
|
jsonfile = StringIO()
|
||||||
MODIFIED_RESULTS = dict(EXAMPLE_RESULTS)
|
MODIFIED_RESULTS = dict(EXAMPLE_RESULTS)
|
||||||
@@ -296,6 +337,19 @@ def test_generate_json_simple_report():
|
|||||||
assert list(json.loads(data[0]).keys()) == ['GitHub', 'GitHub2']
|
assert list(json.loads(data[0]).keys()) == ['GitHub', 'GitHub2']
|
||||||
|
|
||||||
|
|
||||||
|
def test_generate_json_simple_report_broken():
|
||||||
|
jsonfile = StringIO()
|
||||||
|
MODIFIED_RESULTS = dict(BROKEN_RESULTS)
|
||||||
|
MODIFIED_RESULTS['GitHub2'] = BROKEN_RESULTS['GitHub']
|
||||||
|
generate_json_report('test', BROKEN_RESULTS, jsonfile, 'simple')
|
||||||
|
|
||||||
|
jsonfile.seek(0)
|
||||||
|
data = jsonfile.readlines()
|
||||||
|
|
||||||
|
assert len(data) == 1
|
||||||
|
assert list(json.loads(data[0]).keys()) == []
|
||||||
|
|
||||||
|
|
||||||
def test_generate_json_ndjson_report():
|
def test_generate_json_ndjson_report():
|
||||||
jsonfile = StringIO()
|
jsonfile = StringIO()
|
||||||
MODIFIED_RESULTS = dict(EXAMPLE_RESULTS)
|
MODIFIED_RESULTS = dict(EXAMPLE_RESULTS)
|
||||||
@@ -329,6 +383,20 @@ def test_save_xmind_report():
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_save_xmind_report_broken():
|
||||||
|
filename = 'report_test.xmind'
|
||||||
|
save_xmind_report(filename, 'test', BROKEN_RESULTS)
|
||||||
|
|
||||||
|
workbook = xmind.load(filename)
|
||||||
|
sheet = workbook.getPrimarySheet()
|
||||||
|
data = sheet.getData()
|
||||||
|
|
||||||
|
assert data['title'] == 'test Analysis'
|
||||||
|
assert data['topic']['title'] == 'test'
|
||||||
|
assert len(data['topic']['topics']) == 1
|
||||||
|
assert data['topic']['topics'][0]['title'] == 'Undefined'
|
||||||
|
|
||||||
|
|
||||||
def test_html_report():
|
def test_html_report():
|
||||||
report_name = 'report_test.html'
|
report_name = 'report_test.html'
|
||||||
context = generate_report_context(TEST)
|
context = generate_report_context(TEST)
|
||||||
@@ -341,6 +409,21 @@ def test_html_report():
|
|||||||
assert SUPPOSED_INTERESTS in report_text
|
assert SUPPOSED_INTERESTS in report_text
|
||||||
|
|
||||||
|
|
||||||
|
def test_html_report_broken():
|
||||||
|
report_name = 'report_test_broken.html'
|
||||||
|
BROKEN_DATA = copy.deepcopy(TEST)
|
||||||
|
BROKEN_DATA[0][2]['500px']['status'] = None
|
||||||
|
|
||||||
|
context = generate_report_context(BROKEN_DATA)
|
||||||
|
save_html_report(report_name, context)
|
||||||
|
|
||||||
|
report_text = open(report_name).read()
|
||||||
|
|
||||||
|
assert SUPPOSED_BROKEN_BRIEF in report_text
|
||||||
|
assert SUPPOSED_BROKEN_GEO in report_text
|
||||||
|
assert SUPPOSED_BROKEN_INTERESTS in report_text
|
||||||
|
|
||||||
|
|
||||||
def test_pdf_report():
|
def test_pdf_report():
|
||||||
report_name = 'report_test.pdf'
|
report_name = 'report_test.pdf'
|
||||||
context = generate_report_context(TEST)
|
context = generate_report_context(TEST)
|
||||||
@@ -357,3 +440,16 @@ def test_text_report():
|
|||||||
assert brief_part in report_text
|
assert brief_part in report_text
|
||||||
assert 'us' in report_text
|
assert 'us' in report_text
|
||||||
assert 'photo' in report_text
|
assert 'photo' in report_text
|
||||||
|
|
||||||
|
|
||||||
|
def test_text_report_broken():
|
||||||
|
BROKEN_DATA = copy.deepcopy(TEST)
|
||||||
|
BROKEN_DATA[0][2]['500px']['status'] = None
|
||||||
|
|
||||||
|
context = generate_report_context(BROKEN_DATA)
|
||||||
|
report_text = get_plaintext_report(context)
|
||||||
|
|
||||||
|
for brief_part in SUPPOSED_BROKEN_BRIEF.split():
|
||||||
|
assert brief_part in report_text
|
||||||
|
assert 'us' in report_text
|
||||||
|
assert 'photo' in report_text
|
||||||
|
|||||||
Reference in New Issue
Block a user