mirror of
https://github.com/soxoj/maigret.git
synced 2026-05-07 06:24:35 +00:00
Refactoring, sites database tests
This commit is contained in:
+56
-69
@@ -25,7 +25,7 @@ from socid_extractor import parse, extract
|
|||||||
|
|
||||||
from .notify import QueryNotifyPrint
|
from .notify import QueryNotifyPrint
|
||||||
from .result import QueryResult, QueryStatus
|
from .result import QueryResult, QueryStatus
|
||||||
from .sites import SitesInformation
|
from .sites import MaigretDatabase, MaigretSite
|
||||||
|
|
||||||
import xmind
|
import xmind
|
||||||
|
|
||||||
@@ -55,7 +55,7 @@ unsupported_characters = '#'
|
|||||||
cookies_file = 'cookies.txt'
|
cookies_file = 'cookies.txt'
|
||||||
|
|
||||||
|
|
||||||
async def get_response(request_future, social_network, logger):
|
async def get_response(request_future, site_name, logger):
|
||||||
html_text = None
|
html_text = None
|
||||||
status_code = 0
|
status_code = 0
|
||||||
|
|
||||||
@@ -92,7 +92,7 @@ async def get_response(request_future, social_network, logger):
|
|||||||
error_text = "Proxy Error"
|
error_text = "Proxy Error"
|
||||||
expection_text = str(err)
|
expection_text = str(err)
|
||||||
except Exception as err:
|
except Exception as err:
|
||||||
logger.warning(f'Unhandled error while requesting {social_network}: {err}')
|
logger.warning(f'Unhandled error while requesting {site_name}: {err}')
|
||||||
logger.debug(err, exc_info=True)
|
logger.debug(err, exc_info=True)
|
||||||
error_text = "Some Error"
|
error_text = "Some Error"
|
||||||
expection_text = str(err)
|
expection_text = str(err)
|
||||||
@@ -101,19 +101,19 @@ async def get_response(request_future, social_network, logger):
|
|||||||
return html_text, status_code, error_text, expection_text
|
return html_text, status_code, error_text, expection_text
|
||||||
|
|
||||||
|
|
||||||
async def update_site_data_from_response(sitename, site_data, results_info, semaphore, logger, query_notify):
|
async def update_site_dict_from_response(sitename, site_dict, results_info, semaphore, logger, query_notify):
|
||||||
async with semaphore:
|
async with semaphore:
|
||||||
site_obj = site_data[sitename]
|
site_obj = site_dict[sitename]
|
||||||
future = site_obj.get('request_future')
|
future = site_obj.request_future
|
||||||
if not future:
|
if not future:
|
||||||
# ignore: search by incompatible id type
|
# ignore: search by incompatible id type
|
||||||
return
|
return
|
||||||
|
|
||||||
response = await get_response(request_future=future,
|
response = await get_response(request_future=future,
|
||||||
social_network=sitename,
|
site_name=sitename,
|
||||||
logger=logger)
|
logger=logger)
|
||||||
|
|
||||||
site_data[sitename] = process_site_result(response, query_notify, logger, results_info, site_obj, sitename)
|
site_dict[sitename] = process_site_result(response, query_notify, logger, results_info, site_obj)
|
||||||
|
|
||||||
|
|
||||||
# TODO: move info separate module
|
# TODO: move info separate module
|
||||||
@@ -137,13 +137,11 @@ def detect_error_page(html_text, status_code, fail_flags, ignore_403):
|
|||||||
return None, None
|
return None, None
|
||||||
|
|
||||||
|
|
||||||
def process_site_result(response, query_notify, logger, results_info, net_info, social_network):
|
def process_site_result(response, query_notify, logger, results_info, site: MaigretSite):
|
||||||
if not response:
|
if not response:
|
||||||
return results_info
|
return results_info
|
||||||
|
|
||||||
fulltags = []
|
fulltags = site.tags
|
||||||
if ("tags" in net_info.keys()):
|
|
||||||
fulltags = net_info["tags"]
|
|
||||||
|
|
||||||
# Retrieve other site information again
|
# Retrieve other site information again
|
||||||
username = results_info['username']
|
username = results_info['username']
|
||||||
@@ -157,14 +155,14 @@ def process_site_result(response, query_notify, logger, results_info, net_info,
|
|||||||
return results_info
|
return results_info
|
||||||
|
|
||||||
# Get the expected error type
|
# Get the expected error type
|
||||||
error_type = net_info["errorType"]
|
error_type = site.check_type
|
||||||
|
|
||||||
# Get the failure messages and comments
|
# Get the failure messages and comments
|
||||||
failure_errors = net_info.get("errors", {})
|
failure_errors = site.errors
|
||||||
|
|
||||||
# TODO: refactor
|
# TODO: refactor
|
||||||
if not response:
|
if not response:
|
||||||
logger.error(f'No response for {social_network}')
|
logger.error(f'No response for {site.name}')
|
||||||
return results_info
|
return results_info
|
||||||
|
|
||||||
html_text, status_code, error_text, expection_text = response
|
html_text, status_code, error_text, expection_text = response
|
||||||
@@ -182,37 +180,37 @@ def process_site_result(response, query_notify, logger, results_info, net_info,
|
|||||||
|
|
||||||
if status_code and not error_text:
|
if status_code and not error_text:
|
||||||
error_text, site_error_text = detect_error_page(html_text, status_code, failure_errors,
|
error_text, site_error_text = detect_error_page(html_text, status_code, failure_errors,
|
||||||
'ignore_403' in net_info)
|
site.ignore_403)
|
||||||
|
|
||||||
# presense flags
|
# presense flags
|
||||||
# True by default
|
# True by default
|
||||||
presense_flags = net_info.get("presenseStrs", [])
|
presense_flags = site.presense_strs
|
||||||
is_presense_detected = html_text and all(
|
is_presense_detected = html_text and all(
|
||||||
[(presense_flag in html_text) for presense_flag in presense_flags]) or not presense_flags
|
[(presense_flag in html_text) for presense_flag in presense_flags]) or not presense_flags
|
||||||
|
|
||||||
if error_text is not None:
|
if error_text is not None:
|
||||||
logger.debug(error_text)
|
logger.debug(error_text)
|
||||||
result = QueryResult(username,
|
result = QueryResult(username,
|
||||||
social_network,
|
site.name,
|
||||||
url,
|
url,
|
||||||
QueryStatus.UNKNOWN,
|
QueryStatus.UNKNOWN,
|
||||||
query_time=response_time,
|
query_time=response_time,
|
||||||
context=f'{error_text}: {site_error_text}', tags=fulltags)
|
context=f'{error_text}: {site_error_text}', tags=fulltags)
|
||||||
elif error_type == "message":
|
elif error_type == "message":
|
||||||
absence_flags = net_info.get("errorMsg")
|
absence_flags = site.absence_strs
|
||||||
is_absence_flags_list = isinstance(absence_flags, list)
|
is_absence_flags_list = isinstance(absence_flags, list)
|
||||||
absence_flags_set = set(absence_flags) if is_absence_flags_list else {absence_flags}
|
absence_flags_set = set(absence_flags) if is_absence_flags_list else {absence_flags}
|
||||||
# Checks if the error message is in the HTML
|
# Checks if the error message is in the HTML
|
||||||
is_absence_detected = any([(absence_flag in html_text) for absence_flag in absence_flags_set])
|
is_absence_detected = any([(absence_flag in html_text) for absence_flag in absence_flags_set])
|
||||||
if not is_absence_detected and is_presense_detected:
|
if not is_absence_detected and is_presense_detected:
|
||||||
result = QueryResult(username,
|
result = QueryResult(username,
|
||||||
social_network,
|
site.name,
|
||||||
url,
|
url,
|
||||||
QueryStatus.CLAIMED,
|
QueryStatus.CLAIMED,
|
||||||
query_time=response_time, tags=fulltags)
|
query_time=response_time, tags=fulltags)
|
||||||
else:
|
else:
|
||||||
result = QueryResult(username,
|
result = QueryResult(username,
|
||||||
social_network,
|
site.name,
|
||||||
url,
|
url,
|
||||||
QueryStatus.AVAILABLE,
|
QueryStatus.AVAILABLE,
|
||||||
query_time=response_time, tags=fulltags)
|
query_time=response_time, tags=fulltags)
|
||||||
@@ -220,13 +218,13 @@ def process_site_result(response, query_notify, logger, results_info, net_info,
|
|||||||
# Checks if the status code of the response is 2XX
|
# Checks if the status code of the response is 2XX
|
||||||
if (not status_code >= 300 or status_code < 200) and is_presense_detected:
|
if (not status_code >= 300 or status_code < 200) and is_presense_detected:
|
||||||
result = QueryResult(username,
|
result = QueryResult(username,
|
||||||
social_network,
|
site.name,
|
||||||
url,
|
url,
|
||||||
QueryStatus.CLAIMED,
|
QueryStatus.CLAIMED,
|
||||||
query_time=response_time, tags=fulltags)
|
query_time=response_time, tags=fulltags)
|
||||||
else:
|
else:
|
||||||
result = QueryResult(username,
|
result = QueryResult(username,
|
||||||
social_network,
|
site.name,
|
||||||
url,
|
url,
|
||||||
QueryStatus.AVAILABLE,
|
QueryStatus.AVAILABLE,
|
||||||
query_time=response_time, tags=fulltags)
|
query_time=response_time, tags=fulltags)
|
||||||
@@ -238,20 +236,20 @@ def process_site_result(response, query_notify, logger, results_info, net_info,
|
|||||||
# forward to some odd redirect).
|
# forward to some odd redirect).
|
||||||
if 200 <= status_code < 300 and is_presense_detected:
|
if 200 <= status_code < 300 and is_presense_detected:
|
||||||
result = QueryResult(username,
|
result = QueryResult(username,
|
||||||
social_network,
|
site.name,
|
||||||
url,
|
url,
|
||||||
QueryStatus.CLAIMED,
|
QueryStatus.CLAIMED,
|
||||||
query_time=response_time, tags=fulltags)
|
query_time=response_time, tags=fulltags)
|
||||||
else:
|
else:
|
||||||
result = QueryResult(username,
|
result = QueryResult(username,
|
||||||
social_network,
|
site.name,
|
||||||
url,
|
url,
|
||||||
QueryStatus.AVAILABLE,
|
QueryStatus.AVAILABLE,
|
||||||
query_time=response_time, tags=fulltags)
|
query_time=response_time, tags=fulltags)
|
||||||
else:
|
else:
|
||||||
# It should be impossible to ever get here...
|
# It should be impossible to ever get here...
|
||||||
raise ValueError(f"Unknown Error Type '{error_type}' for "
|
raise ValueError(f"Unknown Error Type '{error_type}' for "
|
||||||
f"site '{social_network}'")
|
f"site '{site_name}'")
|
||||||
|
|
||||||
extracted_ids_data = {}
|
extracted_ids_data = {}
|
||||||
|
|
||||||
@@ -259,7 +257,7 @@ def process_site_result(response, query_notify, logger, results_info, net_info,
|
|||||||
try:
|
try:
|
||||||
extracted_ids_data = extract(html_text)
|
extracted_ids_data = extract(html_text)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.warning(f'Error while parsing {social_network}: {e}', exc_info=True)
|
logger.warning(f'Error while parsing {site_name}: {e}', exc_info=True)
|
||||||
|
|
||||||
if extracted_ids_data:
|
if extracted_ids_data:
|
||||||
new_usernames = {}
|
new_usernames = {}
|
||||||
@@ -272,22 +270,21 @@ def process_site_result(response, query_notify, logger, results_info, net_info,
|
|||||||
results_info['ids_usernames'] = new_usernames
|
results_info['ids_usernames'] = new_usernames
|
||||||
result.ids_data = extracted_ids_data
|
result.ids_data = extracted_ids_data
|
||||||
|
|
||||||
is_similar = net_info.get('similarSearch', False)
|
|
||||||
# Notify caller about results of query.
|
# Notify caller about results of query.
|
||||||
query_notify.update(result, is_similar)
|
query_notify.update(result, site.similar_search)
|
||||||
|
|
||||||
# Save status of request
|
# Save status of request
|
||||||
results_info['status'] = result
|
results_info['status'] = result
|
||||||
|
|
||||||
# Save results from request
|
# Save results from request
|
||||||
results_info['http_status'] = status_code
|
results_info['http_status'] = status_code
|
||||||
results_info['is_similar'] = is_similar
|
results_info['is_similar'] = site.similar_search
|
||||||
# results_site['response_text'] = html_text
|
# results_site['response_text'] = html_text
|
||||||
results_info['rank'] = net_info.get('rank', 0)
|
results_info['rank'] = site.popularity_rank
|
||||||
return results_info
|
return results_info
|
||||||
|
|
||||||
|
|
||||||
async def maigret(username, site_data, query_notify, logger,
|
async def maigret(username, site_dict, query_notify, logger,
|
||||||
proxy=None, timeout=None, recursive_search=False,
|
proxy=None, timeout=None, recursive_search=False,
|
||||||
id_type='username', tags=None, debug=False, forced=False,
|
id_type='username', tags=None, debug=False, forced=False,
|
||||||
max_connections=100):
|
max_connections=100):
|
||||||
@@ -298,7 +295,7 @@ async def maigret(username, site_data, query_notify, logger,
|
|||||||
Keyword Arguments:
|
Keyword Arguments:
|
||||||
username -- String indicating username that report
|
username -- String indicating username that report
|
||||||
should be created against.
|
should be created against.
|
||||||
site_data -- Dictionary containing all of the site data.
|
site_dict -- Dictionary containing all of the site data.
|
||||||
query_notify -- Object with base type of QueryNotify().
|
query_notify -- Object with base type of QueryNotify().
|
||||||
This will be used to notify the caller about
|
This will be used to notify the caller about
|
||||||
query results.
|
query results.
|
||||||
@@ -345,21 +342,19 @@ async def maigret(username, site_data, query_notify, logger,
|
|||||||
results_total = {}
|
results_total = {}
|
||||||
|
|
||||||
# First create futures for all requests. This allows for the requests to run in parallel
|
# First create futures for all requests. This allows for the requests to run in parallel
|
||||||
for social_network, net_info in site_data.items():
|
for site_name, site in site_dict.items():
|
||||||
|
|
||||||
fulltags = []
|
fulltags = site.tags
|
||||||
if ("tags" in net_info.keys()):
|
|
||||||
fulltags = net_info["tags"]
|
|
||||||
|
|
||||||
if net_info.get('type', 'username') != id_type:
|
if site.type != id_type:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
site_tags = set(net_info.get('tags', []))
|
site_tags = set(fulltags)
|
||||||
if tags:
|
if tags:
|
||||||
if not set(tags).intersection(site_tags):
|
if not set(tags).intersection(site_tags):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if 'disabled' in net_info and net_info['disabled'] and not forced:
|
if site.disabled and not forced:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Results from analysis of this specific site
|
# Results from analysis of this specific site
|
||||||
@@ -368,32 +363,29 @@ async def maigret(username, site_data, query_notify, logger,
|
|||||||
# Record URL of main site and username
|
# Record URL of main site and username
|
||||||
results_site['username'] = username
|
results_site['username'] = username
|
||||||
results_site['parsing_enabled'] = recursive_search
|
results_site['parsing_enabled'] = recursive_search
|
||||||
results_site['url_main'] = net_info.get("urlMain")
|
results_site['url_main'] = site.url_main
|
||||||
|
|
||||||
|
|
||||||
headers = {
|
headers = {
|
||||||
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 11.1; rv:55.0) Gecko/20100101 Firefox/55.0',
|
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 11.1; rv:55.0) Gecko/20100101 Firefox/55.0',
|
||||||
}
|
}
|
||||||
|
|
||||||
if "headers" in net_info:
|
headers.update(site.headers)
|
||||||
# Override/append any extra headers required by a given site.
|
|
||||||
headers.update(net_info["headers"])
|
|
||||||
|
|
||||||
# URL of user on site (if it exists)
|
# URL of user on site (if it exists)
|
||||||
url = net_info.get('url').format(
|
url = site.url_username_format.format(
|
||||||
urlMain=net_info['urlMain'],
|
urlMain=site.url_main,
|
||||||
urlSubpath=net_info.get('urlSubpath', ''),
|
urlSubpath=site.url_subpath,
|
||||||
username=username
|
username=username
|
||||||
)
|
)
|
||||||
# workaround to prevent slash errors
|
# workaround to prevent slash errors
|
||||||
url = url.replace('///', '/')
|
url = url.replace('///', '/')
|
||||||
|
|
||||||
# Don't make request if username is invalid for the site
|
# Don't make request if username is invalid for the site
|
||||||
regex_check = net_info.get("regexCheck")
|
if site.regex_check and re.search(site.regex_check, username) is None:
|
||||||
if regex_check and re.search(regex_check, username) is None:
|
|
||||||
# No need to do the check at the site: this user name is not allowed.
|
# No need to do the check at the site: this user name is not allowed.
|
||||||
results_site['status'] = QueryResult(username,
|
results_site['status'] = QueryResult(username,
|
||||||
social_network,
|
site_name,
|
||||||
url,
|
url,
|
||||||
QueryStatus.ILLEGAL)
|
QueryStatus.ILLEGAL)
|
||||||
results_site["url_user"] = ""
|
results_site["url_user"] = ""
|
||||||
@@ -403,7 +395,7 @@ async def maigret(username, site_data, query_notify, logger,
|
|||||||
else:
|
else:
|
||||||
# URL of user on site (if it exists)
|
# URL of user on site (if it exists)
|
||||||
results_site["url_user"] = url
|
results_site["url_user"] = url
|
||||||
url_probe = net_info.get("urlProbe")
|
url_probe = site.url_probe
|
||||||
if url_probe is None:
|
if url_probe is None:
|
||||||
# Probe URL is normal one seen by people out on the web.
|
# Probe URL is normal one seen by people out on the web.
|
||||||
url_probe = url
|
url_probe = url
|
||||||
@@ -411,13 +403,13 @@ async def maigret(username, site_data, query_notify, logger,
|
|||||||
# There is a special URL for probing existence separate
|
# There is a special URL for probing existence separate
|
||||||
# from where the user profile normally can be found.
|
# from where the user profile normally can be found.
|
||||||
url_probe = url_probe.format(
|
url_probe = url_probe.format(
|
||||||
urlMain=net_info['urlMain'],
|
urlMain=site.url_main,
|
||||||
urlSubpath=net_info.get('urlSubpath', ''),
|
urlSubpath=site.url_subpath,
|
||||||
username=username,
|
username=username,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
if net_info["errorType"] == 'status_code' and net_info.get("request_head_only", True):
|
if site.check_type == 'status_code' and site.request_head_only:
|
||||||
# In most cases when we are detecting by status code,
|
# In most cases when we are detecting by status code,
|
||||||
# it is not necessary to get the entire body: we can
|
# it is not necessary to get the entire body: we can
|
||||||
# detect fine with just the HEAD response.
|
# detect fine with just the HEAD response.
|
||||||
@@ -428,7 +420,7 @@ async def maigret(username, site_data, query_notify, logger,
|
|||||||
# not respond properly unless we request the whole page.
|
# not respond properly unless we request the whole page.
|
||||||
request_method = session.get
|
request_method = session.get
|
||||||
|
|
||||||
if net_info["errorType"] == "response_url":
|
if site.check_type == "response_url":
|
||||||
# Site forwards request to a different URL if username not
|
# Site forwards request to a different URL if username not
|
||||||
# found. Disallow the redirect so we can capture the
|
# found. Disallow the redirect so we can capture the
|
||||||
# http status from the original URL request.
|
# http status from the original URL request.
|
||||||
@@ -454,10 +446,11 @@ async def maigret(username, site_data, query_notify, logger,
|
|||||||
)
|
)
|
||||||
|
|
||||||
# Store future in data for access later
|
# Store future in data for access later
|
||||||
net_info["request_future"] = future
|
# TODO: move to separate obj
|
||||||
|
site.request_future = future
|
||||||
|
|
||||||
# Add this site's results into final dictionary with all of the other results.
|
# Add this site's results into final dictionary with all of the other results.
|
||||||
results_total[social_network] = results_site
|
results_total[site_name] = results_site
|
||||||
|
|
||||||
# TODO: move into top-level function
|
# TODO: move into top-level function
|
||||||
|
|
||||||
@@ -465,7 +458,7 @@ async def maigret(username, site_data, query_notify, logger,
|
|||||||
|
|
||||||
tasks = []
|
tasks = []
|
||||||
for sitename, result_obj in results_total.items():
|
for sitename, result_obj in results_total.items():
|
||||||
update_site_coro = update_site_data_from_response(sitename, site_data, result_obj, sem, logger, query_notify)
|
update_site_coro = update_site_dict_from_response(sitename, site_dict, result_obj, sem, logger, query_notify)
|
||||||
future = asyncio.ensure_future(update_site_coro)
|
future = asyncio.ensure_future(update_site_coro)
|
||||||
tasks.append(future)
|
tasks.append(future)
|
||||||
|
|
||||||
@@ -553,8 +546,9 @@ async def site_self_check(site_name, site_data, logger):
|
|||||||
|
|
||||||
|
|
||||||
async def self_check(json_file, logger):
|
async def self_check(json_file, logger):
|
||||||
data = json.load(open(json_file))
|
db = MaigretDatabase()
|
||||||
sites = SitesInformation(json_file)
|
db.load_from_file(json_file)
|
||||||
|
sites = db.sites
|
||||||
all_sites = {}
|
all_sites = {}
|
||||||
|
|
||||||
def disabled_count(data):
|
def disabled_count(data):
|
||||||
@@ -825,18 +819,11 @@ async def main():
|
|||||||
|
|
||||||
# Create object with all information about sites we are aware of.
|
# Create object with all information about sites we are aware of.
|
||||||
try:
|
try:
|
||||||
sites = SitesInformation(args.json_file)
|
site_data_all = MaigretDatabase().load_from_file(args.json_file).sites_dict
|
||||||
except Exception as error:
|
except Exception as error:
|
||||||
print(f"ERROR: {error}")
|
print(f"ERROR: {error}")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
# Create original dictionary from SitesInformation() object.
|
|
||||||
# Eventually, the rest of the code will be updated to use the new object
|
|
||||||
# directly, but this will glue the two pieces together.
|
|
||||||
site_data_all = {}
|
|
||||||
for site in sites:
|
|
||||||
site_data_all[site.name] = site.information
|
|
||||||
|
|
||||||
if args.site_list is None:
|
if args.site_list is None:
|
||||||
# Not desired to look at a sub-set of sites
|
# Not desired to look at a sub-set of sites
|
||||||
site_data = site_data_all
|
site_data = site_data_all
|
||||||
@@ -868,7 +855,7 @@ async def main():
|
|||||||
site_data[site] = site_dataCpy.get(site)
|
site_data[site] = site_dataCpy.get(site)
|
||||||
|
|
||||||
# Database consistency
|
# Database consistency
|
||||||
enabled_count = len(list(filter(lambda x: not x.get('disabled', False), site_data.values())))
|
enabled_count = len(list(filter(lambda x: not x.disabled, site_data.values())))
|
||||||
print(f'Sites in database, enabled/total: {enabled_count}/{len(site_data)}')
|
print(f'Sites in database, enabled/total: {enabled_count}/{len(site_data)}')
|
||||||
|
|
||||||
# Create notify object for query results.
|
# Create notify object for query results.
|
||||||
|
|||||||
+26
-25
@@ -1,6 +1,7 @@
|
|||||||
{
|
{
|
||||||
"engines": {
|
"engines": {
|
||||||
"XenForo": {
|
"XenForo": {
|
||||||
|
"presenseStrs": ["XenForo"],
|
||||||
"site": {
|
"site": {
|
||||||
"errorMsg": [
|
"errorMsg": [
|
||||||
"The specified member cannot be found. Please enter a member's entire name.",
|
"The specified member cannot be found. Please enter a member's entire name.",
|
||||||
@@ -11,7 +12,7 @@
|
|||||||
"errors": {
|
"errors": {
|
||||||
"You must be logged-in to do that.": "Login required"
|
"You must be logged-in to do that.": "Login required"
|
||||||
},
|
},
|
||||||
"url": "{urlMain}/members/?username={username}"
|
"url": "{urlMain}{urlSubpath}/members/?username={username}"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"phpBB": {
|
"phpBB": {
|
||||||
@@ -46,7 +47,7 @@
|
|||||||
"The administrator has banned your IP address": "IP ban",
|
"The administrator has banned your IP address": "IP ban",
|
||||||
"\u0418\u0437\u0432\u0438\u043d\u0438\u0442\u0435, \u0441\u0435\u0440\u0432\u0435\u0440 \u043f\u0435\u0440\u0435\u0433\u0440\u0443\u0436\u0435\u043d. \u041f\u043e\u0436\u0430\u043b\u0443\u0439\u0441\u0442\u0430, \u043f\u043e\u043f\u0440\u043e\u0431\u0443\u0439\u0442\u0435 \u0437\u0430\u0439\u0442\u0438 \u043f\u043e\u0437\u0436\u0435.": "Server is overloaded"
|
"\u0418\u0437\u0432\u0438\u043d\u0438\u0442\u0435, \u0441\u0435\u0440\u0432\u0435\u0440 \u043f\u0435\u0440\u0435\u0433\u0440\u0443\u0436\u0435\u043d. \u041f\u043e\u0436\u0430\u043b\u0443\u0439\u0441\u0442\u0430, \u043f\u043e\u043f\u0440\u043e\u0431\u0443\u0439\u0442\u0435 \u0437\u0430\u0439\u0442\u0438 \u043f\u043e\u0437\u0436\u0435.": "Server is overloaded"
|
||||||
},
|
},
|
||||||
"url": "{urlMain}/{urlSubpath}/member.php?username={username}"
|
"url": "{urlMain}{urlSubpath}/member.php?username={username}"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
@@ -6845,7 +6846,7 @@
|
|||||||
"ru"
|
"ru"
|
||||||
],
|
],
|
||||||
"urlMain": "https://www.infrance.su/",
|
"urlMain": "https://www.infrance.su/",
|
||||||
"urlSubpath": "forum",
|
"urlSubpath": "/forum",
|
||||||
"username_claimed": "adam",
|
"username_claimed": "adam",
|
||||||
"username_unclaimed": "noonewouldeverusethis7"
|
"username_unclaimed": "noonewouldeverusethis7"
|
||||||
},
|
},
|
||||||
@@ -7987,7 +7988,7 @@
|
|||||||
"ru"
|
"ru"
|
||||||
],
|
],
|
||||||
"urlMain": "https://la.mail.ru",
|
"urlMain": "https://la.mail.ru",
|
||||||
"urlSubpath": "forums",
|
"urlSubpath": "/forums",
|
||||||
"username_claimed": "wizard",
|
"username_claimed": "wizard",
|
||||||
"username_unclaimed": "noonewouldeverusethis7"
|
"username_unclaimed": "noonewouldeverusethis7"
|
||||||
},
|
},
|
||||||
@@ -8554,7 +8555,7 @@
|
|||||||
"ru"
|
"ru"
|
||||||
],
|
],
|
||||||
"urlMain": "https://minecraftonly.ru",
|
"urlMain": "https://minecraftonly.ru",
|
||||||
"urlSubpath": "forum",
|
"urlSubpath": "/forum",
|
||||||
"username_claimed": "adam",
|
"username_claimed": "adam",
|
||||||
"username_unclaimed": "noonewouldeverusethis7"
|
"username_unclaimed": "noonewouldeverusethis7"
|
||||||
},
|
},
|
||||||
@@ -8639,7 +8640,7 @@
|
|||||||
"us"
|
"us"
|
||||||
],
|
],
|
||||||
"urlMain": "https://www.mobile-files.com/",
|
"urlMain": "https://www.mobile-files.com/",
|
||||||
"urlSubpath": "forum",
|
"urlSubpath": "/forum",
|
||||||
"username_claimed": "adam",
|
"username_claimed": "adam",
|
||||||
"username_unclaimed": "noonewouldeverusethis7"
|
"username_unclaimed": "noonewouldeverusethis7"
|
||||||
},
|
},
|
||||||
@@ -8819,7 +8820,7 @@
|
|||||||
"pk"
|
"pk"
|
||||||
],
|
],
|
||||||
"urlMain": "https://www.movie-list.com",
|
"urlMain": "https://www.movie-list.com",
|
||||||
"urlSubpath": "forum",
|
"urlSubpath": "/forum",
|
||||||
"username_claimed": "adam",
|
"username_claimed": "adam",
|
||||||
"username_unclaimed": "noonewouldeverusethis7"
|
"username_unclaimed": "noonewouldeverusethis7"
|
||||||
},
|
},
|
||||||
@@ -8859,7 +8860,7 @@
|
|||||||
"us"
|
"us"
|
||||||
],
|
],
|
||||||
"urlMain": "https://www.mpgh.net/",
|
"urlMain": "https://www.mpgh.net/",
|
||||||
"urlSubpath": "forum",
|
"urlSubpath": "/forum",
|
||||||
"username_claimed": "adam",
|
"username_claimed": "adam",
|
||||||
"username_unclaimed": "noonewouldeverusethis7"
|
"username_unclaimed": "noonewouldeverusethis7"
|
||||||
},
|
},
|
||||||
@@ -9873,7 +9874,7 @@
|
|||||||
"engine": "vBulletin",
|
"engine": "vBulletin",
|
||||||
"rank": 4840375,
|
"rank": 4840375,
|
||||||
"urlMain": "http://p38forum.com",
|
"urlMain": "http://p38forum.com",
|
||||||
"urlSubpath": "forums",
|
"urlSubpath": "/forums",
|
||||||
"username_claimed": "red",
|
"username_claimed": "red",
|
||||||
"username_unclaimed": "noonewouldeverusethis7"
|
"username_unclaimed": "noonewouldeverusethis7"
|
||||||
},
|
},
|
||||||
@@ -10193,7 +10194,7 @@
|
|||||||
"ru"
|
"ru"
|
||||||
],
|
],
|
||||||
"urlMain": "https://pw.mail.ru/",
|
"urlMain": "https://pw.mail.ru/",
|
||||||
"urlSubpath": "forums",
|
"urlSubpath": "/forums",
|
||||||
"username_claimed": "wizard",
|
"username_claimed": "wizard",
|
||||||
"username_unclaimed": "noonewouldeverusethis7"
|
"username_unclaimed": "noonewouldeverusethis7"
|
||||||
},
|
},
|
||||||
@@ -10216,7 +10217,7 @@
|
|||||||
"ru"
|
"ru"
|
||||||
],
|
],
|
||||||
"urlMain": "http://pesiq.ru/",
|
"urlMain": "http://pesiq.ru/",
|
||||||
"urlSubpath": "forum",
|
"urlSubpath": "/forum",
|
||||||
"username_claimed": "adam",
|
"username_claimed": "adam",
|
||||||
"username_unclaimed": "noonewouldeverusethis7"
|
"username_unclaimed": "noonewouldeverusethis7"
|
||||||
},
|
},
|
||||||
@@ -11233,7 +11234,7 @@
|
|||||||
"music"
|
"music"
|
||||||
],
|
],
|
||||||
"urlMain": "http://www.rap-royalty.com",
|
"urlMain": "http://www.rap-royalty.com",
|
||||||
"urlSubpath": "forum",
|
"urlSubpath": "/forum",
|
||||||
"username_claimed": "red",
|
"username_claimed": "red",
|
||||||
"username_unclaimed": "noonewouldeverusethis7"
|
"username_unclaimed": "noonewouldeverusethis7"
|
||||||
},
|
},
|
||||||
@@ -11365,7 +11366,7 @@
|
|||||||
"ru"
|
"ru"
|
||||||
],
|
],
|
||||||
"urlMain": "http://www.redorchestra.ru",
|
"urlMain": "http://www.redorchestra.ru",
|
||||||
"urlSubpath": "forums",
|
"urlSubpath": "/forums",
|
||||||
"username_claimed": "adam",
|
"username_claimed": "adam",
|
||||||
"username_unclaimed": "noonewouldeverusethis7"
|
"username_unclaimed": "noonewouldeverusethis7"
|
||||||
},
|
},
|
||||||
@@ -11484,7 +11485,7 @@
|
|||||||
"ru"
|
"ru"
|
||||||
],
|
],
|
||||||
"urlMain": "https://rev.mail.ru",
|
"urlMain": "https://rev.mail.ru",
|
||||||
"urlSubpath": "forums",
|
"urlSubpath": "/forums",
|
||||||
"username_claimed": "wizard",
|
"username_claimed": "wizard",
|
||||||
"username_unclaimed": "noonewouldeverusethis7"
|
"username_unclaimed": "noonewouldeverusethis7"
|
||||||
},
|
},
|
||||||
@@ -11523,7 +11524,7 @@
|
|||||||
"ru"
|
"ru"
|
||||||
],
|
],
|
||||||
"urlMain": "https://www.rlocman.ru",
|
"urlMain": "https://www.rlocman.ru",
|
||||||
"urlSubpath": "forum",
|
"urlSubpath": "/forum",
|
||||||
"username_claimed": "elnat",
|
"username_claimed": "elnat",
|
||||||
"username_unclaimed": "noonewouldeverusethis7"
|
"username_unclaimed": "noonewouldeverusethis7"
|
||||||
},
|
},
|
||||||
@@ -11700,7 +11701,7 @@
|
|||||||
"us"
|
"us"
|
||||||
],
|
],
|
||||||
"urlMain": "https://www.rpgwatch.com",
|
"urlMain": "https://www.rpgwatch.com",
|
||||||
"urlSubpath": "forums",
|
"urlSubpath": "/forums",
|
||||||
"username_claimed": "blue",
|
"username_claimed": "blue",
|
||||||
"username_unclaimed": "noonewouldeverusethis7"
|
"username_unclaimed": "noonewouldeverusethis7"
|
||||||
},
|
},
|
||||||
@@ -11821,7 +11822,7 @@
|
|||||||
"ru"
|
"ru"
|
||||||
],
|
],
|
||||||
"urlMain": "http://www.russian.fi/",
|
"urlMain": "http://www.russian.fi/",
|
||||||
"urlSubpath": "forum",
|
"urlSubpath": "/forum",
|
||||||
"username_claimed": "adam",
|
"username_claimed": "adam",
|
||||||
"username_unclaimed": "noonewouldeverusethis7"
|
"username_unclaimed": "noonewouldeverusethis7"
|
||||||
},
|
},
|
||||||
@@ -12532,7 +12533,7 @@
|
|||||||
"ru"
|
"ru"
|
||||||
],
|
],
|
||||||
"urlMain": "https://solaris-club.net",
|
"urlMain": "https://solaris-club.net",
|
||||||
"urlSubpath": "forum",
|
"urlSubpath": "/forum",
|
||||||
"username_claimed": "adam",
|
"username_claimed": "adam",
|
||||||
"username_unclaimed": "noonewouldeverusethis7"
|
"username_unclaimed": "noonewouldeverusethis7"
|
||||||
},
|
},
|
||||||
@@ -12802,7 +12803,7 @@
|
|||||||
"ru"
|
"ru"
|
||||||
],
|
],
|
||||||
"urlMain": "http://statistika.ru",
|
"urlMain": "http://statistika.ru",
|
||||||
"urlSubpath": "forum",
|
"urlSubpath": "/forum",
|
||||||
"username_claimed": "hamam",
|
"username_claimed": "hamam",
|
||||||
"username_unclaimed": "noonewouldeverusethis7"
|
"username_unclaimed": "noonewouldeverusethis7"
|
||||||
},
|
},
|
||||||
@@ -12900,7 +12901,7 @@
|
|||||||
"ru"
|
"ru"
|
||||||
],
|
],
|
||||||
"urlMain": "https://www.stratege.ru",
|
"urlMain": "https://www.stratege.ru",
|
||||||
"urlSubpath": "forums",
|
"urlSubpath": "/forums",
|
||||||
"username_claimed": "blue",
|
"username_claimed": "blue",
|
||||||
"username_unclaimed": "noonewouldeverusethis7"
|
"username_unclaimed": "noonewouldeverusethis7"
|
||||||
},
|
},
|
||||||
@@ -13199,7 +13200,7 @@
|
|||||||
"ru"
|
"ru"
|
||||||
],
|
],
|
||||||
"urlMain": "https://tanks.mail.ru",
|
"urlMain": "https://tanks.mail.ru",
|
||||||
"urlSubpath": "forum",
|
"urlSubpath": "/forum",
|
||||||
"username_claimed": "red",
|
"username_claimed": "red",
|
||||||
"username_unclaimed": "noonewouldeverusethis7"
|
"username_unclaimed": "noonewouldeverusethis7"
|
||||||
},
|
},
|
||||||
@@ -13751,7 +13752,7 @@
|
|||||||
"in"
|
"in"
|
||||||
],
|
],
|
||||||
"urlMain": "https://www.trainsim.com/",
|
"urlMain": "https://www.trainsim.com/",
|
||||||
"urlSubpath": "vbts",
|
"urlSubpath": "/vbts",
|
||||||
"username_claimed": "adam",
|
"username_claimed": "adam",
|
||||||
"username_unclaimed": "noonewouldeverusethis7"
|
"username_unclaimed": "noonewouldeverusethis7"
|
||||||
},
|
},
|
||||||
@@ -13986,7 +13987,7 @@
|
|||||||
"ru"
|
"ru"
|
||||||
],
|
],
|
||||||
"urlMain": "http://tv-games.ru/",
|
"urlMain": "http://tv-games.ru/",
|
||||||
"urlSubpath": "forum",
|
"urlSubpath": "/forum",
|
||||||
"username_claimed": "adam",
|
"username_claimed": "adam",
|
||||||
"username_unclaimed": "noonewouldeverusethis7"
|
"username_unclaimed": "noonewouldeverusethis7"
|
||||||
},
|
},
|
||||||
@@ -14632,7 +14633,7 @@
|
|||||||
"ru"
|
"ru"
|
||||||
],
|
],
|
||||||
"urlMain": "https://wf.mail.ru",
|
"urlMain": "https://wf.mail.ru",
|
||||||
"urlSubpath": "forums",
|
"urlSubpath": "/forums",
|
||||||
"username_claimed": "wizard",
|
"username_claimed": "wizard",
|
||||||
"username_unclaimed": "noonewouldeverusethis7"
|
"username_unclaimed": "noonewouldeverusethis7"
|
||||||
},
|
},
|
||||||
@@ -14961,7 +14962,7 @@
|
|||||||
"us"
|
"us"
|
||||||
],
|
],
|
||||||
"urlMain": "http://wirednewyork.com/",
|
"urlMain": "http://wirednewyork.com/",
|
||||||
"urlSubpath": "forum",
|
"urlSubpath": "/forum",
|
||||||
"username_claimed": "blue",
|
"username_claimed": "blue",
|
||||||
"username_unclaimed": "noonewouldeverusethis7"
|
"username_unclaimed": "noonewouldeverusethis7"
|
||||||
},
|
},
|
||||||
|
|||||||
+114
-126
@@ -1,8 +1,5 @@
|
|||||||
"""Sherlock Sites Information Module
|
"""Maigret Sites Information"""
|
||||||
|
from __future__ import annotations
|
||||||
This module supports storing information about web sites.
|
|
||||||
This is the raw data that will be used to search for usernames.
|
|
||||||
"""
|
|
||||||
import json
|
import json
|
||||||
import operator
|
import operator
|
||||||
import sys
|
import sys
|
||||||
@@ -10,8 +7,14 @@ import sys
|
|||||||
import requests
|
import requests
|
||||||
|
|
||||||
|
|
||||||
class SiteInformation():
|
class MaigretEngine:
|
||||||
def __init__(self, name, url_home, url_username_format, popularity_rank,
|
def __init__(self, name, *args, **kwargs):
|
||||||
|
self.name = name
|
||||||
|
self.__dict__.update(kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
class MaigretSite:
|
||||||
|
def __init__(self, name, url_main, url_username_format, popularity_rank,
|
||||||
username_claimed, username_unclaimed,
|
username_claimed, username_unclaimed,
|
||||||
information):
|
information):
|
||||||
"""Create Site Information Object.
|
"""Create Site Information Object.
|
||||||
@@ -21,7 +24,7 @@ class SiteInformation():
|
|||||||
Keyword Arguments:
|
Keyword Arguments:
|
||||||
self -- This object.
|
self -- This object.
|
||||||
name -- String which identifies site.
|
name -- String which identifies site.
|
||||||
url_home -- String containing URL for home of site.
|
url_main -- String containing URL for home of site.
|
||||||
url_username_format -- String containing URL for Username format
|
url_username_format -- String containing URL for Username format
|
||||||
on site.
|
on site.
|
||||||
NOTE: The string should contain the
|
NOTE: The string should contain the
|
||||||
@@ -55,7 +58,7 @@ class SiteInformation():
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
self.name = name
|
self.name = name
|
||||||
self.url_home = url_home
|
self.url_main = url_main
|
||||||
self.url_username_format = url_username_format
|
self.url_username_format = url_username_format
|
||||||
|
|
||||||
if (popularity_rank is None) or (popularity_rank == 0):
|
if (popularity_rank is None) or (popularity_rank == 0):
|
||||||
@@ -66,105 +69,56 @@ class SiteInformation():
|
|||||||
self.username_claimed = username_claimed
|
self.username_claimed = username_claimed
|
||||||
self.username_unclaimed = username_unclaimed
|
self.username_unclaimed = username_unclaimed
|
||||||
self.information = information
|
self.information = information
|
||||||
|
self.disabled = information.get('disabled', False)
|
||||||
|
self.similar_search = information.get('similarSearch', False)
|
||||||
|
self.ignore_403 = information.get('ignore_403', False)
|
||||||
|
self.tags = information.get('tags', [])
|
||||||
|
|
||||||
|
self.type = information.get('type', 'username')
|
||||||
|
self.headers = information.get('headers', {})
|
||||||
|
self.errors = information.get('errors', {})
|
||||||
|
self.url_subpath = information.get('urlSubpath', '')
|
||||||
|
self.regex_check = information.get('regexCheck', None)
|
||||||
|
self.url_probe = information.get('urlProbe', None)
|
||||||
|
self.check_type = information.get('errorType', '')
|
||||||
|
self.request_head_only = information.get('request_head_only', '')
|
||||||
|
|
||||||
|
self.presense_strs = information.get('presenseStrs', [])
|
||||||
|
self.absence_strs = information.get('errorMsg', [])
|
||||||
|
self.request_future = None
|
||||||
|
|
||||||
return
|
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
"""Convert Object To String.
|
return f"{self.name} ({self.url_main})"
|
||||||
|
|
||||||
Keyword Arguments:
|
|
||||||
self -- This object.
|
|
||||||
|
|
||||||
Return Value:
|
|
||||||
Nicely formatted string to get information about this object.
|
|
||||||
"""
|
|
||||||
|
|
||||||
return f"{self.name} ({self.url_home})"
|
|
||||||
|
|
||||||
|
|
||||||
class SitesInformation():
|
class MaigretDatabase:
|
||||||
def __init__(self, data_file_path=None):
|
def __init__(self):
|
||||||
"""Create Sites Information Object.
|
self._sites = []
|
||||||
|
self._engines = []
|
||||||
|
|
||||||
Contains information about all supported web sites.
|
@property
|
||||||
|
def sites(self: MaigretDatabase):
|
||||||
|
return self._sites
|
||||||
|
|
||||||
Keyword Arguments:
|
@property
|
||||||
self -- This object.
|
def sites_dict(self):
|
||||||
data_file_path -- String which indicates path to data file.
|
return {site.name: site for site in self._sites}
|
||||||
The file name must end in ".json".
|
|
||||||
|
|
||||||
There are 3 possible formats:
|
|
||||||
* Absolute File Format
|
|
||||||
For example, "c:/stuff/data.json".
|
|
||||||
* Relative File Format
|
|
||||||
The current working directory is used
|
|
||||||
as the context.
|
|
||||||
For example, "data.json".
|
|
||||||
* URL Format
|
|
||||||
For example,
|
|
||||||
"https://example.com/data.json", or
|
|
||||||
"http://example.com/data.json".
|
|
||||||
|
|
||||||
An exception will be thrown if the path
|
@property
|
||||||
to the data file is not in the expected
|
def engines(self: MaigretDatabase):
|
||||||
format, or if there was any problem loading
|
return self._engines
|
||||||
the file.
|
|
||||||
|
|
||||||
If this option is not specified, then a
|
|
||||||
default site list will be used.
|
|
||||||
|
|
||||||
Return Value:
|
|
||||||
Nothing.
|
|
||||||
"""
|
|
||||||
|
|
||||||
# Ensure that specified data file has correct extension.
|
|
||||||
if ".json" != data_file_path[-5:].lower():
|
|
||||||
raise FileNotFoundError(f"Incorrect JSON file extension for "
|
|
||||||
f"data file '{data_file_path}'."
|
|
||||||
)
|
|
||||||
|
|
||||||
if (("http://" == data_file_path[:7].lower()) or
|
|
||||||
("https://" == data_file_path[:8].lower())
|
|
||||||
):
|
|
||||||
# Reference is to a URL.
|
|
||||||
try:
|
|
||||||
response = requests.get(url=data_file_path)
|
|
||||||
except Exception as error:
|
|
||||||
raise FileNotFoundError(f"Problem while attempting to access "
|
|
||||||
f"data file URL '{data_file_path}': "
|
|
||||||
f"{str(error)}"
|
|
||||||
)
|
|
||||||
if response.status_code == 200:
|
|
||||||
try:
|
|
||||||
site_data = response.json()
|
|
||||||
except Exception as error:
|
|
||||||
raise ValueError(f"Problem parsing json contents at "
|
|
||||||
f"'{data_file_path}': {str(error)}."
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
raise FileNotFoundError(f"Bad response while accessing "
|
|
||||||
f"data file URL '{data_file_path}'."
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
# Reference is to a file.
|
|
||||||
try:
|
|
||||||
with open(data_file_path, "r", encoding="utf-8") as file:
|
|
||||||
try:
|
|
||||||
data = json.load(file)
|
|
||||||
site_data = data.get("sites")
|
|
||||||
engines_data = data.get("engines")
|
|
||||||
except Exception as error:
|
|
||||||
raise ValueError(f"Problem parsing json contents at "
|
|
||||||
f"'{data_file_path}': {str(error)}."
|
|
||||||
)
|
|
||||||
except FileNotFoundError as error:
|
|
||||||
raise FileNotFoundError(f"Problem while attempting to access "
|
|
||||||
f"data file '{data_file_path}'."
|
|
||||||
)
|
|
||||||
|
|
||||||
self.sites = {}
|
|
||||||
|
|
||||||
|
def load_from_json(self: MaigretDatabase, json_data: dict) -> MaigretDatabase:
|
||||||
# Add all of site information from the json file to internal site list.
|
# Add all of site information from the json file to internal site list.
|
||||||
|
site_data = json_data.get("sites")
|
||||||
|
engines_data = json_data.get("engines")
|
||||||
|
|
||||||
|
for engine_name in engines_data:
|
||||||
|
self._engines.append(MaigretEngine(engine_name, engines_data[engine_name]))
|
||||||
|
|
||||||
for site_name in site_data:
|
for site_name in site_data:
|
||||||
try:
|
try:
|
||||||
site = {}
|
site = {}
|
||||||
@@ -178,8 +132,7 @@ class SitesInformation():
|
|||||||
|
|
||||||
site.update(site_user_info)
|
site.update(site_user_info)
|
||||||
|
|
||||||
self.sites[site_name] = \
|
maigret_site = MaigretSite(site_name,
|
||||||
SiteInformation(site_name,
|
|
||||||
site["urlMain"],
|
site["urlMain"],
|
||||||
site["url"],
|
site["url"],
|
||||||
popularity_rank,
|
popularity_rank,
|
||||||
@@ -187,15 +140,74 @@ class SitesInformation():
|
|||||||
site["username_unclaimed"],
|
site["username_unclaimed"],
|
||||||
site
|
site
|
||||||
)
|
)
|
||||||
|
|
||||||
|
self._sites.append(maigret_site)
|
||||||
except KeyError as error:
|
except KeyError as error:
|
||||||
raise ValueError(f"Problem parsing json contents at "
|
raise ValueError(f"Problem parsing json content for site {site_name}: "
|
||||||
f"'{data_file_path}' for site {site_name}: "
|
|
||||||
f"Missing attribute {str(error)}."
|
f"Missing attribute {str(error)}."
|
||||||
)
|
)
|
||||||
|
|
||||||
return
|
return self
|
||||||
|
|
||||||
def site_name_list(self, popularity_rank=False):
|
|
||||||
|
def load_from_str(self: MaigretDatabase, db_str: str) -> MaigretDatabase:
|
||||||
|
try:
|
||||||
|
data = json.loads(db_str)
|
||||||
|
except Exception as error:
|
||||||
|
raise ValueError(f"Problem parsing json contents from str"
|
||||||
|
f"'{db_str[:50]}'...: {str(error)}."
|
||||||
|
)
|
||||||
|
|
||||||
|
return self.load_from_json(data)
|
||||||
|
|
||||||
|
|
||||||
|
def load_from_url(self: MaigretDatabase, url: str) -> MaigretDatabase:
|
||||||
|
is_url_valid = url.startswith('http://') or url.startswith('https://')
|
||||||
|
|
||||||
|
if not is_url_valid:
|
||||||
|
return False
|
||||||
|
|
||||||
|
try:
|
||||||
|
response = requests.get(url=url)
|
||||||
|
except Exception as error:
|
||||||
|
raise FileNotFoundError(f"Problem while attempting to access "
|
||||||
|
f"data file URL '{url}': "
|
||||||
|
f"{str(error)}"
|
||||||
|
)
|
||||||
|
|
||||||
|
if response.status_code == 200:
|
||||||
|
try:
|
||||||
|
data = response.json()
|
||||||
|
except Exception as error:
|
||||||
|
raise ValueError(f"Problem parsing json contents at "
|
||||||
|
f"'{url}': {str(error)}."
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
raise FileNotFoundError(f"Bad response while accessing "
|
||||||
|
f"data file URL '{url}'."
|
||||||
|
)
|
||||||
|
|
||||||
|
return self.load_from_json(data)
|
||||||
|
|
||||||
|
|
||||||
|
def load_from_file(self: MaigretDatabase, filename: str) -> MaigretDatabase:
|
||||||
|
try:
|
||||||
|
with open(filename, 'r', encoding='utf-8') as file:
|
||||||
|
try:
|
||||||
|
data = json.load(file)
|
||||||
|
except Exception as error:
|
||||||
|
raise ValueError(f"Problem parsing json contents from "
|
||||||
|
f"file '{filename}': {str(error)}."
|
||||||
|
)
|
||||||
|
except FileNotFoundError as error:
|
||||||
|
raise FileNotFoundError(f"Problem while attempting to access "
|
||||||
|
f"data file '{filename}'."
|
||||||
|
)
|
||||||
|
|
||||||
|
return self.load_from_json(data)
|
||||||
|
|
||||||
|
|
||||||
|
def site_name_list(self: MaigretDatabase, popularity_rank=False):
|
||||||
"""Get Site Name List.
|
"""Get Site Name List.
|
||||||
|
|
||||||
Keyword Arguments:
|
Keyword Arguments:
|
||||||
@@ -223,27 +235,3 @@ class SitesInformation():
|
|||||||
site_names = sorted([site.name for site in self], key=str.lower)
|
site_names = sorted([site.name for site in self], key=str.lower)
|
||||||
|
|
||||||
return site_names
|
return site_names
|
||||||
|
|
||||||
def __iter__(self):
|
|
||||||
"""Iterator For Object.
|
|
||||||
|
|
||||||
Keyword Arguments:
|
|
||||||
self -- This object.
|
|
||||||
|
|
||||||
Return Value:
|
|
||||||
Iterator for sites object.
|
|
||||||
"""
|
|
||||||
|
|
||||||
for site_name in self.sites:
|
|
||||||
yield self.sites[site_name]
|
|
||||||
|
|
||||||
def __len__(self):
|
|
||||||
"""Length For Object.
|
|
||||||
|
|
||||||
Keyword Arguments:
|
|
||||||
self -- This object.
|
|
||||||
|
|
||||||
Return Value:
|
|
||||||
Length of sites object.
|
|
||||||
"""
|
|
||||||
return len(self.sites)
|
|
||||||
|
|||||||
@@ -0,0 +1,49 @@
|
|||||||
|
"""Maigret Database test functions"""
|
||||||
|
from maigret.sites import MaigretDatabase
|
||||||
|
|
||||||
|
|
||||||
|
def test_load_empty_db_from_str():
|
||||||
|
db = MaigretDatabase()
|
||||||
|
db.load_from_str('{"engines": {}, "sites": {}}')
|
||||||
|
|
||||||
|
assert db.sites == []
|
||||||
|
assert db.engines == []
|
||||||
|
|
||||||
|
|
||||||
|
def test_load_valid_db():
|
||||||
|
db = MaigretDatabase()
|
||||||
|
db.load_from_json({
|
||||||
|
'engines': {
|
||||||
|
"XenForo": {
|
||||||
|
"presenseStrs": ["XenForo"],
|
||||||
|
"site": {
|
||||||
|
"errorMsg": [
|
||||||
|
"The specified member cannot be found. Please enter a member's entire name.",
|
||||||
|
],
|
||||||
|
"errorType": "message",
|
||||||
|
"errors": {
|
||||||
|
"You must be logged-in to do that.": "Login required"
|
||||||
|
},
|
||||||
|
"url": "{urlMain}{urlSubpath}/members/?username={username}"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
},
|
||||||
|
'sites': {
|
||||||
|
"Amperka": {
|
||||||
|
"engine": "XenForo",
|
||||||
|
"rank": 121613,
|
||||||
|
"tags": [
|
||||||
|
"ru"
|
||||||
|
],
|
||||||
|
"urlMain": "http://forum.amperka.ru",
|
||||||
|
"username_claimed": "adam",
|
||||||
|
"username_unclaimed": "noonewouldeverusethis7"
|
||||||
|
},
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
assert len(db.sites) == 1
|
||||||
|
assert len(db.engines) == 1
|
||||||
|
|
||||||
|
assert db.sites[0].name == 'Amperka'
|
||||||
|
assert db.engines[0].name == 'XenForo'
|
||||||
Reference in New Issue
Block a user