Refactoring, sites database tests

2026-05-06 14:08:59 +00:00 · 2021-01-02 00:23:58 +03:00
parent 863e16b1d9
commit 315ecec39f
5 changed files with 246 additions and 221 deletions
@@ -25,7 +25,7 @@ from socid_extractor import parse, extract
 from .notify import QueryNotifyPrint
 from .result import QueryResult, QueryStatus
-from .sites import SitesInformation
+from .sites import MaigretDatabase, MaigretSite
 import xmind
@@ -55,7 +55,7 @@ unsupported_characters = '#'
 cookies_file = 'cookies.txt'
-async def get_response(request_future, social_network, logger):
+async def get_response(request_future, site_name, logger):
    html_text = None
    status_code = 0
@@ -92,7 +92,7 @@ async def get_response(request_future, social_network, logger):
        error_text = "Proxy Error"
        expection_text = str(err)
    except Exception as err:
-        logger.warning(f'Unhandled error while requesting {social_network}: {err}')
+        logger.warning(f'Unhandled error while requesting {site_name}: {err}')
        logger.debug(err, exc_info=True)
        error_text = "Some Error"
        expection_text = str(err)
@@ -101,19 +101,19 @@ async def get_response(request_future, social_network, logger):
    return html_text, status_code, error_text, expection_text
-async def update_site_data_from_response(sitename, site_data, results_info, semaphore, logger, query_notify):
+async def update_site_dict_from_response(sitename, site_dict, results_info, semaphore, logger, query_notify):
    async with semaphore:
-        site_obj = site_data[sitename]
+        site_obj = site_dict[sitename]
-        future = site_obj.get('request_future')
+        future = site_obj.request_future
        if not future:
            # ignore: search by incompatible id type
            return
        response = await get_response(request_future=future,
-                                      social_network=sitename,
+                                      site_name=sitename,
                                      logger=logger)
-        site_data[sitename] = process_site_result(response, query_notify, logger, results_info, site_obj, sitename)
+        site_dict[sitename] = process_site_result(response, query_notify, logger, results_info, site_obj)
 # TODO: move info separate module
@@ -137,13 +137,11 @@ def detect_error_page(html_text, status_code, fail_flags, ignore_403):
    return None, None
-def process_site_result(response, query_notify, logger, results_info, net_info, social_network):
+def process_site_result(response, query_notify, logger, results_info, site: MaigretSite):
    if not response:
        return results_info
-    fulltags = []
+    fulltags = site.tags
    if ("tags" in net_info.keys()):
        fulltags = net_info["tags"]
    # Retrieve other site information again
    username = results_info['username']
@@ -157,14 +155,14 @@ def process_site_result(response, query_notify, logger, results_info, net_info,
        return results_info
    # Get the expected error type
-    error_type = net_info["errorType"]
+    error_type = site.check_type
    # Get the failure messages and comments
-    failure_errors = net_info.get("errors", {})
+    failure_errors = site.errors
    # TODO: refactor
    if not response:
-        logger.error(f'No response for {social_network}')
+        logger.error(f'No response for {site.name}')
        return results_info
    html_text, status_code, error_text, expection_text = response
@@ -182,37 +180,37 @@ def process_site_result(response, query_notify, logger, results_info, net_info,
    if status_code and not error_text:
        error_text, site_error_text = detect_error_page(html_text, status_code, failure_errors,
-                                                        'ignore_403' in net_info)
+                                                        site.ignore_403)
    # presense flags
    # True by default
-    presense_flags = net_info.get("presenseStrs", [])
+    presense_flags = site.presense_strs
    is_presense_detected = html_text and all(
        [(presense_flag in html_text) for presense_flag in presense_flags]) or not presense_flags
    if error_text is not None:
        logger.debug(error_text)
        result = QueryResult(username,
-                             social_network,
+                             site.name,
                             url,
                             QueryStatus.UNKNOWN,
                             query_time=response_time,
                             context=f'{error_text}: {site_error_text}', tags=fulltags)
    elif error_type == "message":
-        absence_flags = net_info.get("errorMsg")
+        absence_flags = site.absence_strs
        is_absence_flags_list = isinstance(absence_flags, list)
        absence_flags_set = set(absence_flags) if is_absence_flags_list else {absence_flags}
        # Checks if the error message is in the HTML
        is_absence_detected = any([(absence_flag in html_text) for absence_flag in absence_flags_set])
        if not is_absence_detected and is_presense_detected:
            result = QueryResult(username,
-                                 social_network,
+                                 site.name,
                                 url,
                                 QueryStatus.CLAIMED,
                                 query_time=response_time, tags=fulltags)
        else:
            result = QueryResult(username,
-                                 social_network,
+                                 site.name,
                                 url,
                                 QueryStatus.AVAILABLE,
                                 query_time=response_time, tags=fulltags)
@@ -220,13 +218,13 @@ def process_site_result(response, query_notify, logger, results_info, net_info,
        # Checks if the status code of the response is 2XX
        if (not status_code >= 300 or status_code < 200) and is_presense_detected:
            result = QueryResult(username,
-                                 social_network,
+                                 site.name,
                                 url,
                                 QueryStatus.CLAIMED,
                                 query_time=response_time, tags=fulltags)
        else:
            result = QueryResult(username,
-                                 social_network,
+                                 site.name,
                                 url,
                                 QueryStatus.AVAILABLE,
                                 query_time=response_time, tags=fulltags)
@@ -238,20 +236,20 @@ def process_site_result(response, query_notify, logger, results_info, net_info,
        # forward to some odd redirect).
        if 200 <= status_code < 300 and is_presense_detected:
            result = QueryResult(username,
-                                 social_network,
+                                 site.name,
                                 url,
                                 QueryStatus.CLAIMED,
                                 query_time=response_time, tags=fulltags)
        else:
            result = QueryResult(username,
-                                 social_network,
+                                 site.name,
                                 url,
                                 QueryStatus.AVAILABLE,
                                 query_time=response_time, tags=fulltags)
    else:
        # It should be impossible to ever get here...
        raise ValueError(f"Unknown Error Type '{error_type}' for "
-                         f"site '{social_network}'")
+                         f"site '{site_name}'")
    extracted_ids_data = {}
@@ -259,7 +257,7 @@ def process_site_result(response, query_notify, logger, results_info, net_info,
        try:
            extracted_ids_data = extract(html_text)
        except Exception as e:
-            logger.warning(f'Error while parsing {social_network}: {e}', exc_info=True)
+            logger.warning(f'Error while parsing {site_name}: {e}', exc_info=True)
        if extracted_ids_data:
            new_usernames = {}
@@ -272,22 +270,21 @@ def process_site_result(response, query_notify, logger, results_info, net_info,
            results_info['ids_usernames'] = new_usernames
            result.ids_data = extracted_ids_data
    is_similar = net_info.get('similarSearch', False)
    # Notify caller about results of query.
-    query_notify.update(result, is_similar)
+    query_notify.update(result, site.similar_search)
    # Save status of request
    results_info['status'] = result
    # Save results from request
    results_info['http_status'] = status_code
-    results_info['is_similar'] = is_similar
+    results_info['is_similar'] = site.similar_search
    # results_site['response_text'] = html_text
-    results_info['rank'] = net_info.get('rank', 0)
+    results_info['rank'] = site.popularity_rank
    return results_info
-async def maigret(username, site_data, query_notify, logger,
+async def maigret(username, site_dict, query_notify, logger,
                  proxy=None, timeout=None, recursive_search=False,
                  id_type='username', tags=None, debug=False, forced=False,
                  max_connections=100):
@@ -298,7 +295,7 @@ async def maigret(username, site_data, query_notify, logger,
    Keyword Arguments:
    username               -- String indicating username that report
                              should be created against.
-    site_data              -- Dictionary containing all of the site data.
+    site_dict              -- Dictionary containing all of the site data.
    query_notify           -- Object with base type of QueryNotify().
                              This will be used to notify the caller about
                              query results.
@@ -345,21 +342,19 @@ async def maigret(username, site_data, query_notify, logger,
    results_total = {}
    # First create futures for all requests. This allows for the requests to run in parallel
-    for social_network, net_info in site_data.items():
+    for site_name, site in site_dict.items():
-        fulltags = []
+        fulltags = site.tags
        if ("tags" in net_info.keys()):
            fulltags = net_info["tags"]
-        if net_info.get('type', 'username') != id_type:
+        if site.type != id_type:
            continue
-        site_tags = set(net_info.get('tags', []))
+        site_tags = set(fulltags)
        if tags:
            if not set(tags).intersection(site_tags):
                continue
-        if 'disabled' in net_info and net_info['disabled'] and not forced:
+        if site.disabled and not forced:
            continue
        # Results from analysis of this specific site
@@ -368,32 +363,29 @@ async def maigret(username, site_data, query_notify, logger,
        # Record URL of main site and username
        results_site['username'] = username
        results_site['parsing_enabled'] = recursive_search
-        results_site['url_main'] = net_info.get("urlMain")
+        results_site['url_main'] = site.url_main
        headers = {
            'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 11.1; rv:55.0) Gecko/20100101 Firefox/55.0',
        }
-        if "headers" in net_info:
+        headers.update(site.headers)
            # Override/append any extra headers required by a given site.
            headers.update(net_info["headers"])
        # URL of user on site (if it exists)
-        url = net_info.get('url').format(
+        url = site.url_username_format.format(
-            urlMain=net_info['urlMain'],
+            urlMain=site.url_main,
-            urlSubpath=net_info.get('urlSubpath', ''),
+            urlSubpath=site.url_subpath,
            username=username
        )
        # workaround to prevent slash errors
        url = url.replace('///', '/')
        # Don't make request if username is invalid for the site
-        regex_check = net_info.get("regexCheck")
+        if site.regex_check and re.search(site.regex_check, username) is None:
        if regex_check and re.search(regex_check, username) is None:
            # No need to do the check at the site: this user name is not allowed.
            results_site['status'] = QueryResult(username,
-                                                 social_network,
+                                                 site_name,
                                                 url,
                                                 QueryStatus.ILLEGAL)
            results_site["url_user"] = ""
@@ -403,7 +395,7 @@ async def maigret(username, site_data, query_notify, logger,
        else:
            # URL of user on site (if it exists)
            results_site["url_user"] = url
-            url_probe = net_info.get("urlProbe")
+            url_probe = site.url_probe
            if url_probe is None:
                # Probe URL is normal one seen by people out on the web.
                url_probe = url
@@ -411,13 +403,13 @@ async def maigret(username, site_data, query_notify, logger,
                # There is a special URL for probing existence separate
                # from where the user profile normally can be found.
                url_probe = url_probe.format(
-                    urlMain=net_info['urlMain'],
+                    urlMain=site.url_main,
-                    urlSubpath=net_info.get('urlSubpath', ''),
+                    urlSubpath=site.url_subpath,
                    username=username,
                )
-            if net_info["errorType"] == 'status_code' and net_info.get("request_head_only", True):
+            if site.check_type == 'status_code' and site.request_head_only:
                # In most cases when we are detecting by status code,
                # it is not necessary to get the entire body:  we can
                # detect fine with just the HEAD response.
@@ -428,7 +420,7 @@ async def maigret(username, site_data, query_notify, logger,
                # not respond properly unless we request the whole page.
                request_method = session.get
-            if net_info["errorType"] == "response_url":
+            if site.check_type == "response_url":
                # Site forwards request to a different URL if username not
                # found.  Disallow the redirect so we can capture the
                # http status from the original URL request.
@@ -454,10 +446,11 @@ async def maigret(username, site_data, query_notify, logger,
                                    )
            # Store future in data for access later
-            net_info["request_future"] = future
+            # TODO: move to separate obj
            site.request_future = future
        # Add this site's results into final dictionary with all of the other results.
-        results_total[social_network] = results_site
+        results_total[site_name] = results_site
    # TODO: move into top-level function
@@ -465,7 +458,7 @@ async def maigret(username, site_data, query_notify, logger,
    tasks = []
    for sitename, result_obj in results_total.items():
-        update_site_coro = update_site_data_from_response(sitename, site_data, result_obj, sem, logger, query_notify)
+        update_site_coro = update_site_dict_from_response(sitename, site_dict, result_obj, sem, logger, query_notify)
        future = asyncio.ensure_future(update_site_coro)
        tasks.append(future)
@@ -553,8 +546,9 @@ async def site_self_check(site_name, site_data, logger):
 async def self_check(json_file, logger):
-    data = json.load(open(json_file))
+    db = MaigretDatabase()
-    sites = SitesInformation(json_file)
+    db.load_from_file(json_file)
    sites = db.sites
    all_sites = {}
    def disabled_count(data):
@@ -825,18 +819,11 @@ async def main():
    # Create object with all information about sites we are aware of.
    try:
-        sites = SitesInformation(args.json_file)
+        site_data_all = MaigretDatabase().load_from_file(args.json_file).sites_dict
    except Exception as error:
        print(f"ERROR:  {error}")
        sys.exit(1)
    # Create original dictionary from SitesInformation() object.
    # Eventually, the rest of the code will be updated to use the new object
    # directly, but this will glue the two pieces together.
    site_data_all = {}
    for site in sites:
        site_data_all[site.name] = site.information
    if args.site_list is None:
        # Not desired to look at a sub-set of sites
        site_data = site_data_all
@@ -868,7 +855,7 @@ async def main():
            site_data[site] = site_dataCpy.get(site)
    # Database consistency
-    enabled_count = len(list(filter(lambda x: not x.get('disabled', False), site_data.values())))
+    enabled_count = len(list(filter(lambda x: not x.disabled, site_data.values())))
    print(f'Sites in database, enabled/total: {enabled_count}/{len(site_data)}')
    # Create notify object for query results.
@@ -1,6 +1,7 @@
 {
  "engines": {
    "XenForo": {
      "presenseStrs": ["XenForo"],
      "site": {
        "errorMsg": [
          "The specified member cannot be found. Please enter a member's entire name.",
@@ -11,7 +12,7 @@
        "errors": {
          "You must be logged-in to do that.": "Login required"
        },
-        "url": "{urlMain}/members/?username={username}"
+        "url": "{urlMain}{urlSubpath}/members/?username={username}"
      }
    },
    "phpBB": {
@@ -46,7 +47,7 @@
          "The administrator has banned your IP address": "IP ban",
          "\u0418\u0437\u0432\u0438\u043d\u0438\u0442\u0435, \u0441\u0435\u0440\u0432\u0435\u0440 \u043f\u0435\u0440\u0435\u0433\u0440\u0443\u0436\u0435\u043d. \u041f\u043e\u0436\u0430\u043b\u0443\u0439\u0441\u0442\u0430, \u043f\u043e\u043f\u0440\u043e\u0431\u0443\u0439\u0442\u0435 \u0437\u0430\u0439\u0442\u0438 \u043f\u043e\u0437\u0436\u0435.": "Server is overloaded"
        },
-        "url": "{urlMain}/{urlSubpath}/member.php?username={username}"
+        "url": "{urlMain}{urlSubpath}/member.php?username={username}"
      }
    }
  },
@@ -6845,7 +6846,7 @@
        "ru"
      ],
      "urlMain": "https://www.infrance.su/",
-      "urlSubpath": "forum",
+      "urlSubpath": "/forum",
      "username_claimed": "adam",
      "username_unclaimed": "noonewouldeverusethis7"
    },
@@ -7987,7 +7988,7 @@
        "ru"
      ],
      "urlMain": "https://la.mail.ru",
-      "urlSubpath": "forums",
+      "urlSubpath": "/forums",
      "username_claimed": "wizard",
      "username_unclaimed": "noonewouldeverusethis7"
    },
@@ -8554,7 +8555,7 @@
        "ru"
      ],
      "urlMain": "https://minecraftonly.ru",
-      "urlSubpath": "forum",
+      "urlSubpath": "/forum",
      "username_claimed": "adam",
      "username_unclaimed": "noonewouldeverusethis7"
    },
@@ -8639,7 +8640,7 @@
        "us"
      ],
      "urlMain": "https://www.mobile-files.com/",
-      "urlSubpath": "forum",
+      "urlSubpath": "/forum",
      "username_claimed": "adam",
      "username_unclaimed": "noonewouldeverusethis7"
    },
@@ -8819,7 +8820,7 @@
        "pk"
      ],
      "urlMain": "https://www.movie-list.com",
-      "urlSubpath": "forum",
+      "urlSubpath": "/forum",
      "username_claimed": "adam",
      "username_unclaimed": "noonewouldeverusethis7"
    },
@@ -8859,7 +8860,7 @@
        "us"
      ],
      "urlMain": "https://www.mpgh.net/",
-      "urlSubpath": "forum",
+      "urlSubpath": "/forum",
      "username_claimed": "adam",
      "username_unclaimed": "noonewouldeverusethis7"
    },
@@ -9873,7 +9874,7 @@
      "engine": "vBulletin",
      "rank": 4840375,
      "urlMain": "http://p38forum.com",
-      "urlSubpath": "forums",
+      "urlSubpath": "/forums",
      "username_claimed": "red",
      "username_unclaimed": "noonewouldeverusethis7"
    },
@@ -10193,7 +10194,7 @@
        "ru"
      ],
      "urlMain": "https://pw.mail.ru/",
-      "urlSubpath": "forums",
+      "urlSubpath": "/forums",
      "username_claimed": "wizard",
      "username_unclaimed": "noonewouldeverusethis7"
    },
@@ -10216,7 +10217,7 @@
        "ru"
      ],
      "urlMain": "http://pesiq.ru/",
-      "urlSubpath": "forum",
+      "urlSubpath": "/forum",
      "username_claimed": "adam",
      "username_unclaimed": "noonewouldeverusethis7"
    },
@@ -11233,7 +11234,7 @@
        "music"
      ],
      "urlMain": "http://www.rap-royalty.com",
-      "urlSubpath": "forum",
+      "urlSubpath": "/forum",
      "username_claimed": "red",
      "username_unclaimed": "noonewouldeverusethis7"
    },
@@ -11365,7 +11366,7 @@
        "ru"
      ],
      "urlMain": "http://www.redorchestra.ru",
-      "urlSubpath": "forums",
+      "urlSubpath": "/forums",
      "username_claimed": "adam",
      "username_unclaimed": "noonewouldeverusethis7"
    },
@@ -11484,7 +11485,7 @@
        "ru"
      ],
      "urlMain": "https://rev.mail.ru",
-      "urlSubpath": "forums",
+      "urlSubpath": "/forums",
      "username_claimed": "wizard",
      "username_unclaimed": "noonewouldeverusethis7"
    },
@@ -11523,7 +11524,7 @@
        "ru"
      ],
      "urlMain": "https://www.rlocman.ru",
-      "urlSubpath": "forum",
+      "urlSubpath": "/forum",
      "username_claimed": "elnat",
      "username_unclaimed": "noonewouldeverusethis7"
    },
@@ -11700,7 +11701,7 @@
        "us"
      ],
      "urlMain": "https://www.rpgwatch.com",
-      "urlSubpath": "forums",
+      "urlSubpath": "/forums",
      "username_claimed": "blue",
      "username_unclaimed": "noonewouldeverusethis7"
    },
@@ -11821,7 +11822,7 @@
        "ru"
      ],
      "urlMain": "http://www.russian.fi/",
-      "urlSubpath": "forum",
+      "urlSubpath": "/forum",
      "username_claimed": "adam",
      "username_unclaimed": "noonewouldeverusethis7"
    },
@@ -12532,7 +12533,7 @@
        "ru"
      ],
      "urlMain": "https://solaris-club.net",
-      "urlSubpath": "forum",
+      "urlSubpath": "/forum",
      "username_claimed": "adam",
      "username_unclaimed": "noonewouldeverusethis7"
    },
@@ -12802,7 +12803,7 @@
        "ru"
      ],
      "urlMain": "http://statistika.ru",
-      "urlSubpath": "forum",
+      "urlSubpath": "/forum",
      "username_claimed": "hamam",
      "username_unclaimed": "noonewouldeverusethis7"
    },
@@ -12900,7 +12901,7 @@
        "ru"
      ],
      "urlMain": "https://www.stratege.ru",
-      "urlSubpath": "forums",
+      "urlSubpath": "/forums",
      "username_claimed": "blue",
      "username_unclaimed": "noonewouldeverusethis7"
    },
@@ -13199,7 +13200,7 @@
        "ru"
      ],
      "urlMain": "https://tanks.mail.ru",
-      "urlSubpath": "forum",
+      "urlSubpath": "/forum",
      "username_claimed": "red",
      "username_unclaimed": "noonewouldeverusethis7"
    },
@@ -13751,7 +13752,7 @@
        "in"
      ],
      "urlMain": "https://www.trainsim.com/",
-      "urlSubpath": "vbts",
+      "urlSubpath": "/vbts",
      "username_claimed": "adam",
      "username_unclaimed": "noonewouldeverusethis7"
    },
@@ -13986,7 +13987,7 @@
        "ru"
      ],
      "urlMain": "http://tv-games.ru/",
-      "urlSubpath": "forum",
+      "urlSubpath": "/forum",
      "username_claimed": "adam",
      "username_unclaimed": "noonewouldeverusethis7"
    },
@@ -14632,7 +14633,7 @@
        "ru"
      ],
      "urlMain": "https://wf.mail.ru",
-      "urlSubpath": "forums",
+      "urlSubpath": "/forums",
      "username_claimed": "wizard",
      "username_unclaimed": "noonewouldeverusethis7"
    },
@@ -14961,7 +14962,7 @@
        "us"
      ],
      "urlMain": "http://wirednewyork.com/",
-      "urlSubpath": "forum",
+      "urlSubpath": "/forum",
      "username_claimed": "blue",
      "username_unclaimed": "noonewouldeverusethis7"
    },
@@ -1,8 +1,5 @@
-"""Sherlock Sites Information Module
+"""Maigret Sites Information"""
-
+from __future__ import annotations
 This module supports storing information about web sites.
 This is the raw data that will be used to search for usernames.
 """
 import json
 import operator
 import sys
@@ -10,8 +7,14 @@ import sys
 import requests
-class SiteInformation():
+class MaigretEngine:
-    def __init__(self, name, url_home, url_username_format, popularity_rank,
+    def __init__(self, name, *args, **kwargs):
        self.name = name
        self.__dict__.update(kwargs)
 class MaigretSite:
    def __init__(self, name, url_main, url_username_format, popularity_rank,
                 username_claimed, username_unclaimed,
                 information):
        """Create Site Information Object.
@@ -21,7 +24,7 @@ class SiteInformation():
        Keyword Arguments:
        self                   -- This object.
        name                   -- String which identifies site.
-        url_home               -- String containing URL for home of site.
+        url_main               -- String containing URL for home of site.
        url_username_format    -- String containing URL for Username format
                                  on site.
                                  NOTE:  The string should contain the
@@ -55,7 +58,7 @@ class SiteInformation():
        """
        self.name = name
-        self.url_home = url_home
+        self.url_main = url_main
        self.url_username_format = url_username_format
        if (popularity_rank is None) or (popularity_rank == 0):
@@ -66,105 +69,56 @@ class SiteInformation():
        self.username_claimed = username_claimed
        self.username_unclaimed = username_unclaimed
        self.information = information
        self.disabled = information.get('disabled', False)
        self.similar_search = information.get('similarSearch', False)
        self.ignore_403 = information.get('ignore_403', False)
        self.tags = information.get('tags', [])
        self.type = information.get('type', 'username')
        self.headers = information.get('headers', {})
        self.errors = information.get('errors', {})
        self.url_subpath = information.get('urlSubpath', '')
        self.regex_check = information.get('regexCheck', None)
        self.url_probe = information.get('urlProbe', None)
        self.check_type = information.get('errorType', '')
        self.request_head_only = information.get('request_head_only', '')
        self.presense_strs = information.get('presenseStrs', [])
        self.absence_strs = information.get('errorMsg', [])
        self.request_future = None
        return
    def __str__(self):
-        """Convert Object To String.
+        return f"{self.name} ({self.url_main})"
        Keyword Arguments:
        self                   -- This object.
        Return Value:
        Nicely formatted string to get information about this object.
        """
        return f"{self.name} ({self.url_home})"
-class SitesInformation():
+class MaigretDatabase:
-    def __init__(self, data_file_path=None):
+    def __init__(self):
-        """Create Sites Information Object.
+        self._sites = []
        self._engines = []
-        Contains information about all supported web sites.
+    @property
    def sites(self: MaigretDatabase):
        return self._sites
-        Keyword Arguments:
+    @property
-        self                   -- This object.
+    def sites_dict(self):
-        data_file_path         -- String which indicates path to data file.
+        return {site.name: site for site in self._sites}
-                                  The file name must end in ".json".
+    
-                                  There are 3 possible formats:
+    @property
-                                   * Absolute File Format
+    def engines(self: MaigretDatabase):
-                                     For example, "c:/stuff/data.json".
+        return self._engines
                                   * Relative File Format
                                     The current working directory is used
                                     as the context.
                                     For example, "data.json".
                                   * URL Format
                                     For example,
                                     "https://example.com/data.json", or
                                     "http://example.com/data.json".
                                  An exception will be thrown if the path
                                  to the data file is not in the expected
                                  format, or if there was any problem loading
                                  the file.
                                  If this option is not specified, then a
                                  default site list will be used.
        Return Value:
        Nothing.
        """
        # Ensure that specified data file has correct extension.
        if ".json" != data_file_path[-5:].lower():
            raise FileNotFoundError(f"Incorrect JSON file extension for "
                                    f"data file '{data_file_path}'."
                                    )
        if (("http://" == data_file_path[:7].lower()) or
                ("https://" == data_file_path[:8].lower())
        ):
            # Reference is to a URL.
            try:
                response = requests.get(url=data_file_path)
            except Exception as error:
                raise FileNotFoundError(f"Problem while attempting to access "
                                        f"data file URL '{data_file_path}':  "
                                        f"{str(error)}"
                                        )
            if response.status_code == 200:
                try:
                    site_data = response.json()
                except Exception as error:
                    raise ValueError(f"Problem parsing json contents at "
                                     f"'{data_file_path}':  {str(error)}."
                                     )
            else:
                raise FileNotFoundError(f"Bad response while accessing "
                                        f"data file URL '{data_file_path}'."
                                        )
        else:
            # Reference is to a file.
            try:
                with open(data_file_path, "r", encoding="utf-8") as file:
                    try:
                        data = json.load(file)
                        site_data = data.get("sites")
                        engines_data = data.get("engines")
                    except Exception as error:
                        raise ValueError(f"Problem parsing json contents at "
                                         f"'{data_file_path}':  {str(error)}."
                                         )
            except FileNotFoundError as error:
                raise FileNotFoundError(f"Problem while attempting to access "
                                        f"data file '{data_file_path}'."
                                        )
        self.sites = {}
    def load_from_json(self: MaigretDatabase, json_data: dict) -> MaigretDatabase:
        # Add all of site information from the json file to internal site list.
        site_data = json_data.get("sites")
        engines_data = json_data.get("engines")
        for engine_name in engines_data:
            self._engines.append(MaigretEngine(engine_name, engines_data[engine_name]))
        for site_name in site_data:
            try:
                site = {}
@@ -178,8 +132,7 @@ class SitesInformation():
                site.update(site_user_info)
-                self.sites[site_name] = \
+                maigret_site = MaigretSite(site_name,
                    SiteInformation(site_name,
                                    site["urlMain"],
                                    site["url"],
                                    popularity_rank,
@@ -187,15 +140,74 @@ class SitesInformation():
                                    site["username_unclaimed"],
                                    site
                                    )
                self._sites.append(maigret_site)
            except KeyError as error:
-                raise ValueError(f"Problem parsing json contents at "
+                raise ValueError(f"Problem parsing json content for site {site_name}: "
                                 f"'{data_file_path}' for site {site_name}:  "
                                 f"Missing attribute {str(error)}."
                                 )
-        return
+        return self
-    def site_name_list(self, popularity_rank=False):
+
    def load_from_str(self: MaigretDatabase, db_str: str) -> MaigretDatabase:
        try:
            data = json.loads(db_str)
        except Exception as error:
            raise ValueError(f"Problem parsing json contents from str"
                             f"'{db_str[:50]}'...:  {str(error)}."
                             )
        return self.load_from_json(data)
    def load_from_url(self: MaigretDatabase, url: str) -> MaigretDatabase:
        is_url_valid = url.startswith('http://') or url.startswith('https://')
        if not is_url_valid:
            return False
        try:
            response = requests.get(url=url)
        except Exception as error:
            raise FileNotFoundError(f"Problem while attempting to access "
                                    f"data file URL '{url}':  "
                                    f"{str(error)}"
                                    )
        if response.status_code == 200:
            try:
                data = response.json()
            except Exception as error:
                raise ValueError(f"Problem parsing json contents at "
                                 f"'{url}':  {str(error)}."
                                 )
        else:
            raise FileNotFoundError(f"Bad response while accessing "
                                    f"data file URL '{url}'."
                                    )
        return self.load_from_json(data)
    def load_from_file(self: MaigretDatabase, filename: str) -> MaigretDatabase:
        try:
            with open(filename, 'r', encoding='utf-8') as file:
                try:
                    data = json.load(file)
                except Exception as error:
                    raise ValueError(f"Problem parsing json contents from "
                                     f"file '{filename}':  {str(error)}."
                                     )
        except FileNotFoundError as error:
            raise FileNotFoundError(f"Problem while attempting to access "
                                    f"data file '{filename}'."
                                    )
        return self.load_from_json(data)
    def site_name_list(self: MaigretDatabase, popularity_rank=False):
        """Get Site Name List.
        Keyword Arguments:
@@ -223,27 +235,3 @@ class SitesInformation():
            site_names = sorted([site.name for site in self], key=str.lower)
        return site_names
    def __iter__(self):
        """Iterator For Object.
        Keyword Arguments:
        self                   -- This object.
        Return Value:
        Iterator for sites object.
        """
        for site_name in self.sites:
            yield self.sites[site_name]
    def __len__(self):
        """Length For Object.
        Keyword Arguments:
        self                   -- This object.
        Return Value:
        Length of sites object.
        """
        return len(self.sites)
@@ -0,0 +1,49 @@
 """Maigret Database test functions"""
 from maigret.sites import MaigretDatabase
 def test_load_empty_db_from_str():
    db = MaigretDatabase()
    db.load_from_str('{"engines": {}, "sites": {}}')
    assert db.sites == []
    assert db.engines == []
 def test_load_valid_db():
    db = MaigretDatabase()
    db.load_from_json({
        'engines': {
            "XenForo": {
              "presenseStrs": ["XenForo"],
              "site": {
                "errorMsg": [
                  "The specified member cannot be found. Please enter a member's entire name.",
                ],
                "errorType": "message",
                "errors": {
                  "You must be logged-in to do that.": "Login required"
                },
                "url": "{urlMain}{urlSubpath}/members/?username={username}"
              }
            },
        },
        'sites': {
            "Amperka": {
              "engine": "XenForo",
              "rank": 121613,
              "tags": [
                "ru"
              ],
              "urlMain": "http://forum.amperka.ru",
              "username_claimed": "adam",
              "username_unclaimed": "noonewouldeverusethis7"
            },
        }
    })
    assert len(db.sites) == 1
    assert len(db.engines) == 1
    assert db.sites[0].name == 'Amperka'
    assert db.engines[0].name == 'XenForo'