From a2ddb15f09ebb8e8eedbe2a4aa2402aa7c3fb18a Mon Sep 17 00:00:00 2001
From: Soxoj <soxoj@protonmail.com>
Date: Sun, 21 Mar 2021 18:34:57 +0300
Subject: [PATCH] Improving "parse" mode for extracting usernames and other
 info for a further search

---
 maigret/maigret.py          | 33 ++++++++++++++++++++++++---------
 maigret/notify.py           | 19 ++-----------------
 maigret/resources/data.json |  5 ++++-
 maigret/utils.py            | 21 +++++++++++++++++++++
 requirements.txt            |  2 +-
 tests/test_utils.py         | 21 ++++++++++++++++++++-
 6 files changed, 72 insertions(+), 29 deletions(-)

diff --git a/maigret/maigret.py b/maigret/maigret.py
index b9578ad..9e4e343 100755
--- a/maigret/maigret.py
+++ b/maigret/maigret.py
@@ -19,6 +19,7 @@ from .report import save_csv_report, save_xmind_report, save_html_report, save_p
     save_json_report
 from .sites import MaigretDatabase
 from .submit import submit_dialog
+from .utils import get_dict_ascii_tree
 
 __version__ = '0.1.15'
 
@@ -218,15 +219,29 @@ async def main():
         print("Using the proxy: " + args.proxy)
 
     if args.parse_url:
-        page, _ = parse(args.parse_url, cookies_str='')
-        info = extract(page)
-        text = 'Extracted ID data from webpage: ' + ', '.join([f'{a}: {b}' for a, b in info.items()])
-        print(text)
-        for k, v in info.items():
-            if 'username' in k:
-                usernames[v] = 'username'
-            if k in supported_recursive_search_ids:
-                usernames[v] = k
+        # url, headers
+        reqs = [(args.parse_url, set())]
+        try:
+            # temporary workaround for URL mutations MVP
+            from socid_extractor import mutate_url
+            reqs += list(mutate_url(args.parse_url))
+        except:
+            pass
+
+        for req in reqs:
+            url, headers = req
+            print(f'Scanning webpage by URL {url}...')
+            page, _ = parse(url, cookies_str='', headers=headers)
+            info = extract(page)
+            if not info:
+                print('Nothing extracted')
+            else:
+                print(get_dict_ascii_tree(info.items(), new_line=False), ' ')
+            for k, v in info.items():
+                if 'username' in k:
+                    usernames[v] = 'username'
+                if k in supported_recursive_search_ids:
+                    usernames[v] = k
 
     if args.tags:
         args.tags = list(set(str(args.tags).split(',')))
diff --git a/maigret/notify.py b/maigret/notify.py
index ea3186d..86dabda 100644
--- a/maigret/notify.py
+++ b/maigret/notify.py
@@ -8,6 +8,7 @@ import sys
 from colorama import Fore, Style, init
 
 from .result import QueryStatus
+from .utils import get_dict_ascii_tree
 
 
 class QueryNotify():
@@ -176,22 +177,6 @@ class QueryNotifyPrint(QueryNotify):
         else:
             print(msg)
 
-    def get_additional_data_text(self, items, prepend=''):
-        text = ''
-        for num, item in enumerate(items):
-            box_symbol = '┣╸' if num != len(items) - 1 else '┗╸'
-
-            if type(item) == tuple:
-                field_name, field_value = item
-                if field_value.startswith('[\''):
-                    is_last_item = num == len(items) - 1
-                    prepend_symbols = ' ' * 3 if is_last_item else ' ┃ '
-                    field_value = self.get_additional_data_text(eval(field_value), prepend_symbols)
-                text += f'\n{prepend}{box_symbol}{field_name}: {field_value}'
-            else:
-                text += f'\n{prepend}{box_symbol} {item}'
-
-        return text
 
     def update(self, result, is_similar=False):
         """Notify Update.
@@ -211,7 +196,7 @@ class QueryNotifyPrint(QueryNotify):
         if not self.result.ids_data:
             ids_data_text = ""
         else:
-            ids_data_text = self.get_additional_data_text(self.result.ids_data.items(), ' ')
+            ids_data_text = get_dict_ascii_tree(self.result.ids_data.items(), ' ')
 
         def make_colored_terminal_notify(status, text, status_color, text_color, appendix):
             text = [
diff --git a/maigret/resources/data.json b/maigret/resources/data.json
index c9ade31..a197370 100644
--- a/maigret/resources/data.json
+++ b/maigret/resources/data.json
@@ -22761,8 +22761,11 @@
         },
         "codeforces.com": {
             "tags": [
-                "in"
+                "coding"
             ],
+            "errors": {
+                "The page is temporarily blocked by administrator.": "IP ban"
+            },
             "engine": "engineRedirect",
             "alexaRank": 8156,
             "url": "http://codeforces.com/profile/{username}",
diff --git a/maigret/utils.py b/maigret/utils.py
index ce11b59..86da659 100644
--- a/maigret/utils.py
+++ b/maigret/utils.py
@@ -55,3 +55,24 @@ class URLMatcher:
         regexp_str = self._HTTP_URL_RE_STR.replace('(.+)', url_regexp)
 
         return re.compile(regexp_str)
+
+
+def get_dict_ascii_tree(items, prepend='', new_line=True):
+    text = ''
+    for num, item in enumerate(items):
+        box_symbol = '┣╸' if num != len(items) - 1 else '┗╸'
+
+        if type(item) == tuple:
+            field_name, field_value = item
+            if field_value.startswith('[\''):
+                is_last_item = num == len(items) - 1
+                prepend_symbols = ' ' * 3 if is_last_item else ' ┃ '
+                field_value = print_ascii_tree(eval(field_value), prepend_symbols)
+            text += f'\n{prepend}{box_symbol}{field_name}: {field_value}'
+        else:
+            text += f'\n{prepend}{box_symbol} {item}'
+
+    if not new_line:
+        text = text[1:]
+
+    return text
diff --git a/requirements.txt b/requirements.txt
index 98bf068..b6cd8c0 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -28,7 +28,7 @@ reportlab==3.5.59
 requests>=2.24.0
 requests-futures==1.0.0
 six==1.15.0
-socid-extractor>=0.0.13
+socid-extractor>=0.0.15
 soupsieve==2.1
 stem==1.8.0
 torrequest==0.1.0
diff --git a/tests/test_utils.py b/tests/test_utils.py
index fee4cb3..98f63f8 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -2,7 +2,7 @@
 import itertools
 import re
 
-from maigret.utils import CaseConverter, is_country_tag, enrich_link_str, URLMatcher
+from maigret.utils import CaseConverter, is_country_tag, enrich_link_str, URLMatcher, get_dict_ascii_tree
 
 
 def test_case_convert_camel_to_snake():
@@ -72,3 +72,22 @@ def test_url_make_profile_url_regexp():
     for url_parts in itertools.product(*parts):
         url = ''.join(url_parts)
         assert URLMatcher.make_profile_url_regexp(url).pattern == r'^https?://(www.)?flickr\.com/photos/(.+?)$'
+
+
+def test_get_dict_ascii_tree():
+    data = {'uid': 'dXJpOm5vZGU6VXNlcjoyNjQwMzQxNQ==', 'legacy_id': '26403415', 'username': 'alexaimephotographycars', 'name': 'Alex Aimé', 'created_at': '2018-05-04T10:17:01.000+0000', 'image': 'https://drscdn.500px.org/user_avatar/26403415/q%3D85_w%3D300_h%3D300/v2?webp=true&v=2&sig=0235678a4f7b65e007e864033ebfaf5ef6d87fad34f80a8639d985320c20fe3b', 'image_bg': 'https://drscdn.500px.org/user_cover/26403415/q%3D65_m%3D2048/v2?webp=true&v=1&sig=bea411fb158391a4fdad498874ff17088f91257e59dfb376ff67e3a44c3a4201', 'website': 'www.instagram.com/street.reality.photography/', 'facebook_link': ' www.instagram.com/street.reality.photography/', 'instagram_username': 'Street.Reality.Photography', 'twitter_username': 'Alexaimephotogr'}
+
+    ascii_tree = get_dict_ascii_tree(data.items())
+
+    assert ascii_tree == """
+┣╸uid: dXJpOm5vZGU6VXNlcjoyNjQwMzQxNQ==
+┣╸legacy_id: 26403415
+┣╸username: alexaimephotographycars
+┣╸name: Alex Aimé
+┣╸created_at: 2018-05-04T10:17:01.000+0000
+┣╸image: https://drscdn.500px.org/user_avatar/26403415/q%3D85_w%3D300_h%3D300/v2?webp=true&v=2&sig=0235678a4f7b65e007e864033ebfaf5ef6d87fad34f80a8639d985320c20fe3b
+┣╸image_bg: https://drscdn.500px.org/user_cover/26403415/q%3D65_m%3D2048/v2?webp=true&v=1&sig=bea411fb158391a4fdad498874ff17088f91257e59dfb376ff67e3a44c3a4201
+┣╸website: www.instagram.com/street.reality.photography/
+┣╸facebook_link:  www.instagram.com/street.reality.photography/
+┣╸instagram_username: Street.Reality.Photography
+┗╸twitter_username: Alexaimephotogr"""
\ No newline at end of file