# coding: utf8 import ast import difflib import re import random import string from typing import Any DEFAULT_USER_AGENTS = [ "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.114 Safari/537.36", ] class CaseConverter: @staticmethod def camel_to_snake(camelcased_string: str) -> str: return re.sub(r"(? str: formatted = "".join(word.title() for word in snakecased_string.split("_")) result = formatted[0].lower() + formatted[1:] return result @staticmethod def snake_to_title(snakecased_string: str) -> str: words = snakecased_string.split("_") words[0] = words[0].title() return " ".join(words) def is_country_tag(tag: str) -> bool: """detect if tag represent a country""" return bool(re.match("^([a-zA-Z]){2}$", tag)) or tag == "global" def enrich_link_str(link: str) -> str: link = link.strip() if link.startswith("www.") or (link.startswith("http") and "//" in link): return f'{link}' return link class URLMatcher: _HTTP_URL_RE_STR = "^https?://(www.|m.)?(.+)$" HTTP_URL_RE = re.compile(_HTTP_URL_RE_STR) UNSAFE_SYMBOLS = ".?" @classmethod def extract_main_part(self, url: str) -> str: match = self.HTTP_URL_RE.search(url) if match and match.group(2): return match.group(2).rstrip("/") return "" @classmethod def make_profile_url_regexp(self, url: str, username_regexp: str = ""): url_main_part = self.extract_main_part(url) for c in self.UNSAFE_SYMBOLS: url_main_part = url_main_part.replace(c, f"\\{c}") prepared_username_regexp = (username_regexp or ".+?").lstrip('^').rstrip('$') url_regexp = url_main_part.replace( "{username}", f"({prepared_username_regexp})" ) regexp_str = self._HTTP_URL_RE_STR.replace("(.+)", url_regexp) return re.compile(regexp_str, re.IGNORECASE) def ascii_data_display(data: str) -> Any: return ast.literal_eval(data) def get_dict_ascii_tree(items, prepend="", new_line=True): new_result = b'\xe2\x94\x9c'.decode() new_line = b'\xe2\x94\x80'.decode() last_result = b'\xe2\x94\x94'.decode() skip_result = b'\xe2\x94\x82'.decode() text = "" for num, item in enumerate(items): box_symbol = ( new_result + new_line if num != len(items) - 1 else last_result + new_line ) if type(item) == tuple: field_name, field_value = item if field_value.startswith("['"): is_last_item = num == len(items) - 1 prepend_symbols = " " * 3 if is_last_item else f" {skip_result} " data = ascii_data_display(field_value) field_value = get_dict_ascii_tree(data, prepend_symbols) text += f"\n{prepend}{box_symbol}{field_name}: {field_value}" else: text += f"\n{prepend}{box_symbol} {item}" if not new_line: text = text[1:] return text def get_random_user_agent(): return random.choice(DEFAULT_USER_AGENTS) def get_match_ratio(base_strs: list): def get_match_inner(s: str): return round( max( [ difflib.SequenceMatcher(a=s.lower(), b=s2.lower()).ratio() for s2 in base_strs ] ), 2, ) return get_match_inner def generate_random_username(): return ''.join(random.choices(string.ascii_lowercase, k=10))