mirror of
https://github.com/soxoj/maigret.git
synced 2026-05-06 22:19:01 +00:00
127 lines
3.6 KiB
Python
127 lines
3.6 KiB
Python
# coding: utf8
|
|
import ast
|
|
import difflib
|
|
import re
|
|
import random
|
|
import string
|
|
from typing import Any
|
|
|
|
|
|
DEFAULT_USER_AGENTS = [
|
|
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.114 Safari/537.36",
|
|
]
|
|
|
|
|
|
class CaseConverter:
|
|
@staticmethod
|
|
def camel_to_snake(camelcased_string: str) -> str:
|
|
return re.sub(r"(?<!^)(?=[A-Z])", "_", camelcased_string).lower()
|
|
|
|
@staticmethod
|
|
def snake_to_camel(snakecased_string: str) -> str:
|
|
formatted = "".join(word.title() for word in snakecased_string.split("_"))
|
|
result = formatted[0].lower() + formatted[1:]
|
|
return result
|
|
|
|
@staticmethod
|
|
def snake_to_title(snakecased_string: str) -> str:
|
|
words = snakecased_string.split("_")
|
|
words[0] = words[0].title()
|
|
return " ".join(words)
|
|
|
|
|
|
def is_country_tag(tag: str) -> bool:
|
|
"""detect if tag represent a country"""
|
|
return bool(re.match("^([a-zA-Z]){2}$", tag)) or tag == "global"
|
|
|
|
|
|
def enrich_link_str(link: str) -> str:
|
|
link = link.strip()
|
|
if link.startswith("www.") or (link.startswith("http") and "//" in link):
|
|
return f'<a class="auto-link" href="{link}">{link}</a>'
|
|
return link
|
|
|
|
|
|
class URLMatcher:
|
|
_HTTP_URL_RE_STR = "^https?://(www.|m.)?(.+)$"
|
|
HTTP_URL_RE = re.compile(_HTTP_URL_RE_STR)
|
|
UNSAFE_SYMBOLS = ".?"
|
|
|
|
@classmethod
|
|
def extract_main_part(self, url: str) -> str:
|
|
match = self.HTTP_URL_RE.search(url)
|
|
if match and match.group(2):
|
|
return match.group(2).rstrip("/")
|
|
|
|
return ""
|
|
|
|
@classmethod
|
|
def make_profile_url_regexp(self, url: str, username_regexp: str = ""):
|
|
url_main_part = self.extract_main_part(url)
|
|
for c in self.UNSAFE_SYMBOLS:
|
|
url_main_part = url_main_part.replace(c, f"\\{c}")
|
|
prepared_username_regexp = (username_regexp or ".+?").lstrip('^').rstrip('$')
|
|
|
|
url_regexp = url_main_part.replace(
|
|
"{username}", f"({prepared_username_regexp})"
|
|
)
|
|
regexp_str = self._HTTP_URL_RE_STR.replace("(.+)", url_regexp)
|
|
|
|
return re.compile(regexp_str, re.IGNORECASE)
|
|
|
|
|
|
def ascii_data_display(data: str) -> Any:
|
|
return ast.literal_eval(data)
|
|
|
|
|
|
def get_dict_ascii_tree(items, prepend="", new_line=True):
|
|
new_result = b'\xe2\x94\x9c'.decode()
|
|
new_line = b'\xe2\x94\x80'.decode()
|
|
last_result = b'\xe2\x94\x94'.decode()
|
|
skip_result = b'\xe2\x94\x82'.decode()
|
|
|
|
text = ""
|
|
for num, item in enumerate(items):
|
|
box_symbol = (
|
|
new_result + new_line if num != len(items) - 1 else last_result + new_line
|
|
)
|
|
|
|
if type(item) == tuple:
|
|
field_name, field_value = item
|
|
if field_value.startswith("['"):
|
|
is_last_item = num == len(items) - 1
|
|
prepend_symbols = " " * 3 if is_last_item else f" {skip_result} "
|
|
data = ascii_data_display(field_value)
|
|
field_value = get_dict_ascii_tree(data, prepend_symbols)
|
|
text += f"\n{prepend}{box_symbol}{field_name}: {field_value}"
|
|
else:
|
|
text += f"\n{prepend}{box_symbol} {item}"
|
|
|
|
if not new_line:
|
|
text = text[1:]
|
|
|
|
return text
|
|
|
|
|
|
def get_random_user_agent():
|
|
return random.choice(DEFAULT_USER_AGENTS)
|
|
|
|
|
|
def get_match_ratio(base_strs: list):
|
|
def get_match_inner(s: str):
|
|
return round(
|
|
max(
|
|
[
|
|
difflib.SequenceMatcher(a=s.lower(), b=s2.lower()).ratio()
|
|
for s2 in base_strs
|
|
]
|
|
),
|
|
2,
|
|
)
|
|
|
|
return get_match_inner
|
|
|
|
|
|
def generate_random_username():
|
|
return ''.join(random.choices(string.ascii_lowercase, k=10))
|