Improved "submit new site" mode, added tests, fixed top-500 sites (#1952)

This commit is contained in:
Soxoj
2024-12-10 18:02:43 +01:00
committed by GitHub
parent 51ab988e36
commit 81a817a39f
12 changed files with 691 additions and 194 deletions
+107 -46
View File
@@ -1970,6 +1970,7 @@
"usernameUnclaimed": "noonewouldeverusethis7"
},
"BeerMoneyForum": {
"disabled": true,
"ignore403": true,
"tags": [
"finance",
@@ -2366,19 +2367,30 @@
"usernameUnclaimed": "noonewouldeverusethis7"
},
"BoardGameGeek": {
"checkType": "message",
"tags": [
"gaming",
"us"
],
"checkType": "message",
"absenceStrs": [
"User does not exist."
"\t\tUser not found",
"messagebox error",
">\t<div class=",
"\t\t\t<title>Profile | BoardGameGeek</title>",
"\t</div></div>"
],
"alexaRank": 4327,
"urlMain": "https://www.boardgamegeek.com",
"url": "https://www.boardgamegeek.com/user/{username}",
"usernameClaimed": "adam",
"usernameUnclaimed": "noonewouldeverusethis7"
"urlMain": "https://boardgamegeek.com",
"url": "https://boardgamegeek.com/user/{username}",
"usernameClaimed": "ZakuBG",
"usernameUnclaimed": "uzytnhstvj",
"presenseStrs": [
"username",
" style=",
"mail",
" \tstyle=",
" data-username="
]
},
"Bobrdobr": {
"tags": [
@@ -3005,7 +3017,8 @@
"alexaRank": 2689,
"urlMain": "https://community.cbr.com",
"usernameClaimed": "red",
"usernameUnclaimed": "noonewouldeverusethis7"
"usernameUnclaimed": "noonewouldeverusethis7",
"disabled": true
},
"Ccdi": {
"tags": [
@@ -4645,21 +4658,6 @@
"usernameUnclaimed": "noonewouldeverusethis7",
"alexaRank": 301125
},
"Eksisozluk": {
"tags": [
"tr"
],
"checkType": "message",
"absenceStrs": [
"isimli bir yazar kayd\u0131 mevcut de\u011fil",
"olmaz \u00f6yle \u015fey"
],
"alexaRank": 977,
"urlMain": "https://eksisozluk.com/biri/",
"url": "https://eksisozluk.com/biri/{username}",
"usernameClaimed": "adam",
"usernameUnclaimed": "noonewouldeverusethis7"
},
"Elakiri": {
"tags": [
"lk"
@@ -5747,6 +5745,7 @@
"usernameUnclaimed": "noonewouldeverusethis7"
},
"Folkd": {
"disabled": true,
"tags": [
"eu",
"in"
@@ -7678,17 +7677,28 @@
},
"Hotcopper": {
"tags": [
"au"
"finance"
],
"checkType": "message",
"absenceStrs": [
"The following error occurred"
"error-page",
"error-page home container",
"card-footer-item",
"><main id=",
"card-content"
],
"alexaRank": 7767,
"urlMain": "https://hotcopper.com.au",
"url": "https://hotcopper.com.au/search/search?type=post&users={username}",
"usernameClaimed": "red",
"usernameUnclaimed": "noonewouldeverusethis7"
"usernameUnclaimed": "ggyeplcpod",
"presenseStrs": [
"title-td",
"title is-1",
"pagination ",
"toggle",
"active "
]
},
"House-Mixes.com": {
"tags": [
@@ -9056,16 +9066,27 @@
"Lichess": {
"checkType": "message",
"absenceStrs": [
"Page not found!"
"page-small box box-pad page",
"><h1 class=",
">No such player</h1><div><p>This username doesn",
"})()</script></body></html>",
"IR0Cf7qpkpcOhvI9r03a0QbI"
],
"alexaRank": 2374,
"urlMain": "https://lichess.org",
"url": "https://lichess.org/@/{username}",
"usernameClaimed": "blue",
"usernameUnclaimed": "noonewouldeverusethis7",
"usernameClaimed": "adam",
"usernameUnclaimed": "efxvyhnwrh",
"tags": [
"gaming",
"hobby"
],
"presenseStrs": [
"us_profile",
"og:title",
"profile-side",
" data-username=",
"og:site_name"
]
},
"Liebe69": {
@@ -14739,16 +14760,25 @@
"usernameUnclaimed": "noonewouldeverusethis7"
},
"SlideShare": {
"tags": [
"documents",
"sharing"
],
"checkType": "status_code",
"checkType": "message",
"alexaRank": 158,
"urlMain": "https://slideshare.net/",
"url": "https://slideshare.net/{username}",
"usernameClaimed": "blue",
"usernameUnclaimed": "noonewouldeverusethis7"
"urlMain": "https://www.slideshare.net",
"url": "https://www.slideshare.net/{username}",
"usernameClaimed": "KumarSurya7",
"usernameUnclaimed": "kwbmsonxvp",
"presenseStrs": [
"user-name",
"pageInfo",
"listitem",
"polite",
"strippedTitle"
],
"absenceStrs": [
"blankProfile",
"username-available",
"robots",
"noindex,nofollow"
]
},
"Slides": {
"tags": [
@@ -15447,7 +15477,8 @@
"urlMain": "https://www.strava.com/",
"url": "https://www.strava.com/athletes/{username}",
"usernameClaimed": "adam",
"usernameUnclaimed": "noonewouldeverusethis7"
"usernameUnclaimed": "noonewouldeverusethis7",
"disabled": true
},
"Studfile": {
"tags": [
@@ -16771,13 +16802,20 @@
"regexCheck": "^[^\\.]+$",
"checkType": "message",
"absenceStrs": [
"There's nothing here."
"Not found.",
":404,",
"userAgent",
",displayStatus:"
],
"alexaRank": 112,
"urlMain": "https://tumblr.com/",
"url": "https://{username}.tumblr.com/",
"usernameClaimed": "red",
"usernameUnclaimed": "noonewouldeverusethis7"
"urlMain": "https://www.tumblr.com",
"url": "https://www.tumblr.com/{username}",
"usernameClaimed": "soxoj",
"usernameUnclaimed": "zdbimdoqyt",
"presenseStrs": [
"profile",
" title="
]
},
"Tunefind": {
"checkType": "message",
@@ -17114,7 +17152,8 @@
"urlMain": "https://vc.ru",
"url": "https://vc.ru/search/v2/subsite/relevant?query={username}",
"usernameClaimed": "adam",
"usernameUnclaimed": "noonewouldeverusethis7"
"usernameUnclaimed": "noonewouldeverusethis7",
"disabled": true
},
"Viddler": {
"checkType": "message",
@@ -17377,7 +17416,7 @@
"video"
],
"headers": {
"Authorization": "jwt eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJleHAiOjE3MzM2MTc5MjAsInVzZXJfaWQiOm51bGwsImFwcF9pZCI6NTg0NzksInNjb3BlcyI6InB1YmxpYyIsInRlYW1fdXNlcl9pZCI6bnVsbCwianRpIjoiNGYxM2M4N2ItYWMwMy00Y2JhLWExMDctNmNiODhmM2U3NjZjIn0.Y7CWEWckdSMsmJ8ROPmhHR6el2QCYJRDl0RLPpdJOKc"
"Authorization": "jwt eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJleHAiOjE3MzM4MzkwODAsInVzZXJfaWQiOm51bGwsImFwcF9pZCI6NTg0NzksInNjb3BlcyI6InB1YmxpYyIsInRlYW1fdXNlcl9pZCI6bnVsbCwianRpIjoiOWNjMjk0ZjktZGZhOS00NDI0LWE0OGEtN2JjYzkwYjM2NTMyIn0.wG0kC7fWtrdKI9ccS-LE81lVgQRfYobrqCAPWxr1wzc"
},
"activation": {
"url": "https://vimeo.com/_rv/viewer",
@@ -18971,7 +19010,8 @@
"urlMain": "https://aminoapps.com/",
"url": "https://aminoapps.com/u/{username}",
"usernameClaimed": "blue",
"usernameUnclaimed": "noonewouldeverusethis77777"
"usernameUnclaimed": "noonewouldeverusethis77777",
"disabled": true
},
"analitika-forex.ru": {
"engine": "uCoz",
@@ -35419,6 +35459,27 @@
"Cache-Control": "no-cache",
"TE": "trailers"
}
},
"Eksisozluk": {
"absenceStrs": [
" <h1>b\u00f6yle bir yazar yok</h1>\r"
],
"presenseStrs": [
"profile-dots",
"profile-logo",
"profile-cards",
"profile-biography",
" data-title="
],
"alexaRank": 977,
"url": "https://eksisozluk.com/biri/{username}",
"urlMain": "https://eksisozluk.com",
"usernameClaimed": "kartalbafilerrr",
"usernameUnclaimed": "rlcvuwlxqh",
"checkType": "message",
"tags": [
"tr"
]
}
},
"engines": {
+1 -1
View File
@@ -96,7 +96,7 @@ class MaigretCheckResult:
return self.status == MaigretCheckStatus.CLAIMED
def __repr__(self):
return f"'{self.__str__()}'"
return f"<{self.__str__()}>"
def __str__(self):
"""Convert Object To String.
+262 -121
View File
@@ -2,7 +2,8 @@ import asyncio
import json
import re
import os
from typing import Any, Dict, List, Optional
import logging
from typing import Any, Dict, List, Optional, Tuple
from aiohttp import ClientSession, TCPConnector
from aiohttp_socks import ProxyConnector
@@ -15,7 +16,7 @@ from .settings import Settings
from .sites import MaigretDatabase, MaigretEngine, MaigretSite
from .utils import get_random_user_agent
from .checking import site_self_check
from .utils import get_match_ratio
from .utils import get_match_ratio, generate_random_username
class CloudflareSession:
@@ -125,21 +126,13 @@ class Submitter:
return fields
async def detect_known_engine(
self, url_exists, url_mainpage
self, url_exists, url_mainpage, session, follow_redirects, headers
) -> [List[MaigretSite], str]:
resp_text = ''
try:
r = await self.session.get(url_mainpage)
content = await r.content.read()
charset = r.charset or "utf-8"
resp_text = content.decode(charset, "ignore")
self.logger.debug(resp_text)
except Exception as e:
self.logger.warning(e, exc_info=True)
print(f"Some error while checking main page: {e}")
return [], resp_text
session = session or self.session
resp_text, _ = await self.get_html_response_to_compare(
url_exists, session, follow_redirects, headers
)
for engine in self.db.engines:
strs_to_check = engine.__dict__.get("presenseStrs")
@@ -195,113 +188,134 @@ class Submitter:
)
return entered_username if entered_username else supposed_username
async def check_features_manually(
self, url_exists, url_mainpage, cookie_file, redirects=False
@staticmethod
async def get_html_response_to_compare(
url: str, session: ClientSession = None, redirects=False, headers: Dict = None
):
custom_headers = {}
while self.args.verbose:
header_key = input(
'Specify custom header if you need or just press Enter to skip. Header name: '
async with session.get(
url, allow_redirects=redirects, headers=headers
) as response:
# Try different encodings or fallback to 'ignore' errors
try:
html_response = await response.text(encoding='utf-8')
except UnicodeDecodeError:
try:
html_response = await response.text(encoding='latin1')
except UnicodeDecodeError:
html_response = await response.text(errors='ignore')
return html_response, response.status
async def check_features_manually(
self,
username: str,
url_exists: str,
cookie_filename="", # TODO: use cookies
session: ClientSession = None,
follow_redirects=False,
headers: dict = None,
) -> Tuple[List[str], List[str], str, str]:
random_username = generate_random_username()
url_of_non_existing_account = url_exists.lower().replace(
username.lower(), random_username
)
if not header_key:
break
header_value = input('Header value: ')
custom_headers[header_key.strip()] = header_value.strip()
supposed_username = self.extract_username_dialog(url_exists)
non_exist_username = "noonewouldeverusethis7"
url_user = url_exists.replace(supposed_username, "{username}")
url_not_exists = url_exists.replace(supposed_username, non_exist_username)
headers = dict(self.HEADERS)
headers.update(custom_headers)
exists_resp = await self.session.get(
url_exists,
headers=headers,
allow_redirects=redirects,
try:
session = session or self.session
first_html_response, first_status = await self.get_html_response_to_compare(
url_exists, session, follow_redirects, headers
)
exists_resp_text = await exists_resp.text()
self.logger.debug(url_exists)
self.logger.debug(exists_resp.status)
self.logger.debug(exists_resp_text)
non_exists_resp = await self.session.get(
url_not_exists,
headers=headers,
allow_redirects=redirects,
second_html_response, second_status = (
await self.get_html_response_to_compare(
url_of_non_existing_account, session, follow_redirects, headers
)
non_exists_resp_text = await non_exists_resp.text()
self.logger.debug(url_not_exists)
self.logger.debug(non_exists_resp.status)
self.logger.debug(non_exists_resp_text)
)
await session.close()
except Exception as e:
self.logger.error(
f"Error while getting HTTP response for username {username}: {e}",
exc_info=True,
)
return None, None, str(e), random_username
a = exists_resp_text
b = non_exists_resp_text
self.logger.info(f"URL with existing account: {url_exists}")
self.logger.info(
f"HTTP response status for URL with existing account: {first_status}"
)
self.logger.info(
f"HTTP response length URL with existing account: {len(first_html_response)}"
)
self.logger.debug(first_html_response)
tokens_a = set(re.split(f'[{self.SEPARATORS}]', a))
tokens_b = set(re.split(f'[{self.SEPARATORS}]', b))
self.logger.info(f"URL with existing account: {url_of_non_existing_account}")
self.logger.info(
f"HTTP response status for URL with non-existing account: {second_status}"
)
self.logger.info(
f"HTTP response length URL with non-existing account: {len(second_html_response)}"
)
self.logger.debug(second_html_response)
# TODO: filter by errors, move to dialog function
if (
"/cdn-cgi/challenge-platform" in first_html_response
or "\t\t\t\tnow: " in first_html_response
or "Sorry, you have been blocked" in first_html_response
):
self.logger.info("Cloudflare detected, skipping")
return None, None, "Cloudflare detected, skipping", random_username
tokens_a = set(re.split(f'[{self.SEPARATORS}]', first_html_response))
tokens_b = set(re.split(f'[{self.SEPARATORS}]', second_html_response))
a_minus_b = tokens_a.difference(tokens_b)
b_minus_a = tokens_b.difference(tokens_a)
# additional filtering by html response
a_minus_b = [t for t in a_minus_b if t not in non_exists_resp_text]
b_minus_a = [t for t in b_minus_a if t not in exists_resp_text]
a_minus_b = list(map(lambda x: x.strip('\\'), a_minus_b))
b_minus_a = list(map(lambda x: x.strip('\\'), b_minus_a))
# Filter out strings containing usernames
a_minus_b = [s for s in a_minus_b if username.lower() not in s.lower()]
b_minus_a = [s for s in b_minus_a if random_username.lower() not in s.lower()]
def filter_tokens(token: str, html_response: str) -> bool:
is_in_html = token in html_response
is_long_str = len(token) >= 50
is_number = re.match(r'^\d\.?\d+$', token) or re.match(r':^\d+$', token)
is_whitelisted_number = token in ['200', '404', '403']
return not (
is_in_html or is_long_str or (is_number and not is_whitelisted_number)
)
a_minus_b = list(
filter(lambda t: filter_tokens(t, second_html_response), a_minus_b)
)
b_minus_a = list(
filter(lambda t: filter_tokens(t, first_html_response), b_minus_a)
)
if len(a_minus_b) == len(b_minus_a) == 0:
print("The pages for existing and non-existing account are the same!")
top_features_count = int(
input(
f"Specify count of features to extract [default {self.TOP_FEATURES}]: "
)
or self.TOP_FEATURES
return (
None,
None,
"HTTP responses for pages with existing and non-existing accounts are the same",
random_username,
)
match_fun = get_match_ratio(self.settings.presence_strings)
presence_list = sorted(a_minus_b, key=match_fun, reverse=True)[
:top_features_count
: self.TOP_FEATURES
]
self.logger.debug([(keyword, match_fun(keyword)) for keyword in presence_list])
print("Detected text features of existing account: " + ", ".join(presence_list))
features = input("If features was not detected correctly, write it manually: ")
if features:
presence_list = list(map(str.strip, features.split(",")))
absence_list = sorted(b_minus_a, key=match_fun, reverse=True)[
:top_features_count
: self.TOP_FEATURES
]
self.logger.debug([(keyword, match_fun(keyword)) for keyword in absence_list])
print(
"Detected text features of non-existing account: " + ", ".join(absence_list)
)
features = input("If features was not detected correctly, write it manually: ")
self.logger.info(f"Detected presence features: {presence_list}")
self.logger.info(f"Detected absence features: {absence_list}")
if features:
absence_list = list(map(str.strip, features.split(",")))
site_data = {
"absenceStrs": absence_list,
"presenseStrs": presence_list,
"url": url_user,
"urlMain": url_mainpage,
"usernameClaimed": supposed_username,
"usernameUnclaimed": non_exist_username,
"checkType": "message",
}
if headers != self.HEADERS:
site_data['headers'] = headers
site = MaigretSite(url_mainpage.split("/")[-1], site_data)
return site
return presence_list, absence_list, "Found", random_username
async def add_site(self, site):
sem = asyncio.Semaphore(1)
@@ -376,6 +390,12 @@ class Submitter:
}
async def dialog(self, url_exists, cookie_file):
old_site = None
additional_options_enabled = self.logger.level in (
logging.DEBUG,
logging.WARNING,
)
domain_raw = self.URL_RE.sub("", url_exists).strip().strip("/")
domain_raw = domain_raw.split("/")[0]
self.logger.info('Domain is %s', domain_raw)
@@ -386,9 +406,11 @@ class Submitter:
)
if matched_sites:
# TODO: update the existing site
print(
f'Sites with domain "{domain_raw}" already exists in the Maigret database!'
f"{Fore.YELLOW}[!] Sites with domain \"{domain_raw}\" already exists in the Maigret database!{Style.RESET_ALL}"
)
status = lambda s: "(disabled)" if s.disabled else ""
url_block = lambda s: f"\n\t{s.url_main}\n\t{s.url}"
print(
@@ -400,16 +422,62 @@ class Submitter:
)
)
if input("Do you want to continue? [yN] ").lower() in "n":
if (
input(
f"{Fore.GREEN}[?] Do you want to continue? [yN] {Style.RESET_ALL}"
).lower()
in "n"
):
return False
site_names = [site.name for site in matched_sites]
site_name = (
input(
f"{Fore.GREEN}[?] Which site do you want to update in case of success? 1st by default. [{', '.join(site_names)}] {Style.RESET_ALL}"
)
or matched_sites[0].name
)
old_site = next(
(site for site in matched_sites if site.name == site_name), None
)
print(
f'{Fore.GREEN}[+] We will update site "{old_site.name}" in case of success.{Style.RESET_ALL}'
)
url_mainpage = self.extract_mainpage_url(url_exists)
# headers update
custom_headers = dict(self.HEADERS)
while additional_options_enabled:
header_key = input(
f'{Fore.GREEN}[?] Specify custom header if you need or just press Enter to skip. Header name: {Style.RESET_ALL}'
)
if not header_key:
break
header_value = input(f'{Fore.GREEN}[?] Header value: {Style.RESET_ALL}')
custom_headers[header_key.strip()] = header_value.strip()
# redirects settings update
redirects = False
if additional_options_enabled:
redirects = (
'y'
in input(
f'{Fore.GREEN}[?] Should we do redirects automatically? [yN] {Style.RESET_ALL}'
).lower()
)
print('Detecting site engine, please wait...')
sites = []
text = None
try:
sites, text = await self.detect_known_engine(url_exists, url_exists)
sites, text = await self.detect_known_engine(
url_exists,
url_exists,
session=None,
follow_redirects=redirects,
headers=custom_headers,
)
except KeyboardInterrupt:
print('Engine detect process is interrupted.')
@@ -422,26 +490,48 @@ class Submitter:
if not sites:
print("Unable to detect site engine, lets generate checking features")
redirects = False
if self.args.verbose:
redirects = (
'y' in input('Should we do redirects automatically? [yN] ').lower()
supposed_username = self.extract_username_dialog(url_exists)
self.logger.info(f"Supposed username: {supposed_username}")
presence_list, absence_list, status, non_exist_username = (
await self.check_features_manually(
username=supposed_username,
url_exists=url_exists,
cookie_filename=cookie_file,
follow_redirects=redirects,
headers=custom_headers,
)
)
sites = [
await self.check_features_manually(
url_exists,
url_mainpage,
cookie_file,
redirects,
if status == "Found":
site_data = {
"absenceStrs": absence_list,
"presenseStrs": presence_list,
"url": url_exists.replace(supposed_username, '{username}'),
"urlMain": url_mainpage,
"usernameClaimed": supposed_username,
"usernameUnclaimed": non_exist_username,
"checkType": "message",
}
self.logger.info(json.dumps(site_data, indent=4))
if custom_headers != self.HEADERS:
site_data['headers'] = custom_headers
site = MaigretSite(url_mainpage.split("/")[-1], site_data)
sites.append(site)
else:
print(
f"{Fore.RED}[!] The check for site failed! Reason: {status}{Style.RESET_ALL}"
)
]
return False
self.logger.debug(sites[0].__dict__)
sem = asyncio.Semaphore(1)
print("Checking, please wait...")
print(f"{Fore.GREEN}[*] Checking, please wait...{Style.RESET_ALL}")
found = False
chosen_site = None
for s in sites:
@@ -463,7 +553,7 @@ class Submitter:
else:
if (
input(
f"Site {chosen_site.name} successfully checked. Do you want to save it in the Maigret DB? [Yn] "
f"{Fore.GREEN}[?] Site {chosen_site.name} successfully checked. Do you want to save it in the Maigret DB? [Yn] {Style.RESET_ALL}"
)
.lower()
.strip("y")
@@ -471,22 +561,73 @@ class Submitter:
return False
if self.args.verbose:
source = input("Name the source site if it is mirror: ")
self.logger.info(
"Verbose mode is enabled, additional settings are available"
)
source = input(
f"{Fore.GREEN}[?] Name the source site if it is mirror: {Style.RESET_ALL}"
)
if source:
chosen_site.source = source
chosen_site.name = input("Change site name if you want: ") or chosen_site.name
chosen_site.tags = list(map(str.strip, input("Site tags: ").split(',')))
default_site_name = old_site.name if old_site else chosen_site.name
new_name = (
input(
f"{Fore.GREEN}[?] Change site name if you want [{default_site_name}]: {Style.RESET_ALL}"
)
or default_site_name
)
if new_name != default_site_name:
self.logger.info(f"New site name is {new_name}")
chosen_site.name = new_name
# TODO: remove empty tags
new_tags = input(f"{Fore.GREEN}[?] Site tags: {Style.RESET_ALL}")
if new_tags:
chosen_site.tags = list(map(str.strip, new_tags.split(',')))
else:
chosen_site.tags = []
self.logger.info(f"Site tags are: {', '.join(chosen_site.tags)}")
# rank = Submitter.get_alexa_rank(chosen_site.url_main)
# if rank:
# print(f'New alexa rank: {rank}')
# chosen_site.alexa_rank = rank
self.logger.debug(chosen_site.json)
self.logger.info(chosen_site.json)
site_data = chosen_site.strip_engine_data()
self.logger.debug(site_data.json)
self.db.update_site(site_data)
self.logger.info(site_data.json)
if old_site:
# Update old site with new values and log changes
fields_to_check = {
'url': 'URL',
'url_main': 'Main URL',
'username_claimed': 'Username claimed',
'username_unclaimed': 'Username unclaimed',
'check_type': 'Check type',
'presense_strs': 'Presence strings',
'absence_strs': 'Absence strings',
'tags': 'Tags',
'source': 'Source',
'headers': 'Headers',
}
for field, display_name in fields_to_check.items():
old_value = getattr(old_site, field)
new_value = getattr(site_data, field)
if field == 'tags' and not new_tags:
continue
if str(old_value) != str(new_value):
print(
f"{Fore.YELLOW}[*] '{display_name}' updated: {Fore.RED}{old_value} {Fore.YELLOW}to {Fore.GREEN}{new_value}{Style.RESET_ALL}"
)
old_site.__dict__[field] = new_value
# update the site
final_site = old_site if old_site else site_data
self.db.update_site(final_site)
# save the db in file
if self.args.db_file != self.settings.sites_db_path:
print(
f"{Fore.GREEN}[+] Maigret DB is saved to {self.args.db}.{Style.RESET_ALL}"
+5
View File
@@ -3,6 +3,7 @@ import ast
import difflib
import re
import random
import string
from typing import Any
@@ -119,3 +120,7 @@ def get_match_ratio(base_strs: list):
)
return get_match_inner
def generate_random_username():
return ''.join(random.choices(string.ascii_lowercase, k=10))
+22 -22
View File
@@ -77,7 +77,7 @@ Rank data fetched from Alexa by domains.
1. ![](https://www.google.com/s2/favicons?domain=https://open.spotify.com/) [Spotify (https://open.spotify.com/)](https://open.spotify.com/)*: top 100, music, us*, search is disabled
1. ![](https://www.google.com/s2/favicons?domain=https://www.tiktok.com/) [TikTok (https://www.tiktok.com/)](https://www.tiktok.com/)*: top 100, video*
1. ![](https://www.google.com/s2/favicons?domain=https://xvideos.com/) [Xvideos (https://xvideos.com/)](https://xvideos.com/)*: top 500, porn, us*
1. ![](https://www.google.com/s2/favicons?domain=https://tumblr.com/) [Tumblr (https://tumblr.com/)](https://tumblr.com/)*: top 500, blog*
1. ![](https://www.google.com/s2/favicons?domain=https://www.tumblr.com) [Tumblr (https://www.tumblr.com)](https://www.tumblr.com)*: top 500, blog*
1. ![](https://www.google.com/s2/favicons?domain=https://www.roblox.com/) [Roblox (https://www.roblox.com/)](https://www.roblox.com/)*: top 500, gaming, us*
1. ![](https://www.google.com/s2/favicons?domain=https://soundcloud.com/) [SoundCloud (https://soundcloud.com/)](https://soundcloud.com/)*: top 500, music*
1. ![](https://www.google.com/s2/favicons?domain=https://www.udemy.com) [Udemy (https://www.udemy.com)](https://www.udemy.com)*: top 500, in*
@@ -92,7 +92,7 @@ Rank data fetched from Alexa by domains.
1. ![](https://www.google.com/s2/favicons?domain=https://www.pinterest.com/) [Pinterest (https://www.pinterest.com/)](https://www.pinterest.com/)*: top 500, art, photo, sharing*
1. ![](https://www.google.com/s2/favicons?domain=https://www.fiverr.com/) [Fiverr (https://www.fiverr.com/)](https://www.fiverr.com/)*: top 500, shopping, us*
1. ![](https://www.google.com/s2/favicons?domain=https://t.me/) [Telegram (https://t.me/)](https://t.me/)*: top 500, messaging*
1. ![](https://www.google.com/s2/favicons?domain=https://slideshare.net/) [SlideShare (https://slideshare.net/)](https://slideshare.net/)*: top 500, documents, sharing*
1. ![](https://www.google.com/s2/favicons?domain=https://www.slideshare.net) [SlideShare (https://www.slideshare.net)](https://www.slideshare.net)*: top 500*
1. ![](https://www.google.com/s2/favicons?domain=https://theguardian.com) [TheGuardian (https://theguardian.com)](https://theguardian.com)*: top 500, news, us*, search is disabled
1. ![](https://www.google.com/s2/favicons?domain=https://trello.com/) [Trello (https://trello.com/)](https://trello.com/)*: top 500, tasks*
1. ![](https://www.google.com/s2/favicons?domain=https://support.mozilla.org) [Mozilla Support (https://support.mozilla.org)](https://support.mozilla.org)*: top 500, us*
@@ -187,7 +187,7 @@ Rank data fetched from Alexa by domains.
1. ![](https://www.google.com/s2/favicons?domain=https://community.brave.com) [community.brave.com (https://community.brave.com)](https://community.brave.com)*: top 1K, forum, us*
1. ![](https://www.google.com/s2/favicons?domain=https://tinder.com/) [Tinder (https://tinder.com/)](https://tinder.com/)*: top 1K, dating, us*
1. ![](https://www.google.com/s2/favicons?domain=https://community.cloudflare.com/) [CloudflareCommunity (https://community.cloudflare.com/)](https://community.cloudflare.com/)*: top 1K, forum, tech*
1. ![](https://www.google.com/s2/favicons?domain=https://eksisozluk.com/biri/) [Eksisozluk (https://eksisozluk.com/biri/)](https://eksisozluk.com/biri/)*: top 1K, tr*
1. ![](https://www.google.com/s2/favicons?domain=https://eksisozluk.com) [Eksisozluk (https://eksisozluk.com)](https://eksisozluk.com)*: top 1K, tr*
1. ![](https://www.google.com/s2/favicons?domain=https://www.allrecipes.com/) [AllRecipes (https://www.allrecipes.com/)](https://www.allrecipes.com/)*: top 1K, us*
1. ![](https://www.google.com/s2/favicons?domain=https://support.t-mobile.com) [T-MobileSupport (https://support.t-mobile.com)](https://support.t-mobile.com)*: top 1K, us*, search is disabled
1. ![](https://www.google.com/s2/favicons?domain=https://www.tinkoff.ru/invest/) [Tinkoff Invest (https://www.tinkoff.ru/invest/)](https://www.tinkoff.ru/invest/)*: top 5K, ru*
@@ -195,7 +195,7 @@ Rank data fetched from Alexa by domains.
1. ![](https://www.google.com/s2/favicons?domain=https://discuss.python.org/) [DiscussPython (https://discuss.python.org/)](https://discuss.python.org/)*: top 5K, coding, forum, us*
1. ![](https://www.google.com/s2/favicons?domain=https://www.nairaland.com/) [Nairaland Forum (https://www.nairaland.com/)](https://www.nairaland.com/)*: top 5K, ng*
1. ![](https://www.google.com/s2/favicons?domain=https://ru.redtube.com/) [Redtube (https://ru.redtube.com/)](https://ru.redtube.com/)*: top 5K, porn, us*
1. ![](https://www.google.com/s2/favicons?domain=https://www.strava.com/) [Strava (https://www.strava.com/)](https://www.strava.com/)*: top 5K, us*
1. ![](https://www.google.com/s2/favicons?domain=https://www.strava.com/) [Strava (https://www.strava.com/)](https://www.strava.com/)*: top 5K, us*, search is disabled
1. ![](https://www.google.com/s2/favicons?domain=https://profile.ameba.jp) [Ameba (https://profile.ameba.jp)](https://profile.ameba.jp)*: top 5K, jp*
1. ![](https://www.google.com/s2/favicons?domain=https://adblockplus.org) [adblockplus.org (https://adblockplus.org)](https://adblockplus.org)*: top 5K, us*
1. ![](https://www.google.com/s2/favicons?domain=https://houzz.com/) [Houzz (https://houzz.com/)](https://houzz.com/)*: top 5K, us*, search is disabled
@@ -265,7 +265,7 @@ Rank data fetched from Alexa by domains.
1. ![](https://www.google.com/s2/favicons?domain=https://lichess.org) [Lichess (https://lichess.org)](https://lichess.org)*: top 5K, gaming, hobby*
1. ![](https://www.google.com/s2/favicons?domain=https://jsfiddle.net) [jsfiddle.net (https://jsfiddle.net)](https://jsfiddle.net)*: top 5K, coding, sharing*
1. ![](https://www.google.com/s2/favicons?domain=https://ru.pathofexile.com) [Pathofexile (https://ru.pathofexile.com)](https://ru.pathofexile.com)*: top 5K, ru, us*
1. ![](https://www.google.com/s2/favicons?domain=https://vc.ru) [VC.ru (https://vc.ru)](https://vc.ru)*: top 5K, ru*
1. ![](https://www.google.com/s2/favicons?domain=https://vc.ru) [VC.ru (https://vc.ru)](https://vc.ru)*: top 5K, ru*, search is disabled
1. ![](https://www.google.com/s2/favicons?domain=https://www.metacritic.com/) [metacritic (https://www.metacritic.com/)](https://www.metacritic.com/)*: top 5K, us*, search is disabled
1. ![](https://www.google.com/s2/favicons?domain=https://www.digitalocean.com/) [DigitalOcean (https://www.digitalocean.com/)](https://www.digitalocean.com/)*: top 5K, forum, in, tech*
1. ![](https://www.google.com/s2/favicons?domain=http://www.jeuxvideo.com) [jeuxvideo (http://www.jeuxvideo.com)](http://www.jeuxvideo.com)*: top 5K, fr, gaming*
@@ -281,7 +281,7 @@ Rank data fetched from Alexa by domains.
1. ![](https://www.google.com/s2/favicons?domain=https://archiveofourown.org) [ArchiveOfOurOwn (https://archiveofourown.org)](https://archiveofourown.org)*: top 5K, us*
1. ![](https://www.google.com/s2/favicons?domain=https://bit.ly) [Bit.ly (https://bit.ly)](https://bit.ly)*: top 5K, links*
1. ![](https://www.google.com/s2/favicons?domain=https://infourok.ru) [Infourok (https://infourok.ru)](https://infourok.ru)*: top 5K, ru*
1. ![](https://www.google.com/s2/favicons?domain=https://community.cbr.com) [Cbr (https://community.cbr.com)](https://community.cbr.com)*: top 5K, forum, us*
1. ![](https://www.google.com/s2/favicons?domain=https://community.cbr.com) [Cbr (https://community.cbr.com)](https://community.cbr.com)*: top 5K, forum, us*, search is disabled
1. ![](https://www.google.com/s2/favicons?domain=https://segmentfault.com/) [segmentfault (https://segmentfault.com/)](https://segmentfault.com/)*: top 5K, cn*, search is disabled
1. ![](https://www.google.com/s2/favicons?domain=https://www.warriorforum.com/) [Warrior Forum (https://www.warriorforum.com/)](https://www.warriorforum.com/)*: top 5K, forum, us*
1. ![](https://www.google.com/s2/favicons?domain=https://hub.docker.com/) [Docker Hub (https://hub.docker.com/)](https://hub.docker.com/)*: top 5K, coding*
@@ -295,7 +295,7 @@ Rank data fetched from Alexa by domains.
1. ![](https://www.google.com/s2/favicons?domain=https://creativemarket.com/) [CreativeMarket (https://creativemarket.com/)](https://creativemarket.com/)*: top 5K, art, stock*
1. ![](https://www.google.com/s2/favicons?domain=https://bitbucket.org/) [BitBucket (https://bitbucket.org/)](https://bitbucket.org/)*: top 5K, coding*
1. ![](https://www.google.com/s2/favicons?domain=https://www.techrepublic.com) [Techrepublic (https://www.techrepublic.com)](https://www.techrepublic.com)*: top 5K, us*
1. ![](https://www.google.com/s2/favicons?domain=https://aminoapps.com/) [aminoapp (https://aminoapps.com/)](https://aminoapps.com/)*: top 5K, br, us*
1. ![](https://www.google.com/s2/favicons?domain=https://aminoapps.com/) [aminoapp (https://aminoapps.com/)](https://aminoapps.com/)*: top 5K, br, us*, search is disabled
1. ![](https://www.google.com/s2/favicons?domain=https://www.mixcloud.com/) [MixCloud (https://www.mixcloud.com/)](https://www.mixcloud.com/)*: top 5K, music*
1. ![](https://www.google.com/s2/favicons?domain=https://forum.xda-developers.com) [XDA (https://forum.xda-developers.com)](https://forum.xda-developers.com)*: top 5K, apps, forum*, search is disabled
1. ![](https://www.google.com/s2/favicons?domain=https://thechive.com/) [Thechive (https://thechive.com/)](https://thechive.com/)*: top 5K, us*
@@ -321,7 +321,7 @@ Rank data fetched from Alexa by domains.
1. ![](https://www.google.com/s2/favicons?domain=http://forums.bulbagarden.net) [forums.bulbagarden.net (http://forums.bulbagarden.net)](http://forums.bulbagarden.net)*: top 5K, forum, us*
1. ![](https://www.google.com/s2/favicons?domain=https://videohive.net) [videohive.net (https://videohive.net)](https://videohive.net)*: top 5K, video*
1. ![](https://www.google.com/s2/favicons?domain=https://imginn.com) [ImgInn (https://imginn.com)](https://imginn.com)*: top 5K, photo*
1. ![](https://www.google.com/s2/favicons?domain=https://www.boardgamegeek.com) [BoardGameGeek (https://www.boardgamegeek.com)](https://www.boardgamegeek.com)*: top 5K, gaming, us*
1. ![](https://www.google.com/s2/favicons?domain=https://boardgamegeek.com) [BoardGameGeek (https://boardgamegeek.com)](https://boardgamegeek.com)*: top 5K, gaming, us*
1. ![](https://www.google.com/s2/favicons?domain=https://osu.ppy.sh/) [osu! (https://osu.ppy.sh/)](https://osu.ppy.sh/)*: top 5K, us*
1. ![](https://www.google.com/s2/favicons?domain=https://app.pluralsight.com) [Pluralsight (https://app.pluralsight.com)](https://app.pluralsight.com)*: top 5K, in, us*
1. ![](https://www.google.com/s2/favicons?domain=https://www.techpowerup.com) [TechPowerUp (https://www.techpowerup.com)](https://www.techpowerup.com)*: top 5K, us*
@@ -406,7 +406,7 @@ Rank data fetched from Alexa by domains.
1. ![](https://www.google.com/s2/favicons?domain=https://www.reverbnation.com/) [ReverbNation (https://www.reverbnation.com/)](https://www.reverbnation.com/)*: top 10K, us*
1. ![](https://www.google.com/s2/favicons?domain=https://www.glavbukh.ru) [Scorcher (https://www.glavbukh.ru)](https://www.glavbukh.ru)*: top 10K, ru*, search is disabled
1. ![](https://www.google.com/s2/favicons?domain=https://www.trakt.tv/) [Trakt (https://www.trakt.tv/)](https://www.trakt.tv/)*: top 10K, de, fr*
1. ![](https://www.google.com/s2/favicons?domain=https://hotcopper.com.au) [Hotcopper (https://hotcopper.com.au)](https://hotcopper.com.au)*: top 10K, au*
1. ![](https://www.google.com/s2/favicons?domain=https://hotcopper.com.au) [Hotcopper (https://hotcopper.com.au)](https://hotcopper.com.au)*: top 10K, finance*
1. ![](https://www.google.com/s2/favicons?domain=https://pandia.ru) [Pandia (https://pandia.ru)](https://pandia.ru)*: top 10K, news, ru*
1. ![](https://www.google.com/s2/favicons?domain=https://forums.majorgeeks.com) [forums.majorgeeks.com (https://forums.majorgeeks.com)](https://forums.majorgeeks.com)*: top 10K, forum, us*
1. ![](https://www.google.com/s2/favicons?domain=https://www.hackerearth.com) [Hackerearth (https://www.hackerearth.com)](https://www.hackerearth.com)*: top 10K, freelance*
@@ -472,7 +472,7 @@ Rank data fetched from Alexa by domains.
1. ![](https://www.google.com/s2/favicons?domain=https://3ddd.ru) [3ddd (https://3ddd.ru)](https://3ddd.ru)*: top 100K, ru*
1. ![](https://www.google.com/s2/favicons?domain=https://namemc.com/) [NameMC (https://namemc.com/)](https://namemc.com/)*: top 100K, us*
1. ![](https://www.google.com/s2/favicons?domain=https://www.b17.ru/) [B17 (https://www.b17.ru/)](https://www.b17.ru/)*: top 100K, ru*
1. ![](https://www.google.com/s2/favicons?domain=https://www.beermoneyforum.com) [BeerMoneyForum (https://www.beermoneyforum.com)](https://www.beermoneyforum.com)*: top 100K, finance, forum, gambling*
1. ![](https://www.google.com/s2/favicons?domain=https://www.beermoneyforum.com) [BeerMoneyForum (https://www.beermoneyforum.com)](https://www.beermoneyforum.com)*: top 100K, finance, forum, gambling*, search is disabled
1. ![](https://www.google.com/s2/favicons?domain=https://diary.ru) [Diary.ru (https://diary.ru)](https://diary.ru)*: top 100K, blog, nl, ru*
1. ![](https://www.google.com/s2/favicons?domain=https://www.americanthinker.com/) [Americanthinker (https://www.americanthinker.com/)](https://www.americanthinker.com/)*: top 100K*
1. ![](https://www.google.com/s2/favicons?domain=https://contently.com/) [Contently (https://contently.com/)](https://contently.com/)*: top 100K, freelance, in*
@@ -497,7 +497,7 @@ Rank data fetched from Alexa by domains.
1. ![](https://www.google.com/s2/favicons?domain=https://pbase.com/) [Pbase (https://pbase.com/)](https://pbase.com/)*: top 100K, in*
1. ![](https://www.google.com/s2/favicons?domain=https://www.native-instruments.com/forum/) [NICommunityForum (https://www.native-instruments.com/forum/)](https://www.native-instruments.com/forum/)*: top 100K, forum*
1. ![](https://www.google.com/s2/favicons?domain=https://spletnik.ru/) [spletnik (https://spletnik.ru/)](https://spletnik.ru/)*: top 100K, ru*
1. ![](https://www.google.com/s2/favicons?domain=http://www.folkd.com/profile/) [Folkd (http://www.folkd.com/profile/)](http://www.folkd.com/profile/)*: top 100K, eu, in*
1. ![](https://www.google.com/s2/favicons?domain=http://www.folkd.com/profile/) [Folkd (http://www.folkd.com/profile/)](http://www.folkd.com/profile/)*: top 100K, eu, in*, search is disabled
1. ![](https://www.google.com/s2/favicons?domain=https://www.iphones.ru) [Iphones.ru (https://www.iphones.ru)](https://www.iphones.ru)*: top 100K, ru*
1. ![](https://www.google.com/s2/favicons?domain=https://www.oper.ru/) [Oper (https://www.oper.ru/)](https://www.oper.ru/)*: top 100K, ru*
1. ![](https://www.google.com/s2/favicons?domain=https://www.interpals.net/) [interpals (https://www.interpals.net/)](https://www.interpals.net/)*: top 100K, dating*
@@ -3141,20 +3141,20 @@ Rank data fetched from Alexa by domains.
1. ![](https://www.google.com/s2/favicons?domain=https://pubg.op.gg) [OP.GG [PUBG] (https://pubg.op.gg)](https://pubg.op.gg)*: top 100M, gaming*
1. ![](https://www.google.com/s2/favicons?domain=https://valorant.op.gg) [OP.GG [Valorant] (https://valorant.op.gg)](https://valorant.op.gg)*: top 100M, gaming*
The list was updated at (2024-12-09)
The list was updated at (2024-12-10)
## Statistics
Enabled/total sites: 2699/3137 = 86.04%
Enabled/total sites: 2693/3137 = 85.85%
Incomplete message checks: 406/2699 = 15.04% (false positive risks)
Incomplete message checks: 397/2693 = 14.74% (false positive risks)
Status code checks: 720/2699 = 26.68% (false positive risks)
Status code checks: 719/2693 = 26.7% (false positive risks)
False positive risk (total): 41.72%
False positive risk (total): 41.44%
Top 20 profile URLs:
- (796) `{urlMain}/index/8-0-{username} (uCoz)`
- (300) `/{username}`
- (301) `/{username}`
- (221) `{urlMain}{urlSubpath}/members/?username={username} (XenForo)`
- (161) `/user/{username}`
- (133) `{urlMain}{urlSubpath}/member.php?username={username} (vBulletin)`
@@ -3165,8 +3165,8 @@ Top 20 profile URLs:
- (87) `{urlMain}/u/{username}/summary (Discourse)`
- (54) `/wiki/User:{username}`
- (52) `/@{username}`
- (42) `SUBDOMAIN`
- (41) `/members/?username={username}`
- (41) `SUBDOMAIN`
- (32) `/members/{username}`
- (29) `/author/{username}`
- (27) `{urlMain}{urlSubpath}/memberlist.php?username={username} (phpBB)`
@@ -3177,21 +3177,21 @@ Top 20 profile URLs:
Top 20 tags:
- (328) `NO_TAGS` (non-standard)
- (307) `forum`
- (52) `gaming`
- (50) `gaming`
- (26) `coding`
- (21) `photo`
- (21) `blog`
- (20) `blog`
- (19) `news`
- (15) `music`
- (14) `tech`
- (12) `sharing`
- (12) `freelance`
- (12) `finance`
- (11) `sharing`
- (10) `dating`
- (10) `art`
- (10) `shopping`
- (10) `movies`
- (8) `hobby`
- (8) `crypto`
- (7) `sport`
- (7) `hobby`
- (7) `hacking`
+7
View File
@@ -79,6 +79,13 @@ def reports_autoclean():
remove_test_reports()
@pytest.fixture(scope='session')
def settings():
settings = Settings()
settings.load([SETTINGS_FILE])
return settings
@pytest.fixture(scope='session')
def argparser():
settings = Settings()
+4 -4
View File
@@ -26,7 +26,7 @@
"alexaRank": 1,
"url": "https://play.google.com/store/apps/developer?id={username}",
"urlMain": "https://play.google.com/store",
"usernameClaimed": "OpenAI",
"usernameClaimed": "KONAMI",
"usernameUnclaimed": "noonewouldeverusethis7"
},
"InvalidActive": {
@@ -36,7 +36,7 @@
"alexaRank": 1,
"url": "https://play.google.com/store/apps/dev?id={username}",
"urlMain": "https://play.google.com/store",
"usernameClaimed": "OpenAI",
"usernameClaimed": "KONAMI",
"usernameUnclaimed": "noonewouldeverusethis7"
},
"ValidInactive": {
@@ -46,7 +46,7 @@
"alexaRank": 1,
"url": "https://play.google.com/store/apps/developer?id={username}",
"urlMain": "https://play.google.com/store",
"usernameClaimed": "OpenAI",
"usernameClaimed": "KONAMI",
"usernameUnclaimed": "noonewouldeverusethis7"
},
"InvalidInactive": {
@@ -56,7 +56,7 @@
"alexaRank": 1,
"url": "https://play.google.com/store/apps/dev?id={username}",
"urlMain": "https://play.google.com/store",
"usernameClaimed": "OpenAI",
"usernameClaimed": "KONAMI",
"usernameUnclaimed": "noonewouldeverusethis7"
}
}
+1
View File
@@ -34,6 +34,7 @@ def test_vimeo_activation(default_db):
assert token1 != token2
@pytest.mark.slow
@pytest.mark.asyncio
async def test_import_aiohttp_cookies():
cookies_filename = 'cookies_test.txt'
+2
View File
@@ -1,8 +1,10 @@
"""Maigret data test functions"""
import pytest
from maigret.utils import is_country_tag
@pytest.mark.slow
def test_tags_validity(default_db):
unknown_tags = set()
+1
View File
@@ -49,6 +49,7 @@ async def test_asyncio_progressbar_semaphore_executor():
assert executor.execution_time < 0.4
@pytest.mark.slow
@pytest.mark.asyncio
async def test_asyncio_progressbar_queue_executor():
tasks = [(func, [n], {}) for n in range(10)]
+1
View File
@@ -84,6 +84,7 @@ def test_maigret_results(test_db):
assert results == RESULTS_EXAMPLE
@pytest.mark.slow
def test_extract_ids_from_url(default_db):
assert default_db.extract_ids_from_url('https://www.reddit.com/user/test') == {
'test': 'username'
+278
View File
@@ -0,0 +1,278 @@
import pytest
from unittest.mock import AsyncMock, MagicMock, patch
from maigret.submit import Submitter, MaigretSite, MaigretEngine
from aiohttp import ClientSession
from maigret.sites import MaigretDatabase
from maigret.settings import Settings
import logging
@pytest.mark.slow
@pytest.mark.asyncio
async def test_detect_known_engine(test_db, local_test_db):
# Use the database fixture instead of mocking
mock_db = test_db
mock_settings = MagicMock()
mock_logger = MagicMock()
mock_args = MagicMock()
mock_args.cookie_file = ""
mock_args.proxy = ""
# Mock the supposed usernames
mock_settings.supposed_usernames = ["adam"]
# Create the Submitter instance
submitter = Submitter(test_db, mock_settings, mock_logger, mock_args)
# Call the method with test URLs
url_exists = "https://devforum.zoom.us/u/adam"
url_mainpage = "https://devforum.zoom.us/"
# Mock extract_username_dialog to return "adam"
submitter.extract_username_dialog = MagicMock(return_value="adam")
sites, resp_text = await submitter.detect_known_engine(
url_exists, url_mainpage, session=None, follow_redirects=False, headers=None
)
# Assertions
assert len(sites) == 2
assert sites[0].name == "devforum.zoom.us"
assert sites[0].url_main == "https://devforum.zoom.us/"
assert sites[0].engine == "Discourse"
assert sites[0].username_claimed == "adam"
assert sites[0].username_unclaimed == "noonewouldeverusethis7"
assert resp_text != ""
await submitter.close()
# Create the Submitter instance without engines
submitter = Submitter(local_test_db, mock_settings, mock_logger, mock_args)
sites, resp_text = await submitter.detect_known_engine(
url_exists, url_mainpage, session=None, follow_redirects=False, headers=None
)
assert len(sites) == 0
await submitter.close()
@pytest.mark.slow
@pytest.mark.asyncio
async def test_check_features_manually_success(settings):
# Setup
db = MaigretDatabase()
logger = logging.getLogger("test_logger")
args = type(
'Args', (object,), {'proxy': None, 'cookie_file': None, 'verbose': False}
)()
submitter = Submitter(db, settings, logger, args)
username = "KONAMI"
url_exists = "https://play.google.com/store/apps/developer?id=KONAMI"
# Execute
presence_list, absence_list, status, random_username = (
await submitter.check_features_manually(
username=username,
url_exists=url_exists,
session=ClientSession(),
follow_redirects=False,
headers=None,
)
)
await submitter.close()
# Assert
assert status == "Found", "Expected status to be 'Found'"
assert isinstance(presence_list, list), "Presence list should be a list"
assert isinstance(absence_list, list), "Absence list should be a list"
assert isinstance(random_username, str), "Random username should be a string"
assert (
random_username != username
), "Random username should not be the same as the input username"
assert sorted(presence_list) == sorted(
[
' title=',
'og:title',
'display: none;',
'4;0',
'main-title',
]
)
assert sorted(absence_list) == sorted(
[
' body {',
' </style>',
'><title>Not Found</title>',
' <style nonce=',
' .rounded {',
]
)
@pytest.mark.slow
@pytest.mark.asyncio
async def test_check_features_manually_success(settings):
# Setup
db = MaigretDatabase()
logger = logging.getLogger("test_logger")
args = type(
'Args', (object,), {'proxy': None, 'cookie_file': None, 'verbose': False}
)()
submitter = Submitter(db, settings, logger, args)
username = "abel"
url_exists = "https://community.cloudflare.com/badges/1/basic?username=abel"
# Execute
presence_list, absence_list, status, random_username = (
await submitter.check_features_manually(
username=username,
url_exists=url_exists,
session=ClientSession(),
follow_redirects=False,
headers=None,
)
)
await submitter.close()
# Assert
assert status == "Cloudflare detected, skipping"
assert presence_list is None
assert absence_list is None
assert random_username != username
@pytest.mark.slow
@pytest.mark.asyncio
async def test_dialog_adds_site_positive(settings):
# Initialize necessary objects
db = MaigretDatabase()
logger = logging.getLogger("test_logger")
logger.setLevel(logging.INFO)
args = type(
'Args',
(object,),
{
'proxy': None,
'cookie_file': None,
'verbose': False,
'db_file': 'test_db.json',
'db': 'test_db.json',
},
)()
submitter = Submitter(db, settings, logger, args)
# Mock user inputs
user_inputs = [
'KONAMI', # Manually input username
'y', # Save the site in the Maigret DB
'GooglePlayStore', # Custom site name
'', # no custom tags
]
with patch('builtins.input', side_effect=user_inputs):
result = await submitter.dialog(
"https://play.google.com/store/apps/developer?id=KONAMI", None
)
await submitter.close()
assert result is True
assert len(db.sites) == 1
site = db.sites[0]
assert site.url_main == "https://play.google.com"
assert site.name == "GooglePlayStore"
assert site.tags == []
assert site.presense_strs != []
assert site.absence_strs != []
assert site.username_claimed == "KONAMI"
assert site.check_type == "message"
@pytest.mark.slow
@pytest.mark.asyncio
async def test_dialog_replace_site(settings, test_db):
# Initialize necessary objects
db = test_db
logger = logging.getLogger("test_logger")
logger.setLevel(logging.DEBUG)
args = type(
'Args',
(object,),
{
'proxy': None,
'cookie_file': None,
'verbose': False,
'db_file': 'test_db.json',
'db': 'test_db.json',
},
)()
assert len(db.sites) == 4
submitter = Submitter(db, settings, logger, args)
# Mock user inputs
user_inputs = [
'y', # Similar sites found, continue
'InvalidActive', # Choose site to replace
'', # Custom headers
'y', # Should we do redirects automatically?
'KONAMI', # Manually input username
'y', # Save the site in the Maigret DB
'', # Custom site name
'', # no custom tags
]
with patch('builtins.input', side_effect=user_inputs):
result = await submitter.dialog(
"https://play.google.com/store/apps/developer?id=KONAMI", None
)
await submitter.close()
assert result is True
assert len(db.sites) == 4
site = db.sites_dict["InvalidActive"]
assert site.name == "InvalidActive"
assert site.url_main == "https://play.google.com"
assert site.tags == ['global', 'us']
assert site.presense_strs != []
assert site.absence_strs != []
assert site.username_claimed == "KONAMI"
assert site.check_type == "message"
@pytest.mark.slow
@pytest.mark.asyncio
async def test_dialog_adds_site_negative(settings):
# Initialize necessary objects
db = MaigretDatabase()
logger = logging.getLogger("test_logger")
logger.setLevel(logging.INFO)
args = type(
'Args',
(object,),
{
'proxy': None,
'cookie_file': None,
'verbose': False,
'db_file': 'test_db.json',
'db': 'test_db.json',
},
)()
submitter = Submitter(db, settings, logger, args)
# Mock user inputs
user_inputs = [
'sokrat', # Manually input username
'y', # Save the site in the Maigret DB
]
with patch('builtins.input', side_effect=user_inputs):
result = await submitter.dialog("https://icq.im/sokrat", None)
await submitter.close()
assert result is False