mirror of
https://github.com/soxoj/maigret.git
synced 2026-05-06 22:19:01 +00:00
Improved "submit new site" mode, added tests, fixed top-500 sites (#1952)
This commit is contained in:
+107
-46
@@ -1970,6 +1970,7 @@
|
|||||||
"usernameUnclaimed": "noonewouldeverusethis7"
|
"usernameUnclaimed": "noonewouldeverusethis7"
|
||||||
},
|
},
|
||||||
"BeerMoneyForum": {
|
"BeerMoneyForum": {
|
||||||
|
"disabled": true,
|
||||||
"ignore403": true,
|
"ignore403": true,
|
||||||
"tags": [
|
"tags": [
|
||||||
"finance",
|
"finance",
|
||||||
@@ -2366,19 +2367,30 @@
|
|||||||
"usernameUnclaimed": "noonewouldeverusethis7"
|
"usernameUnclaimed": "noonewouldeverusethis7"
|
||||||
},
|
},
|
||||||
"BoardGameGeek": {
|
"BoardGameGeek": {
|
||||||
|
"checkType": "message",
|
||||||
"tags": [
|
"tags": [
|
||||||
"gaming",
|
"gaming",
|
||||||
"us"
|
"us"
|
||||||
],
|
],
|
||||||
"checkType": "message",
|
|
||||||
"absenceStrs": [
|
"absenceStrs": [
|
||||||
"User does not exist."
|
"\t\tUser not found",
|
||||||
|
"messagebox error",
|
||||||
|
">\t<div class=",
|
||||||
|
"\t\t\t<title>Profile | BoardGameGeek</title>",
|
||||||
|
"\t</div></div>"
|
||||||
],
|
],
|
||||||
"alexaRank": 4327,
|
"alexaRank": 4327,
|
||||||
"urlMain": "https://www.boardgamegeek.com",
|
"urlMain": "https://boardgamegeek.com",
|
||||||
"url": "https://www.boardgamegeek.com/user/{username}",
|
"url": "https://boardgamegeek.com/user/{username}",
|
||||||
"usernameClaimed": "adam",
|
"usernameClaimed": "ZakuBG",
|
||||||
"usernameUnclaimed": "noonewouldeverusethis7"
|
"usernameUnclaimed": "uzytnhstvj",
|
||||||
|
"presenseStrs": [
|
||||||
|
"username",
|
||||||
|
" style=",
|
||||||
|
"mail",
|
||||||
|
" \tstyle=",
|
||||||
|
" data-username="
|
||||||
|
]
|
||||||
},
|
},
|
||||||
"Bobrdobr": {
|
"Bobrdobr": {
|
||||||
"tags": [
|
"tags": [
|
||||||
@@ -3005,7 +3017,8 @@
|
|||||||
"alexaRank": 2689,
|
"alexaRank": 2689,
|
||||||
"urlMain": "https://community.cbr.com",
|
"urlMain": "https://community.cbr.com",
|
||||||
"usernameClaimed": "red",
|
"usernameClaimed": "red",
|
||||||
"usernameUnclaimed": "noonewouldeverusethis7"
|
"usernameUnclaimed": "noonewouldeverusethis7",
|
||||||
|
"disabled": true
|
||||||
},
|
},
|
||||||
"Ccdi": {
|
"Ccdi": {
|
||||||
"tags": [
|
"tags": [
|
||||||
@@ -4645,21 +4658,6 @@
|
|||||||
"usernameUnclaimed": "noonewouldeverusethis7",
|
"usernameUnclaimed": "noonewouldeverusethis7",
|
||||||
"alexaRank": 301125
|
"alexaRank": 301125
|
||||||
},
|
},
|
||||||
"Eksisozluk": {
|
|
||||||
"tags": [
|
|
||||||
"tr"
|
|
||||||
],
|
|
||||||
"checkType": "message",
|
|
||||||
"absenceStrs": [
|
|
||||||
"isimli bir yazar kayd\u0131 mevcut de\u011fil",
|
|
||||||
"olmaz \u00f6yle \u015fey"
|
|
||||||
],
|
|
||||||
"alexaRank": 977,
|
|
||||||
"urlMain": "https://eksisozluk.com/biri/",
|
|
||||||
"url": "https://eksisozluk.com/biri/{username}",
|
|
||||||
"usernameClaimed": "adam",
|
|
||||||
"usernameUnclaimed": "noonewouldeverusethis7"
|
|
||||||
},
|
|
||||||
"Elakiri": {
|
"Elakiri": {
|
||||||
"tags": [
|
"tags": [
|
||||||
"lk"
|
"lk"
|
||||||
@@ -5747,6 +5745,7 @@
|
|||||||
"usernameUnclaimed": "noonewouldeverusethis7"
|
"usernameUnclaimed": "noonewouldeverusethis7"
|
||||||
},
|
},
|
||||||
"Folkd": {
|
"Folkd": {
|
||||||
|
"disabled": true,
|
||||||
"tags": [
|
"tags": [
|
||||||
"eu",
|
"eu",
|
||||||
"in"
|
"in"
|
||||||
@@ -7678,17 +7677,28 @@
|
|||||||
},
|
},
|
||||||
"Hotcopper": {
|
"Hotcopper": {
|
||||||
"tags": [
|
"tags": [
|
||||||
"au"
|
"finance"
|
||||||
],
|
],
|
||||||
"checkType": "message",
|
"checkType": "message",
|
||||||
"absenceStrs": [
|
"absenceStrs": [
|
||||||
"The following error occurred"
|
"error-page",
|
||||||
|
"error-page home container",
|
||||||
|
"card-footer-item",
|
||||||
|
"><main id=",
|
||||||
|
"card-content"
|
||||||
],
|
],
|
||||||
"alexaRank": 7767,
|
"alexaRank": 7767,
|
||||||
"urlMain": "https://hotcopper.com.au",
|
"urlMain": "https://hotcopper.com.au",
|
||||||
"url": "https://hotcopper.com.au/search/search?type=post&users={username}",
|
"url": "https://hotcopper.com.au/search/search?type=post&users={username}",
|
||||||
"usernameClaimed": "red",
|
"usernameClaimed": "red",
|
||||||
"usernameUnclaimed": "noonewouldeverusethis7"
|
"usernameUnclaimed": "ggyeplcpod",
|
||||||
|
"presenseStrs": [
|
||||||
|
"title-td",
|
||||||
|
"title is-1",
|
||||||
|
"pagination ",
|
||||||
|
"toggle",
|
||||||
|
"active "
|
||||||
|
]
|
||||||
},
|
},
|
||||||
"House-Mixes.com": {
|
"House-Mixes.com": {
|
||||||
"tags": [
|
"tags": [
|
||||||
@@ -9056,16 +9066,27 @@
|
|||||||
"Lichess": {
|
"Lichess": {
|
||||||
"checkType": "message",
|
"checkType": "message",
|
||||||
"absenceStrs": [
|
"absenceStrs": [
|
||||||
"Page not found!"
|
"page-small box box-pad page",
|
||||||
|
"><h1 class=",
|
||||||
|
">No such player</h1><div><p>This username doesn",
|
||||||
|
"})()</script></body></html>",
|
||||||
|
"IR0Cf7qpkpcOhvI9r03a0QbI"
|
||||||
],
|
],
|
||||||
"alexaRank": 2374,
|
"alexaRank": 2374,
|
||||||
"urlMain": "https://lichess.org",
|
"urlMain": "https://lichess.org",
|
||||||
"url": "https://lichess.org/@/{username}",
|
"url": "https://lichess.org/@/{username}",
|
||||||
"usernameClaimed": "blue",
|
"usernameClaimed": "adam",
|
||||||
"usernameUnclaimed": "noonewouldeverusethis7",
|
"usernameUnclaimed": "efxvyhnwrh",
|
||||||
"tags": [
|
"tags": [
|
||||||
"gaming",
|
"gaming",
|
||||||
"hobby"
|
"hobby"
|
||||||
|
],
|
||||||
|
"presenseStrs": [
|
||||||
|
"us_profile",
|
||||||
|
"og:title",
|
||||||
|
"profile-side",
|
||||||
|
" data-username=",
|
||||||
|
"og:site_name"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"Liebe69": {
|
"Liebe69": {
|
||||||
@@ -14739,16 +14760,25 @@
|
|||||||
"usernameUnclaimed": "noonewouldeverusethis7"
|
"usernameUnclaimed": "noonewouldeverusethis7"
|
||||||
},
|
},
|
||||||
"SlideShare": {
|
"SlideShare": {
|
||||||
"tags": [
|
"checkType": "message",
|
||||||
"documents",
|
|
||||||
"sharing"
|
|
||||||
],
|
|
||||||
"checkType": "status_code",
|
|
||||||
"alexaRank": 158,
|
"alexaRank": 158,
|
||||||
"urlMain": "https://slideshare.net/",
|
"urlMain": "https://www.slideshare.net",
|
||||||
"url": "https://slideshare.net/{username}",
|
"url": "https://www.slideshare.net/{username}",
|
||||||
"usernameClaimed": "blue",
|
"usernameClaimed": "KumarSurya7",
|
||||||
"usernameUnclaimed": "noonewouldeverusethis7"
|
"usernameUnclaimed": "kwbmsonxvp",
|
||||||
|
"presenseStrs": [
|
||||||
|
"user-name",
|
||||||
|
"pageInfo",
|
||||||
|
"listitem",
|
||||||
|
"polite",
|
||||||
|
"strippedTitle"
|
||||||
|
],
|
||||||
|
"absenceStrs": [
|
||||||
|
"blankProfile",
|
||||||
|
"username-available",
|
||||||
|
"robots",
|
||||||
|
"noindex,nofollow"
|
||||||
|
]
|
||||||
},
|
},
|
||||||
"Slides": {
|
"Slides": {
|
||||||
"tags": [
|
"tags": [
|
||||||
@@ -15447,7 +15477,8 @@
|
|||||||
"urlMain": "https://www.strava.com/",
|
"urlMain": "https://www.strava.com/",
|
||||||
"url": "https://www.strava.com/athletes/{username}",
|
"url": "https://www.strava.com/athletes/{username}",
|
||||||
"usernameClaimed": "adam",
|
"usernameClaimed": "adam",
|
||||||
"usernameUnclaimed": "noonewouldeverusethis7"
|
"usernameUnclaimed": "noonewouldeverusethis7",
|
||||||
|
"disabled": true
|
||||||
},
|
},
|
||||||
"Studfile": {
|
"Studfile": {
|
||||||
"tags": [
|
"tags": [
|
||||||
@@ -16771,13 +16802,20 @@
|
|||||||
"regexCheck": "^[^\\.]+$",
|
"regexCheck": "^[^\\.]+$",
|
||||||
"checkType": "message",
|
"checkType": "message",
|
||||||
"absenceStrs": [
|
"absenceStrs": [
|
||||||
"There's nothing here."
|
"Not found.",
|
||||||
|
":404,",
|
||||||
|
"userAgent",
|
||||||
|
",displayStatus:"
|
||||||
],
|
],
|
||||||
"alexaRank": 112,
|
"alexaRank": 112,
|
||||||
"urlMain": "https://tumblr.com/",
|
"urlMain": "https://www.tumblr.com",
|
||||||
"url": "https://{username}.tumblr.com/",
|
"url": "https://www.tumblr.com/{username}",
|
||||||
"usernameClaimed": "red",
|
"usernameClaimed": "soxoj",
|
||||||
"usernameUnclaimed": "noonewouldeverusethis7"
|
"usernameUnclaimed": "zdbimdoqyt",
|
||||||
|
"presenseStrs": [
|
||||||
|
"profile",
|
||||||
|
" title="
|
||||||
|
]
|
||||||
},
|
},
|
||||||
"Tunefind": {
|
"Tunefind": {
|
||||||
"checkType": "message",
|
"checkType": "message",
|
||||||
@@ -17114,7 +17152,8 @@
|
|||||||
"urlMain": "https://vc.ru",
|
"urlMain": "https://vc.ru",
|
||||||
"url": "https://vc.ru/search/v2/subsite/relevant?query={username}",
|
"url": "https://vc.ru/search/v2/subsite/relevant?query={username}",
|
||||||
"usernameClaimed": "adam",
|
"usernameClaimed": "adam",
|
||||||
"usernameUnclaimed": "noonewouldeverusethis7"
|
"usernameUnclaimed": "noonewouldeverusethis7",
|
||||||
|
"disabled": true
|
||||||
},
|
},
|
||||||
"Viddler": {
|
"Viddler": {
|
||||||
"checkType": "message",
|
"checkType": "message",
|
||||||
@@ -17377,7 +17416,7 @@
|
|||||||
"video"
|
"video"
|
||||||
],
|
],
|
||||||
"headers": {
|
"headers": {
|
||||||
"Authorization": "jwt eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJleHAiOjE3MzM2MTc5MjAsInVzZXJfaWQiOm51bGwsImFwcF9pZCI6NTg0NzksInNjb3BlcyI6InB1YmxpYyIsInRlYW1fdXNlcl9pZCI6bnVsbCwianRpIjoiNGYxM2M4N2ItYWMwMy00Y2JhLWExMDctNmNiODhmM2U3NjZjIn0.Y7CWEWckdSMsmJ8ROPmhHR6el2QCYJRDl0RLPpdJOKc"
|
"Authorization": "jwt eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJleHAiOjE3MzM4MzkwODAsInVzZXJfaWQiOm51bGwsImFwcF9pZCI6NTg0NzksInNjb3BlcyI6InB1YmxpYyIsInRlYW1fdXNlcl9pZCI6bnVsbCwianRpIjoiOWNjMjk0ZjktZGZhOS00NDI0LWE0OGEtN2JjYzkwYjM2NTMyIn0.wG0kC7fWtrdKI9ccS-LE81lVgQRfYobrqCAPWxr1wzc"
|
||||||
},
|
},
|
||||||
"activation": {
|
"activation": {
|
||||||
"url": "https://vimeo.com/_rv/viewer",
|
"url": "https://vimeo.com/_rv/viewer",
|
||||||
@@ -18971,7 +19010,8 @@
|
|||||||
"urlMain": "https://aminoapps.com/",
|
"urlMain": "https://aminoapps.com/",
|
||||||
"url": "https://aminoapps.com/u/{username}",
|
"url": "https://aminoapps.com/u/{username}",
|
||||||
"usernameClaimed": "blue",
|
"usernameClaimed": "blue",
|
||||||
"usernameUnclaimed": "noonewouldeverusethis77777"
|
"usernameUnclaimed": "noonewouldeverusethis77777",
|
||||||
|
"disabled": true
|
||||||
},
|
},
|
||||||
"analitika-forex.ru": {
|
"analitika-forex.ru": {
|
||||||
"engine": "uCoz",
|
"engine": "uCoz",
|
||||||
@@ -35419,6 +35459,27 @@
|
|||||||
"Cache-Control": "no-cache",
|
"Cache-Control": "no-cache",
|
||||||
"TE": "trailers"
|
"TE": "trailers"
|
||||||
}
|
}
|
||||||
|
},
|
||||||
|
"Eksisozluk": {
|
||||||
|
"absenceStrs": [
|
||||||
|
" <h1>b\u00f6yle bir yazar yok</h1>\r"
|
||||||
|
],
|
||||||
|
"presenseStrs": [
|
||||||
|
"profile-dots",
|
||||||
|
"profile-logo",
|
||||||
|
"profile-cards",
|
||||||
|
"profile-biography",
|
||||||
|
" data-title="
|
||||||
|
],
|
||||||
|
"alexaRank": 977,
|
||||||
|
"url": "https://eksisozluk.com/biri/{username}",
|
||||||
|
"urlMain": "https://eksisozluk.com",
|
||||||
|
"usernameClaimed": "kartalbafilerrr",
|
||||||
|
"usernameUnclaimed": "rlcvuwlxqh",
|
||||||
|
"checkType": "message",
|
||||||
|
"tags": [
|
||||||
|
"tr"
|
||||||
|
]
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"engines": {
|
"engines": {
|
||||||
|
|||||||
+1
-1
@@ -96,7 +96,7 @@ class MaigretCheckResult:
|
|||||||
return self.status == MaigretCheckStatus.CLAIMED
|
return self.status == MaigretCheckStatus.CLAIMED
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
return f"'{self.__str__()}'"
|
return f"<{self.__str__()}>"
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
"""Convert Object To String.
|
"""Convert Object To String.
|
||||||
|
|||||||
+262
-121
@@ -2,7 +2,8 @@ import asyncio
|
|||||||
import json
|
import json
|
||||||
import re
|
import re
|
||||||
import os
|
import os
|
||||||
from typing import Any, Dict, List, Optional
|
import logging
|
||||||
|
from typing import Any, Dict, List, Optional, Tuple
|
||||||
|
|
||||||
from aiohttp import ClientSession, TCPConnector
|
from aiohttp import ClientSession, TCPConnector
|
||||||
from aiohttp_socks import ProxyConnector
|
from aiohttp_socks import ProxyConnector
|
||||||
@@ -15,7 +16,7 @@ from .settings import Settings
|
|||||||
from .sites import MaigretDatabase, MaigretEngine, MaigretSite
|
from .sites import MaigretDatabase, MaigretEngine, MaigretSite
|
||||||
from .utils import get_random_user_agent
|
from .utils import get_random_user_agent
|
||||||
from .checking import site_self_check
|
from .checking import site_self_check
|
||||||
from .utils import get_match_ratio
|
from .utils import get_match_ratio, generate_random_username
|
||||||
|
|
||||||
|
|
||||||
class CloudflareSession:
|
class CloudflareSession:
|
||||||
@@ -125,21 +126,13 @@ class Submitter:
|
|||||||
return fields
|
return fields
|
||||||
|
|
||||||
async def detect_known_engine(
|
async def detect_known_engine(
|
||||||
self, url_exists, url_mainpage
|
self, url_exists, url_mainpage, session, follow_redirects, headers
|
||||||
) -> [List[MaigretSite], str]:
|
) -> [List[MaigretSite], str]:
|
||||||
|
|
||||||
resp_text = ''
|
session = session or self.session
|
||||||
|
resp_text, _ = await self.get_html_response_to_compare(
|
||||||
try:
|
url_exists, session, follow_redirects, headers
|
||||||
r = await self.session.get(url_mainpage)
|
)
|
||||||
content = await r.content.read()
|
|
||||||
charset = r.charset or "utf-8"
|
|
||||||
resp_text = content.decode(charset, "ignore")
|
|
||||||
self.logger.debug(resp_text)
|
|
||||||
except Exception as e:
|
|
||||||
self.logger.warning(e, exc_info=True)
|
|
||||||
print(f"Some error while checking main page: {e}")
|
|
||||||
return [], resp_text
|
|
||||||
|
|
||||||
for engine in self.db.engines:
|
for engine in self.db.engines:
|
||||||
strs_to_check = engine.__dict__.get("presenseStrs")
|
strs_to_check = engine.__dict__.get("presenseStrs")
|
||||||
@@ -195,113 +188,134 @@ class Submitter:
|
|||||||
)
|
)
|
||||||
return entered_username if entered_username else supposed_username
|
return entered_username if entered_username else supposed_username
|
||||||
|
|
||||||
async def check_features_manually(
|
@staticmethod
|
||||||
self, url_exists, url_mainpage, cookie_file, redirects=False
|
async def get_html_response_to_compare(
|
||||||
|
url: str, session: ClientSession = None, redirects=False, headers: Dict = None
|
||||||
):
|
):
|
||||||
custom_headers = {}
|
async with session.get(
|
||||||
while self.args.verbose:
|
url, allow_redirects=redirects, headers=headers
|
||||||
header_key = input(
|
) as response:
|
||||||
'Specify custom header if you need or just press Enter to skip. Header name: '
|
# Try different encodings or fallback to 'ignore' errors
|
||||||
|
try:
|
||||||
|
html_response = await response.text(encoding='utf-8')
|
||||||
|
except UnicodeDecodeError:
|
||||||
|
try:
|
||||||
|
html_response = await response.text(encoding='latin1')
|
||||||
|
except UnicodeDecodeError:
|
||||||
|
html_response = await response.text(errors='ignore')
|
||||||
|
return html_response, response.status
|
||||||
|
|
||||||
|
async def check_features_manually(
|
||||||
|
self,
|
||||||
|
username: str,
|
||||||
|
url_exists: str,
|
||||||
|
cookie_filename="", # TODO: use cookies
|
||||||
|
session: ClientSession = None,
|
||||||
|
follow_redirects=False,
|
||||||
|
headers: dict = None,
|
||||||
|
) -> Tuple[List[str], List[str], str, str]:
|
||||||
|
|
||||||
|
random_username = generate_random_username()
|
||||||
|
url_of_non_existing_account = url_exists.lower().replace(
|
||||||
|
username.lower(), random_username
|
||||||
)
|
)
|
||||||
if not header_key:
|
|
||||||
break
|
|
||||||
header_value = input('Header value: ')
|
|
||||||
custom_headers[header_key.strip()] = header_value.strip()
|
|
||||||
|
|
||||||
supposed_username = self.extract_username_dialog(url_exists)
|
try:
|
||||||
non_exist_username = "noonewouldeverusethis7"
|
session = session or self.session
|
||||||
|
first_html_response, first_status = await self.get_html_response_to_compare(
|
||||||
url_user = url_exists.replace(supposed_username, "{username}")
|
url_exists, session, follow_redirects, headers
|
||||||
url_not_exists = url_exists.replace(supposed_username, non_exist_username)
|
|
||||||
|
|
||||||
headers = dict(self.HEADERS)
|
|
||||||
headers.update(custom_headers)
|
|
||||||
|
|
||||||
exists_resp = await self.session.get(
|
|
||||||
url_exists,
|
|
||||||
headers=headers,
|
|
||||||
allow_redirects=redirects,
|
|
||||||
)
|
)
|
||||||
exists_resp_text = await exists_resp.text()
|
second_html_response, second_status = (
|
||||||
self.logger.debug(url_exists)
|
await self.get_html_response_to_compare(
|
||||||
self.logger.debug(exists_resp.status)
|
url_of_non_existing_account, session, follow_redirects, headers
|
||||||
self.logger.debug(exists_resp_text)
|
|
||||||
|
|
||||||
non_exists_resp = await self.session.get(
|
|
||||||
url_not_exists,
|
|
||||||
headers=headers,
|
|
||||||
allow_redirects=redirects,
|
|
||||||
)
|
)
|
||||||
non_exists_resp_text = await non_exists_resp.text()
|
)
|
||||||
self.logger.debug(url_not_exists)
|
await session.close()
|
||||||
self.logger.debug(non_exists_resp.status)
|
except Exception as e:
|
||||||
self.logger.debug(non_exists_resp_text)
|
self.logger.error(
|
||||||
|
f"Error while getting HTTP response for username {username}: {e}",
|
||||||
|
exc_info=True,
|
||||||
|
)
|
||||||
|
return None, None, str(e), random_username
|
||||||
|
|
||||||
a = exists_resp_text
|
self.logger.info(f"URL with existing account: {url_exists}")
|
||||||
b = non_exists_resp_text
|
self.logger.info(
|
||||||
|
f"HTTP response status for URL with existing account: {first_status}"
|
||||||
|
)
|
||||||
|
self.logger.info(
|
||||||
|
f"HTTP response length URL with existing account: {len(first_html_response)}"
|
||||||
|
)
|
||||||
|
self.logger.debug(first_html_response)
|
||||||
|
|
||||||
tokens_a = set(re.split(f'[{self.SEPARATORS}]', a))
|
self.logger.info(f"URL with existing account: {url_of_non_existing_account}")
|
||||||
tokens_b = set(re.split(f'[{self.SEPARATORS}]', b))
|
self.logger.info(
|
||||||
|
f"HTTP response status for URL with non-existing account: {second_status}"
|
||||||
|
)
|
||||||
|
self.logger.info(
|
||||||
|
f"HTTP response length URL with non-existing account: {len(second_html_response)}"
|
||||||
|
)
|
||||||
|
self.logger.debug(second_html_response)
|
||||||
|
|
||||||
|
# TODO: filter by errors, move to dialog function
|
||||||
|
if (
|
||||||
|
"/cdn-cgi/challenge-platform" in first_html_response
|
||||||
|
or "\t\t\t\tnow: " in first_html_response
|
||||||
|
or "Sorry, you have been blocked" in first_html_response
|
||||||
|
):
|
||||||
|
self.logger.info("Cloudflare detected, skipping")
|
||||||
|
return None, None, "Cloudflare detected, skipping", random_username
|
||||||
|
|
||||||
|
tokens_a = set(re.split(f'[{self.SEPARATORS}]', first_html_response))
|
||||||
|
tokens_b = set(re.split(f'[{self.SEPARATORS}]', second_html_response))
|
||||||
|
|
||||||
a_minus_b = tokens_a.difference(tokens_b)
|
a_minus_b = tokens_a.difference(tokens_b)
|
||||||
b_minus_a = tokens_b.difference(tokens_a)
|
b_minus_a = tokens_b.difference(tokens_a)
|
||||||
|
|
||||||
# additional filtering by html response
|
a_minus_b = list(map(lambda x: x.strip('\\'), a_minus_b))
|
||||||
a_minus_b = [t for t in a_minus_b if t not in non_exists_resp_text]
|
b_minus_a = list(map(lambda x: x.strip('\\'), b_minus_a))
|
||||||
b_minus_a = [t for t in b_minus_a if t not in exists_resp_text]
|
|
||||||
|
# Filter out strings containing usernames
|
||||||
|
a_minus_b = [s for s in a_minus_b if username.lower() not in s.lower()]
|
||||||
|
b_minus_a = [s for s in b_minus_a if random_username.lower() not in s.lower()]
|
||||||
|
|
||||||
|
def filter_tokens(token: str, html_response: str) -> bool:
|
||||||
|
is_in_html = token in html_response
|
||||||
|
is_long_str = len(token) >= 50
|
||||||
|
is_number = re.match(r'^\d\.?\d+$', token) or re.match(r':^\d+$', token)
|
||||||
|
is_whitelisted_number = token in ['200', '404', '403']
|
||||||
|
|
||||||
|
return not (
|
||||||
|
is_in_html or is_long_str or (is_number and not is_whitelisted_number)
|
||||||
|
)
|
||||||
|
|
||||||
|
a_minus_b = list(
|
||||||
|
filter(lambda t: filter_tokens(t, second_html_response), a_minus_b)
|
||||||
|
)
|
||||||
|
b_minus_a = list(
|
||||||
|
filter(lambda t: filter_tokens(t, first_html_response), b_minus_a)
|
||||||
|
)
|
||||||
|
|
||||||
if len(a_minus_b) == len(b_minus_a) == 0:
|
if len(a_minus_b) == len(b_minus_a) == 0:
|
||||||
print("The pages for existing and non-existing account are the same!")
|
return (
|
||||||
|
None,
|
||||||
top_features_count = int(
|
None,
|
||||||
input(
|
"HTTP responses for pages with existing and non-existing accounts are the same",
|
||||||
f"Specify count of features to extract [default {self.TOP_FEATURES}]: "
|
random_username,
|
||||||
)
|
|
||||||
or self.TOP_FEATURES
|
|
||||||
)
|
)
|
||||||
|
|
||||||
match_fun = get_match_ratio(self.settings.presence_strings)
|
match_fun = get_match_ratio(self.settings.presence_strings)
|
||||||
|
|
||||||
presence_list = sorted(a_minus_b, key=match_fun, reverse=True)[
|
presence_list = sorted(a_minus_b, key=match_fun, reverse=True)[
|
||||||
:top_features_count
|
: self.TOP_FEATURES
|
||||||
]
|
]
|
||||||
|
|
||||||
self.logger.debug([(keyword, match_fun(keyword)) for keyword in presence_list])
|
|
||||||
|
|
||||||
print("Detected text features of existing account: " + ", ".join(presence_list))
|
|
||||||
features = input("If features was not detected correctly, write it manually: ")
|
|
||||||
|
|
||||||
if features:
|
|
||||||
presence_list = list(map(str.strip, features.split(",")))
|
|
||||||
|
|
||||||
absence_list = sorted(b_minus_a, key=match_fun, reverse=True)[
|
absence_list = sorted(b_minus_a, key=match_fun, reverse=True)[
|
||||||
:top_features_count
|
: self.TOP_FEATURES
|
||||||
]
|
]
|
||||||
self.logger.debug([(keyword, match_fun(keyword)) for keyword in absence_list])
|
|
||||||
|
|
||||||
print(
|
self.logger.info(f"Detected presence features: {presence_list}")
|
||||||
"Detected text features of non-existing account: " + ", ".join(absence_list)
|
self.logger.info(f"Detected absence features: {absence_list}")
|
||||||
)
|
|
||||||
features = input("If features was not detected correctly, write it manually: ")
|
|
||||||
|
|
||||||
if features:
|
return presence_list, absence_list, "Found", random_username
|
||||||
absence_list = list(map(str.strip, features.split(",")))
|
|
||||||
|
|
||||||
site_data = {
|
|
||||||
"absenceStrs": absence_list,
|
|
||||||
"presenseStrs": presence_list,
|
|
||||||
"url": url_user,
|
|
||||||
"urlMain": url_mainpage,
|
|
||||||
"usernameClaimed": supposed_username,
|
|
||||||
"usernameUnclaimed": non_exist_username,
|
|
||||||
"checkType": "message",
|
|
||||||
}
|
|
||||||
|
|
||||||
if headers != self.HEADERS:
|
|
||||||
site_data['headers'] = headers
|
|
||||||
|
|
||||||
site = MaigretSite(url_mainpage.split("/")[-1], site_data)
|
|
||||||
return site
|
|
||||||
|
|
||||||
async def add_site(self, site):
|
async def add_site(self, site):
|
||||||
sem = asyncio.Semaphore(1)
|
sem = asyncio.Semaphore(1)
|
||||||
@@ -376,6 +390,12 @@ class Submitter:
|
|||||||
}
|
}
|
||||||
|
|
||||||
async def dialog(self, url_exists, cookie_file):
|
async def dialog(self, url_exists, cookie_file):
|
||||||
|
old_site = None
|
||||||
|
additional_options_enabled = self.logger.level in (
|
||||||
|
logging.DEBUG,
|
||||||
|
logging.WARNING,
|
||||||
|
)
|
||||||
|
|
||||||
domain_raw = self.URL_RE.sub("", url_exists).strip().strip("/")
|
domain_raw = self.URL_RE.sub("", url_exists).strip().strip("/")
|
||||||
domain_raw = domain_raw.split("/")[0]
|
domain_raw = domain_raw.split("/")[0]
|
||||||
self.logger.info('Domain is %s', domain_raw)
|
self.logger.info('Domain is %s', domain_raw)
|
||||||
@@ -386,9 +406,11 @@ class Submitter:
|
|||||||
)
|
)
|
||||||
|
|
||||||
if matched_sites:
|
if matched_sites:
|
||||||
|
# TODO: update the existing site
|
||||||
print(
|
print(
|
||||||
f'Sites with domain "{domain_raw}" already exists in the Maigret database!'
|
f"{Fore.YELLOW}[!] Sites with domain \"{domain_raw}\" already exists in the Maigret database!{Style.RESET_ALL}"
|
||||||
)
|
)
|
||||||
|
|
||||||
status = lambda s: "(disabled)" if s.disabled else ""
|
status = lambda s: "(disabled)" if s.disabled else ""
|
||||||
url_block = lambda s: f"\n\t{s.url_main}\n\t{s.url}"
|
url_block = lambda s: f"\n\t{s.url_main}\n\t{s.url}"
|
||||||
print(
|
print(
|
||||||
@@ -400,16 +422,62 @@ class Submitter:
|
|||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
if input("Do you want to continue? [yN] ").lower() in "n":
|
if (
|
||||||
|
input(
|
||||||
|
f"{Fore.GREEN}[?] Do you want to continue? [yN] {Style.RESET_ALL}"
|
||||||
|
).lower()
|
||||||
|
in "n"
|
||||||
|
):
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
site_names = [site.name for site in matched_sites]
|
||||||
|
site_name = (
|
||||||
|
input(
|
||||||
|
f"{Fore.GREEN}[?] Which site do you want to update in case of success? 1st by default. [{', '.join(site_names)}] {Style.RESET_ALL}"
|
||||||
|
)
|
||||||
|
or matched_sites[0].name
|
||||||
|
)
|
||||||
|
old_site = next(
|
||||||
|
(site for site in matched_sites if site.name == site_name), None
|
||||||
|
)
|
||||||
|
print(
|
||||||
|
f'{Fore.GREEN}[+] We will update site "{old_site.name}" in case of success.{Style.RESET_ALL}'
|
||||||
|
)
|
||||||
|
|
||||||
url_mainpage = self.extract_mainpage_url(url_exists)
|
url_mainpage = self.extract_mainpage_url(url_exists)
|
||||||
|
|
||||||
|
# headers update
|
||||||
|
custom_headers = dict(self.HEADERS)
|
||||||
|
while additional_options_enabled:
|
||||||
|
header_key = input(
|
||||||
|
f'{Fore.GREEN}[?] Specify custom header if you need or just press Enter to skip. Header name: {Style.RESET_ALL}'
|
||||||
|
)
|
||||||
|
if not header_key:
|
||||||
|
break
|
||||||
|
header_value = input(f'{Fore.GREEN}[?] Header value: {Style.RESET_ALL}')
|
||||||
|
custom_headers[header_key.strip()] = header_value.strip()
|
||||||
|
|
||||||
|
# redirects settings update
|
||||||
|
redirects = False
|
||||||
|
if additional_options_enabled:
|
||||||
|
redirects = (
|
||||||
|
'y'
|
||||||
|
in input(
|
||||||
|
f'{Fore.GREEN}[?] Should we do redirects automatically? [yN] {Style.RESET_ALL}'
|
||||||
|
).lower()
|
||||||
|
)
|
||||||
|
|
||||||
print('Detecting site engine, please wait...')
|
print('Detecting site engine, please wait...')
|
||||||
sites = []
|
sites = []
|
||||||
text = None
|
text = None
|
||||||
try:
|
try:
|
||||||
sites, text = await self.detect_known_engine(url_exists, url_exists)
|
sites, text = await self.detect_known_engine(
|
||||||
|
url_exists,
|
||||||
|
url_exists,
|
||||||
|
session=None,
|
||||||
|
follow_redirects=redirects,
|
||||||
|
headers=custom_headers,
|
||||||
|
)
|
||||||
except KeyboardInterrupt:
|
except KeyboardInterrupt:
|
||||||
print('Engine detect process is interrupted.')
|
print('Engine detect process is interrupted.')
|
||||||
|
|
||||||
@@ -422,26 +490,48 @@ class Submitter:
|
|||||||
if not sites:
|
if not sites:
|
||||||
print("Unable to detect site engine, lets generate checking features")
|
print("Unable to detect site engine, lets generate checking features")
|
||||||
|
|
||||||
redirects = False
|
supposed_username = self.extract_username_dialog(url_exists)
|
||||||
if self.args.verbose:
|
self.logger.info(f"Supposed username: {supposed_username}")
|
||||||
redirects = (
|
|
||||||
'y' in input('Should we do redirects automatically? [yN] ').lower()
|
presence_list, absence_list, status, non_exist_username = (
|
||||||
|
await self.check_features_manually(
|
||||||
|
username=supposed_username,
|
||||||
|
url_exists=url_exists,
|
||||||
|
cookie_filename=cookie_file,
|
||||||
|
follow_redirects=redirects,
|
||||||
|
headers=custom_headers,
|
||||||
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
sites = [
|
if status == "Found":
|
||||||
await self.check_features_manually(
|
site_data = {
|
||||||
url_exists,
|
"absenceStrs": absence_list,
|
||||||
url_mainpage,
|
"presenseStrs": presence_list,
|
||||||
cookie_file,
|
"url": url_exists.replace(supposed_username, '{username}'),
|
||||||
redirects,
|
"urlMain": url_mainpage,
|
||||||
|
"usernameClaimed": supposed_username,
|
||||||
|
"usernameUnclaimed": non_exist_username,
|
||||||
|
"checkType": "message",
|
||||||
|
}
|
||||||
|
self.logger.info(json.dumps(site_data, indent=4))
|
||||||
|
|
||||||
|
if custom_headers != self.HEADERS:
|
||||||
|
site_data['headers'] = custom_headers
|
||||||
|
|
||||||
|
site = MaigretSite(url_mainpage.split("/")[-1], site_data)
|
||||||
|
sites.append(site)
|
||||||
|
|
||||||
|
else:
|
||||||
|
print(
|
||||||
|
f"{Fore.RED}[!] The check for site failed! Reason: {status}{Style.RESET_ALL}"
|
||||||
)
|
)
|
||||||
]
|
return False
|
||||||
|
|
||||||
self.logger.debug(sites[0].__dict__)
|
self.logger.debug(sites[0].__dict__)
|
||||||
|
|
||||||
sem = asyncio.Semaphore(1)
|
sem = asyncio.Semaphore(1)
|
||||||
|
|
||||||
print("Checking, please wait...")
|
print(f"{Fore.GREEN}[*] Checking, please wait...{Style.RESET_ALL}")
|
||||||
found = False
|
found = False
|
||||||
chosen_site = None
|
chosen_site = None
|
||||||
for s in sites:
|
for s in sites:
|
||||||
@@ -463,7 +553,7 @@ class Submitter:
|
|||||||
else:
|
else:
|
||||||
if (
|
if (
|
||||||
input(
|
input(
|
||||||
f"Site {chosen_site.name} successfully checked. Do you want to save it in the Maigret DB? [Yn] "
|
f"{Fore.GREEN}[?] Site {chosen_site.name} successfully checked. Do you want to save it in the Maigret DB? [Yn] {Style.RESET_ALL}"
|
||||||
)
|
)
|
||||||
.lower()
|
.lower()
|
||||||
.strip("y")
|
.strip("y")
|
||||||
@@ -471,22 +561,73 @@ class Submitter:
|
|||||||
return False
|
return False
|
||||||
|
|
||||||
if self.args.verbose:
|
if self.args.verbose:
|
||||||
source = input("Name the source site if it is mirror: ")
|
self.logger.info(
|
||||||
|
"Verbose mode is enabled, additional settings are available"
|
||||||
|
)
|
||||||
|
source = input(
|
||||||
|
f"{Fore.GREEN}[?] Name the source site if it is mirror: {Style.RESET_ALL}"
|
||||||
|
)
|
||||||
if source:
|
if source:
|
||||||
chosen_site.source = source
|
chosen_site.source = source
|
||||||
|
|
||||||
chosen_site.name = input("Change site name if you want: ") or chosen_site.name
|
default_site_name = old_site.name if old_site else chosen_site.name
|
||||||
chosen_site.tags = list(map(str.strip, input("Site tags: ").split(',')))
|
new_name = (
|
||||||
|
input(
|
||||||
|
f"{Fore.GREEN}[?] Change site name if you want [{default_site_name}]: {Style.RESET_ALL}"
|
||||||
|
)
|
||||||
|
or default_site_name
|
||||||
|
)
|
||||||
|
if new_name != default_site_name:
|
||||||
|
self.logger.info(f"New site name is {new_name}")
|
||||||
|
chosen_site.name = new_name
|
||||||
|
|
||||||
|
# TODO: remove empty tags
|
||||||
|
new_tags = input(f"{Fore.GREEN}[?] Site tags: {Style.RESET_ALL}")
|
||||||
|
if new_tags:
|
||||||
|
chosen_site.tags = list(map(str.strip, new_tags.split(',')))
|
||||||
|
else:
|
||||||
|
chosen_site.tags = []
|
||||||
|
self.logger.info(f"Site tags are: {', '.join(chosen_site.tags)}")
|
||||||
# rank = Submitter.get_alexa_rank(chosen_site.url_main)
|
# rank = Submitter.get_alexa_rank(chosen_site.url_main)
|
||||||
# if rank:
|
# if rank:
|
||||||
# print(f'New alexa rank: {rank}')
|
# print(f'New alexa rank: {rank}')
|
||||||
# chosen_site.alexa_rank = rank
|
# chosen_site.alexa_rank = rank
|
||||||
|
|
||||||
self.logger.debug(chosen_site.json)
|
self.logger.info(chosen_site.json)
|
||||||
site_data = chosen_site.strip_engine_data()
|
site_data = chosen_site.strip_engine_data()
|
||||||
self.logger.debug(site_data.json)
|
self.logger.info(site_data.json)
|
||||||
self.db.update_site(site_data)
|
|
||||||
|
|
||||||
|
if old_site:
|
||||||
|
# Update old site with new values and log changes
|
||||||
|
fields_to_check = {
|
||||||
|
'url': 'URL',
|
||||||
|
'url_main': 'Main URL',
|
||||||
|
'username_claimed': 'Username claimed',
|
||||||
|
'username_unclaimed': 'Username unclaimed',
|
||||||
|
'check_type': 'Check type',
|
||||||
|
'presense_strs': 'Presence strings',
|
||||||
|
'absence_strs': 'Absence strings',
|
||||||
|
'tags': 'Tags',
|
||||||
|
'source': 'Source',
|
||||||
|
'headers': 'Headers',
|
||||||
|
}
|
||||||
|
|
||||||
|
for field, display_name in fields_to_check.items():
|
||||||
|
old_value = getattr(old_site, field)
|
||||||
|
new_value = getattr(site_data, field)
|
||||||
|
if field == 'tags' and not new_tags:
|
||||||
|
continue
|
||||||
|
if str(old_value) != str(new_value):
|
||||||
|
print(
|
||||||
|
f"{Fore.YELLOW}[*] '{display_name}' updated: {Fore.RED}{old_value} {Fore.YELLOW}to {Fore.GREEN}{new_value}{Style.RESET_ALL}"
|
||||||
|
)
|
||||||
|
old_site.__dict__[field] = new_value
|
||||||
|
|
||||||
|
# update the site
|
||||||
|
final_site = old_site if old_site else site_data
|
||||||
|
self.db.update_site(final_site)
|
||||||
|
|
||||||
|
# save the db in file
|
||||||
if self.args.db_file != self.settings.sites_db_path:
|
if self.args.db_file != self.settings.sites_db_path:
|
||||||
print(
|
print(
|
||||||
f"{Fore.GREEN}[+] Maigret DB is saved to {self.args.db}.{Style.RESET_ALL}"
|
f"{Fore.GREEN}[+] Maigret DB is saved to {self.args.db}.{Style.RESET_ALL}"
|
||||||
|
|||||||
@@ -3,6 +3,7 @@ import ast
|
|||||||
import difflib
|
import difflib
|
||||||
import re
|
import re
|
||||||
import random
|
import random
|
||||||
|
import string
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
|
|
||||||
@@ -119,3 +120,7 @@ def get_match_ratio(base_strs: list):
|
|||||||
)
|
)
|
||||||
|
|
||||||
return get_match_inner
|
return get_match_inner
|
||||||
|
|
||||||
|
|
||||||
|
def generate_random_username():
|
||||||
|
return ''.join(random.choices(string.ascii_lowercase, k=10))
|
||||||
|
|||||||
@@ -77,7 +77,7 @@ Rank data fetched from Alexa by domains.
|
|||||||
1.  [Spotify (https://open.spotify.com/)](https://open.spotify.com/)*: top 100, music, us*, search is disabled
|
1.  [Spotify (https://open.spotify.com/)](https://open.spotify.com/)*: top 100, music, us*, search is disabled
|
||||||
1.  [TikTok (https://www.tiktok.com/)](https://www.tiktok.com/)*: top 100, video*
|
1.  [TikTok (https://www.tiktok.com/)](https://www.tiktok.com/)*: top 100, video*
|
||||||
1.  [Xvideos (https://xvideos.com/)](https://xvideos.com/)*: top 500, porn, us*
|
1.  [Xvideos (https://xvideos.com/)](https://xvideos.com/)*: top 500, porn, us*
|
||||||
1.  [Tumblr (https://tumblr.com/)](https://tumblr.com/)*: top 500, blog*
|
1.  [Tumblr (https://www.tumblr.com)](https://www.tumblr.com)*: top 500, blog*
|
||||||
1.  [Roblox (https://www.roblox.com/)](https://www.roblox.com/)*: top 500, gaming, us*
|
1.  [Roblox (https://www.roblox.com/)](https://www.roblox.com/)*: top 500, gaming, us*
|
||||||
1.  [SoundCloud (https://soundcloud.com/)](https://soundcloud.com/)*: top 500, music*
|
1.  [SoundCloud (https://soundcloud.com/)](https://soundcloud.com/)*: top 500, music*
|
||||||
1.  [Udemy (https://www.udemy.com)](https://www.udemy.com)*: top 500, in*
|
1.  [Udemy (https://www.udemy.com)](https://www.udemy.com)*: top 500, in*
|
||||||
@@ -92,7 +92,7 @@ Rank data fetched from Alexa by domains.
|
|||||||
1.  [Pinterest (https://www.pinterest.com/)](https://www.pinterest.com/)*: top 500, art, photo, sharing*
|
1.  [Pinterest (https://www.pinterest.com/)](https://www.pinterest.com/)*: top 500, art, photo, sharing*
|
||||||
1.  [Fiverr (https://www.fiverr.com/)](https://www.fiverr.com/)*: top 500, shopping, us*
|
1.  [Fiverr (https://www.fiverr.com/)](https://www.fiverr.com/)*: top 500, shopping, us*
|
||||||
1.  [Telegram (https://t.me/)](https://t.me/)*: top 500, messaging*
|
1.  [Telegram (https://t.me/)](https://t.me/)*: top 500, messaging*
|
||||||
1.  [SlideShare (https://slideshare.net/)](https://slideshare.net/)*: top 500, documents, sharing*
|
1.  [SlideShare (https://www.slideshare.net)](https://www.slideshare.net)*: top 500*
|
||||||
1.  [TheGuardian (https://theguardian.com)](https://theguardian.com)*: top 500, news, us*, search is disabled
|
1.  [TheGuardian (https://theguardian.com)](https://theguardian.com)*: top 500, news, us*, search is disabled
|
||||||
1.  [Trello (https://trello.com/)](https://trello.com/)*: top 500, tasks*
|
1.  [Trello (https://trello.com/)](https://trello.com/)*: top 500, tasks*
|
||||||
1.  [Mozilla Support (https://support.mozilla.org)](https://support.mozilla.org)*: top 500, us*
|
1.  [Mozilla Support (https://support.mozilla.org)](https://support.mozilla.org)*: top 500, us*
|
||||||
@@ -187,7 +187,7 @@ Rank data fetched from Alexa by domains.
|
|||||||
1.  [community.brave.com (https://community.brave.com)](https://community.brave.com)*: top 1K, forum, us*
|
1.  [community.brave.com (https://community.brave.com)](https://community.brave.com)*: top 1K, forum, us*
|
||||||
1.  [Tinder (https://tinder.com/)](https://tinder.com/)*: top 1K, dating, us*
|
1.  [Tinder (https://tinder.com/)](https://tinder.com/)*: top 1K, dating, us*
|
||||||
1.  [CloudflareCommunity (https://community.cloudflare.com/)](https://community.cloudflare.com/)*: top 1K, forum, tech*
|
1.  [CloudflareCommunity (https://community.cloudflare.com/)](https://community.cloudflare.com/)*: top 1K, forum, tech*
|
||||||
1.  [Eksisozluk (https://eksisozluk.com/biri/)](https://eksisozluk.com/biri/)*: top 1K, tr*
|
1.  [Eksisozluk (https://eksisozluk.com)](https://eksisozluk.com)*: top 1K, tr*
|
||||||
1.  [AllRecipes (https://www.allrecipes.com/)](https://www.allrecipes.com/)*: top 1K, us*
|
1.  [AllRecipes (https://www.allrecipes.com/)](https://www.allrecipes.com/)*: top 1K, us*
|
||||||
1.  [T-MobileSupport (https://support.t-mobile.com)](https://support.t-mobile.com)*: top 1K, us*, search is disabled
|
1.  [T-MobileSupport (https://support.t-mobile.com)](https://support.t-mobile.com)*: top 1K, us*, search is disabled
|
||||||
1.  [Tinkoff Invest (https://www.tinkoff.ru/invest/)](https://www.tinkoff.ru/invest/)*: top 5K, ru*
|
1.  [Tinkoff Invest (https://www.tinkoff.ru/invest/)](https://www.tinkoff.ru/invest/)*: top 5K, ru*
|
||||||
@@ -195,7 +195,7 @@ Rank data fetched from Alexa by domains.
|
|||||||
1.  [DiscussPython (https://discuss.python.org/)](https://discuss.python.org/)*: top 5K, coding, forum, us*
|
1.  [DiscussPython (https://discuss.python.org/)](https://discuss.python.org/)*: top 5K, coding, forum, us*
|
||||||
1.  [Nairaland Forum (https://www.nairaland.com/)](https://www.nairaland.com/)*: top 5K, ng*
|
1.  [Nairaland Forum (https://www.nairaland.com/)](https://www.nairaland.com/)*: top 5K, ng*
|
||||||
1.  [Redtube (https://ru.redtube.com/)](https://ru.redtube.com/)*: top 5K, porn, us*
|
1.  [Redtube (https://ru.redtube.com/)](https://ru.redtube.com/)*: top 5K, porn, us*
|
||||||
1.  [Strava (https://www.strava.com/)](https://www.strava.com/)*: top 5K, us*
|
1.  [Strava (https://www.strava.com/)](https://www.strava.com/)*: top 5K, us*, search is disabled
|
||||||
1.  [Ameba (https://profile.ameba.jp)](https://profile.ameba.jp)*: top 5K, jp*
|
1.  [Ameba (https://profile.ameba.jp)](https://profile.ameba.jp)*: top 5K, jp*
|
||||||
1.  [adblockplus.org (https://adblockplus.org)](https://adblockplus.org)*: top 5K, us*
|
1.  [adblockplus.org (https://adblockplus.org)](https://adblockplus.org)*: top 5K, us*
|
||||||
1.  [Houzz (https://houzz.com/)](https://houzz.com/)*: top 5K, us*, search is disabled
|
1.  [Houzz (https://houzz.com/)](https://houzz.com/)*: top 5K, us*, search is disabled
|
||||||
@@ -265,7 +265,7 @@ Rank data fetched from Alexa by domains.
|
|||||||
1.  [Lichess (https://lichess.org)](https://lichess.org)*: top 5K, gaming, hobby*
|
1.  [Lichess (https://lichess.org)](https://lichess.org)*: top 5K, gaming, hobby*
|
||||||
1.  [jsfiddle.net (https://jsfiddle.net)](https://jsfiddle.net)*: top 5K, coding, sharing*
|
1.  [jsfiddle.net (https://jsfiddle.net)](https://jsfiddle.net)*: top 5K, coding, sharing*
|
||||||
1.  [Pathofexile (https://ru.pathofexile.com)](https://ru.pathofexile.com)*: top 5K, ru, us*
|
1.  [Pathofexile (https://ru.pathofexile.com)](https://ru.pathofexile.com)*: top 5K, ru, us*
|
||||||
1.  [VC.ru (https://vc.ru)](https://vc.ru)*: top 5K, ru*
|
1.  [VC.ru (https://vc.ru)](https://vc.ru)*: top 5K, ru*, search is disabled
|
||||||
1.  [metacritic (https://www.metacritic.com/)](https://www.metacritic.com/)*: top 5K, us*, search is disabled
|
1.  [metacritic (https://www.metacritic.com/)](https://www.metacritic.com/)*: top 5K, us*, search is disabled
|
||||||
1.  [DigitalOcean (https://www.digitalocean.com/)](https://www.digitalocean.com/)*: top 5K, forum, in, tech*
|
1.  [DigitalOcean (https://www.digitalocean.com/)](https://www.digitalocean.com/)*: top 5K, forum, in, tech*
|
||||||
1.  [jeuxvideo (http://www.jeuxvideo.com)](http://www.jeuxvideo.com)*: top 5K, fr, gaming*
|
1.  [jeuxvideo (http://www.jeuxvideo.com)](http://www.jeuxvideo.com)*: top 5K, fr, gaming*
|
||||||
@@ -281,7 +281,7 @@ Rank data fetched from Alexa by domains.
|
|||||||
1.  [ArchiveOfOurOwn (https://archiveofourown.org)](https://archiveofourown.org)*: top 5K, us*
|
1.  [ArchiveOfOurOwn (https://archiveofourown.org)](https://archiveofourown.org)*: top 5K, us*
|
||||||
1.  [Bit.ly (https://bit.ly)](https://bit.ly)*: top 5K, links*
|
1.  [Bit.ly (https://bit.ly)](https://bit.ly)*: top 5K, links*
|
||||||
1.  [Infourok (https://infourok.ru)](https://infourok.ru)*: top 5K, ru*
|
1.  [Infourok (https://infourok.ru)](https://infourok.ru)*: top 5K, ru*
|
||||||
1.  [Cbr (https://community.cbr.com)](https://community.cbr.com)*: top 5K, forum, us*
|
1.  [Cbr (https://community.cbr.com)](https://community.cbr.com)*: top 5K, forum, us*, search is disabled
|
||||||
1.  [segmentfault (https://segmentfault.com/)](https://segmentfault.com/)*: top 5K, cn*, search is disabled
|
1.  [segmentfault (https://segmentfault.com/)](https://segmentfault.com/)*: top 5K, cn*, search is disabled
|
||||||
1.  [Warrior Forum (https://www.warriorforum.com/)](https://www.warriorforum.com/)*: top 5K, forum, us*
|
1.  [Warrior Forum (https://www.warriorforum.com/)](https://www.warriorforum.com/)*: top 5K, forum, us*
|
||||||
1.  [Docker Hub (https://hub.docker.com/)](https://hub.docker.com/)*: top 5K, coding*
|
1.  [Docker Hub (https://hub.docker.com/)](https://hub.docker.com/)*: top 5K, coding*
|
||||||
@@ -295,7 +295,7 @@ Rank data fetched from Alexa by domains.
|
|||||||
1.  [CreativeMarket (https://creativemarket.com/)](https://creativemarket.com/)*: top 5K, art, stock*
|
1.  [CreativeMarket (https://creativemarket.com/)](https://creativemarket.com/)*: top 5K, art, stock*
|
||||||
1.  [BitBucket (https://bitbucket.org/)](https://bitbucket.org/)*: top 5K, coding*
|
1.  [BitBucket (https://bitbucket.org/)](https://bitbucket.org/)*: top 5K, coding*
|
||||||
1.  [Techrepublic (https://www.techrepublic.com)](https://www.techrepublic.com)*: top 5K, us*
|
1.  [Techrepublic (https://www.techrepublic.com)](https://www.techrepublic.com)*: top 5K, us*
|
||||||
1.  [aminoapp (https://aminoapps.com/)](https://aminoapps.com/)*: top 5K, br, us*
|
1.  [aminoapp (https://aminoapps.com/)](https://aminoapps.com/)*: top 5K, br, us*, search is disabled
|
||||||
1.  [MixCloud (https://www.mixcloud.com/)](https://www.mixcloud.com/)*: top 5K, music*
|
1.  [MixCloud (https://www.mixcloud.com/)](https://www.mixcloud.com/)*: top 5K, music*
|
||||||
1.  [XDA (https://forum.xda-developers.com)](https://forum.xda-developers.com)*: top 5K, apps, forum*, search is disabled
|
1.  [XDA (https://forum.xda-developers.com)](https://forum.xda-developers.com)*: top 5K, apps, forum*, search is disabled
|
||||||
1.  [Thechive (https://thechive.com/)](https://thechive.com/)*: top 5K, us*
|
1.  [Thechive (https://thechive.com/)](https://thechive.com/)*: top 5K, us*
|
||||||
@@ -321,7 +321,7 @@ Rank data fetched from Alexa by domains.
|
|||||||
1.  [forums.bulbagarden.net (http://forums.bulbagarden.net)](http://forums.bulbagarden.net)*: top 5K, forum, us*
|
1.  [forums.bulbagarden.net (http://forums.bulbagarden.net)](http://forums.bulbagarden.net)*: top 5K, forum, us*
|
||||||
1.  [videohive.net (https://videohive.net)](https://videohive.net)*: top 5K, video*
|
1.  [videohive.net (https://videohive.net)](https://videohive.net)*: top 5K, video*
|
||||||
1.  [ImgInn (https://imginn.com)](https://imginn.com)*: top 5K, photo*
|
1.  [ImgInn (https://imginn.com)](https://imginn.com)*: top 5K, photo*
|
||||||
1.  [BoardGameGeek (https://www.boardgamegeek.com)](https://www.boardgamegeek.com)*: top 5K, gaming, us*
|
1.  [BoardGameGeek (https://boardgamegeek.com)](https://boardgamegeek.com)*: top 5K, gaming, us*
|
||||||
1.  [osu! (https://osu.ppy.sh/)](https://osu.ppy.sh/)*: top 5K, us*
|
1.  [osu! (https://osu.ppy.sh/)](https://osu.ppy.sh/)*: top 5K, us*
|
||||||
1.  [Pluralsight (https://app.pluralsight.com)](https://app.pluralsight.com)*: top 5K, in, us*
|
1.  [Pluralsight (https://app.pluralsight.com)](https://app.pluralsight.com)*: top 5K, in, us*
|
||||||
1.  [TechPowerUp (https://www.techpowerup.com)](https://www.techpowerup.com)*: top 5K, us*
|
1.  [TechPowerUp (https://www.techpowerup.com)](https://www.techpowerup.com)*: top 5K, us*
|
||||||
@@ -406,7 +406,7 @@ Rank data fetched from Alexa by domains.
|
|||||||
1.  [ReverbNation (https://www.reverbnation.com/)](https://www.reverbnation.com/)*: top 10K, us*
|
1.  [ReverbNation (https://www.reverbnation.com/)](https://www.reverbnation.com/)*: top 10K, us*
|
||||||
1.  [Scorcher (https://www.glavbukh.ru)](https://www.glavbukh.ru)*: top 10K, ru*, search is disabled
|
1.  [Scorcher (https://www.glavbukh.ru)](https://www.glavbukh.ru)*: top 10K, ru*, search is disabled
|
||||||
1.  [Trakt (https://www.trakt.tv/)](https://www.trakt.tv/)*: top 10K, de, fr*
|
1.  [Trakt (https://www.trakt.tv/)](https://www.trakt.tv/)*: top 10K, de, fr*
|
||||||
1.  [Hotcopper (https://hotcopper.com.au)](https://hotcopper.com.au)*: top 10K, au*
|
1.  [Hotcopper (https://hotcopper.com.au)](https://hotcopper.com.au)*: top 10K, finance*
|
||||||
1.  [Pandia (https://pandia.ru)](https://pandia.ru)*: top 10K, news, ru*
|
1.  [Pandia (https://pandia.ru)](https://pandia.ru)*: top 10K, news, ru*
|
||||||
1.  [forums.majorgeeks.com (https://forums.majorgeeks.com)](https://forums.majorgeeks.com)*: top 10K, forum, us*
|
1.  [forums.majorgeeks.com (https://forums.majorgeeks.com)](https://forums.majorgeeks.com)*: top 10K, forum, us*
|
||||||
1.  [Hackerearth (https://www.hackerearth.com)](https://www.hackerearth.com)*: top 10K, freelance*
|
1.  [Hackerearth (https://www.hackerearth.com)](https://www.hackerearth.com)*: top 10K, freelance*
|
||||||
@@ -472,7 +472,7 @@ Rank data fetched from Alexa by domains.
|
|||||||
1.  [3ddd (https://3ddd.ru)](https://3ddd.ru)*: top 100K, ru*
|
1.  [3ddd (https://3ddd.ru)](https://3ddd.ru)*: top 100K, ru*
|
||||||
1.  [NameMC (https://namemc.com/)](https://namemc.com/)*: top 100K, us*
|
1.  [NameMC (https://namemc.com/)](https://namemc.com/)*: top 100K, us*
|
||||||
1.  [B17 (https://www.b17.ru/)](https://www.b17.ru/)*: top 100K, ru*
|
1.  [B17 (https://www.b17.ru/)](https://www.b17.ru/)*: top 100K, ru*
|
||||||
1.  [BeerMoneyForum (https://www.beermoneyforum.com)](https://www.beermoneyforum.com)*: top 100K, finance, forum, gambling*
|
1.  [BeerMoneyForum (https://www.beermoneyforum.com)](https://www.beermoneyforum.com)*: top 100K, finance, forum, gambling*, search is disabled
|
||||||
1.  [Diary.ru (https://diary.ru)](https://diary.ru)*: top 100K, blog, nl, ru*
|
1.  [Diary.ru (https://diary.ru)](https://diary.ru)*: top 100K, blog, nl, ru*
|
||||||
1.  [Americanthinker (https://www.americanthinker.com/)](https://www.americanthinker.com/)*: top 100K*
|
1.  [Americanthinker (https://www.americanthinker.com/)](https://www.americanthinker.com/)*: top 100K*
|
||||||
1.  [Contently (https://contently.com/)](https://contently.com/)*: top 100K, freelance, in*
|
1.  [Contently (https://contently.com/)](https://contently.com/)*: top 100K, freelance, in*
|
||||||
@@ -497,7 +497,7 @@ Rank data fetched from Alexa by domains.
|
|||||||
1.  [Pbase (https://pbase.com/)](https://pbase.com/)*: top 100K, in*
|
1.  [Pbase (https://pbase.com/)](https://pbase.com/)*: top 100K, in*
|
||||||
1.  [NICommunityForum (https://www.native-instruments.com/forum/)](https://www.native-instruments.com/forum/)*: top 100K, forum*
|
1.  [NICommunityForum (https://www.native-instruments.com/forum/)](https://www.native-instruments.com/forum/)*: top 100K, forum*
|
||||||
1.  [spletnik (https://spletnik.ru/)](https://spletnik.ru/)*: top 100K, ru*
|
1.  [spletnik (https://spletnik.ru/)](https://spletnik.ru/)*: top 100K, ru*
|
||||||
1.  [Folkd (http://www.folkd.com/profile/)](http://www.folkd.com/profile/)*: top 100K, eu, in*
|
1.  [Folkd (http://www.folkd.com/profile/)](http://www.folkd.com/profile/)*: top 100K, eu, in*, search is disabled
|
||||||
1.  [Iphones.ru (https://www.iphones.ru)](https://www.iphones.ru)*: top 100K, ru*
|
1.  [Iphones.ru (https://www.iphones.ru)](https://www.iphones.ru)*: top 100K, ru*
|
||||||
1.  [Oper (https://www.oper.ru/)](https://www.oper.ru/)*: top 100K, ru*
|
1.  [Oper (https://www.oper.ru/)](https://www.oper.ru/)*: top 100K, ru*
|
||||||
1.  [interpals (https://www.interpals.net/)](https://www.interpals.net/)*: top 100K, dating*
|
1.  [interpals (https://www.interpals.net/)](https://www.interpals.net/)*: top 100K, dating*
|
||||||
@@ -3141,20 +3141,20 @@ Rank data fetched from Alexa by domains.
|
|||||||
1.  [OP.GG [PUBG] (https://pubg.op.gg)](https://pubg.op.gg)*: top 100M, gaming*
|
1.  [OP.GG [PUBG] (https://pubg.op.gg)](https://pubg.op.gg)*: top 100M, gaming*
|
||||||
1.  [OP.GG [Valorant] (https://valorant.op.gg)](https://valorant.op.gg)*: top 100M, gaming*
|
1.  [OP.GG [Valorant] (https://valorant.op.gg)](https://valorant.op.gg)*: top 100M, gaming*
|
||||||
|
|
||||||
The list was updated at (2024-12-09)
|
The list was updated at (2024-12-10)
|
||||||
## Statistics
|
## Statistics
|
||||||
|
|
||||||
Enabled/total sites: 2699/3137 = 86.04%
|
Enabled/total sites: 2693/3137 = 85.85%
|
||||||
|
|
||||||
Incomplete message checks: 406/2699 = 15.04% (false positive risks)
|
Incomplete message checks: 397/2693 = 14.74% (false positive risks)
|
||||||
|
|
||||||
Status code checks: 720/2699 = 26.68% (false positive risks)
|
Status code checks: 719/2693 = 26.7% (false positive risks)
|
||||||
|
|
||||||
False positive risk (total): 41.72%
|
False positive risk (total): 41.44%
|
||||||
|
|
||||||
Top 20 profile URLs:
|
Top 20 profile URLs:
|
||||||
- (796) `{urlMain}/index/8-0-{username} (uCoz)`
|
- (796) `{urlMain}/index/8-0-{username} (uCoz)`
|
||||||
- (300) `/{username}`
|
- (301) `/{username}`
|
||||||
- (221) `{urlMain}{urlSubpath}/members/?username={username} (XenForo)`
|
- (221) `{urlMain}{urlSubpath}/members/?username={username} (XenForo)`
|
||||||
- (161) `/user/{username}`
|
- (161) `/user/{username}`
|
||||||
- (133) `{urlMain}{urlSubpath}/member.php?username={username} (vBulletin)`
|
- (133) `{urlMain}{urlSubpath}/member.php?username={username} (vBulletin)`
|
||||||
@@ -3165,8 +3165,8 @@ Top 20 profile URLs:
|
|||||||
- (87) `{urlMain}/u/{username}/summary (Discourse)`
|
- (87) `{urlMain}/u/{username}/summary (Discourse)`
|
||||||
- (54) `/wiki/User:{username}`
|
- (54) `/wiki/User:{username}`
|
||||||
- (52) `/@{username}`
|
- (52) `/@{username}`
|
||||||
- (42) `SUBDOMAIN`
|
|
||||||
- (41) `/members/?username={username}`
|
- (41) `/members/?username={username}`
|
||||||
|
- (41) `SUBDOMAIN`
|
||||||
- (32) `/members/{username}`
|
- (32) `/members/{username}`
|
||||||
- (29) `/author/{username}`
|
- (29) `/author/{username}`
|
||||||
- (27) `{urlMain}{urlSubpath}/memberlist.php?username={username} (phpBB)`
|
- (27) `{urlMain}{urlSubpath}/memberlist.php?username={username} (phpBB)`
|
||||||
@@ -3177,21 +3177,21 @@ Top 20 profile URLs:
|
|||||||
Top 20 tags:
|
Top 20 tags:
|
||||||
- (328) `NO_TAGS` (non-standard)
|
- (328) `NO_TAGS` (non-standard)
|
||||||
- (307) `forum`
|
- (307) `forum`
|
||||||
- (52) `gaming`
|
- (50) `gaming`
|
||||||
- (26) `coding`
|
- (26) `coding`
|
||||||
- (21) `photo`
|
- (21) `photo`
|
||||||
- (21) `blog`
|
- (20) `blog`
|
||||||
- (19) `news`
|
- (19) `news`
|
||||||
- (15) `music`
|
- (15) `music`
|
||||||
- (14) `tech`
|
- (14) `tech`
|
||||||
- (12) `sharing`
|
|
||||||
- (12) `freelance`
|
- (12) `freelance`
|
||||||
- (12) `finance`
|
- (12) `finance`
|
||||||
|
- (11) `sharing`
|
||||||
- (10) `dating`
|
- (10) `dating`
|
||||||
- (10) `art`
|
- (10) `art`
|
||||||
- (10) `shopping`
|
- (10) `shopping`
|
||||||
- (10) `movies`
|
- (10) `movies`
|
||||||
- (8) `hobby`
|
|
||||||
- (8) `crypto`
|
- (8) `crypto`
|
||||||
- (7) `sport`
|
- (7) `sport`
|
||||||
|
- (7) `hobby`
|
||||||
- (7) `hacking`
|
- (7) `hacking`
|
||||||
|
|||||||
@@ -79,6 +79,13 @@ def reports_autoclean():
|
|||||||
remove_test_reports()
|
remove_test_reports()
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(scope='session')
|
||||||
|
def settings():
|
||||||
|
settings = Settings()
|
||||||
|
settings.load([SETTINGS_FILE])
|
||||||
|
return settings
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(scope='session')
|
@pytest.fixture(scope='session')
|
||||||
def argparser():
|
def argparser():
|
||||||
settings = Settings()
|
settings = Settings()
|
||||||
|
|||||||
+4
-4
@@ -26,7 +26,7 @@
|
|||||||
"alexaRank": 1,
|
"alexaRank": 1,
|
||||||
"url": "https://play.google.com/store/apps/developer?id={username}",
|
"url": "https://play.google.com/store/apps/developer?id={username}",
|
||||||
"urlMain": "https://play.google.com/store",
|
"urlMain": "https://play.google.com/store",
|
||||||
"usernameClaimed": "OpenAI",
|
"usernameClaimed": "KONAMI",
|
||||||
"usernameUnclaimed": "noonewouldeverusethis7"
|
"usernameUnclaimed": "noonewouldeverusethis7"
|
||||||
},
|
},
|
||||||
"InvalidActive": {
|
"InvalidActive": {
|
||||||
@@ -36,7 +36,7 @@
|
|||||||
"alexaRank": 1,
|
"alexaRank": 1,
|
||||||
"url": "https://play.google.com/store/apps/dev?id={username}",
|
"url": "https://play.google.com/store/apps/dev?id={username}",
|
||||||
"urlMain": "https://play.google.com/store",
|
"urlMain": "https://play.google.com/store",
|
||||||
"usernameClaimed": "OpenAI",
|
"usernameClaimed": "KONAMI",
|
||||||
"usernameUnclaimed": "noonewouldeverusethis7"
|
"usernameUnclaimed": "noonewouldeverusethis7"
|
||||||
},
|
},
|
||||||
"ValidInactive": {
|
"ValidInactive": {
|
||||||
@@ -46,7 +46,7 @@
|
|||||||
"alexaRank": 1,
|
"alexaRank": 1,
|
||||||
"url": "https://play.google.com/store/apps/developer?id={username}",
|
"url": "https://play.google.com/store/apps/developer?id={username}",
|
||||||
"urlMain": "https://play.google.com/store",
|
"urlMain": "https://play.google.com/store",
|
||||||
"usernameClaimed": "OpenAI",
|
"usernameClaimed": "KONAMI",
|
||||||
"usernameUnclaimed": "noonewouldeverusethis7"
|
"usernameUnclaimed": "noonewouldeverusethis7"
|
||||||
},
|
},
|
||||||
"InvalidInactive": {
|
"InvalidInactive": {
|
||||||
@@ -56,7 +56,7 @@
|
|||||||
"alexaRank": 1,
|
"alexaRank": 1,
|
||||||
"url": "https://play.google.com/store/apps/dev?id={username}",
|
"url": "https://play.google.com/store/apps/dev?id={username}",
|
||||||
"urlMain": "https://play.google.com/store",
|
"urlMain": "https://play.google.com/store",
|
||||||
"usernameClaimed": "OpenAI",
|
"usernameClaimed": "KONAMI",
|
||||||
"usernameUnclaimed": "noonewouldeverusethis7"
|
"usernameUnclaimed": "noonewouldeverusethis7"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -34,6 +34,7 @@ def test_vimeo_activation(default_db):
|
|||||||
assert token1 != token2
|
assert token1 != token2
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.slow
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_import_aiohttp_cookies():
|
async def test_import_aiohttp_cookies():
|
||||||
cookies_filename = 'cookies_test.txt'
|
cookies_filename = 'cookies_test.txt'
|
||||||
|
|||||||
@@ -1,8 +1,10 @@
|
|||||||
"""Maigret data test functions"""
|
"""Maigret data test functions"""
|
||||||
|
|
||||||
|
import pytest
|
||||||
from maigret.utils import is_country_tag
|
from maigret.utils import is_country_tag
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.slow
|
||||||
def test_tags_validity(default_db):
|
def test_tags_validity(default_db):
|
||||||
unknown_tags = set()
|
unknown_tags = set()
|
||||||
|
|
||||||
|
|||||||
@@ -49,6 +49,7 @@ async def test_asyncio_progressbar_semaphore_executor():
|
|||||||
assert executor.execution_time < 0.4
|
assert executor.execution_time < 0.4
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.slow
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_asyncio_progressbar_queue_executor():
|
async def test_asyncio_progressbar_queue_executor():
|
||||||
tasks = [(func, [n], {}) for n in range(10)]
|
tasks = [(func, [n], {}) for n in range(10)]
|
||||||
|
|||||||
@@ -84,6 +84,7 @@ def test_maigret_results(test_db):
|
|||||||
assert results == RESULTS_EXAMPLE
|
assert results == RESULTS_EXAMPLE
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.slow
|
||||||
def test_extract_ids_from_url(default_db):
|
def test_extract_ids_from_url(default_db):
|
||||||
assert default_db.extract_ids_from_url('https://www.reddit.com/user/test') == {
|
assert default_db.extract_ids_from_url('https://www.reddit.com/user/test') == {
|
||||||
'test': 'username'
|
'test': 'username'
|
||||||
|
|||||||
@@ -0,0 +1,278 @@
|
|||||||
|
import pytest
|
||||||
|
from unittest.mock import AsyncMock, MagicMock, patch
|
||||||
|
from maigret.submit import Submitter, MaigretSite, MaigretEngine
|
||||||
|
from aiohttp import ClientSession
|
||||||
|
from maigret.sites import MaigretDatabase
|
||||||
|
from maigret.settings import Settings
|
||||||
|
import logging
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.slow
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_detect_known_engine(test_db, local_test_db):
|
||||||
|
# Use the database fixture instead of mocking
|
||||||
|
mock_db = test_db
|
||||||
|
mock_settings = MagicMock()
|
||||||
|
mock_logger = MagicMock()
|
||||||
|
mock_args = MagicMock()
|
||||||
|
mock_args.cookie_file = ""
|
||||||
|
mock_args.proxy = ""
|
||||||
|
|
||||||
|
# Mock the supposed usernames
|
||||||
|
mock_settings.supposed_usernames = ["adam"]
|
||||||
|
# Create the Submitter instance
|
||||||
|
submitter = Submitter(test_db, mock_settings, mock_logger, mock_args)
|
||||||
|
|
||||||
|
# Call the method with test URLs
|
||||||
|
url_exists = "https://devforum.zoom.us/u/adam"
|
||||||
|
url_mainpage = "https://devforum.zoom.us/"
|
||||||
|
# Mock extract_username_dialog to return "adam"
|
||||||
|
submitter.extract_username_dialog = MagicMock(return_value="adam")
|
||||||
|
|
||||||
|
sites, resp_text = await submitter.detect_known_engine(
|
||||||
|
url_exists, url_mainpage, session=None, follow_redirects=False, headers=None
|
||||||
|
)
|
||||||
|
|
||||||
|
# Assertions
|
||||||
|
assert len(sites) == 2
|
||||||
|
assert sites[0].name == "devforum.zoom.us"
|
||||||
|
assert sites[0].url_main == "https://devforum.zoom.us/"
|
||||||
|
assert sites[0].engine == "Discourse"
|
||||||
|
assert sites[0].username_claimed == "adam"
|
||||||
|
assert sites[0].username_unclaimed == "noonewouldeverusethis7"
|
||||||
|
assert resp_text != ""
|
||||||
|
|
||||||
|
await submitter.close()
|
||||||
|
|
||||||
|
# Create the Submitter instance without engines
|
||||||
|
submitter = Submitter(local_test_db, mock_settings, mock_logger, mock_args)
|
||||||
|
sites, resp_text = await submitter.detect_known_engine(
|
||||||
|
url_exists, url_mainpage, session=None, follow_redirects=False, headers=None
|
||||||
|
)
|
||||||
|
assert len(sites) == 0
|
||||||
|
|
||||||
|
await submitter.close()
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.slow
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_check_features_manually_success(settings):
|
||||||
|
# Setup
|
||||||
|
db = MaigretDatabase()
|
||||||
|
logger = logging.getLogger("test_logger")
|
||||||
|
args = type(
|
||||||
|
'Args', (object,), {'proxy': None, 'cookie_file': None, 'verbose': False}
|
||||||
|
)()
|
||||||
|
|
||||||
|
submitter = Submitter(db, settings, logger, args)
|
||||||
|
|
||||||
|
username = "KONAMI"
|
||||||
|
url_exists = "https://play.google.com/store/apps/developer?id=KONAMI"
|
||||||
|
|
||||||
|
# Execute
|
||||||
|
presence_list, absence_list, status, random_username = (
|
||||||
|
await submitter.check_features_manually(
|
||||||
|
username=username,
|
||||||
|
url_exists=url_exists,
|
||||||
|
session=ClientSession(),
|
||||||
|
follow_redirects=False,
|
||||||
|
headers=None,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
await submitter.close()
|
||||||
|
# Assert
|
||||||
|
assert status == "Found", "Expected status to be 'Found'"
|
||||||
|
assert isinstance(presence_list, list), "Presence list should be a list"
|
||||||
|
assert isinstance(absence_list, list), "Absence list should be a list"
|
||||||
|
assert isinstance(random_username, str), "Random username should be a string"
|
||||||
|
assert (
|
||||||
|
random_username != username
|
||||||
|
), "Random username should not be the same as the input username"
|
||||||
|
assert sorted(presence_list) == sorted(
|
||||||
|
[
|
||||||
|
' title=',
|
||||||
|
'og:title',
|
||||||
|
'display: none;',
|
||||||
|
'4;0',
|
||||||
|
'main-title',
|
||||||
|
]
|
||||||
|
)
|
||||||
|
assert sorted(absence_list) == sorted(
|
||||||
|
[
|
||||||
|
' body {',
|
||||||
|
' </style>',
|
||||||
|
'><title>Not Found</title>',
|
||||||
|
' <style nonce=',
|
||||||
|
' .rounded {',
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.slow
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_check_features_manually_success(settings):
|
||||||
|
# Setup
|
||||||
|
db = MaigretDatabase()
|
||||||
|
logger = logging.getLogger("test_logger")
|
||||||
|
args = type(
|
||||||
|
'Args', (object,), {'proxy': None, 'cookie_file': None, 'verbose': False}
|
||||||
|
)()
|
||||||
|
|
||||||
|
submitter = Submitter(db, settings, logger, args)
|
||||||
|
|
||||||
|
username = "abel"
|
||||||
|
url_exists = "https://community.cloudflare.com/badges/1/basic?username=abel"
|
||||||
|
|
||||||
|
# Execute
|
||||||
|
presence_list, absence_list, status, random_username = (
|
||||||
|
await submitter.check_features_manually(
|
||||||
|
username=username,
|
||||||
|
url_exists=url_exists,
|
||||||
|
session=ClientSession(),
|
||||||
|
follow_redirects=False,
|
||||||
|
headers=None,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
await submitter.close()
|
||||||
|
|
||||||
|
# Assert
|
||||||
|
assert status == "Cloudflare detected, skipping"
|
||||||
|
assert presence_list is None
|
||||||
|
assert absence_list is None
|
||||||
|
assert random_username != username
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.slow
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_dialog_adds_site_positive(settings):
|
||||||
|
# Initialize necessary objects
|
||||||
|
db = MaigretDatabase()
|
||||||
|
logger = logging.getLogger("test_logger")
|
||||||
|
logger.setLevel(logging.INFO)
|
||||||
|
args = type(
|
||||||
|
'Args',
|
||||||
|
(object,),
|
||||||
|
{
|
||||||
|
'proxy': None,
|
||||||
|
'cookie_file': None,
|
||||||
|
'verbose': False,
|
||||||
|
'db_file': 'test_db.json',
|
||||||
|
'db': 'test_db.json',
|
||||||
|
},
|
||||||
|
)()
|
||||||
|
|
||||||
|
submitter = Submitter(db, settings, logger, args)
|
||||||
|
|
||||||
|
# Mock user inputs
|
||||||
|
user_inputs = [
|
||||||
|
'KONAMI', # Manually input username
|
||||||
|
'y', # Save the site in the Maigret DB
|
||||||
|
'GooglePlayStore', # Custom site name
|
||||||
|
'', # no custom tags
|
||||||
|
]
|
||||||
|
|
||||||
|
with patch('builtins.input', side_effect=user_inputs):
|
||||||
|
result = await submitter.dialog(
|
||||||
|
"https://play.google.com/store/apps/developer?id=KONAMI", None
|
||||||
|
)
|
||||||
|
await submitter.close()
|
||||||
|
|
||||||
|
assert result is True
|
||||||
|
assert len(db.sites) == 1
|
||||||
|
|
||||||
|
site = db.sites[0]
|
||||||
|
assert site.url_main == "https://play.google.com"
|
||||||
|
assert site.name == "GooglePlayStore"
|
||||||
|
assert site.tags == []
|
||||||
|
assert site.presense_strs != []
|
||||||
|
assert site.absence_strs != []
|
||||||
|
assert site.username_claimed == "KONAMI"
|
||||||
|
assert site.check_type == "message"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.slow
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_dialog_replace_site(settings, test_db):
|
||||||
|
# Initialize necessary objects
|
||||||
|
db = test_db
|
||||||
|
logger = logging.getLogger("test_logger")
|
||||||
|
logger.setLevel(logging.DEBUG)
|
||||||
|
args = type(
|
||||||
|
'Args',
|
||||||
|
(object,),
|
||||||
|
{
|
||||||
|
'proxy': None,
|
||||||
|
'cookie_file': None,
|
||||||
|
'verbose': False,
|
||||||
|
'db_file': 'test_db.json',
|
||||||
|
'db': 'test_db.json',
|
||||||
|
},
|
||||||
|
)()
|
||||||
|
|
||||||
|
assert len(db.sites) == 4
|
||||||
|
|
||||||
|
submitter = Submitter(db, settings, logger, args)
|
||||||
|
|
||||||
|
# Mock user inputs
|
||||||
|
user_inputs = [
|
||||||
|
'y', # Similar sites found, continue
|
||||||
|
'InvalidActive', # Choose site to replace
|
||||||
|
'', # Custom headers
|
||||||
|
'y', # Should we do redirects automatically?
|
||||||
|
'KONAMI', # Manually input username
|
||||||
|
'y', # Save the site in the Maigret DB
|
||||||
|
'', # Custom site name
|
||||||
|
'', # no custom tags
|
||||||
|
]
|
||||||
|
|
||||||
|
with patch('builtins.input', side_effect=user_inputs):
|
||||||
|
result = await submitter.dialog(
|
||||||
|
"https://play.google.com/store/apps/developer?id=KONAMI", None
|
||||||
|
)
|
||||||
|
await submitter.close()
|
||||||
|
|
||||||
|
assert result is True
|
||||||
|
assert len(db.sites) == 4
|
||||||
|
|
||||||
|
site = db.sites_dict["InvalidActive"]
|
||||||
|
assert site.name == "InvalidActive"
|
||||||
|
assert site.url_main == "https://play.google.com"
|
||||||
|
assert site.tags == ['global', 'us']
|
||||||
|
assert site.presense_strs != []
|
||||||
|
assert site.absence_strs != []
|
||||||
|
assert site.username_claimed == "KONAMI"
|
||||||
|
assert site.check_type == "message"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.slow
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_dialog_adds_site_negative(settings):
|
||||||
|
# Initialize necessary objects
|
||||||
|
db = MaigretDatabase()
|
||||||
|
logger = logging.getLogger("test_logger")
|
||||||
|
logger.setLevel(logging.INFO)
|
||||||
|
args = type(
|
||||||
|
'Args',
|
||||||
|
(object,),
|
||||||
|
{
|
||||||
|
'proxy': None,
|
||||||
|
'cookie_file': None,
|
||||||
|
'verbose': False,
|
||||||
|
'db_file': 'test_db.json',
|
||||||
|
'db': 'test_db.json',
|
||||||
|
},
|
||||||
|
)()
|
||||||
|
|
||||||
|
submitter = Submitter(db, settings, logger, args)
|
||||||
|
|
||||||
|
# Mock user inputs
|
||||||
|
user_inputs = [
|
||||||
|
'sokrat', # Manually input username
|
||||||
|
'y', # Save the site in the Maigret DB
|
||||||
|
]
|
||||||
|
|
||||||
|
with patch('builtins.input', side_effect=user_inputs):
|
||||||
|
result = await submitter.dialog("https://icq.im/sokrat", None)
|
||||||
|
await submitter.close()
|
||||||
|
|
||||||
|
assert result is False
|
||||||
Reference in New Issue
Block a user