Added a couple of sites, fixed false positives (#286)

This commit is contained in:
Soxoj
2022-01-03 01:35:53 +03:00
committed by GitHub
parent 8801f7e6de
commit ecabf88c3a
4 changed files with 194 additions and 29 deletions
+1 -1
View File
@@ -536,7 +536,7 @@ async def main():
site_data = get_top_sites_for_id(args.id_type) site_data = get_top_sites_for_id(args.id_type)
if args.new_site_to_submit: if args.new_site_to_submit:
submitter = Submitter(db=db, logger=logger, settings=settings) submitter = Submitter(db=db, logger=logger, settings=settings, args=args)
is_submitted = await submitter.dialog(args.new_site_to_submit, args.cookie_file) is_submitted = await submitter.dialog(args.new_site_to_submit, args.cookie_file)
if is_submitted: if is_submitted:
db.save_to_file(db_file) db.save_to_file(db_file)
+152
View File
@@ -1833,6 +1833,7 @@
"usernameUnclaimed": "noonewouldeverusethis7" "usernameUnclaimed": "noonewouldeverusethis7"
}, },
"Bestfantasybooks": { "Bestfantasybooks": {
"disabled": true,
"tags": [ "tags": [
"us" "us"
], ],
@@ -4432,6 +4433,7 @@
] ]
}, },
"Facenama": { "Facenama": {
"disabled": true,
"tags": [ "tags": [
"ir" "ir"
], ],
@@ -28440,6 +28442,156 @@
"usernameUnclaimed": "noonewouldeverusethis7", "usernameUnclaimed": "noonewouldeverusethis7",
"checkType": "message", "checkType": "message",
"alexaRank": 6859 "alexaRank": 6859
},
"Worldis.me": {
"absenceStrs": [
"user_password",
"send_email"
],
"presenseStrs": [
"my_profile",
"profile_upi",
"UserInfo"
],
"url": "http://en.worldis.me/{username}",
"urlMain": "http://en.worldis.me",
"usernameClaimed": "admin",
"usernameUnclaimed": "noonewouldeverusethis7",
"checkType": "message",
"alexaRank": 3233509,
"tags": [
"ru"
]
},
"photoshop-kopona.com": {
"absenceStrs": [
"<title>noonewouldeverusethis7 &raquo; \u0420\u0435\u0441\u0443\u0440\u0441\u044b \u0434\u043b\u044f \u0424\u043e\u0442\u043e\u0448\u043e\u043f\u0430</title>"
],
"presenseStrs": [
"offline",
"uspusertitle"
],
"url": "https://photoshop-kopona.com/ru/user/{username}/",
"urlMain": "https://photoshop-kopona.com",
"usernameClaimed": "test",
"usernameUnclaimed": "noonewouldeverusethis7",
"checkType": "message",
"alexaRank": 44106,
"tags": [
"ru"
]
},
"dumskaya.net": {
"absenceStrs": [
"><img class=nobo src=/banner/ps2_/ alt="
],
"presenseStrs": [
"><img class=nobo src=/banner/prague_/ alt="
],
"url": "https://dumskaya.net/user/{username}/",
"urlMain": "https://dumskaya.net",
"usernameClaimed": "test",
"usernameUnclaimed": "noonewouldeverusethis7",
"checkType": "message",
"alexaRank": 73617,
"tags": [
"ru"
]
},
"rblx.trade": {
"absenceStrs": [
"isRblxTradeException"
],
"presenseStrs": [
"userId"
],
"url": "https://rblx.trade/p/{username}",
"urlMain": "https://rblx.trade",
"usernameClaimed": "test",
"usernameUnclaimed": "noonewouldeverusethis7",
"checkType": "message",
"alexaRank": 362185,
"tags": [
"gaming"
]
},
"monitoringminecraft.ru": {
"absenceStrs": [
"shadowi"
],
"presenseStrs": [
"small"
],
"url": "https://monitoringminecraft.ru/player/{username}",
"urlMain": "https://monitoringminecraft.ru",
"usernameClaimed": "test",
"usernameUnclaimed": "noonewouldeverusethis7",
"checkType": "message",
"alexaRank": 115209,
"tags": [
"gaming"
]
},
"profi.ru": {
"absenceStrs": [
"page-404__paragraph"
],
"presenseStrs": [
"PROFILE",
"profiles",
"profileOIO",
"fullProfile",
"profileUGC2"
],
"url": "https://profi.ru/profile/{username}/",
"urlMain": "https://profi.ru",
"usernameClaimed": "EgorovRV",
"usernameUnclaimed": "noonewouldeverusethis7",
"checkType": "message",
"alexaRank": 12037,
"tags": [
"freelance"
]
},
"app.airnfts.com": {
"absenceStrs": [
"user-not-found-div"
],
"presenseStrs": [
"username",
"ownerUsername",
"creatorUsername",
"name",
"user"
],
"url": "https://app.airnfts.com/creators/{username}",
"urlMain": "https://app.airnfts.com",
"usernameClaimed": "test",
"usernameUnclaimed": "noonewouldeverusethis7",
"checkType": "message",
"alexaRank": 30223
},
"xgm.guru": {
"absenceStrs": [
">Username:</label>"
],
"presenseStrs": [
"email",
"usernamereg",
"username-top",
"\u041e\u043f\u044b\u0442 \u043f\u043e\u043b\u044c\u0437\u043e\u0432\u0430\u0442\u0435\u043b\u044f",
"check-username"
],
"url": "https://xgm.guru/user/{username}",
"urlMain": "https://xgm.guru",
"usernameClaimed": "test",
"usernameUnclaimed": "noonewouldeverusethis7",
"checkType": "message",
"alexaRank": 692341,
"tags": [
"forum",
"gaming"
]
} }
}, },
"engines": { "engines": {
+40 -27
View File
@@ -3,6 +3,7 @@ import json
import re import re
from typing import List from typing import List
import xml.etree.ElementTree as ET import xml.etree.ElementTree as ET
from aiohttp import TCPConnector, ClientSession
import requests import requests
from .activation import import_aiohttp_cookies from .activation import import_aiohttp_cookies
@@ -24,11 +25,24 @@ class Submitter:
TOP_FEATURES = 5 TOP_FEATURES = 5
URL_RE = re.compile(r"https?://(www\.)?") URL_RE = re.compile(r"https?://(www\.)?")
def __init__(self, db: MaigretDatabase, settings: Settings, logger): def __init__(self, db: MaigretDatabase, settings: Settings, logger, args):
self.settings = settings self.settings = settings
self.args = args
self.db = db self.db = db
self.logger = logger self.logger = logger
from aiohttp_socks import ProxyConnector
proxy = self.args.proxy
cookie_jar = None
if args.cookie_file:
cookie_jar = import_aiohttp_cookies(args.cookie_file)
connector = ProxyConnector.from_url(proxy) if proxy else TCPConnector(ssl=False)
connector.verify_ssl = False
self.session = ClientSession(
connector=connector, trust_env=True, cookie_jar=cookie_jar
)
@staticmethod @staticmethod
def get_alexa_rank(site_url_main): def get_alexa_rank(site_url_main):
url = f"http://data.alexa.com/data?cli=10&url={site_url_main}" url = f"http://data.alexa.com/data?cli=10&url={site_url_main}"
@@ -63,6 +77,7 @@ class Submitter:
results_dict = await maigret( results_dict = await maigret(
username=username, username=username,
site_dict={site.name: site}, site_dict={site.name: site},
proxy=self.args.proxy,
logger=self.logger, logger=self.logger,
timeout=30, timeout=30,
id_type=site.type, id_type=site.type,
@@ -126,9 +141,11 @@ class Submitter:
return fields return fields
async def detect_known_engine(self, url_exists, url_mainpage) -> List[MaigretSite]: async def detect_known_engine(self, url_exists, url_mainpage) -> List[MaigretSite]:
resp_text = ''
try: try:
r = requests.get(url_mainpage) r = await self.session.get(url_mainpage)
self.logger.debug(r.text) resp_text = await r.text()
self.logger.debug(resp_text)
except Exception as e: except Exception as e:
self.logger.warning(e) self.logger.warning(e)
print("Some error while checking main page") print("Some error while checking main page")
@@ -136,10 +153,10 @@ class Submitter:
for engine in self.db.engines: for engine in self.db.engines:
strs_to_check = engine.__dict__.get("presenseStrs") strs_to_check = engine.__dict__.get("presenseStrs")
if strs_to_check and r and r.text: if strs_to_check and resp_text:
all_strs_in_response = True all_strs_in_response = True
for s in strs_to_check: for s in strs_to_check:
if s not in r.text: if s not in resp_text:
all_strs_in_response = False all_strs_in_response = False
sites = [] sites = []
if all_strs_in_response: if all_strs_in_response:
@@ -209,32 +226,28 @@ class Submitter:
headers = dict(self.HEADERS) headers = dict(self.HEADERS)
headers.update(custom_headers) headers.update(custom_headers)
# cookies exists_resp = await self.session.get(
cookie_dict = None url_exists,
if cookie_file:
self.logger.info(f'Use {cookie_file} for cookies')
cookie_jar = import_aiohttp_cookies(cookie_file)
cookie_dict = {c.key: c.value for c in cookie_jar}
exists_resp = requests.get(
url_exists, cookies=cookie_dict, headers=headers, allow_redirects=redirects
)
self.logger.debug(url_exists)
self.logger.debug(exists_resp.status_code)
self.logger.debug(exists_resp.text)
non_exists_resp = requests.get(
url_not_exists,
cookies=cookie_dict,
headers=headers, headers=headers,
allow_redirects=redirects, allow_redirects=redirects,
) )
self.logger.debug(url_not_exists) exists_resp_text = await exists_resp.text()
self.logger.debug(non_exists_resp.status_code) self.logger.debug(url_exists)
self.logger.debug(non_exists_resp.text) self.logger.debug(exists_resp.status)
self.logger.debug(exists_resp_text)
a = exists_resp.text non_exists_resp = await self.session.get(
b = non_exists_resp.text url_not_exists,
headers=headers,
allow_redirects=redirects,
)
non_exists_resp_text = await non_exists_resp.text()
self.logger.debug(url_not_exists)
self.logger.debug(non_exists_resp.status)
self.logger.debug(non_exists_resp_text)
a = exists_resp_text
b = non_exists_resp_text
tokens_a = set(re.split(f'[{self.SEPARATORS}]', a)) tokens_a = set(re.split(f'[{self.SEPARATORS}]', a))
tokens_b = set(re.split(f'[{self.SEPARATORS}]', b)) tokens_b = set(re.split(f'[{self.SEPARATORS}]', b))
+1 -1
View File
@@ -25,7 +25,7 @@ RANKS.update({
'100000000': '100M', '100000000': '100M',
}) })
SEMAPHORE = threading.Semaphore(10) SEMAPHORE = threading.Semaphore(20)
def get_rank(domain_to_query, site, print_errors=True): def get_rank(domain_to_query, site, print_errors=True):
with SEMAPHORE: with SEMAPHORE: