mirror of
https://github.com/soxoj/maigret.git
synced 2026-05-06 22:19:01 +00:00
Added a couple of sites, fixed false positives (#286)
This commit is contained in:
+1
-1
@@ -536,7 +536,7 @@ async def main():
|
|||||||
site_data = get_top_sites_for_id(args.id_type)
|
site_data = get_top_sites_for_id(args.id_type)
|
||||||
|
|
||||||
if args.new_site_to_submit:
|
if args.new_site_to_submit:
|
||||||
submitter = Submitter(db=db, logger=logger, settings=settings)
|
submitter = Submitter(db=db, logger=logger, settings=settings, args=args)
|
||||||
is_submitted = await submitter.dialog(args.new_site_to_submit, args.cookie_file)
|
is_submitted = await submitter.dialog(args.new_site_to_submit, args.cookie_file)
|
||||||
if is_submitted:
|
if is_submitted:
|
||||||
db.save_to_file(db_file)
|
db.save_to_file(db_file)
|
||||||
|
|||||||
@@ -1833,6 +1833,7 @@
|
|||||||
"usernameUnclaimed": "noonewouldeverusethis7"
|
"usernameUnclaimed": "noonewouldeverusethis7"
|
||||||
},
|
},
|
||||||
"Bestfantasybooks": {
|
"Bestfantasybooks": {
|
||||||
|
"disabled": true,
|
||||||
"tags": [
|
"tags": [
|
||||||
"us"
|
"us"
|
||||||
],
|
],
|
||||||
@@ -4432,6 +4433,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
"Facenama": {
|
"Facenama": {
|
||||||
|
"disabled": true,
|
||||||
"tags": [
|
"tags": [
|
||||||
"ir"
|
"ir"
|
||||||
],
|
],
|
||||||
@@ -28440,6 +28442,156 @@
|
|||||||
"usernameUnclaimed": "noonewouldeverusethis7",
|
"usernameUnclaimed": "noonewouldeverusethis7",
|
||||||
"checkType": "message",
|
"checkType": "message",
|
||||||
"alexaRank": 6859
|
"alexaRank": 6859
|
||||||
|
},
|
||||||
|
"Worldis.me": {
|
||||||
|
"absenceStrs": [
|
||||||
|
"user_password",
|
||||||
|
"send_email"
|
||||||
|
],
|
||||||
|
"presenseStrs": [
|
||||||
|
"my_profile",
|
||||||
|
"profile_upi",
|
||||||
|
"UserInfo"
|
||||||
|
],
|
||||||
|
"url": "http://en.worldis.me/{username}",
|
||||||
|
"urlMain": "http://en.worldis.me",
|
||||||
|
"usernameClaimed": "admin",
|
||||||
|
"usernameUnclaimed": "noonewouldeverusethis7",
|
||||||
|
"checkType": "message",
|
||||||
|
"alexaRank": 3233509,
|
||||||
|
"tags": [
|
||||||
|
"ru"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"photoshop-kopona.com": {
|
||||||
|
"absenceStrs": [
|
||||||
|
"<title>noonewouldeverusethis7 » \u0420\u0435\u0441\u0443\u0440\u0441\u044b \u0434\u043b\u044f \u0424\u043e\u0442\u043e\u0448\u043e\u043f\u0430</title>"
|
||||||
|
],
|
||||||
|
"presenseStrs": [
|
||||||
|
"offline",
|
||||||
|
"uspusertitle"
|
||||||
|
],
|
||||||
|
"url": "https://photoshop-kopona.com/ru/user/{username}/",
|
||||||
|
"urlMain": "https://photoshop-kopona.com",
|
||||||
|
"usernameClaimed": "test",
|
||||||
|
"usernameUnclaimed": "noonewouldeverusethis7",
|
||||||
|
"checkType": "message",
|
||||||
|
"alexaRank": 44106,
|
||||||
|
"tags": [
|
||||||
|
"ru"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"dumskaya.net": {
|
||||||
|
"absenceStrs": [
|
||||||
|
"><img class=nobo src=/banner/ps2_/ alt="
|
||||||
|
],
|
||||||
|
"presenseStrs": [
|
||||||
|
"><img class=nobo src=/banner/prague_/ alt="
|
||||||
|
],
|
||||||
|
"url": "https://dumskaya.net/user/{username}/",
|
||||||
|
"urlMain": "https://dumskaya.net",
|
||||||
|
"usernameClaimed": "test",
|
||||||
|
"usernameUnclaimed": "noonewouldeverusethis7",
|
||||||
|
"checkType": "message",
|
||||||
|
"alexaRank": 73617,
|
||||||
|
"tags": [
|
||||||
|
"ru"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"rblx.trade": {
|
||||||
|
"absenceStrs": [
|
||||||
|
"isRblxTradeException"
|
||||||
|
],
|
||||||
|
"presenseStrs": [
|
||||||
|
"userId"
|
||||||
|
],
|
||||||
|
"url": "https://rblx.trade/p/{username}",
|
||||||
|
"urlMain": "https://rblx.trade",
|
||||||
|
"usernameClaimed": "test",
|
||||||
|
"usernameUnclaimed": "noonewouldeverusethis7",
|
||||||
|
"checkType": "message",
|
||||||
|
"alexaRank": 362185,
|
||||||
|
"tags": [
|
||||||
|
"gaming"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"monitoringminecraft.ru": {
|
||||||
|
"absenceStrs": [
|
||||||
|
"shadowi"
|
||||||
|
],
|
||||||
|
"presenseStrs": [
|
||||||
|
"small"
|
||||||
|
],
|
||||||
|
"url": "https://monitoringminecraft.ru/player/{username}",
|
||||||
|
"urlMain": "https://monitoringminecraft.ru",
|
||||||
|
"usernameClaimed": "test",
|
||||||
|
"usernameUnclaimed": "noonewouldeverusethis7",
|
||||||
|
"checkType": "message",
|
||||||
|
"alexaRank": 115209,
|
||||||
|
"tags": [
|
||||||
|
"gaming"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"profi.ru": {
|
||||||
|
"absenceStrs": [
|
||||||
|
"page-404__paragraph"
|
||||||
|
],
|
||||||
|
"presenseStrs": [
|
||||||
|
"PROFILE",
|
||||||
|
"profiles",
|
||||||
|
"profileOIO",
|
||||||
|
"fullProfile",
|
||||||
|
"profileUGC2"
|
||||||
|
],
|
||||||
|
"url": "https://profi.ru/profile/{username}/",
|
||||||
|
"urlMain": "https://profi.ru",
|
||||||
|
"usernameClaimed": "EgorovRV",
|
||||||
|
"usernameUnclaimed": "noonewouldeverusethis7",
|
||||||
|
"checkType": "message",
|
||||||
|
"alexaRank": 12037,
|
||||||
|
"tags": [
|
||||||
|
"freelance"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"app.airnfts.com": {
|
||||||
|
"absenceStrs": [
|
||||||
|
"user-not-found-div"
|
||||||
|
],
|
||||||
|
"presenseStrs": [
|
||||||
|
"username",
|
||||||
|
"ownerUsername",
|
||||||
|
"creatorUsername",
|
||||||
|
"name",
|
||||||
|
"user"
|
||||||
|
],
|
||||||
|
"url": "https://app.airnfts.com/creators/{username}",
|
||||||
|
"urlMain": "https://app.airnfts.com",
|
||||||
|
"usernameClaimed": "test",
|
||||||
|
"usernameUnclaimed": "noonewouldeverusethis7",
|
||||||
|
"checkType": "message",
|
||||||
|
"alexaRank": 30223
|
||||||
|
},
|
||||||
|
"xgm.guru": {
|
||||||
|
"absenceStrs": [
|
||||||
|
">Username:</label>"
|
||||||
|
],
|
||||||
|
"presenseStrs": [
|
||||||
|
"email",
|
||||||
|
"usernamereg",
|
||||||
|
"username-top",
|
||||||
|
"\u041e\u043f\u044b\u0442 \u043f\u043e\u043b\u044c\u0437\u043e\u0432\u0430\u0442\u0435\u043b\u044f",
|
||||||
|
"check-username"
|
||||||
|
],
|
||||||
|
"url": "https://xgm.guru/user/{username}",
|
||||||
|
"urlMain": "https://xgm.guru",
|
||||||
|
"usernameClaimed": "test",
|
||||||
|
"usernameUnclaimed": "noonewouldeverusethis7",
|
||||||
|
"checkType": "message",
|
||||||
|
"alexaRank": 692341,
|
||||||
|
"tags": [
|
||||||
|
"forum",
|
||||||
|
"gaming"
|
||||||
|
]
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"engines": {
|
"engines": {
|
||||||
|
|||||||
+40
-27
@@ -3,6 +3,7 @@ import json
|
|||||||
import re
|
import re
|
||||||
from typing import List
|
from typing import List
|
||||||
import xml.etree.ElementTree as ET
|
import xml.etree.ElementTree as ET
|
||||||
|
from aiohttp import TCPConnector, ClientSession
|
||||||
import requests
|
import requests
|
||||||
|
|
||||||
from .activation import import_aiohttp_cookies
|
from .activation import import_aiohttp_cookies
|
||||||
@@ -24,11 +25,24 @@ class Submitter:
|
|||||||
TOP_FEATURES = 5
|
TOP_FEATURES = 5
|
||||||
URL_RE = re.compile(r"https?://(www\.)?")
|
URL_RE = re.compile(r"https?://(www\.)?")
|
||||||
|
|
||||||
def __init__(self, db: MaigretDatabase, settings: Settings, logger):
|
def __init__(self, db: MaigretDatabase, settings: Settings, logger, args):
|
||||||
self.settings = settings
|
self.settings = settings
|
||||||
|
self.args = args
|
||||||
self.db = db
|
self.db = db
|
||||||
self.logger = logger
|
self.logger = logger
|
||||||
|
|
||||||
|
from aiohttp_socks import ProxyConnector
|
||||||
|
proxy = self.args.proxy
|
||||||
|
cookie_jar = None
|
||||||
|
if args.cookie_file:
|
||||||
|
cookie_jar = import_aiohttp_cookies(args.cookie_file)
|
||||||
|
|
||||||
|
connector = ProxyConnector.from_url(proxy) if proxy else TCPConnector(ssl=False)
|
||||||
|
connector.verify_ssl = False
|
||||||
|
self.session = ClientSession(
|
||||||
|
connector=connector, trust_env=True, cookie_jar=cookie_jar
|
||||||
|
)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def get_alexa_rank(site_url_main):
|
def get_alexa_rank(site_url_main):
|
||||||
url = f"http://data.alexa.com/data?cli=10&url={site_url_main}"
|
url = f"http://data.alexa.com/data?cli=10&url={site_url_main}"
|
||||||
@@ -63,6 +77,7 @@ class Submitter:
|
|||||||
results_dict = await maigret(
|
results_dict = await maigret(
|
||||||
username=username,
|
username=username,
|
||||||
site_dict={site.name: site},
|
site_dict={site.name: site},
|
||||||
|
proxy=self.args.proxy,
|
||||||
logger=self.logger,
|
logger=self.logger,
|
||||||
timeout=30,
|
timeout=30,
|
||||||
id_type=site.type,
|
id_type=site.type,
|
||||||
@@ -126,9 +141,11 @@ class Submitter:
|
|||||||
return fields
|
return fields
|
||||||
|
|
||||||
async def detect_known_engine(self, url_exists, url_mainpage) -> List[MaigretSite]:
|
async def detect_known_engine(self, url_exists, url_mainpage) -> List[MaigretSite]:
|
||||||
|
resp_text = ''
|
||||||
try:
|
try:
|
||||||
r = requests.get(url_mainpage)
|
r = await self.session.get(url_mainpage)
|
||||||
self.logger.debug(r.text)
|
resp_text = await r.text()
|
||||||
|
self.logger.debug(resp_text)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.logger.warning(e)
|
self.logger.warning(e)
|
||||||
print("Some error while checking main page")
|
print("Some error while checking main page")
|
||||||
@@ -136,10 +153,10 @@ class Submitter:
|
|||||||
|
|
||||||
for engine in self.db.engines:
|
for engine in self.db.engines:
|
||||||
strs_to_check = engine.__dict__.get("presenseStrs")
|
strs_to_check = engine.__dict__.get("presenseStrs")
|
||||||
if strs_to_check and r and r.text:
|
if strs_to_check and resp_text:
|
||||||
all_strs_in_response = True
|
all_strs_in_response = True
|
||||||
for s in strs_to_check:
|
for s in strs_to_check:
|
||||||
if s not in r.text:
|
if s not in resp_text:
|
||||||
all_strs_in_response = False
|
all_strs_in_response = False
|
||||||
sites = []
|
sites = []
|
||||||
if all_strs_in_response:
|
if all_strs_in_response:
|
||||||
@@ -209,32 +226,28 @@ class Submitter:
|
|||||||
headers = dict(self.HEADERS)
|
headers = dict(self.HEADERS)
|
||||||
headers.update(custom_headers)
|
headers.update(custom_headers)
|
||||||
|
|
||||||
# cookies
|
exists_resp = await self.session.get(
|
||||||
cookie_dict = None
|
url_exists,
|
||||||
if cookie_file:
|
|
||||||
self.logger.info(f'Use {cookie_file} for cookies')
|
|
||||||
cookie_jar = import_aiohttp_cookies(cookie_file)
|
|
||||||
cookie_dict = {c.key: c.value for c in cookie_jar}
|
|
||||||
|
|
||||||
exists_resp = requests.get(
|
|
||||||
url_exists, cookies=cookie_dict, headers=headers, allow_redirects=redirects
|
|
||||||
)
|
|
||||||
self.logger.debug(url_exists)
|
|
||||||
self.logger.debug(exists_resp.status_code)
|
|
||||||
self.logger.debug(exists_resp.text)
|
|
||||||
|
|
||||||
non_exists_resp = requests.get(
|
|
||||||
url_not_exists,
|
|
||||||
cookies=cookie_dict,
|
|
||||||
headers=headers,
|
headers=headers,
|
||||||
allow_redirects=redirects,
|
allow_redirects=redirects,
|
||||||
)
|
)
|
||||||
self.logger.debug(url_not_exists)
|
exists_resp_text = await exists_resp.text()
|
||||||
self.logger.debug(non_exists_resp.status_code)
|
self.logger.debug(url_exists)
|
||||||
self.logger.debug(non_exists_resp.text)
|
self.logger.debug(exists_resp.status)
|
||||||
|
self.logger.debug(exists_resp_text)
|
||||||
|
|
||||||
a = exists_resp.text
|
non_exists_resp = await self.session.get(
|
||||||
b = non_exists_resp.text
|
url_not_exists,
|
||||||
|
headers=headers,
|
||||||
|
allow_redirects=redirects,
|
||||||
|
)
|
||||||
|
non_exists_resp_text = await non_exists_resp.text()
|
||||||
|
self.logger.debug(url_not_exists)
|
||||||
|
self.logger.debug(non_exists_resp.status)
|
||||||
|
self.logger.debug(non_exists_resp_text)
|
||||||
|
|
||||||
|
a = exists_resp_text
|
||||||
|
b = non_exists_resp_text
|
||||||
|
|
||||||
tokens_a = set(re.split(f'[{self.SEPARATORS}]', a))
|
tokens_a = set(re.split(f'[{self.SEPARATORS}]', a))
|
||||||
tokens_b = set(re.split(f'[{self.SEPARATORS}]', b))
|
tokens_b = set(re.split(f'[{self.SEPARATORS}]', b))
|
||||||
|
|||||||
@@ -25,7 +25,7 @@ RANKS.update({
|
|||||||
'100000000': '100M',
|
'100000000': '100M',
|
||||||
})
|
})
|
||||||
|
|
||||||
SEMAPHORE = threading.Semaphore(10)
|
SEMAPHORE = threading.Semaphore(20)
|
||||||
|
|
||||||
def get_rank(domain_to_query, site, print_errors=True):
|
def get_rank(domain_to_query, site, print_errors=True):
|
||||||
with SEMAPHORE:
|
with SEMAPHORE:
|
||||||
|
|||||||
Reference in New Issue
Block a user