Improved submit mode, several sites added

This commit is contained in:
Soxoj
2021-03-21 13:59:59 +03:00
parent b586a4cd06
commit 2bb01f7019
6 changed files with 2746 additions and 2864 deletions
+2 -1
View File
@@ -44,6 +44,7 @@ unsupported_characters = '#'
QueryDraft = Tuple[Callable, Any, Any]
QueriesDraft = Iterable[QueryDraft]
class AsyncExecutor:
def __init__(self, *args, **kwargs):
self.logger = kwargs['logger']
@@ -472,7 +473,7 @@ async def maigret(username, site_dict, query_notify, logger,
headers.update(site.headers)
if not 'url' in site.__dict__:
if 'url' not in site.__dict__:
logger.error('No URL for site %s', site.name)
# URL of user on site (if it exists)
url = site.url.format(
+7 -4
View File
@@ -1,19 +1,23 @@
"""
Maigret main module
"""
import aiohttp
import asyncio
import logging
import os
import sys
import platform
from argparse import ArgumentParser, RawDescriptionHelpFormatter
import requests
from socid_extractor import parse, __version__ as socid_version
from socid_extractor import extract, parse, __version__ as socid_version
from .checking import *
from .checking import timeout_check, supported_recursive_search_ids, self_check, unsupported_characters, maigret
from .notify import QueryNotifyPrint
from .report import save_csv_report, save_xmind_report, save_html_report, save_pdf_report, \
generate_report_context, save_txt_report, SUPPORTED_JSON_REPORT_FORMATS, check_supported_json_format, \
save_json_report
from .sites import MaigretDatabase
from .submit import submit_dialog
__version__ = '0.1.15'
@@ -273,7 +277,6 @@ async def main():
# Make reports folder is not exists
os.makedirs(args.folderoutput, exist_ok=True)
report_path = args.folderoutput
# Define one report filename template
report_filepath_tpl = os.path.join(args.folderoutput, 'report_{username}{postfix}')
+1751 -1950
View File
File diff suppressed because it is too large Load Diff
+2
View File
@@ -15,6 +15,7 @@ SUPPORTED_TAGS = [
'discussion', 'sharing', 'writing', 'wiki', 'business', 'shopping', 'sport',
'books', 'news', 'documents', 'travel', 'maps', 'hobby', 'apps', 'classified',
'career', 'geosocial', 'streaming', 'education', 'networking', 'torrent',
'science', 'medicine',
]
@@ -154,6 +155,7 @@ class MaigretSite:
# remove dict keys
if isinstance(engine_data[k], dict) and is_exists:
for f in engine_data[k].keys():
if f in self_copy.__dict__[field]:
del self_copy.__dict__[field][f]
continue
# remove list items
+80 -23
View File
@@ -1,13 +1,15 @@
import difflib
import requests
from mock import Mock
from .checking import *
DESIRED_STRINGS = ["username", "not found", "пользователь", "profile", "lastname", "firstname", "biography",
"birthday", "репутация", "информация", "e-mail"]
SUPPOSED_USERNAMES = ['alex', 'god', 'admin', 'red', 'blue', 'john']
RATIO = 0.6
TOP_FEATURES = 5
URL_RE = re.compile(r'https?://(www\.)?')
@@ -20,7 +22,7 @@ def get_match_ratio(x):
]), 2)
def extract_domain(url):
def extract_mainpage_url(url):
return '/'.join(url.split('/', 3)[:3])
@@ -38,7 +40,6 @@ async def site_self_check(site, logger, semaphore, db: MaigretDatabase, silent=F
logger.info(f'Checking {site.name}...')
for username, status in check_data:
async with semaphore:
results_dict = await maigret(
username,
{site.name: site},
@@ -84,18 +85,45 @@ async def site_self_check(site, logger, semaphore, db: MaigretDatabase, silent=F
return changes
async def submit_dialog(db, url_exists, cookie_file):
domain_raw = URL_RE.sub('', url_exists).strip().strip('/')
domain_raw = domain_raw.split('/')[0]
async def detect_known_engine(db, url_exists, url_mainpage):
try:
r = requests.get(url_mainpage)
except Exception as e:
print(e)
print('Some error while checking main page')
return None
matched_sites = list(filter(lambda x: domain_raw in x.url_main + x.url, db.sites))
if matched_sites:
print(f'Sites with domain "{domain_raw}" already exists in the Maigret database!')
status = lambda s: '(disabled)' if s.disabled else ''
url_block = lambda s: f'\n\t{s.url_main}\n\t{s.url}'
print('\n'.join([f'{site.name} {status(site)}{url_block(site)}' for site in matched_sites]))
return False
for e in db.engines:
strs_to_check = e.__dict__.get('presenseStrs')
if strs_to_check and r and r.text:
all_strs_in_response = True
for s in strs_to_check:
if not s in r.text:
all_strs_in_response = False
if all_strs_in_response:
engine_name = e.__dict__.get('name')
print(f'Detected engine {engine_name} for site {url_mainpage}')
sites = []
for u in SUPPOSED_USERNAMES:
site_data = {
'urlMain': url_mainpage,
'name': url_mainpage.split('//')[0],
'engine': engine_name,
'usernameClaimed': u,
'usernameUnclaimed': 'noonewouldeverusethis7',
}
maigret_site = MaigretSite(url_mainpage.split('/')[-1], site_data)
maigret_site.update_from_engine(db.engines_dict[engine_name])
sites.append(maigret_site)
return sites
return None
async def check_features_manually(db, url_exists, url_mainpage, cookie_file):
url_parts = url_exists.split('/')
supposed_username = url_parts[-1]
new_name = input(f'Is "{supposed_username}" a valid username? If not, write it manually: ')
@@ -138,21 +166,40 @@ async def submit_dialog(db, url_exists, cookie_file):
if features:
absence_list = features.split(',')
url_main = extract_domain(url_exists)
site_data = {
'absenceStrs': absence_list,
'presenseStrs': presence_list,
'url': url_user,
'urlMain': url_main,
'urlMain': url_mainpage,
'usernameClaimed': supposed_username,
'usernameUnclaimed': non_exist_username,
'checkType': 'message',
}
site = MaigretSite(url_main.split('/')[-1], site_data)
site = MaigretSite(url_mainpage.split('/')[-1], site_data)
return site
print(site.__dict__)
async def submit_dialog(db, url_exists, cookie_file):
domain_raw = URL_RE.sub('', url_exists).strip().strip('/')
domain_raw = domain_raw.split('/')[0]
# check for existence
matched_sites = list(filter(lambda x: domain_raw in x.url_main + x.url, db.sites))
if matched_sites:
print(f'Sites with domain "{domain_raw}" already exists in the Maigret database!')
status = lambda s: '(disabled)' if s.disabled else ''
url_block = lambda s: f'\n\t{s.url_main}\n\t{s.url}'
print('\n'.join([f'{site.name} {status(site)}{url_block(site)}' for site in matched_sites]))
return False
url_mainpage = extract_mainpage_url(url_exists)
sites = await detect_known_engine(db, url_exists, url_mainpage)
if not sites:
print('Unable to detect site engine, lets generate checking features')
sites = [await check_features_manually(db, url_exists, url_mainpage, cookie_file)]
print(sites[0].__dict__)
sem = asyncio.Semaphore(1)
log_level = logging.INFO
@@ -164,14 +211,24 @@ async def submit_dialog(db, url_exists, cookie_file):
logger = logging.getLogger('site-submit')
logger.setLevel(log_level)
result = await site_self_check(site, logger, sem, db)
found = False
chosen_site = None
for s in sites:
chosen_site = s
result = await site_self_check(s, logger, sem, db)
if not result['disabled']:
found = True
break
if result['disabled']:
print(f'Sorry, we couldn\'t find params to detect account presence/absence in {site.name}.')
if not found:
print(f'Sorry, we couldn\'t find params to detect account presence/absence in {chosen_site.name}.')
print('Try to run this mode again and increase features count or choose others.')
else:
if input(f'Site {site.name} successfully checked. Do you want to save it in the Maigret DB? [Yn] ').lower() in 'y':
db.update_site(site)
if input(f'Site {chosen_site.name} successfully checked. Do you want to save it in the Maigret DB? [Yn] ').lower() in 'y':
print(chosen_site.json)
site_data = chosen_site.strip_engine_data()
print(site_data.json)
db.update_site(site_data)
return True
return False
+886 -868
View File
File diff suppressed because it is too large Load Diff