mirror of
https://github.com/soxoj/maigret.git
synced 2026-05-06 22:19:01 +00:00
Reformat code, some sites added
This commit is contained in:
@@ -1,11 +1,9 @@
|
|||||||
import aiohttp
|
|
||||||
from aiohttp import CookieJar
|
|
||||||
import asyncio
|
|
||||||
import json
|
|
||||||
from http.cookiejar import MozillaCookieJar
|
from http.cookiejar import MozillaCookieJar
|
||||||
from http.cookies import Morsel
|
from http.cookies import Morsel
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
|
from aiohttp import CookieJar
|
||||||
|
|
||||||
|
|
||||||
class ParsingActivator:
|
class ParsingActivator:
|
||||||
@staticmethod
|
@staticmethod
|
||||||
|
|||||||
+6
-2
@@ -467,8 +467,12 @@ async def maigret(username, site_dict, query_notify, logger,
|
|||||||
if no_progressbar:
|
if no_progressbar:
|
||||||
await asyncio.gather(*tasks)
|
await asyncio.gather(*tasks)
|
||||||
else:
|
else:
|
||||||
for f in tqdm.asyncio.tqdm.as_completed(tasks):
|
for f in tqdm.asyncio.tqdm.as_completed(tasks, timeout=timeout):
|
||||||
await f
|
try:
|
||||||
|
await f
|
||||||
|
except asyncio.exceptions.TimeoutError:
|
||||||
|
# TODO: write timeout to results
|
||||||
|
pass
|
||||||
|
|
||||||
await session.close()
|
await session.close()
|
||||||
|
|
||||||
|
|||||||
+2
-4
@@ -4,7 +4,6 @@ Maigret main module
|
|||||||
|
|
||||||
import os
|
import os
|
||||||
import platform
|
import platform
|
||||||
import sys
|
|
||||||
from argparse import ArgumentParser, RawDescriptionHelpFormatter
|
from argparse import ArgumentParser, RawDescriptionHelpFormatter
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
@@ -176,7 +175,7 @@ async def main():
|
|||||||
action="store", metavar='REPORT_TYPE',
|
action="store", metavar='REPORT_TYPE',
|
||||||
dest="json", default='', type=check_supported_json_format,
|
dest="json", default='', type=check_supported_json_format,
|
||||||
help=f"Generate a JSON report of specific type: {', '.join(SUPPORTED_JSON_REPORT_FORMATS)}"
|
help=f"Generate a JSON report of specific type: {', '.join(SUPPORTED_JSON_REPORT_FORMATS)}"
|
||||||
" (one report per username)."
|
" (one report per username)."
|
||||||
)
|
)
|
||||||
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
@@ -204,7 +203,7 @@ async def main():
|
|||||||
u: args.id_type
|
u: args.id_type
|
||||||
for u in args.username
|
for u in args.username
|
||||||
if u not in ['-']
|
if u not in ['-']
|
||||||
and u not in args.ignore_ids_list
|
and u not in args.ignore_ids_list
|
||||||
}
|
}
|
||||||
|
|
||||||
parsing_enabled = not args.disable_extracting
|
parsing_enabled = not args.disable_extracting
|
||||||
@@ -380,7 +379,6 @@ async def main():
|
|||||||
save_json_report(filename, username, results, report_type=args.json)
|
save_json_report(filename, username, results, report_type=args.json)
|
||||||
query_notify.warning(f'JSON {args.json} report for {username} saved in {filename}')
|
query_notify.warning(f'JSON {args.json} report for {username} saved in {filename}')
|
||||||
|
|
||||||
|
|
||||||
# reporting for all the result
|
# reporting for all the result
|
||||||
if general_results:
|
if general_results:
|
||||||
if args.html or args.pdf:
|
if args.html or args.pdf:
|
||||||
|
|||||||
@@ -4,6 +4,7 @@ This module defines the objects for notifying the caller about the
|
|||||||
results of queries.
|
results of queries.
|
||||||
"""
|
"""
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
from colorama import Fore, Style, init
|
from colorama import Fore, Style, init
|
||||||
|
|
||||||
from .result import QueryStatus
|
from .result import QueryStatus
|
||||||
|
|||||||
+25
-15
@@ -1,15 +1,16 @@
|
|||||||
import csv
|
import csv
|
||||||
import json
|
|
||||||
import io
|
import io
|
||||||
|
import json
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
|
from argparse import ArgumentTypeError
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
import pycountry
|
import pycountry
|
||||||
import xmind
|
import xmind
|
||||||
from datetime import datetime
|
from dateutil.parser import parse as parse_datetime_str
|
||||||
from jinja2 import Template
|
from jinja2 import Template
|
||||||
from xhtml2pdf import pisa
|
from xhtml2pdf import pisa
|
||||||
from argparse import ArgumentTypeError
|
|
||||||
from dateutil.parser import parse as parse_datetime_str
|
|
||||||
|
|
||||||
from .result import QueryStatus
|
from .result import QueryStatus
|
||||||
from .utils import is_country_tag, CaseConverter, enrich_link_str
|
from .utils import is_country_tag, CaseConverter, enrich_link_str
|
||||||
@@ -19,10 +20,11 @@ SUPPORTED_JSON_REPORT_FORMATS = [
|
|||||||
'ndjson',
|
'ndjson',
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
'''
|
'''
|
||||||
UTILS
|
UTILS
|
||||||
'''
|
'''
|
||||||
|
|
||||||
|
|
||||||
def filter_supposed_data(data):
|
def filter_supposed_data(data):
|
||||||
### interesting fields
|
### interesting fields
|
||||||
allowed_fields = ['fullname', 'gender', 'location', 'age']
|
allowed_fields = ['fullname', 'gender', 'location', 'age']
|
||||||
@@ -35,6 +37,8 @@ def filter_supposed_data(data):
|
|||||||
'''
|
'''
|
||||||
REPORTS SAVING
|
REPORTS SAVING
|
||||||
'''
|
'''
|
||||||
|
|
||||||
|
|
||||||
def save_csv_report(filename: str, username: str, results: dict):
|
def save_csv_report(filename: str, username: str, results: dict):
|
||||||
with open(filename, 'w', newline='', encoding='utf-8') as f:
|
with open(filename, 'w', newline='', encoding='utf-8') as f:
|
||||||
generate_csv_report(username, results, f)
|
generate_csv_report(username, results, f)
|
||||||
@@ -58,6 +62,7 @@ def save_pdf_report(filename: str, context: dict):
|
|||||||
with open(filename, 'w+b') as f:
|
with open(filename, 'w+b') as f:
|
||||||
pisa.pisaDocument(io.StringIO(filled_template), dest=f, default_css=css)
|
pisa.pisaDocument(io.StringIO(filled_template), dest=f, default_css=css)
|
||||||
|
|
||||||
|
|
||||||
def save_json_report(filename: str, username: str, results: dict, report_type: str):
|
def save_json_report(filename: str, username: str, results: dict, report_type: str):
|
||||||
with open(filename, 'w', encoding='utf-8') as f:
|
with open(filename, 'w', encoding='utf-8') as f:
|
||||||
generate_json_report(username, results, f, report_type=report_type)
|
generate_json_report(username, results, f, report_type=report_type)
|
||||||
@@ -66,10 +71,13 @@ def save_json_report(filename: str, username: str, results: dict, report_type: s
|
|||||||
'''
|
'''
|
||||||
REPORTS GENERATING
|
REPORTS GENERATING
|
||||||
'''
|
'''
|
||||||
|
|
||||||
|
|
||||||
def generate_report_template(is_pdf: bool):
|
def generate_report_template(is_pdf: bool):
|
||||||
"""
|
"""
|
||||||
HTML/PDF template generation
|
HTML/PDF template generation
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def get_resource_content(filename):
|
def get_resource_content(filename):
|
||||||
return open(os.path.join(maigret_path, 'resources', filename)).read()
|
return open(os.path.join(maigret_path, 'resources', filename)).read()
|
||||||
|
|
||||||
@@ -112,6 +120,9 @@ def generate_report_context(username_results: list):
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
status = dictionary.get('status')
|
status = dictionary.get('status')
|
||||||
|
if not status: # FIXME: currently in case of timeout
|
||||||
|
continue
|
||||||
|
|
||||||
if status.ids_data:
|
if status.ids_data:
|
||||||
dictionary['ids_data'] = status.ids_data
|
dictionary['ids_data'] = status.ids_data
|
||||||
extended_info_count += 1
|
extended_info_count += 1
|
||||||
@@ -166,7 +177,6 @@ def generate_report_context(username_results: list):
|
|||||||
for t in status.tags:
|
for t in status.tags:
|
||||||
tags[t] = tags.get(t, 0) + 1
|
tags[t] = tags.get(t, 0) + 1
|
||||||
|
|
||||||
|
|
||||||
brief_text.append(f'Search by {id_type} {username} returned {found_accounts} accounts.')
|
brief_text.append(f'Search by {id_type} {username} returned {found_accounts} accounts.')
|
||||||
|
|
||||||
if new_ids:
|
if new_ids:
|
||||||
@@ -177,8 +187,6 @@ def generate_report_context(username_results: list):
|
|||||||
|
|
||||||
brief_text.append(f'Extended info extracted from {extended_info_count} accounts.')
|
brief_text.append(f'Extended info extracted from {extended_info_count} accounts.')
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
brief = ' '.join(brief_text).strip()
|
brief = ' '.join(brief_text).strip()
|
||||||
tuple_sort = lambda d: sorted(d, key=lambda x: x[1], reverse=True)
|
tuple_sort = lambda d: sorted(d, key=lambda x: x[1], reverse=True)
|
||||||
|
|
||||||
@@ -221,7 +229,7 @@ def generate_csv_report(username: str, results: dict, csvfile):
|
|||||||
results[site]['url_user'],
|
results[site]['url_user'],
|
||||||
str(results[site]['status'].status),
|
str(results[site]['status'].status),
|
||||||
results[site]['http_status'],
|
results[site]['http_status'],
|
||||||
])
|
])
|
||||||
|
|
||||||
|
|
||||||
def generate_txt_report(username: str, results: dict, file):
|
def generate_txt_report(username: str, results: dict, file):
|
||||||
@@ -253,16 +261,19 @@ def generate_json_report(username: str, results: dict, file, report_type):
|
|||||||
|
|
||||||
if is_report_per_line:
|
if is_report_per_line:
|
||||||
data['sitename'] = sitename
|
data['sitename'] = sitename
|
||||||
file.write(json.dumps(data)+'\n')
|
file.write(json.dumps(data) + '\n')
|
||||||
else:
|
else:
|
||||||
all_json[sitename] = data
|
all_json[sitename] = data
|
||||||
|
|
||||||
if not is_report_per_line:
|
if not is_report_per_line:
|
||||||
file.write(json.dumps(all_json))
|
file.write(json.dumps(all_json))
|
||||||
|
|
||||||
|
|
||||||
'''
|
'''
|
||||||
XMIND 8 Functions
|
XMIND 8 Functions
|
||||||
'''
|
'''
|
||||||
|
|
||||||
|
|
||||||
def save_xmind_report(filename, username, results):
|
def save_xmind_report(filename, username, results):
|
||||||
if os.path.exists(filename):
|
if os.path.exists(filename):
|
||||||
os.remove(filename)
|
os.remove(filename)
|
||||||
@@ -277,9 +288,9 @@ def design_sheet(sheet, username, results):
|
|||||||
alltags = {}
|
alltags = {}
|
||||||
supposed_data = {}
|
supposed_data = {}
|
||||||
|
|
||||||
sheet.setTitle("%s Analysis"%(username))
|
sheet.setTitle("%s Analysis" % (username))
|
||||||
root_topic1 = sheet.getRootTopic()
|
root_topic1 = sheet.getRootTopic()
|
||||||
root_topic1.setTitle("%s"%(username))
|
root_topic1.setTitle("%s" % (username))
|
||||||
|
|
||||||
undefinedsection = root_topic1.addSubTopic()
|
undefinedsection = root_topic1.addSubTopic()
|
||||||
undefinedsection.setTitle("Undefined")
|
undefinedsection.setTitle("Undefined")
|
||||||
@@ -333,7 +344,7 @@ def design_sheet(sheet, username, results):
|
|||||||
currentsublabel.setTitle("%s: %s" % (k, currentval))
|
currentsublabel.setTitle("%s: %s" % (k, currentval))
|
||||||
### Add Supposed DATA
|
### Add Supposed DATA
|
||||||
filterede_supposed_data = filter_supposed_data(supposed_data)
|
filterede_supposed_data = filter_supposed_data(supposed_data)
|
||||||
if(len(filterede_supposed_data) >0):
|
if (len(filterede_supposed_data) > 0):
|
||||||
undefinedsection = root_topic1.addSubTopic()
|
undefinedsection = root_topic1.addSubTopic()
|
||||||
undefinedsection.setTitle("SUPPOSED DATA")
|
undefinedsection.setTitle("SUPPOSED DATA")
|
||||||
for k, v in filterede_supposed_data.items():
|
for k, v in filterede_supposed_data.items():
|
||||||
@@ -344,6 +355,5 @@ def design_sheet(sheet, username, results):
|
|||||||
def check_supported_json_format(value):
|
def check_supported_json_format(value):
|
||||||
if value and not value in SUPPORTED_JSON_REPORT_FORMATS:
|
if value and not value in SUPPORTED_JSON_REPORT_FORMATS:
|
||||||
raise ArgumentTypeError(f'JSON report type must be one of the following types: '
|
raise ArgumentTypeError(f'JSON report type must be one of the following types: '
|
||||||
+ ', '.join(SUPPORTED_JSON_REPORT_FORMATS))
|
+ ', '.join(SUPPORTED_JSON_REPORT_FORMATS))
|
||||||
return value
|
return value
|
||||||
|
|
||||||
|
|||||||
@@ -12349,7 +12349,7 @@
|
|||||||
"us"
|
"us"
|
||||||
],
|
],
|
||||||
"headers": {
|
"headers": {
|
||||||
"authorization": "Bearer BQCEWXdzCPImYp4zhhbEssMRKqvUasJb9vVoe2A3J5eFMhTfn0b5jPkUHGJ9Fe0_HCaF81AMeRnSD9KzIPg"
|
"authorization": "Bearer BQA6sdhtUg3hadjln7DCoAK6sLn7KrHfsn2DObW2gr-W3HgF0h1KZGVYgwispRDR1tqRntVeTd0Duvb2q4g"
|
||||||
},
|
},
|
||||||
"errors": {
|
"errors": {
|
||||||
"Spotify is currently not available in your country.": "Access denied in your country, use proxy/vpn"
|
"Spotify is currently not available in your country.": "Access denied in your country, use proxy/vpn"
|
||||||
@@ -14062,7 +14062,7 @@
|
|||||||
"video"
|
"video"
|
||||||
],
|
],
|
||||||
"headers": {
|
"headers": {
|
||||||
"Authorization": "jwt eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJleHAiOjE2MTYwOTgwODAsInVzZXJfaWQiOm51bGwsImFwcF9pZCI6NTg0NzksInNjb3BlcyI6InB1YmxpYyIsInRlYW1fdXNlcl9pZCI6bnVsbH0.tTecsUjIJ0KCcMxOT8OgkCp-P3ezg5RR0FGqtiejqE8"
|
"Authorization": "jwt eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJleHAiOjE2MTYxMDcyNjAsInVzZXJfaWQiOm51bGwsImFwcF9pZCI6NTg0NzksInNjb3BlcyI6InB1YmxpYyIsInRlYW1fdXNlcl9pZCI6bnVsbH0.kzWxBf1qCJwjpZYUP6w-Pf4VptBMKpKUaMw8VnYwtPU"
|
||||||
},
|
},
|
||||||
"activation": {
|
"activation": {
|
||||||
"url": "https://vimeo.com/_rv/viewer",
|
"url": "https://vimeo.com/_rv/viewer",
|
||||||
@@ -14969,6 +14969,7 @@
|
|||||||
"usernameUnclaimed": "noonewouldeverusethis7"
|
"usernameUnclaimed": "noonewouldeverusethis7"
|
||||||
},
|
},
|
||||||
"YandexLocal": {
|
"YandexLocal": {
|
||||||
|
"disabled": true,
|
||||||
"tags": [
|
"tags": [
|
||||||
"ru"
|
"ru"
|
||||||
],
|
],
|
||||||
@@ -23595,6 +23596,67 @@
|
|||||||
"urlMain": "https://calendly.com",
|
"urlMain": "https://calendly.com",
|
||||||
"usernameClaimed": "john",
|
"usernameClaimed": "john",
|
||||||
"usernameUnclaimed": "noonewouldeverusethis7"
|
"usernameUnclaimed": "noonewouldeverusethis7"
|
||||||
|
},
|
||||||
|
"depop.com": {
|
||||||
|
"checkType": "message",
|
||||||
|
"presenseStrs": [
|
||||||
|
"first_name"
|
||||||
|
],
|
||||||
|
"absenceStrs": [
|
||||||
|
"invalidUrlError__message"
|
||||||
|
],
|
||||||
|
"url": "https://www.depop.com/{username}",
|
||||||
|
"urlMain": "https://www.depop.com",
|
||||||
|
"usernameClaimed": "blue",
|
||||||
|
"usernameUnclaimed": "noonewouldeverusethis7"
|
||||||
|
},
|
||||||
|
"community.brave.com": {
|
||||||
|
"engine": "Discourse",
|
||||||
|
"urlMain": "https://community.brave.com",
|
||||||
|
"usernameClaimed": "alex",
|
||||||
|
"usernameUnclaimed": "noonewouldeverusethis7"
|
||||||
|
},
|
||||||
|
"community.endlessos.com": {
|
||||||
|
"engine": "Discourse",
|
||||||
|
"urlMain": "https://community.endlessos.com",
|
||||||
|
"usernameClaimed": "alex",
|
||||||
|
"usernameUnclaimed": "noonewouldeverusethis7"
|
||||||
|
},
|
||||||
|
"forum.endeavouros.com": {
|
||||||
|
"engine": "Discourse",
|
||||||
|
"urlMain": "https://forum.endeavouros.com",
|
||||||
|
"usernameClaimed": "alex",
|
||||||
|
"usernameUnclaimed": "noonewouldeverusethis7"
|
||||||
|
},
|
||||||
|
"forum.garudalinux.org": {
|
||||||
|
"engine": "Discourse",
|
||||||
|
"urlMain": "https://forum.garudalinux.org",
|
||||||
|
"usernameClaimed": "alex",
|
||||||
|
"usernameUnclaimed": "noonewouldeverusethis7"
|
||||||
|
},
|
||||||
|
"forum.snapcraft.io": {
|
||||||
|
"engine": "Discourse",
|
||||||
|
"urlMain": "https://forum.snapcraft.io",
|
||||||
|
"usernameClaimed": "alex",
|
||||||
|
"usernameUnclaimed": "noonewouldeverusethis7"
|
||||||
|
},
|
||||||
|
"forum.zorin.com": {
|
||||||
|
"engine": "Discourse",
|
||||||
|
"urlMain": "https://forum.zorin.com",
|
||||||
|
"usernameClaimed": "alex",
|
||||||
|
"usernameUnclaimed": "noonewouldeverusethis7"
|
||||||
|
},
|
||||||
|
"codeseller.ru": {
|
||||||
|
"engine": "Wordpress/Author",
|
||||||
|
"urlMain": "https://codeseller.ru",
|
||||||
|
"usernameClaimed": "alex",
|
||||||
|
"usernameUnclaimed": "noonewouldeverusethis7"
|
||||||
|
},
|
||||||
|
"linuxpip.org": {
|
||||||
|
"engine": "Wordpress/Author",
|
||||||
|
"urlMain": "https://linuxpip.org",
|
||||||
|
"usernameClaimed": "diehard",
|
||||||
|
"usernameUnclaimed": "noonewouldeverusethis7"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"engines": {
|
"engines": {
|
||||||
@@ -23689,6 +23751,24 @@
|
|||||||
"<meta name=\"generator\" content=\"Discourse"
|
"<meta name=\"generator\" content=\"Discourse"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
"Wordpress/Author": {
|
||||||
|
"name": "Wordpress/Author",
|
||||||
|
"site": {
|
||||||
|
"presenseStrs": [
|
||||||
|
"author-",
|
||||||
|
"author/"
|
||||||
|
],
|
||||||
|
"absenceStrs": [
|
||||||
|
"error404"
|
||||||
|
],
|
||||||
|
"checkType": "message",
|
||||||
|
"url": "{urlMain}/author/{username}/"
|
||||||
|
},
|
||||||
|
"presenseStrs": [
|
||||||
|
"/wp-admin",
|
||||||
|
"/wp-includes/wlwmanifest.xml"
|
||||||
|
]
|
||||||
|
},
|
||||||
"engine404": {
|
"engine404": {
|
||||||
"name": "engine404",
|
"name": "engine404",
|
||||||
"site": {
|
"site": {
|
||||||
|
|||||||
+3
-9
@@ -2,7 +2,6 @@
|
|||||||
"""Maigret Sites Information"""
|
"""Maigret Sites Information"""
|
||||||
import copy
|
import copy
|
||||||
import json
|
import json
|
||||||
import re
|
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
@@ -87,13 +86,12 @@ class MaigretSite:
|
|||||||
url = self.url
|
url = self.url
|
||||||
for group in ['urlMain', 'urlSubpath']:
|
for group in ['urlMain', 'urlSubpath']:
|
||||||
if group in url:
|
if group in url:
|
||||||
url = url.replace('{'+group+'}', self.__dict__[CaseConverter.camel_to_snake(group)])
|
url = url.replace('{' + group + '}', self.__dict__[CaseConverter.camel_to_snake(group)])
|
||||||
|
|
||||||
self.url_regexp = URLMatcher.make_profile_url_regexp(url, self.regex_check)
|
self.url_regexp = URLMatcher.make_profile_url_regexp(url, self.regex_check)
|
||||||
|
|
||||||
def detect_username(self, url: str) -> str:
|
def detect_username(self, url: str) -> str:
|
||||||
if self.url_regexp:
|
if self.url_regexp:
|
||||||
import logging
|
|
||||||
match_groups = self.url_regexp.match(url)
|
match_groups = self.url_regexp.match(url)
|
||||||
if match_groups:
|
if match_groups:
|
||||||
return match_groups.groups()[-1].rstrip('/')
|
return match_groups.groups()[-1].rstrip('/')
|
||||||
@@ -238,7 +236,6 @@ class MaigretDatabase:
|
|||||||
|
|
||||||
return self
|
return self
|
||||||
|
|
||||||
|
|
||||||
def load_from_json(self, json_data: dict) -> MaigretDatabase:
|
def load_from_json(self, json_data: dict) -> MaigretDatabase:
|
||||||
# Add all of site information from the json file to internal site list.
|
# Add all of site information from the json file to internal site list.
|
||||||
site_data = json_data.get("sites", {})
|
site_data = json_data.get("sites", {})
|
||||||
@@ -263,7 +260,6 @@ class MaigretDatabase:
|
|||||||
|
|
||||||
return self
|
return self
|
||||||
|
|
||||||
|
|
||||||
def load_from_str(self, db_str: str) -> MaigretDatabase:
|
def load_from_str(self, db_str: str) -> MaigretDatabase:
|
||||||
try:
|
try:
|
||||||
data = json.loads(db_str)
|
data = json.loads(db_str)
|
||||||
@@ -274,7 +270,6 @@ class MaigretDatabase:
|
|||||||
|
|
||||||
return self.load_from_json(data)
|
return self.load_from_json(data)
|
||||||
|
|
||||||
|
|
||||||
def load_from_url(self, url: str) -> MaigretDatabase:
|
def load_from_url(self, url: str) -> MaigretDatabase:
|
||||||
is_url_valid = url.startswith('http://') or url.startswith('https://')
|
is_url_valid = url.startswith('http://') or url.startswith('https://')
|
||||||
|
|
||||||
@@ -303,7 +298,6 @@ class MaigretDatabase:
|
|||||||
|
|
||||||
return self.load_from_json(data)
|
return self.load_from_json(data)
|
||||||
|
|
||||||
|
|
||||||
def load_from_file(self, filename: str) -> MaigretDatabase:
|
def load_from_file(self, filename: str) -> MaigretDatabase:
|
||||||
try:
|
try:
|
||||||
with open(filename, 'r', encoding='utf-8') as file:
|
with open(filename, 'r', encoding='utf-8') as file:
|
||||||
@@ -364,7 +358,7 @@ class MaigretDatabase:
|
|||||||
continue
|
continue
|
||||||
tags[tag] = tags.get(tag, 0) + 1
|
tags[tag] = tags.get(tag, 0) + 1
|
||||||
|
|
||||||
output += f'Enabled/total sites: {total_count-disabled_count}/{total_count}\n'
|
output += f'Enabled/total sites: {total_count - disabled_count}/{total_count}\n'
|
||||||
output += 'Top sites\' profile URLs:\n'
|
output += 'Top sites\' profile URLs:\n'
|
||||||
for url, count in sorted(urls.items(), key=lambda x: x[1], reverse=True)[:20]:
|
for url, count in sorted(urls.items(), key=lambda x: x[1], reverse=True)[:20]:
|
||||||
if count == 1:
|
if count == 1:
|
||||||
@@ -377,4 +371,4 @@ class MaigretDatabase:
|
|||||||
mark = ' (non-standard)'
|
mark = ' (non-standard)'
|
||||||
output += f'{count}\t{tag}{mark}\n'
|
output += f'{count}\t{tag}{mark}\n'
|
||||||
|
|
||||||
return output
|
return output
|
||||||
|
|||||||
+1
-2
@@ -1,5 +1,4 @@
|
|||||||
import difflib
|
import difflib
|
||||||
import json
|
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
from mock import Mock
|
from mock import Mock
|
||||||
@@ -89,7 +88,7 @@ async def submit_dialog(db, url_exists, cookie_file):
|
|||||||
domain_raw = URL_RE.sub('', url_exists).strip().strip('/')
|
domain_raw = URL_RE.sub('', url_exists).strip().strip('/')
|
||||||
domain_raw = domain_raw.split('/')[0]
|
domain_raw = domain_raw.split('/')[0]
|
||||||
|
|
||||||
matched_sites = list(filter(lambda x: domain_raw in x.url_main+x.url, db.sites))
|
matched_sites = list(filter(lambda x: domain_raw in x.url_main + x.url, db.sites))
|
||||||
if matched_sites:
|
if matched_sites:
|
||||||
print(f'Sites with domain "{domain_raw}" already exists in the Maigret database!')
|
print(f'Sites with domain "{domain_raw}" already exists in the Maigret database!')
|
||||||
status = lambda s: '(disabled)' if s.disabled else ''
|
status = lambda s: '(disabled)' if s.disabled else ''
|
||||||
|
|||||||
+1
-2
@@ -1,5 +1,4 @@
|
|||||||
import re
|
import re
|
||||||
import sys
|
|
||||||
|
|
||||||
|
|
||||||
class CaseConverter:
|
class CaseConverter:
|
||||||
@@ -55,4 +54,4 @@ class URLMatcher:
|
|||||||
url_regexp = url_main_part.replace('{username}', f'({username_regexp})')
|
url_regexp = url_main_part.replace('{username}', f'({username_regexp})')
|
||||||
regexp_str = self._HTTP_URL_RE_STR.replace('(.+)', url_regexp)
|
regexp_str = self._HTTP_URL_RE_STR.replace('(.+)', url_regexp)
|
||||||
|
|
||||||
return re.compile(regexp_str)
|
return re.compile(regexp_str)
|
||||||
|
|||||||
+2
-2
@@ -1,11 +1,11 @@
|
|||||||
import glob
|
import glob
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
from _pytest.mark import Mark
|
from _pytest.mark import Mark
|
||||||
from mock import Mock
|
|
||||||
|
|
||||||
from maigret.sites import MaigretDatabase, MaigretSite
|
from maigret.sites import MaigretDatabase
|
||||||
|
|
||||||
CUR_PATH = os.path.dirname(os.path.realpath(__file__))
|
CUR_PATH = os.path.dirname(os.path.realpath(__file__))
|
||||||
JSON_FILE = os.path.join(CUR_PATH, '../maigret/resources/data.json')
|
JSON_FILE = os.path.join(CUR_PATH, '../maigret/resources/data.json')
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
"""Maigret activation test functions"""
|
"""Maigret activation test functions"""
|
||||||
import json
|
import json
|
||||||
|
|
||||||
import aiohttp
|
import aiohttp
|
||||||
import pytest
|
import pytest
|
||||||
from mock import Mock
|
from mock import Mock
|
||||||
|
|||||||
@@ -1,10 +1,11 @@
|
|||||||
"""Maigret main module test functions"""
|
"""Maigret main module test functions"""
|
||||||
import asyncio
|
import asyncio
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
from mock import Mock
|
from mock import Mock
|
||||||
|
|
||||||
from maigret.maigret import self_check
|
from maigret.maigret import self_check
|
||||||
from maigret.sites import MaigretDatabase, MaigretSite
|
from maigret.sites import MaigretDatabase
|
||||||
|
|
||||||
EXAMPLE_DB = {
|
EXAMPLE_DB = {
|
||||||
'engines': {
|
'engines': {
|
||||||
|
|||||||
+20
-20
@@ -1,33 +1,32 @@
|
|||||||
"""Maigret Database test functions"""
|
"""Maigret Database test functions"""
|
||||||
from maigret.sites import MaigretDatabase, MaigretSite
|
from maigret.sites import MaigretDatabase, MaigretSite
|
||||||
|
|
||||||
|
|
||||||
EXAMPLE_DB = {
|
EXAMPLE_DB = {
|
||||||
'engines': {
|
'engines': {
|
||||||
"XenForo": {
|
"XenForo": {
|
||||||
"presenseStrs": ["XenForo"],
|
"presenseStrs": ["XenForo"],
|
||||||
"site": {
|
"site": {
|
||||||
"absenceStrs": [
|
"absenceStrs": [
|
||||||
"The specified member cannot be found. Please enter a member's entire name.",
|
"The specified member cannot be found. Please enter a member's entire name.",
|
||||||
],
|
],
|
||||||
"checkType": "message",
|
"checkType": "message",
|
||||||
"errors": {
|
"errors": {
|
||||||
"You must be logged-in to do that.": "Login required"
|
"You must be logged-in to do that.": "Login required"
|
||||||
},
|
},
|
||||||
"url": "{urlMain}{urlSubpath}/members/?username={username}"
|
"url": "{urlMain}{urlSubpath}/members/?username={username}"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
'sites': {
|
'sites': {
|
||||||
"Amperka": {
|
"Amperka": {
|
||||||
"engine": "XenForo",
|
"engine": "XenForo",
|
||||||
"rank": 121613,
|
"rank": 121613,
|
||||||
"tags": [
|
"tags": [
|
||||||
"ru"
|
"ru"
|
||||||
],
|
],
|
||||||
"urlMain": "http://forum.amperka.ru",
|
"urlMain": "http://forum.amperka.ru",
|
||||||
"usernameClaimed": "adam",
|
"usernameClaimed": "adam",
|
||||||
"usernameUnclaimed": "noonewouldeverusethis7"
|
"usernameUnclaimed": "noonewouldeverusethis7"
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -167,6 +166,7 @@ def test_ranked_sites_dict_disabled():
|
|||||||
assert len(db.ranked_sites_dict()) == 2
|
assert len(db.ranked_sites_dict()) == 2
|
||||||
assert len(db.ranked_sites_dict(disabled=False)) == 1
|
assert len(db.ranked_sites_dict(disabled=False)) == 1
|
||||||
|
|
||||||
|
|
||||||
def test_ranked_sites_dict_id_type():
|
def test_ranked_sites_dict_id_type():
|
||||||
db = MaigretDatabase()
|
db = MaigretDatabase()
|
||||||
db.update_site(MaigretSite('1', {}))
|
db.update_site(MaigretSite('1', {}))
|
||||||
|
|||||||
+46
-38
@@ -1,66 +1,74 @@
|
|||||||
"""Maigret utils test functions"""
|
"""Maigret utils test functions"""
|
||||||
import itertools
|
import itertools
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from maigret.utils import CaseConverter, is_country_tag, enrich_link_str, URLMatcher
|
from maigret.utils import CaseConverter, is_country_tag, enrich_link_str, URLMatcher
|
||||||
|
|
||||||
|
|
||||||
def test_case_convert_camel_to_snake():
|
def test_case_convert_camel_to_snake():
|
||||||
a = 'SnakeCasedString'
|
a = 'SnakeCasedString'
|
||||||
b = CaseConverter.camel_to_snake(a)
|
b = CaseConverter.camel_to_snake(a)
|
||||||
|
|
||||||
|
assert b == 'snake_cased_string'
|
||||||
|
|
||||||
assert b == 'snake_cased_string'
|
|
||||||
|
|
||||||
def test_case_convert_snake_to_camel():
|
def test_case_convert_snake_to_camel():
|
||||||
a = 'camel_cased_string'
|
a = 'camel_cased_string'
|
||||||
b = CaseConverter.snake_to_camel(a)
|
b = CaseConverter.snake_to_camel(a)
|
||||||
|
|
||||||
|
assert b == 'camelCasedString'
|
||||||
|
|
||||||
assert b == 'camelCasedString'
|
|
||||||
|
|
||||||
def test_case_convert_snake_to_title():
|
def test_case_convert_snake_to_title():
|
||||||
a = 'camel_cased_string'
|
a = 'camel_cased_string'
|
||||||
b = CaseConverter.snake_to_title(a)
|
b = CaseConverter.snake_to_title(a)
|
||||||
|
|
||||||
|
assert b == 'Camel cased string'
|
||||||
|
|
||||||
assert b == 'Camel cased string'
|
|
||||||
|
|
||||||
def test_is_country_tag():
|
def test_is_country_tag():
|
||||||
assert is_country_tag('ru') == True
|
assert is_country_tag('ru') == True
|
||||||
assert is_country_tag('FR') == True
|
assert is_country_tag('FR') == True
|
||||||
|
|
||||||
assert is_country_tag('a1') == False
|
assert is_country_tag('a1') == False
|
||||||
assert is_country_tag('dating') == False
|
assert is_country_tag('dating') == False
|
||||||
|
|
||||||
|
assert is_country_tag('global') == True
|
||||||
|
|
||||||
assert is_country_tag('global') == True
|
|
||||||
|
|
||||||
def test_enrich_link_str():
|
def test_enrich_link_str():
|
||||||
assert enrich_link_str('test') == 'test'
|
assert enrich_link_str('test') == 'test'
|
||||||
assert enrich_link_str(' www.flickr.com/photos/alexaimephotography/') == '<a class="auto-link" href="www.flickr.com/photos/alexaimephotography/">www.flickr.com/photos/alexaimephotography/</a>'
|
assert enrich_link_str(
|
||||||
|
' www.flickr.com/photos/alexaimephotography/') == '<a class="auto-link" href="www.flickr.com/photos/alexaimephotography/">www.flickr.com/photos/alexaimephotography/</a>'
|
||||||
|
|
||||||
|
|
||||||
def test_url_extract_main_part():
|
def test_url_extract_main_part():
|
||||||
url_main_part = 'flickr.com/photos/alexaimephotography'
|
url_main_part = 'flickr.com/photos/alexaimephotography'
|
||||||
|
|
||||||
parts = [
|
parts = [
|
||||||
['http://', 'https://'],
|
['http://', 'https://'],
|
||||||
['www.', ''],
|
['www.', ''],
|
||||||
[url_main_part],
|
[url_main_part],
|
||||||
['/', ''],
|
['/', ''],
|
||||||
]
|
]
|
||||||
|
|
||||||
|
url_regexp = re.compile('^https?://(www.)?flickr.com/photos/(.+?)$')
|
||||||
|
for url_parts in itertools.product(*parts):
|
||||||
|
url = ''.join(url_parts)
|
||||||
|
assert URLMatcher.extract_main_part(url) == url_main_part
|
||||||
|
assert not url_regexp.match(url) is None
|
||||||
|
|
||||||
url_regexp = re.compile('^https?://(www.)?flickr.com/photos/(.+?)$')
|
|
||||||
for url_parts in itertools.product(*parts):
|
|
||||||
url = ''.join(url_parts)
|
|
||||||
assert URLMatcher.extract_main_part(url) == url_main_part
|
|
||||||
assert not url_regexp.match(url) is None
|
|
||||||
|
|
||||||
def test_url_make_profile_url_regexp():
|
def test_url_make_profile_url_regexp():
|
||||||
url_main_part = 'flickr.com/photos/{username}'
|
url_main_part = 'flickr.com/photos/{username}'
|
||||||
|
|
||||||
parts = [
|
parts = [
|
||||||
['http://', 'https://'],
|
['http://', 'https://'],
|
||||||
['www.', ''],
|
['www.', ''],
|
||||||
[url_main_part],
|
[url_main_part],
|
||||||
['/', ''],
|
['/', ''],
|
||||||
]
|
]
|
||||||
|
|
||||||
for url_parts in itertools.product(*parts):
|
for url_parts in itertools.product(*parts):
|
||||||
url = ''.join(url_parts)
|
url = ''.join(url_parts)
|
||||||
assert URLMatcher.make_profile_url_regexp(url).pattern == r'^https?://(www.)?flickr\.com/photos/(.+?)$'
|
assert URLMatcher.make_profile_url_regexp(url).pattern == r'^https?://(www.)?flickr\.com/photos/(.+?)$'
|
||||||
|
|||||||
Reference in New Issue
Block a user