diff --git a/maigret/activation.py b/maigret/activation.py
index af2b542..a4346ad 100644
--- a/maigret/activation.py
+++ b/maigret/activation.py
@@ -1,11 +1,9 @@
-import aiohttp
-from aiohttp import CookieJar
-import asyncio
-import json
from http.cookiejar import MozillaCookieJar
from http.cookies import Morsel
import requests
+from aiohttp import CookieJar
+
class ParsingActivator:
@staticmethod
diff --git a/maigret/checking.py b/maigret/checking.py
index 4a4f5d5..deb99e6 100644
--- a/maigret/checking.py
+++ b/maigret/checking.py
@@ -467,8 +467,12 @@ async def maigret(username, site_dict, query_notify, logger,
if no_progressbar:
await asyncio.gather(*tasks)
else:
- for f in tqdm.asyncio.tqdm.as_completed(tasks):
- await f
+ for f in tqdm.asyncio.tqdm.as_completed(tasks, timeout=timeout):
+ try:
+ await f
+ except asyncio.exceptions.TimeoutError:
+ # TODO: write timeout to results
+ pass
await session.close()
diff --git a/maigret/maigret.py b/maigret/maigret.py
index d4e5b05..b613d76 100755
--- a/maigret/maigret.py
+++ b/maigret/maigret.py
@@ -4,7 +4,6 @@ Maigret main module
import os
import platform
-import sys
from argparse import ArgumentParser, RawDescriptionHelpFormatter
import requests
@@ -176,7 +175,7 @@ async def main():
action="store", metavar='REPORT_TYPE',
dest="json", default='', type=check_supported_json_format,
help=f"Generate a JSON report of specific type: {', '.join(SUPPORTED_JSON_REPORT_FORMATS)}"
- " (one report per username)."
+ " (one report per username)."
)
args = parser.parse_args()
@@ -204,7 +203,7 @@ async def main():
u: args.id_type
for u in args.username
if u not in ['-']
- and u not in args.ignore_ids_list
+ and u not in args.ignore_ids_list
}
parsing_enabled = not args.disable_extracting
@@ -380,7 +379,6 @@ async def main():
save_json_report(filename, username, results, report_type=args.json)
query_notify.warning(f'JSON {args.json} report for {username} saved in {filename}')
-
# reporting for all the result
if general_results:
if args.html or args.pdf:
diff --git a/maigret/notify.py b/maigret/notify.py
index e9af924..ea3186d 100644
--- a/maigret/notify.py
+++ b/maigret/notify.py
@@ -4,6 +4,7 @@ This module defines the objects for notifying the caller about the
results of queries.
"""
import sys
+
from colorama import Fore, Style, init
from .result import QueryStatus
diff --git a/maigret/report.py b/maigret/report.py
index b30417b..3a5cb94 100644
--- a/maigret/report.py
+++ b/maigret/report.py
@@ -1,15 +1,16 @@
import csv
-import json
import io
+import json
import logging
import os
+from argparse import ArgumentTypeError
+from datetime import datetime
+
import pycountry
import xmind
-from datetime import datetime
+from dateutil.parser import parse as parse_datetime_str
from jinja2 import Template
from xhtml2pdf import pisa
-from argparse import ArgumentTypeError
-from dateutil.parser import parse as parse_datetime_str
from .result import QueryStatus
from .utils import is_country_tag, CaseConverter, enrich_link_str
@@ -19,10 +20,11 @@ SUPPORTED_JSON_REPORT_FORMATS = [
'ndjson',
]
-
'''
UTILS
'''
+
+
def filter_supposed_data(data):
### interesting fields
allowed_fields = ['fullname', 'gender', 'location', 'age']
@@ -35,6 +37,8 @@ def filter_supposed_data(data):
'''
REPORTS SAVING
'''
+
+
def save_csv_report(filename: str, username: str, results: dict):
with open(filename, 'w', newline='', encoding='utf-8') as f:
generate_csv_report(username, results, f)
@@ -58,6 +62,7 @@ def save_pdf_report(filename: str, context: dict):
with open(filename, 'w+b') as f:
pisa.pisaDocument(io.StringIO(filled_template), dest=f, default_css=css)
+
def save_json_report(filename: str, username: str, results: dict, report_type: str):
with open(filename, 'w', encoding='utf-8') as f:
generate_json_report(username, results, f, report_type=report_type)
@@ -66,10 +71,13 @@ def save_json_report(filename: str, username: str, results: dict, report_type: s
'''
REPORTS GENERATING
'''
+
+
def generate_report_template(is_pdf: bool):
"""
HTML/PDF template generation
"""
+
def get_resource_content(filename):
return open(os.path.join(maigret_path, 'resources', filename)).read()
@@ -112,6 +120,9 @@ def generate_report_context(username_results: list):
continue
status = dictionary.get('status')
+ if not status: # FIXME: currently in case of timeout
+ continue
+
if status.ids_data:
dictionary['ids_data'] = status.ids_data
extended_info_count += 1
@@ -166,7 +177,6 @@ def generate_report_context(username_results: list):
for t in status.tags:
tags[t] = tags.get(t, 0) + 1
-
brief_text.append(f'Search by {id_type} {username} returned {found_accounts} accounts.')
if new_ids:
@@ -177,8 +187,6 @@ def generate_report_context(username_results: list):
brief_text.append(f'Extended info extracted from {extended_info_count} accounts.')
-
-
brief = ' '.join(brief_text).strip()
tuple_sort = lambda d: sorted(d, key=lambda x: x[1], reverse=True)
@@ -221,7 +229,7 @@ def generate_csv_report(username: str, results: dict, csvfile):
results[site]['url_user'],
str(results[site]['status'].status),
results[site]['http_status'],
- ])
+ ])
def generate_txt_report(username: str, results: dict, file):
@@ -253,16 +261,19 @@ def generate_json_report(username: str, results: dict, file, report_type):
if is_report_per_line:
data['sitename'] = sitename
- file.write(json.dumps(data)+'\n')
+ file.write(json.dumps(data) + '\n')
else:
all_json[sitename] = data
if not is_report_per_line:
file.write(json.dumps(all_json))
+
'''
XMIND 8 Functions
'''
+
+
def save_xmind_report(filename, username, results):
if os.path.exists(filename):
os.remove(filename)
@@ -277,9 +288,9 @@ def design_sheet(sheet, username, results):
alltags = {}
supposed_data = {}
- sheet.setTitle("%s Analysis"%(username))
+ sheet.setTitle("%s Analysis" % (username))
root_topic1 = sheet.getRootTopic()
- root_topic1.setTitle("%s"%(username))
+ root_topic1.setTitle("%s" % (username))
undefinedsection = root_topic1.addSubTopic()
undefinedsection.setTitle("Undefined")
@@ -333,7 +344,7 @@ def design_sheet(sheet, username, results):
currentsublabel.setTitle("%s: %s" % (k, currentval))
### Add Supposed DATA
filterede_supposed_data = filter_supposed_data(supposed_data)
- if(len(filterede_supposed_data) >0):
+ if (len(filterede_supposed_data) > 0):
undefinedsection = root_topic1.addSubTopic()
undefinedsection.setTitle("SUPPOSED DATA")
for k, v in filterede_supposed_data.items():
@@ -344,6 +355,5 @@ def design_sheet(sheet, username, results):
def check_supported_json_format(value):
if value and not value in SUPPORTED_JSON_REPORT_FORMATS:
raise ArgumentTypeError(f'JSON report type must be one of the following types: '
- + ', '.join(SUPPORTED_JSON_REPORT_FORMATS))
+ + ', '.join(SUPPORTED_JSON_REPORT_FORMATS))
return value
-
diff --git a/maigret/resources/data.json b/maigret/resources/data.json
index b253953..64e1277 100644
--- a/maigret/resources/data.json
+++ b/maigret/resources/data.json
@@ -12349,7 +12349,7 @@
"us"
],
"headers": {
- "authorization": "Bearer BQCEWXdzCPImYp4zhhbEssMRKqvUasJb9vVoe2A3J5eFMhTfn0b5jPkUHGJ9Fe0_HCaF81AMeRnSD9KzIPg"
+ "authorization": "Bearer BQA6sdhtUg3hadjln7DCoAK6sLn7KrHfsn2DObW2gr-W3HgF0h1KZGVYgwispRDR1tqRntVeTd0Duvb2q4g"
},
"errors": {
"Spotify is currently not available in your country.": "Access denied in your country, use proxy/vpn"
@@ -14062,7 +14062,7 @@
"video"
],
"headers": {
- "Authorization": "jwt eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJleHAiOjE2MTYwOTgwODAsInVzZXJfaWQiOm51bGwsImFwcF9pZCI6NTg0NzksInNjb3BlcyI6InB1YmxpYyIsInRlYW1fdXNlcl9pZCI6bnVsbH0.tTecsUjIJ0KCcMxOT8OgkCp-P3ezg5RR0FGqtiejqE8"
+ "Authorization": "jwt eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJleHAiOjE2MTYxMDcyNjAsInVzZXJfaWQiOm51bGwsImFwcF9pZCI6NTg0NzksInNjb3BlcyI6InB1YmxpYyIsInRlYW1fdXNlcl9pZCI6bnVsbH0.kzWxBf1qCJwjpZYUP6w-Pf4VptBMKpKUaMw8VnYwtPU"
},
"activation": {
"url": "https://vimeo.com/_rv/viewer",
@@ -14969,6 +14969,7 @@
"usernameUnclaimed": "noonewouldeverusethis7"
},
"YandexLocal": {
+ "disabled": true,
"tags": [
"ru"
],
@@ -23595,6 +23596,67 @@
"urlMain": "https://calendly.com",
"usernameClaimed": "john",
"usernameUnclaimed": "noonewouldeverusethis7"
+ },
+ "depop.com": {
+ "checkType": "message",
+ "presenseStrs": [
+ "first_name"
+ ],
+ "absenceStrs": [
+ "invalidUrlError__message"
+ ],
+ "url": "https://www.depop.com/{username}",
+ "urlMain": "https://www.depop.com",
+ "usernameClaimed": "blue",
+ "usernameUnclaimed": "noonewouldeverusethis7"
+ },
+ "community.brave.com": {
+ "engine": "Discourse",
+ "urlMain": "https://community.brave.com",
+ "usernameClaimed": "alex",
+ "usernameUnclaimed": "noonewouldeverusethis7"
+ },
+ "community.endlessos.com": {
+ "engine": "Discourse",
+ "urlMain": "https://community.endlessos.com",
+ "usernameClaimed": "alex",
+ "usernameUnclaimed": "noonewouldeverusethis7"
+ },
+ "forum.endeavouros.com": {
+ "engine": "Discourse",
+ "urlMain": "https://forum.endeavouros.com",
+ "usernameClaimed": "alex",
+ "usernameUnclaimed": "noonewouldeverusethis7"
+ },
+ "forum.garudalinux.org": {
+ "engine": "Discourse",
+ "urlMain": "https://forum.garudalinux.org",
+ "usernameClaimed": "alex",
+ "usernameUnclaimed": "noonewouldeverusethis7"
+ },
+ "forum.snapcraft.io": {
+ "engine": "Discourse",
+ "urlMain": "https://forum.snapcraft.io",
+ "usernameClaimed": "alex",
+ "usernameUnclaimed": "noonewouldeverusethis7"
+ },
+ "forum.zorin.com": {
+ "engine": "Discourse",
+ "urlMain": "https://forum.zorin.com",
+ "usernameClaimed": "alex",
+ "usernameUnclaimed": "noonewouldeverusethis7"
+ },
+ "codeseller.ru": {
+ "engine": "Wordpress/Author",
+ "urlMain": "https://codeseller.ru",
+ "usernameClaimed": "alex",
+ "usernameUnclaimed": "noonewouldeverusethis7"
+ },
+ "linuxpip.org": {
+ "engine": "Wordpress/Author",
+ "urlMain": "https://linuxpip.org",
+ "usernameClaimed": "diehard",
+ "usernameUnclaimed": "noonewouldeverusethis7"
}
},
"engines": {
@@ -23689,6 +23751,24 @@
" str:
if self.url_regexp:
- import logging
match_groups = self.url_regexp.match(url)
if match_groups:
return match_groups.groups()[-1].rstrip('/')
@@ -238,7 +236,6 @@ class MaigretDatabase:
return self
-
def load_from_json(self, json_data: dict) -> MaigretDatabase:
# Add all of site information from the json file to internal site list.
site_data = json_data.get("sites", {})
@@ -263,7 +260,6 @@ class MaigretDatabase:
return self
-
def load_from_str(self, db_str: str) -> MaigretDatabase:
try:
data = json.loads(db_str)
@@ -274,7 +270,6 @@ class MaigretDatabase:
return self.load_from_json(data)
-
def load_from_url(self, url: str) -> MaigretDatabase:
is_url_valid = url.startswith('http://') or url.startswith('https://')
@@ -303,7 +298,6 @@ class MaigretDatabase:
return self.load_from_json(data)
-
def load_from_file(self, filename: str) -> MaigretDatabase:
try:
with open(filename, 'r', encoding='utf-8') as file:
@@ -364,7 +358,7 @@ class MaigretDatabase:
continue
tags[tag] = tags.get(tag, 0) + 1
- output += f'Enabled/total sites: {total_count-disabled_count}/{total_count}\n'
+ output += f'Enabled/total sites: {total_count - disabled_count}/{total_count}\n'
output += 'Top sites\' profile URLs:\n'
for url, count in sorted(urls.items(), key=lambda x: x[1], reverse=True)[:20]:
if count == 1:
@@ -377,4 +371,4 @@ class MaigretDatabase:
mark = ' (non-standard)'
output += f'{count}\t{tag}{mark}\n'
- return output
\ No newline at end of file
+ return output
diff --git a/maigret/submit.py b/maigret/submit.py
index 974c828..9f88b03 100644
--- a/maigret/submit.py
+++ b/maigret/submit.py
@@ -1,5 +1,4 @@
import difflib
-import json
import requests
from mock import Mock
@@ -89,7 +88,7 @@ async def submit_dialog(db, url_exists, cookie_file):
domain_raw = URL_RE.sub('', url_exists).strip().strip('/')
domain_raw = domain_raw.split('/')[0]
- matched_sites = list(filter(lambda x: domain_raw in x.url_main+x.url, db.sites))
+ matched_sites = list(filter(lambda x: domain_raw in x.url_main + x.url, db.sites))
if matched_sites:
print(f'Sites with domain "{domain_raw}" already exists in the Maigret database!')
status = lambda s: '(disabled)' if s.disabled else ''
diff --git a/maigret/utils.py b/maigret/utils.py
index d7125ef..ce11b59 100644
--- a/maigret/utils.py
+++ b/maigret/utils.py
@@ -1,5 +1,4 @@
import re
-import sys
class CaseConverter:
@@ -55,4 +54,4 @@ class URLMatcher:
url_regexp = url_main_part.replace('{username}', f'({username_regexp})')
regexp_str = self._HTTP_URL_RE_STR.replace('(.+)', url_regexp)
- return re.compile(regexp_str)
\ No newline at end of file
+ return re.compile(regexp_str)
diff --git a/tests/conftest.py b/tests/conftest.py
index 754f67c..79edd9b 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -1,11 +1,11 @@
import glob
import logging
import os
+
import pytest
from _pytest.mark import Mark
-from mock import Mock
-from maigret.sites import MaigretDatabase, MaigretSite
+from maigret.sites import MaigretDatabase
CUR_PATH = os.path.dirname(os.path.realpath(__file__))
JSON_FILE = os.path.join(CUR_PATH, '../maigret/resources/data.json')
diff --git a/tests/test_activation.py b/tests/test_activation.py
index e37a9a1..d0d718f 100644
--- a/tests/test_activation.py
+++ b/tests/test_activation.py
@@ -1,5 +1,6 @@
"""Maigret activation test functions"""
import json
+
import aiohttp
import pytest
from mock import Mock
diff --git a/tests/test_maigret.py b/tests/test_maigret.py
index 1809bc6..6fde946 100644
--- a/tests/test_maigret.py
+++ b/tests/test_maigret.py
@@ -1,10 +1,11 @@
"""Maigret main module test functions"""
import asyncio
+
import pytest
from mock import Mock
from maigret.maigret import self_check
-from maigret.sites import MaigretDatabase, MaigretSite
+from maigret.sites import MaigretDatabase
EXAMPLE_DB = {
'engines': {
diff --git a/tests/test_sites.py b/tests/test_sites.py
index ff33a9a..7ad834d 100644
--- a/tests/test_sites.py
+++ b/tests/test_sites.py
@@ -1,33 +1,32 @@
"""Maigret Database test functions"""
from maigret.sites import MaigretDatabase, MaigretSite
-
EXAMPLE_DB = {
'engines': {
"XenForo": {
- "presenseStrs": ["XenForo"],
- "site": {
- "absenceStrs": [
- "The specified member cannot be found. Please enter a member's entire name.",
- ],
- "checkType": "message",
- "errors": {
- "You must be logged-in to do that.": "Login required"
- },
- "url": "{urlMain}{urlSubpath}/members/?username={username}"
- }
+ "presenseStrs": ["XenForo"],
+ "site": {
+ "absenceStrs": [
+ "The specified member cannot be found. Please enter a member's entire name.",
+ ],
+ "checkType": "message",
+ "errors": {
+ "You must be logged-in to do that.": "Login required"
+ },
+ "url": "{urlMain}{urlSubpath}/members/?username={username}"
+ }
},
},
'sites': {
"Amperka": {
- "engine": "XenForo",
- "rank": 121613,
- "tags": [
- "ru"
- ],
- "urlMain": "http://forum.amperka.ru",
- "usernameClaimed": "adam",
- "usernameUnclaimed": "noonewouldeverusethis7"
+ "engine": "XenForo",
+ "rank": 121613,
+ "tags": [
+ "ru"
+ ],
+ "urlMain": "http://forum.amperka.ru",
+ "usernameClaimed": "adam",
+ "usernameUnclaimed": "noonewouldeverusethis7"
},
}
}
@@ -167,6 +166,7 @@ def test_ranked_sites_dict_disabled():
assert len(db.ranked_sites_dict()) == 2
assert len(db.ranked_sites_dict(disabled=False)) == 1
+
def test_ranked_sites_dict_id_type():
db = MaigretDatabase()
db.update_site(MaigretSite('1', {}))
diff --git a/tests/test_utils.py b/tests/test_utils.py
index e2a1bed..fee4cb3 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -1,66 +1,74 @@
"""Maigret utils test functions"""
import itertools
import re
+
from maigret.utils import CaseConverter, is_country_tag, enrich_link_str, URLMatcher
def test_case_convert_camel_to_snake():
- a = 'SnakeCasedString'
- b = CaseConverter.camel_to_snake(a)
+ a = 'SnakeCasedString'
+ b = CaseConverter.camel_to_snake(a)
+
+ assert b == 'snake_cased_string'
- assert b == 'snake_cased_string'
def test_case_convert_snake_to_camel():
- a = 'camel_cased_string'
- b = CaseConverter.snake_to_camel(a)
+ a = 'camel_cased_string'
+ b = CaseConverter.snake_to_camel(a)
+
+ assert b == 'camelCasedString'
- assert b == 'camelCasedString'
def test_case_convert_snake_to_title():
- a = 'camel_cased_string'
- b = CaseConverter.snake_to_title(a)
+ a = 'camel_cased_string'
+ b = CaseConverter.snake_to_title(a)
+
+ assert b == 'Camel cased string'
- assert b == 'Camel cased string'
def test_is_country_tag():
- assert is_country_tag('ru') == True
- assert is_country_tag('FR') == True
+ assert is_country_tag('ru') == True
+ assert is_country_tag('FR') == True
- assert is_country_tag('a1') == False
- assert is_country_tag('dating') == False
+ assert is_country_tag('a1') == False
+ assert is_country_tag('dating') == False
+
+ assert is_country_tag('global') == True
- assert is_country_tag('global') == True
def test_enrich_link_str():
- assert enrich_link_str('test') == 'test'
- assert enrich_link_str(' www.flickr.com/photos/alexaimephotography/') == 'www.flickr.com/photos/alexaimephotography/'
+ assert enrich_link_str('test') == 'test'
+ assert enrich_link_str(
+ ' www.flickr.com/photos/alexaimephotography/') == 'www.flickr.com/photos/alexaimephotography/'
+
def test_url_extract_main_part():
- url_main_part = 'flickr.com/photos/alexaimephotography'
+ url_main_part = 'flickr.com/photos/alexaimephotography'
- parts = [
- ['http://', 'https://'],
- ['www.', ''],
- [url_main_part],
- ['/', ''],
- ]
+ parts = [
+ ['http://', 'https://'],
+ ['www.', ''],
+ [url_main_part],
+ ['/', ''],
+ ]
+
+ url_regexp = re.compile('^https?://(www.)?flickr.com/photos/(.+?)$')
+ for url_parts in itertools.product(*parts):
+ url = ''.join(url_parts)
+ assert URLMatcher.extract_main_part(url) == url_main_part
+ assert not url_regexp.match(url) is None
- url_regexp = re.compile('^https?://(www.)?flickr.com/photos/(.+?)$')
- for url_parts in itertools.product(*parts):
- url = ''.join(url_parts)
- assert URLMatcher.extract_main_part(url) == url_main_part
- assert not url_regexp.match(url) is None
def test_url_make_profile_url_regexp():
- url_main_part = 'flickr.com/photos/{username}'
+ url_main_part = 'flickr.com/photos/{username}'
- parts = [
- ['http://', 'https://'],
- ['www.', ''],
- [url_main_part],
- ['/', ''],
- ]
+ parts = [
+ ['http://', 'https://'],
+ ['www.', ''],
+ [url_main_part],
+ ['/', ''],
+ ]
- for url_parts in itertools.product(*parts):
- url = ''.join(url_parts)
- assert URLMatcher.make_profile_url_regexp(url).pattern == r'^https?://(www.)?flickr\.com/photos/(.+?)$'
+ for url_parts in itertools.product(*parts):
+ url = ''.join(url_parts)
+ assert URLMatcher.make_profile_url_regexp(url).pattern == r'^https?://(www.)?flickr\.com/photos/(.+?)$'