Introduced --retries flag, made thorough refactoring

- updated sites list
- test scripts linting
This commit is contained in:
Soxoj
2021-05-01 23:51:48 +03:00
parent 7fd4a2c516
commit 5ee91f6659
18 changed files with 6182 additions and 4943 deletions
+2 -1
View File
@@ -26,7 +26,8 @@ def get_test_reports_filenames():
def remove_test_reports():
reports_list = get_test_reports_filenames()
for f in reports_list: os.remove(f)
for f in reports_list:
os.remove(f)
logging.error(f'Removed test reports {reports_list}')
+3 -2
View File
@@ -44,8 +44,9 @@ async def test_import_aiohttp_cookies():
url = 'https://httpbin.org/cookies'
connector = aiohttp.TCPConnector(ssl=False)
session = aiohttp.ClientSession(connector=connector, trust_env=True,
cookie_jar=cookie_jar)
session = aiohttp.ClientSession(
connector=connector, trust_env=True, cookie_jar=cookie_jar
)
response = await session.get(url=url)
result = json.loads(await response.content.read())
+9 -3
View File
@@ -2,11 +2,16 @@
import pytest
import asyncio
import logging
from maigret.executors import AsyncioSimpleExecutor, AsyncioProgressbarExecutor, \
AsyncioProgressbarSemaphoreExecutor, AsyncioProgressbarQueueExecutor
from maigret.executors import (
AsyncioSimpleExecutor,
AsyncioProgressbarExecutor,
AsyncioProgressbarSemaphoreExecutor,
AsyncioProgressbarQueueExecutor,
)
logger = logging.getLogger(__name__)
async def func(n):
await asyncio.sleep(0.1 * (n % 3))
return n
@@ -20,6 +25,7 @@ async def test_simple_asyncio_executor():
assert executor.execution_time > 0.2
assert executor.execution_time < 0.3
@pytest.mark.asyncio
async def test_asyncio_progressbar_executor():
tasks = [(func, [n], {}) for n in range(10)]
@@ -64,4 +70,4 @@ async def test_asyncio_progressbar_queue_executor():
executor = AsyncioProgressbarQueueExecutor(logger=logger, in_parallel=10)
assert await executor.run(tasks) == [0, 3, 6, 9, 1, 4, 7, 2, 5, 8]
assert executor.execution_time > 0.2
assert executor.execution_time < 0.3
assert executor.execution_time < 0.3
+7 -17
View File
@@ -8,40 +8,30 @@ from maigret.maigret import self_check
from maigret.sites import MaigretDatabase
EXAMPLE_DB = {
'engines': {
},
'engines': {},
'sites': {
"GooglePlayStore": {
"tags": [
"global",
"us"
],
"tags": ["global", "us"],
"disabled": False,
"checkType": "status_code",
"alexaRank": 1,
"url": "https://play.google.com/store/apps/developer?id={username}",
"urlMain": "https://play.google.com/store",
"usernameClaimed": "Facebook_nosuchname",
"usernameUnclaimed": "noonewouldeverusethis7"
"usernameUnclaimed": "noonewouldeverusethis7",
},
"Reddit": {
"tags": [
"news",
"social",
"us"
],
"tags": ["news", "social", "us"],
"checkType": "status_code",
"presenseStrs": [
"totalKarma"
],
"presenseStrs": ["totalKarma"],
"disabled": True,
"alexaRank": 17,
"url": "https://www.reddit.com/user/{username}",
"urlMain": "https://www.reddit.com/",
"usernameClaimed": "blue",
"usernameUnclaimed": "noonewouldeverusethis7"
"usernameUnclaimed": "noonewouldeverusethis7",
},
}
},
}
+202 -67
View File
@@ -7,8 +7,16 @@ from io import StringIO
import xmind
from jinja2 import Template
from maigret.report import generate_csv_report, generate_txt_report, save_xmind_report, save_html_report, \
save_pdf_report, generate_report_template, generate_report_context, generate_json_report
from maigret.report import (
generate_csv_report,
generate_txt_report,
save_xmind_report,
save_html_report,
save_pdf_report,
generate_report_template,
generate_report_context,
generate_json_report,
)
from maigret.result import QueryResult, QueryStatus
EXAMPLE_RESULTS = {
@@ -17,14 +25,16 @@ EXAMPLE_RESULTS = {
'parsing_enabled': True,
'url_main': 'https://www.github.com/',
'url_user': 'https://www.github.com/test',
'status': QueryResult('test',
'GitHub',
'https://www.github.com/test',
QueryStatus.CLAIMED,
tags=['test_tag']),
'status': QueryResult(
'test',
'GitHub',
'https://www.github.com/test',
QueryStatus.CLAIMED,
tags=['test_tag'],
),
'http_status': 200,
'is_similar': False,
'rank': 78
'rank': 78,
}
}
@@ -33,74 +43,196 @@ BAD_RESULT = QueryResult('', '', '', QueryStatus.AVAILABLE)
GOOD_500PX_RESULT = copy.deepcopy(GOOD_RESULT)
GOOD_500PX_RESULT.tags = ['photo', 'us', 'global']
GOOD_500PX_RESULT.ids_data = {"uid": "dXJpOm5vZGU6VXNlcjoyNjQwMzQxNQ==", "legacy_id": "26403415",
"username": "alexaimephotographycars", "name": "Alex Aim\u00e9",
"website": "www.flickr.com/photos/alexaimephotography/",
"facebook_link": " www.instagram.com/street.reality.photography/",
"instagram_username": "alexaimephotography", "twitter_username": "Alexaimephotogr"}
GOOD_500PX_RESULT.ids_data = {
"uid": "dXJpOm5vZGU6VXNlcjoyNjQwMzQxNQ==",
"legacy_id": "26403415",
"username": "alexaimephotographycars",
"name": "Alex Aim\u00e9",
"website": "www.flickr.com/photos/alexaimephotography/",
"facebook_link": " www.instagram.com/street.reality.photography/",
"instagram_username": "alexaimephotography",
"twitter_username": "Alexaimephotogr",
}
GOOD_REDDIT_RESULT = copy.deepcopy(GOOD_RESULT)
GOOD_REDDIT_RESULT.tags = ['news', 'us']
GOOD_REDDIT_RESULT.ids_data = {"reddit_id": "t5_1nytpy", "reddit_username": "alexaimephotography",
"fullname": "alexaimephotography",
"image": "https://styles.redditmedia.com/t5_1nytpy/styles/profileIcon_7vmhdwzd3g931.jpg?width=256&height=256&crop=256:256,smart&frame=1&s=4f355f16b4920844a3f4eacd4237a7bf76b2e97e",
"is_employee": "False", "is_nsfw": "False", "is_mod": "True", "is_following": "True",
"has_user_profile": "True", "hide_from_robots": "False",
"created_at": "2019-07-10 12:20:03", "total_karma": "53959", "post_karma": "52738"}
GOOD_REDDIT_RESULT.ids_data = {
"reddit_id": "t5_1nytpy",
"reddit_username": "alexaimephotography",
"fullname": "alexaimephotography",
"image": "https://styles.redditmedia.com/t5_1nytpy/styles/profileIcon_7vmhdwzd3g931.jpg?width=256&height=256&crop=256:256,smart&frame=1&s=4f355f16b4920844a3f4eacd4237a7bf76b2e97e",
"is_employee": "False",
"is_nsfw": "False",
"is_mod": "True",
"is_following": "True",
"has_user_profile": "True",
"hide_from_robots": "False",
"created_at": "2019-07-10 12:20:03",
"total_karma": "53959",
"post_karma": "52738",
}
GOOD_IG_RESULT = copy.deepcopy(GOOD_RESULT)
GOOD_IG_RESULT.tags = ['photo', 'global']
GOOD_IG_RESULT.ids_data = {"instagram_username": "alexaimephotography", "fullname": "Alexaimephotography",
"id": "6828488620",
"image": "https://scontent-hel3-1.cdninstagram.com/v/t51.2885-19/s320x320/95420076_1169632876707608_8741505804647006208_n.jpg?_nc_ht=scontent-hel3-1.cdninstagram.com&_nc_ohc=jd87OUGsX4MAX_Ym5GX&tp=1&oh=0f42badd68307ba97ec7fb1ef7b4bfd4&oe=601E5E6F",
"bio": "Photographer \nChild of fine street arts",
"external_url": "https://www.flickr.com/photos/alexaimephotography2020/"}
GOOD_IG_RESULT.ids_data = {
"instagram_username": "alexaimephotography",
"fullname": "Alexaimephotography",
"id": "6828488620",
"image": "https://scontent-hel3-1.cdninstagram.com/v/t51.2885-19/s320x320/95420076_1169632876707608_8741505804647006208_n.jpg?_nc_ht=scontent-hel3-1.cdninstagram.com&_nc_ohc=jd87OUGsX4MAX_Ym5GX&tp=1&oh=0f42badd68307ba97ec7fb1ef7b4bfd4&oe=601E5E6F",
"bio": "Photographer \nChild of fine street arts",
"external_url": "https://www.flickr.com/photos/alexaimephotography2020/",
}
GOOD_TWITTER_RESULT = copy.deepcopy(GOOD_RESULT)
GOOD_TWITTER_RESULT.tags = ['social', 'us']
TEST = [('alexaimephotographycars', 'username', {
'500px': {'username': 'alexaimephotographycars', 'parsing_enabled': True, 'url_main': 'https://500px.com/',
'url_user': 'https://500px.com/p/alexaimephotographycars',
'ids_usernames': {'alexaimephotographycars': 'username', 'alexaimephotography': 'username',
'Alexaimephotogr': 'username'}, 'status': GOOD_500PX_RESULT, 'http_status': 200,
'is_similar': False, 'rank': 2981},
'Reddit': {'username': 'alexaimephotographycars', 'parsing_enabled': True, 'url_main': 'https://www.reddit.com/',
'url_user': 'https://www.reddit.com/user/alexaimephotographycars', 'status': BAD_RESULT,
'http_status': 404, 'is_similar': False, 'rank': 17},
'Twitter': {'username': 'alexaimephotographycars', 'parsing_enabled': True, 'url_main': 'https://www.twitter.com/',
'url_user': 'https://twitter.com/alexaimephotographycars', 'status': BAD_RESULT, 'http_status': 400,
'is_similar': False, 'rank': 55},
'Instagram': {'username': 'alexaimephotographycars', 'parsing_enabled': True,
'url_main': 'https://www.instagram.com/',
'url_user': 'https://www.instagram.com/alexaimephotographycars', 'status': BAD_RESULT,
'http_status': 404, 'is_similar': False, 'rank': 29}}), ('alexaimephotography', 'username', {
'500px': {'username': 'alexaimephotography', 'parsing_enabled': True, 'url_main': 'https://500px.com/',
'url_user': 'https://500px.com/p/alexaimephotography', 'status': BAD_RESULT, 'http_status': 200,
'is_similar': False, 'rank': 2981},
'Reddit': {'username': 'alexaimephotography', 'parsing_enabled': True, 'url_main': 'https://www.reddit.com/',
'url_user': 'https://www.reddit.com/user/alexaimephotography',
'ids_usernames': {'alexaimephotography': 'username'}, 'status': GOOD_REDDIT_RESULT, 'http_status': 200,
'is_similar': False, 'rank': 17},
'Twitter': {'username': 'alexaimephotography', 'parsing_enabled': True, 'url_main': 'https://www.twitter.com/',
'url_user': 'https://twitter.com/alexaimephotography', 'status': BAD_RESULT, 'http_status': 400,
'is_similar': False, 'rank': 55},
'Instagram': {'username': 'alexaimephotography', 'parsing_enabled': True, 'url_main': 'https://www.instagram.com/',
'url_user': 'https://www.instagram.com/alexaimephotography',
'ids_usernames': {'alexaimephotography': 'username'}, 'status': GOOD_IG_RESULT, 'http_status': 200,
'is_similar': False, 'rank': 29}}), ('Alexaimephotogr', 'username', {
'500px': {'username': 'Alexaimephotogr', 'parsing_enabled': True, 'url_main': 'https://500px.com/',
'url_user': 'https://500px.com/p/Alexaimephotogr', 'status': BAD_RESULT, 'http_status': 200,
'is_similar': False, 'rank': 2981},
'Reddit': {'username': 'Alexaimephotogr', 'parsing_enabled': True, 'url_main': 'https://www.reddit.com/',
'url_user': 'https://www.reddit.com/user/Alexaimephotogr', 'status': BAD_RESULT, 'http_status': 404,
'is_similar': False, 'rank': 17},
'Twitter': {'username': 'Alexaimephotogr', 'parsing_enabled': True, 'url_main': 'https://www.twitter.com/',
'url_user': 'https://twitter.com/Alexaimephotogr', 'status': GOOD_TWITTER_RESULT, 'http_status': 400,
'is_similar': False, 'rank': 55},
'Instagram': {'username': 'Alexaimephotogr', 'parsing_enabled': True, 'url_main': 'https://www.instagram.com/',
'url_user': 'https://www.instagram.com/Alexaimephotogr', 'status': BAD_RESULT, 'http_status': 404,
'is_similar': False, 'rank': 29}})]
TEST = [
(
'alexaimephotographycars',
'username',
{
'500px': {
'username': 'alexaimephotographycars',
'parsing_enabled': True,
'url_main': 'https://500px.com/',
'url_user': 'https://500px.com/p/alexaimephotographycars',
'ids_usernames': {
'alexaimephotographycars': 'username',
'alexaimephotography': 'username',
'Alexaimephotogr': 'username',
},
'status': GOOD_500PX_RESULT,
'http_status': 200,
'is_similar': False,
'rank': 2981,
},
'Reddit': {
'username': 'alexaimephotographycars',
'parsing_enabled': True,
'url_main': 'https://www.reddit.com/',
'url_user': 'https://www.reddit.com/user/alexaimephotographycars',
'status': BAD_RESULT,
'http_status': 404,
'is_similar': False,
'rank': 17,
},
'Twitter': {
'username': 'alexaimephotographycars',
'parsing_enabled': True,
'url_main': 'https://www.twitter.com/',
'url_user': 'https://twitter.com/alexaimephotographycars',
'status': BAD_RESULT,
'http_status': 400,
'is_similar': False,
'rank': 55,
},
'Instagram': {
'username': 'alexaimephotographycars',
'parsing_enabled': True,
'url_main': 'https://www.instagram.com/',
'url_user': 'https://www.instagram.com/alexaimephotographycars',
'status': BAD_RESULT,
'http_status': 404,
'is_similar': False,
'rank': 29,
},
},
),
(
'alexaimephotography',
'username',
{
'500px': {
'username': 'alexaimephotography',
'parsing_enabled': True,
'url_main': 'https://500px.com/',
'url_user': 'https://500px.com/p/alexaimephotography',
'status': BAD_RESULT,
'http_status': 200,
'is_similar': False,
'rank': 2981,
},
'Reddit': {
'username': 'alexaimephotography',
'parsing_enabled': True,
'url_main': 'https://www.reddit.com/',
'url_user': 'https://www.reddit.com/user/alexaimephotography',
'ids_usernames': {'alexaimephotography': 'username'},
'status': GOOD_REDDIT_RESULT,
'http_status': 200,
'is_similar': False,
'rank': 17,
},
'Twitter': {
'username': 'alexaimephotography',
'parsing_enabled': True,
'url_main': 'https://www.twitter.com/',
'url_user': 'https://twitter.com/alexaimephotography',
'status': BAD_RESULT,
'http_status': 400,
'is_similar': False,
'rank': 55,
},
'Instagram': {
'username': 'alexaimephotography',
'parsing_enabled': True,
'url_main': 'https://www.instagram.com/',
'url_user': 'https://www.instagram.com/alexaimephotography',
'ids_usernames': {'alexaimephotography': 'username'},
'status': GOOD_IG_RESULT,
'http_status': 200,
'is_similar': False,
'rank': 29,
},
},
),
(
'Alexaimephotogr',
'username',
{
'500px': {
'username': 'Alexaimephotogr',
'parsing_enabled': True,
'url_main': 'https://500px.com/',
'url_user': 'https://500px.com/p/Alexaimephotogr',
'status': BAD_RESULT,
'http_status': 200,
'is_similar': False,
'rank': 2981,
},
'Reddit': {
'username': 'Alexaimephotogr',
'parsing_enabled': True,
'url_main': 'https://www.reddit.com/',
'url_user': 'https://www.reddit.com/user/Alexaimephotogr',
'status': BAD_RESULT,
'http_status': 404,
'is_similar': False,
'rank': 17,
},
'Twitter': {
'username': 'Alexaimephotogr',
'parsing_enabled': True,
'url_main': 'https://www.twitter.com/',
'url_user': 'https://twitter.com/Alexaimephotogr',
'status': GOOD_TWITTER_RESULT,
'http_status': 400,
'is_similar': False,
'rank': 55,
},
'Instagram': {
'username': 'Alexaimephotogr',
'parsing_enabled': True,
'url_main': 'https://www.instagram.com/',
'url_user': 'https://www.instagram.com/Alexaimephotogr',
'status': BAD_RESULT,
'http_status': 404,
'is_similar': False,
'rank': 29,
},
},
),
]
SUPPOSED_BRIEF = """Search by username alexaimephotographycars returned 1 accounts. Found target's other IDs: alexaimephotography, Alexaimephotogr. Search by username alexaimephotography returned 2 accounts. Search by username Alexaimephotogr returned 1 accounts. Extended info extracted from 3 accounts."""
@@ -187,7 +319,10 @@ def test_save_xmind_report():
assert data['topic']['topics'][0]['title'] == 'Undefined'
assert data['topic']['topics'][1]['title'] == 'test_tag'
assert len(data['topic']['topics'][1]['topics']) == 1
assert data['topic']['topics'][1]['topics'][0]['label'] == 'https://www.github.com/test'
assert (
data['topic']['topics'][1]['topics'][0]['label']
== 'https://www.github.com/test'
)
def test_html_report():
+14 -12
View File
@@ -10,25 +10,21 @@ EXAMPLE_DB = {
"The specified member cannot be found. Please enter a member's entire name.",
],
"checkType": "message",
"errors": {
"You must be logged-in to do that.": "Login required"
},
"url": "{urlMain}{urlSubpath}/members/?username={username}"
}
"errors": {"You must be logged-in to do that.": "Login required"},
"url": "{urlMain}{urlSubpath}/members/?username={username}",
},
},
},
'sites': {
"Amperka": {
"engine": "XenForo",
"rank": 121613,
"tags": [
"ru"
],
"tags": ["ru"],
"urlMain": "http://forum.amperka.ru",
"usernameClaimed": "adam",
"usernameUnclaimed": "noonewouldeverusethis7"
"usernameUnclaimed": "noonewouldeverusethis7",
},
}
},
}
@@ -116,8 +112,14 @@ def test_site_url_detector():
db = MaigretDatabase()
db.load_from_json(EXAMPLE_DB)
assert db.sites[0].url_regexp.pattern == r'^https?://(www.)?forum\.amperka\.ru/members/\?username=(.+?)$'
assert db.sites[0].detect_username('http://forum.amperka.ru/members/?username=test') == 'test'
assert (
db.sites[0].url_regexp.pattern
== r'^https?://(www.)?forum\.amperka\.ru/members/\?username=(.+?)$'
)
assert (
db.sites[0].detect_username('http://forum.amperka.ru/members/?username=test')
== 'test'
)
def test_ranked_sites_dict():
+33 -7
View File
@@ -2,7 +2,13 @@
import itertools
import re
from maigret.utils import CaseConverter, is_country_tag, enrich_link_str, URLMatcher, get_dict_ascii_tree
from maigret.utils import (
CaseConverter,
is_country_tag,
enrich_link_str,
URLMatcher,
get_dict_ascii_tree,
)
def test_case_convert_camel_to_snake():
@@ -45,8 +51,10 @@ def test_is_country_tag():
def test_enrich_link_str():
assert enrich_link_str('test') == 'test'
assert enrich_link_str(
' www.flickr.com/photos/alexaimephotography/') == '<a class="auto-link" href="www.flickr.com/photos/alexaimephotography/">www.flickr.com/photos/alexaimephotography/</a>'
assert (
enrich_link_str(' www.flickr.com/photos/alexaimephotography/')
== '<a class="auto-link" href="www.flickr.com/photos/alexaimephotography/">www.flickr.com/photos/alexaimephotography/</a>'
)
def test_url_extract_main_part():
@@ -78,15 +86,32 @@ def test_url_make_profile_url_regexp():
for url_parts in itertools.product(*parts):
url = ''.join(url_parts)
assert URLMatcher.make_profile_url_regexp(url).pattern == r'^https?://(www.)?flickr\.com/photos/(.+?)$'
assert (
URLMatcher.make_profile_url_regexp(url).pattern
== r'^https?://(www.)?flickr\.com/photos/(.+?)$'
)
def test_get_dict_ascii_tree():
data = {'uid': 'dXJpOm5vZGU6VXNlcjoyNjQwMzQxNQ==', 'legacy_id': '26403415', 'username': 'alexaimephotographycars', 'name': 'Alex Aimé', 'created_at': '2018-05-04T10:17:01.000+0000', 'image': 'https://drscdn.500px.org/user_avatar/26403415/q%3D85_w%3D300_h%3D300/v2?webp=true&v=2&sig=0235678a4f7b65e007e864033ebfaf5ef6d87fad34f80a8639d985320c20fe3b', 'image_bg': 'https://drscdn.500px.org/user_cover/26403415/q%3D65_m%3D2048/v2?webp=true&v=1&sig=bea411fb158391a4fdad498874ff17088f91257e59dfb376ff67e3a44c3a4201', 'website': 'www.instagram.com/street.reality.photography/', 'facebook_link': ' www.instagram.com/street.reality.photography/', 'instagram_username': 'Street.Reality.Photography', 'twitter_username': 'Alexaimephotogr'}
data = {
'uid': 'dXJpOm5vZGU6VXNlcjoyNjQwMzQxNQ==',
'legacy_id': '26403415',
'username': 'alexaimephotographycars',
'name': 'Alex Aimé',
'created_at': '2018-05-04T10:17:01.000+0000',
'image': 'https://drscdn.500px.org/user_avatar/26403415/q%3D85_w%3D300_h%3D300/v2?webp=true&v=2&sig=0235678a4f7b65e007e864033ebfaf5ef6d87fad34f80a8639d985320c20fe3b',
'image_bg': 'https://drscdn.500px.org/user_cover/26403415/q%3D65_m%3D2048/v2?webp=true&v=1&sig=bea411fb158391a4fdad498874ff17088f91257e59dfb376ff67e3a44c3a4201',
'website': 'www.instagram.com/street.reality.photography/',
'facebook_link': ' www.instagram.com/street.reality.photography/',
'instagram_username': 'Street.Reality.Photography',
'twitter_username': 'Alexaimephotogr',
}
ascii_tree = get_dict_ascii_tree(data.items())
assert ascii_tree == """
assert (
ascii_tree
== """
┣╸uid: dXJpOm5vZGU6VXNlcjoyNjQwMzQxNQ==
┣╸legacy_id: 26403415
┣╸username: alexaimephotographycars
@@ -97,4 +122,5 @@ def test_get_dict_ascii_tree():
┣╸website: www.instagram.com/street.reality.photography/
┣╸facebook_link: www.instagram.com/street.reality.photography/
┣╸instagram_username: Street.Reality.Photography
┗╸twitter_username: Alexaimephotogr"""
┗╸twitter_username: Alexaimephotogr"""
)