mirror of
https://github.com/soxoj/maigret.git
synced 2026-05-07 06:24:35 +00:00
Tags and custom checks bugfixes
This commit is contained in:
@@ -1,6 +1,8 @@
|
|||||||
# Changelog
|
# Changelog
|
||||||
|
|
||||||
## [Unreleased]
|
## [Unreleased]
|
||||||
|
* tags bugfix
|
||||||
|
* custom data checks bugfix
|
||||||
|
|
||||||
## [0.1.10] - 2021-01-13
|
## [0.1.10] - 2021-01-13
|
||||||
* added report static resources into package
|
* added report static resources into package
|
||||||
|
|||||||
+51
-38
@@ -116,7 +116,6 @@ async def update_site_dict_from_response(sitename, site_dict, results_info, sema
|
|||||||
|
|
||||||
site_dict[sitename] = process_site_result(response, query_notify, logger, results_info, site_obj)
|
site_dict[sitename] = process_site_result(response, query_notify, logger, results_info, site_obj)
|
||||||
|
|
||||||
|
|
||||||
# TODO: move info separate module
|
# TODO: move info separate module
|
||||||
def detect_error_page(html_text, status_code, fail_flags, ignore_403):
|
def detect_error_page(html_text, status_code, fail_flags, ignore_403):
|
||||||
# Detect service restrictions such as a country restriction
|
# Detect service restrictions such as a country restriction
|
||||||
@@ -197,8 +196,18 @@ def process_site_result(response, query_notify, logger, results_info, site: Maig
|
|||||||
# presense flags
|
# presense flags
|
||||||
# True by default
|
# True by default
|
||||||
presense_flags = site.presense_strs
|
presense_flags = site.presense_strs
|
||||||
is_presense_detected = html_text and all(
|
if html_text:
|
||||||
[(presense_flag in html_text) for presense_flag in presense_flags]) or not presense_flags
|
is_presense_detected = False
|
||||||
|
if not presense_flags:
|
||||||
|
is_presense_detected = True
|
||||||
|
site.stats['presense_flag'] = None
|
||||||
|
else:
|
||||||
|
for presense_flag in presense_flags:
|
||||||
|
if presense_flag in html_text:
|
||||||
|
is_presense_detected = True
|
||||||
|
site.stats['presense_flag'] = presense_flag
|
||||||
|
logger.info(presense_flag)
|
||||||
|
break
|
||||||
|
|
||||||
if error_text is not None:
|
if error_text is not None:
|
||||||
logger.debug(error_text)
|
logger.debug(error_text)
|
||||||
@@ -300,7 +309,7 @@ def process_site_result(response, query_notify, logger, results_info, site: Maig
|
|||||||
|
|
||||||
async def maigret(username, site_dict, query_notify, logger,
|
async def maigret(username, site_dict, query_notify, logger,
|
||||||
proxy=None, timeout=None, recursive_search=False,
|
proxy=None, timeout=None, recursive_search=False,
|
||||||
id_type='username', tags=None, debug=False, forced=False,
|
id_type='username', debug=False, forced=False,
|
||||||
max_connections=100, no_progressbar=False):
|
max_connections=100, no_progressbar=False):
|
||||||
"""Main search func
|
"""Main search func
|
||||||
|
|
||||||
@@ -333,8 +342,6 @@ async def maigret(username, site_dict, query_notify, logger,
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
# Notify caller that we are starting the query.
|
# Notify caller that we are starting the query.
|
||||||
if tags is None:
|
|
||||||
tags = set()
|
|
||||||
query_notify.start(username, id_type)
|
query_notify.start(username, id_type)
|
||||||
|
|
||||||
# TODO: connector
|
# TODO: connector
|
||||||
@@ -358,17 +365,11 @@ async def maigret(username, site_dict, query_notify, logger,
|
|||||||
# First create futures for all requests. This allows for the requests to run in parallel
|
# First create futures for all requests. This allows for the requests to run in parallel
|
||||||
for site_name, site in site_dict.items():
|
for site_name, site in site_dict.items():
|
||||||
|
|
||||||
fulltags = site.tags
|
|
||||||
|
|
||||||
if site.type != id_type:
|
if site.type != id_type:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
site_tags = set(fulltags)
|
|
||||||
if tags:
|
|
||||||
if not set(tags).intersection(site_tags):
|
|
||||||
continue
|
|
||||||
|
|
||||||
if site.disabled and not forced:
|
if site.disabled and not forced:
|
||||||
|
logger.debug(f'Site {site.name} is disabled, skipping...')
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Results from analysis of this specific site
|
# Results from analysis of this specific site
|
||||||
@@ -579,13 +580,13 @@ async def site_self_check(site, logger, semaphore, db: MaigretDatabase, silent=F
|
|||||||
site.disabled = changes['disabled']
|
site.disabled = changes['disabled']
|
||||||
db.update_site(site)
|
db.update_site(site)
|
||||||
if not silent:
|
if not silent:
|
||||||
action = 'Disabled' if not site.disabled else 'Enabled'
|
action = 'Disabled' if site.disabled else 'Enabled'
|
||||||
print(f'{action} site {site.name}...')
|
print(f'{action} site {site.name}...')
|
||||||
|
|
||||||
return changes
|
return changes
|
||||||
|
|
||||||
|
|
||||||
async def self_check(db: MaigretDatabase, site_data: dict, logger, silent=False):
|
async def self_check(db: MaigretDatabase, site_data: dict, logger, silent=False) -> bool:
|
||||||
sem = asyncio.Semaphore(10)
|
sem = asyncio.Semaphore(10)
|
||||||
tasks = []
|
tasks = []
|
||||||
all_sites = site_data
|
all_sites = site_data
|
||||||
@@ -613,7 +614,9 @@ async def self_check(db: MaigretDatabase, site_data: dict, logger, silent=False)
|
|||||||
total_disabled *= -1
|
total_disabled *= -1
|
||||||
|
|
||||||
if not silent:
|
if not silent:
|
||||||
print(f'{message} {total_disabled} checked sites. Run with `--info` flag to get more information')
|
print(f'{message} {total_disabled} ({disabled_old_count} => {disabled_new_count}) checked sites. Run with `--info` flag to get more information')
|
||||||
|
|
||||||
|
return total_disabled != 0
|
||||||
|
|
||||||
|
|
||||||
async def main():
|
async def main():
|
||||||
@@ -664,9 +667,18 @@ async def main():
|
|||||||
"A longer timeout will be more likely to get results from slow sites."
|
"A longer timeout will be more likely to get results from slow sites."
|
||||||
"On the other hand, this may cause a long delay to gather all results."
|
"On the other hand, this may cause a long delay to gather all results."
|
||||||
)
|
)
|
||||||
|
parser.add_argument("-n", "--max-connections",
|
||||||
|
action="store", type=int,
|
||||||
|
dest="connections", default=100,
|
||||||
|
help="Allowed number of concurrent connections."
|
||||||
|
)
|
||||||
|
parser.add_argument("-a", "--all-sites",
|
||||||
|
action="store_true", dest="all_sites", default=False,
|
||||||
|
help="Use all sites for scan."
|
||||||
|
)
|
||||||
parser.add_argument("--top-sites",
|
parser.add_argument("--top-sites",
|
||||||
action="store", default=500, type=int,
|
action="store", default=500, type=int,
|
||||||
help="Count of sites for checking ranked by Alexa Top (default: 500)."
|
help="Count of sites for scan ranked by Alexa Top (default: 500)."
|
||||||
)
|
)
|
||||||
parser.add_argument("--print-not-found",
|
parser.add_argument("--print-not-found",
|
||||||
action="store_true", dest="print_not_found", default=False,
|
action="store_true", dest="print_not_found", default=False,
|
||||||
@@ -789,7 +801,7 @@ async def main():
|
|||||||
"resources/data.json"
|
"resources/data.json"
|
||||||
)
|
)
|
||||||
|
|
||||||
if args.top_sites == 0:
|
if args.top_sites == 0 or args.all_sites:
|
||||||
args.top_sites = sys.maxsize
|
args.top_sites = sys.maxsize
|
||||||
|
|
||||||
# Create object with all information about sites we are aware of.
|
# Create object with all information about sites we are aware of.
|
||||||
@@ -803,12 +815,14 @@ async def main():
|
|||||||
# Database self-checking
|
# Database self-checking
|
||||||
if args.self_check:
|
if args.self_check:
|
||||||
print('Maigret sites database self-checking...')
|
print('Maigret sites database self-checking...')
|
||||||
await self_check(db, site_data, logger)
|
is_need_update = await self_check(db, site_data, logger)
|
||||||
if input('Do you want to save changes permanently? [yYnN]\n').lower() == 'y':
|
if is_need_update:
|
||||||
db.save_to_file(args.json_file)
|
if input('Do you want to save changes permanently? [yYnN]\n').lower() == 'y':
|
||||||
print('Database was successfully updated.')
|
db.save_to_file(args.json_file)
|
||||||
else:
|
print('Database was successfully updated.')
|
||||||
print('Updates will be applied only for current search session.')
|
else:
|
||||||
|
print('Updates will be applied only for current search session.')
|
||||||
|
print(db.get_stats(site_data))
|
||||||
|
|
||||||
# Make reports folder is not exists
|
# Make reports folder is not exists
|
||||||
os.makedirs(args.folderoutput, exist_ok=True)
|
os.makedirs(args.folderoutput, exist_ok=True)
|
||||||
@@ -865,10 +879,10 @@ async def main():
|
|||||||
timeout=args.timeout,
|
timeout=args.timeout,
|
||||||
recursive_search=recursive_search_enabled,
|
recursive_search=recursive_search_enabled,
|
||||||
id_type=id_type,
|
id_type=id_type,
|
||||||
tags=args.tags,
|
|
||||||
debug=args.verbose,
|
debug=args.verbose,
|
||||||
logger=logger,
|
logger=logger,
|
||||||
forced=args.use_disabled_sites,
|
forced=args.use_disabled_sites,
|
||||||
|
max_connections=args.connections,
|
||||||
)
|
)
|
||||||
|
|
||||||
username_result = (username, id_type, results)
|
username_result = (username, id_type, results)
|
||||||
@@ -902,21 +916,20 @@ async def main():
|
|||||||
print(f'TXT report for {username} saved in {filename}')
|
print(f'TXT report for {username} saved in {filename}')
|
||||||
|
|
||||||
# reporting for all the result
|
# reporting for all the result
|
||||||
report_context = generate_report_context(general_results)
|
if general_results:
|
||||||
# determine main username
|
report_context = generate_report_context(general_results)
|
||||||
username = report_context['username']
|
# determine main username
|
||||||
|
username = report_context['username']
|
||||||
if args.html:
|
|
||||||
filename = report_filepath_tpl.format(username=username, postfix='.html')
|
|
||||||
save_html_report(filename, report_context)
|
|
||||||
print(f'HTML report on all usernames saved in {filename}')
|
|
||||||
|
|
||||||
if args.pdf:
|
|
||||||
filename = report_filepath_tpl.format(username=username, postfix='.pdf')
|
|
||||||
save_pdf_report(filename, report_context)
|
|
||||||
print(f'PDF report on all usernames saved in {filename}')
|
|
||||||
|
|
||||||
|
if args.html:
|
||||||
|
filename = report_filepath_tpl.format(username=username, postfix='.html')
|
||||||
|
save_html_report(filename, report_context)
|
||||||
|
print(f'HTML report on all usernames saved in {filename}')
|
||||||
|
|
||||||
|
if args.pdf:
|
||||||
|
filename = report_filepath_tpl.format(username=username, postfix='.pdf')
|
||||||
|
save_pdf_report(filename, report_context)
|
||||||
|
print(f'PDF report on all usernames saved in {filename}')
|
||||||
# update database
|
# update database
|
||||||
db.save_to_file(args.json_file)
|
db.save_to_file(args.json_file)
|
||||||
|
|
||||||
|
|||||||
+1777
-1625
File diff suppressed because it is too large
Load Diff
+29
-9
@@ -41,6 +41,7 @@ class MaigretSite:
|
|||||||
|
|
||||||
self.presense_strs = []
|
self.presense_strs = []
|
||||||
self.absence_strs = []
|
self.absence_strs = []
|
||||||
|
self.stats = {}
|
||||||
|
|
||||||
self.engine = None
|
self.engine = None
|
||||||
self.engine_data = {}
|
self.engine_data = {}
|
||||||
@@ -68,7 +69,7 @@ class MaigretSite:
|
|||||||
# strip empty elements
|
# strip empty elements
|
||||||
if v in (False, '', [], {}, None, sys.maxsize, 'username'):
|
if v in (False, '', [], {}, None, sys.maxsize, 'username'):
|
||||||
continue
|
continue
|
||||||
if field in ['name', 'engineData', 'requestFuture', 'detectedEngine', 'engineObj']:
|
if field in ['name', 'engineData', 'requestFuture', 'detectedEngine', 'engineObj', 'stats']:
|
||||||
continue
|
continue
|
||||||
result[field] = v
|
result[field] = v
|
||||||
|
|
||||||
@@ -87,6 +88,8 @@ class MaigretSite:
|
|||||||
# TODO: assertion of intersecting keys
|
# TODO: assertion of intersecting keys
|
||||||
# update dicts like errors
|
# update dicts like errors
|
||||||
self.__dict__.get(field, {}).update(v)
|
self.__dict__.get(field, {}).update(v)
|
||||||
|
elif isinstance(v, list):
|
||||||
|
self.__dict__[field] = self.__dict__.get(field, []) + v
|
||||||
else:
|
else:
|
||||||
self.__dict__[field] = v
|
self.__dict__[field] = v
|
||||||
|
|
||||||
@@ -101,16 +104,23 @@ class MaigretSite:
|
|||||||
self.request_future = None
|
self.request_future = None
|
||||||
self_copy = copy.deepcopy(self)
|
self_copy = copy.deepcopy(self)
|
||||||
engine_data = self_copy.engine_obj.site
|
engine_data = self_copy.engine_obj.site
|
||||||
for field in engine_data.keys():
|
site_data_keys = list(self_copy.__dict__.keys())
|
||||||
if isinstance(engine_data[field], dict):
|
|
||||||
for k in engine_data[field].keys():
|
|
||||||
del self_copy.__dict__[field][k]
|
|
||||||
continue
|
|
||||||
|
|
||||||
if field in list(self_copy.__dict__.keys()):
|
for k in engine_data.keys():
|
||||||
|
field = CaseConverter.camel_to_snake(k)
|
||||||
|
is_exists = field in site_data_keys
|
||||||
|
# remove dict keys
|
||||||
|
if isinstance(engine_data[k], dict) and is_exists:
|
||||||
|
for f in engine_data[k].keys():
|
||||||
|
del self_copy.__dict__[field][f]
|
||||||
|
continue
|
||||||
|
# remove list items
|
||||||
|
if isinstance(engine_data[k], list) and is_exists:
|
||||||
|
for f in engine_data[k]:
|
||||||
|
self_copy.__dict__[field].remove(f)
|
||||||
|
continue
|
||||||
|
if is_exists:
|
||||||
del self_copy.__dict__[field]
|
del self_copy.__dict__[field]
|
||||||
if CaseConverter.camel_to_snake(field) in list(self_copy.__dict__.keys()):
|
|
||||||
del self_copy.__dict__[CaseConverter.camel_to_snake(field)]
|
|
||||||
|
|
||||||
return self_copy
|
return self_copy
|
||||||
|
|
||||||
@@ -255,3 +265,13 @@ class MaigretDatabase:
|
|||||||
)
|
)
|
||||||
|
|
||||||
return self.load_from_json(data)
|
return self.load_from_json(data)
|
||||||
|
|
||||||
|
def get_stats(self, sites_dict):
|
||||||
|
sites = sites_dict or self.sites_dict
|
||||||
|
found_flags = {}
|
||||||
|
for _, s in sites.items():
|
||||||
|
if 'presense_flag' in s.stats:
|
||||||
|
flag = s.stats['presense_flag']
|
||||||
|
found_flags[flag] = found_flags.get(flag, 0) + 1
|
||||||
|
|
||||||
|
return found_flags
|
||||||
|
|||||||
+58
-7
@@ -27,28 +27,79 @@ EXAMPLE_RESULTS = {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
GOOD_RESULT = QueryResult('', '', '', QueryStatus.CLAIMED)
|
GOOD_RESULT = QueryResult('', '', '', QueryStatus.CLAIMED)
|
||||||
BAD_RESULT = QueryResult('', '', '', QueryStatus.AVAILABLE)
|
BAD_RESULT = QueryResult('', '', '', QueryStatus.AVAILABLE)
|
||||||
|
|
||||||
GOOD_500PX_RESULT = copy.deepcopy(GOOD_RESULT)
|
GOOD_500PX_RESULT = copy.deepcopy(GOOD_RESULT)
|
||||||
GOOD_500PX_RESULT.tags = ['photo', 'us', 'global']
|
GOOD_500PX_RESULT.tags = ['photo', 'us', 'global']
|
||||||
GOOD_500PX_RESULT.ids_data = {"uid": "dXJpOm5vZGU6VXNlcjoyNjQwMzQxNQ==", "legacy_id": "26403415", "username": "alexaimephotographycars", "name": "Alex Aim\u00e9", "website": "www.flickr.com/photos/alexaimephotography/", "facebook_link": " www.instagram.com/street.reality.photography/", "instagram_username": "alexaimephotography", "twitter_username": "Alexaimephotogr"}
|
GOOD_500PX_RESULT.ids_data = {"uid": "dXJpOm5vZGU6VXNlcjoyNjQwMzQxNQ==", "legacy_id": "26403415",
|
||||||
|
"username": "alexaimephotographycars", "name": "Alex Aim\u00e9",
|
||||||
|
"website": "www.flickr.com/photos/alexaimephotography/",
|
||||||
|
"facebook_link": " www.instagram.com/street.reality.photography/",
|
||||||
|
"instagram_username": "alexaimephotography", "twitter_username": "Alexaimephotogr"}
|
||||||
|
|
||||||
GOOD_REDDIT_RESULT = copy.deepcopy(GOOD_RESULT)
|
GOOD_REDDIT_RESULT = copy.deepcopy(GOOD_RESULT)
|
||||||
GOOD_REDDIT_RESULT.tags = ['news', 'us']
|
GOOD_REDDIT_RESULT.tags = ['news', 'us']
|
||||||
GOOD_REDDIT_RESULT.ids_data = {"reddit_id": "t5_1nytpy", "reddit_username": "alexaimephotography", "fullname": "alexaimephotography", "image": "https://styles.redditmedia.com/t5_1nytpy/styles/profileIcon_7vmhdwzd3g931.jpg?width=256&height=256&crop=256:256,smart&frame=1&s=4f355f16b4920844a3f4eacd4237a7bf76b2e97e", "is_employee": "False", "is_nsfw": "False", "is_mod": "True", "is_following": "True", "has_user_profile": "True", "hide_from_robots": "False", "created_at": "2019-07-10 12:20:03", "total_karma": "53959", "post_karma": "52738"}
|
GOOD_REDDIT_RESULT.ids_data = {"reddit_id": "t5_1nytpy", "reddit_username": "alexaimephotography",
|
||||||
|
"fullname": "alexaimephotography",
|
||||||
|
"image": "https://styles.redditmedia.com/t5_1nytpy/styles/profileIcon_7vmhdwzd3g931.jpg?width=256&height=256&crop=256:256,smart&frame=1&s=4f355f16b4920844a3f4eacd4237a7bf76b2e97e",
|
||||||
|
"is_employee": "False", "is_nsfw": "False", "is_mod": "True", "is_following": "True",
|
||||||
|
"has_user_profile": "True", "hide_from_robots": "False",
|
||||||
|
"created_at": "2019-07-10 12:20:03", "total_karma": "53959", "post_karma": "52738"}
|
||||||
|
|
||||||
GOOD_IG_RESULT = copy.deepcopy(GOOD_RESULT)
|
GOOD_IG_RESULT = copy.deepcopy(GOOD_RESULT)
|
||||||
GOOD_IG_RESULT.tags = ['photo', 'global']
|
GOOD_IG_RESULT.tags = ['photo', 'global']
|
||||||
GOOD_IG_RESULT.ids_data = {"instagram_username": "alexaimephotography", "fullname": "Alexaimephotography", "id": "6828488620", "image": "https://scontent-hel3-1.cdninstagram.com/v/t51.2885-19/s320x320/95420076_1169632876707608_8741505804647006208_n.jpg?_nc_ht=scontent-hel3-1.cdninstagram.com&_nc_ohc=jd87OUGsX4MAX_Ym5GX&tp=1&oh=0f42badd68307ba97ec7fb1ef7b4bfd4&oe=601E5E6F", "bio": "Photographer \nChild of fine street arts", "external_url": "https://www.flickr.com/photos/alexaimephotography2020/"}
|
GOOD_IG_RESULT.ids_data = {"instagram_username": "alexaimephotography", "fullname": "Alexaimephotography",
|
||||||
|
"id": "6828488620",
|
||||||
|
"image": "https://scontent-hel3-1.cdninstagram.com/v/t51.2885-19/s320x320/95420076_1169632876707608_8741505804647006208_n.jpg?_nc_ht=scontent-hel3-1.cdninstagram.com&_nc_ohc=jd87OUGsX4MAX_Ym5GX&tp=1&oh=0f42badd68307ba97ec7fb1ef7b4bfd4&oe=601E5E6F",
|
||||||
|
"bio": "Photographer \nChild of fine street arts",
|
||||||
|
"external_url": "https://www.flickr.com/photos/alexaimephotography2020/"}
|
||||||
|
|
||||||
GOOD_TWITTER_RESULT = copy.deepcopy(GOOD_RESULT)
|
GOOD_TWITTER_RESULT = copy.deepcopy(GOOD_RESULT)
|
||||||
GOOD_TWITTER_RESULT.tags = ['social', 'us']
|
GOOD_TWITTER_RESULT.tags = ['social', 'us']
|
||||||
|
|
||||||
|
TEST = [('alexaimephotographycars', 'username', {
|
||||||
TEST = [('alexaimephotographycars', 'username', {'500px': {'username': 'alexaimephotographycars', 'parsing_enabled': True, 'url_main': 'https://500px.com/', 'url_user': 'https://500px.com/p/alexaimephotographycars', 'ids_usernames': {'alexaimephotographycars': 'username', 'alexaimephotography': 'username', 'Alexaimephotogr': 'username'}, 'status': GOOD_500PX_RESULT, 'http_status': 200, 'is_similar': False, 'rank': 2981}, 'Reddit': {'username': 'alexaimephotographycars', 'parsing_enabled': True, 'url_main': 'https://www.reddit.com/', 'url_user': 'https://www.reddit.com/user/alexaimephotographycars', 'status': BAD_RESULT, 'http_status': 404, 'is_similar': False, 'rank': 17}, 'Twitter': {'username': 'alexaimephotographycars', 'parsing_enabled': True, 'url_main': 'https://www.twitter.com/', 'url_user': 'https://twitter.com/alexaimephotographycars', 'status': BAD_RESULT, 'http_status': 400, 'is_similar': False, 'rank': 55}, 'Instagram': {'username': 'alexaimephotographycars', 'parsing_enabled': True, 'url_main': 'https://www.instagram.com/', 'url_user': 'https://www.instagram.com/alexaimephotographycars', 'status': BAD_RESULT, 'http_status': 404, 'is_similar': False, 'rank': 29}}), ('alexaimephotography', 'username', {'500px': {'username': 'alexaimephotography', 'parsing_enabled': True, 'url_main': 'https://500px.com/', 'url_user': 'https://500px.com/p/alexaimephotography', 'status': BAD_RESULT, 'http_status': 200, 'is_similar': False, 'rank': 2981}, 'Reddit': {'username': 'alexaimephotography', 'parsing_enabled': True, 'url_main': 'https://www.reddit.com/', 'url_user': 'https://www.reddit.com/user/alexaimephotography', 'ids_usernames': {'alexaimephotography': 'username'}, 'status': GOOD_REDDIT_RESULT, 'http_status': 200, 'is_similar': False, 'rank': 17}, 'Twitter': {'username': 'alexaimephotography', 'parsing_enabled': True, 'url_main': 'https://www.twitter.com/', 'url_user': 'https://twitter.com/alexaimephotography', 'status': BAD_RESULT, 'http_status': 400, 'is_similar': False, 'rank': 55}, 'Instagram': {'username': 'alexaimephotography', 'parsing_enabled': True, 'url_main': 'https://www.instagram.com/', 'url_user': 'https://www.instagram.com/alexaimephotography', 'ids_usernames': {'alexaimephotography': 'username'}, 'status': GOOD_IG_RESULT, 'http_status': 200, 'is_similar': False, 'rank': 29}}), ('Alexaimephotogr', 'username', {'500px': {'username': 'Alexaimephotogr', 'parsing_enabled': True, 'url_main': 'https://500px.com/', 'url_user': 'https://500px.com/p/Alexaimephotogr', 'status': BAD_RESULT, 'http_status': 200, 'is_similar': False, 'rank': 2981}, 'Reddit': {'username': 'Alexaimephotogr', 'parsing_enabled': True, 'url_main': 'https://www.reddit.com/', 'url_user': 'https://www.reddit.com/user/Alexaimephotogr', 'status': BAD_RESULT, 'http_status': 404, 'is_similar': False, 'rank': 17}, 'Twitter': {'username': 'Alexaimephotogr', 'parsing_enabled': True, 'url_main': 'https://www.twitter.com/', 'url_user': 'https://twitter.com/Alexaimephotogr', 'status': GOOD_TWITTER_RESULT, 'http_status': 400, 'is_similar': False, 'rank': 55}, 'Instagram': {'username': 'Alexaimephotogr', 'parsing_enabled': True, 'url_main': 'https://www.instagram.com/', 'url_user': 'https://www.instagram.com/Alexaimephotogr', 'status':BAD_RESULT, 'http_status': 404, 'is_similar': False, 'rank': 29}})]
|
'500px': {'username': 'alexaimephotographycars', 'parsing_enabled': True, 'url_main': 'https://500px.com/',
|
||||||
|
'url_user': 'https://500px.com/p/alexaimephotographycars',
|
||||||
|
'ids_usernames': {'alexaimephotographycars': 'username', 'alexaimephotography': 'username',
|
||||||
|
'Alexaimephotogr': 'username'}, 'status': GOOD_500PX_RESULT, 'http_status': 200,
|
||||||
|
'is_similar': False, 'rank': 2981},
|
||||||
|
'Reddit': {'username': 'alexaimephotographycars', 'parsing_enabled': True, 'url_main': 'https://www.reddit.com/',
|
||||||
|
'url_user': 'https://www.reddit.com/user/alexaimephotographycars', 'status': BAD_RESULT,
|
||||||
|
'http_status': 404, 'is_similar': False, 'rank': 17},
|
||||||
|
'Twitter': {'username': 'alexaimephotographycars', 'parsing_enabled': True, 'url_main': 'https://www.twitter.com/',
|
||||||
|
'url_user': 'https://twitter.com/alexaimephotographycars', 'status': BAD_RESULT, 'http_status': 400,
|
||||||
|
'is_similar': False, 'rank': 55},
|
||||||
|
'Instagram': {'username': 'alexaimephotographycars', 'parsing_enabled': True,
|
||||||
|
'url_main': 'https://www.instagram.com/',
|
||||||
|
'url_user': 'https://www.instagram.com/alexaimephotographycars', 'status': BAD_RESULT,
|
||||||
|
'http_status': 404, 'is_similar': False, 'rank': 29}}), ('alexaimephotography', 'username', {
|
||||||
|
'500px': {'username': 'alexaimephotography', 'parsing_enabled': True, 'url_main': 'https://500px.com/',
|
||||||
|
'url_user': 'https://500px.com/p/alexaimephotography', 'status': BAD_RESULT, 'http_status': 200,
|
||||||
|
'is_similar': False, 'rank': 2981},
|
||||||
|
'Reddit': {'username': 'alexaimephotography', 'parsing_enabled': True, 'url_main': 'https://www.reddit.com/',
|
||||||
|
'url_user': 'https://www.reddit.com/user/alexaimephotography',
|
||||||
|
'ids_usernames': {'alexaimephotography': 'username'}, 'status': GOOD_REDDIT_RESULT, 'http_status': 200,
|
||||||
|
'is_similar': False, 'rank': 17},
|
||||||
|
'Twitter': {'username': 'alexaimephotography', 'parsing_enabled': True, 'url_main': 'https://www.twitter.com/',
|
||||||
|
'url_user': 'https://twitter.com/alexaimephotography', 'status': BAD_RESULT, 'http_status': 400,
|
||||||
|
'is_similar': False, 'rank': 55},
|
||||||
|
'Instagram': {'username': 'alexaimephotography', 'parsing_enabled': True, 'url_main': 'https://www.instagram.com/',
|
||||||
|
'url_user': 'https://www.instagram.com/alexaimephotography',
|
||||||
|
'ids_usernames': {'alexaimephotography': 'username'}, 'status': GOOD_IG_RESULT, 'http_status': 200,
|
||||||
|
'is_similar': False, 'rank': 29}}), ('Alexaimephotogr', 'username', {
|
||||||
|
'500px': {'username': 'Alexaimephotogr', 'parsing_enabled': True, 'url_main': 'https://500px.com/',
|
||||||
|
'url_user': 'https://500px.com/p/Alexaimephotogr', 'status': BAD_RESULT, 'http_status': 200,
|
||||||
|
'is_similar': False, 'rank': 2981},
|
||||||
|
'Reddit': {'username': 'Alexaimephotogr', 'parsing_enabled': True, 'url_main': 'https://www.reddit.com/',
|
||||||
|
'url_user': 'https://www.reddit.com/user/Alexaimephotogr', 'status': BAD_RESULT, 'http_status': 404,
|
||||||
|
'is_similar': False, 'rank': 17},
|
||||||
|
'Twitter': {'username': 'Alexaimephotogr', 'parsing_enabled': True, 'url_main': 'https://www.twitter.com/',
|
||||||
|
'url_user': 'https://twitter.com/Alexaimephotogr', 'status': GOOD_TWITTER_RESULT, 'http_status': 400,
|
||||||
|
'is_similar': False, 'rank': 55},
|
||||||
|
'Instagram': {'username': 'Alexaimephotogr', 'parsing_enabled': True, 'url_main': 'https://www.instagram.com/',
|
||||||
|
'url_user': 'https://www.instagram.com/Alexaimephotogr', 'status': BAD_RESULT, 'http_status': 404,
|
||||||
|
'is_similar': False, 'rank': 29}})]
|
||||||
|
|
||||||
SUPPOSED_BRIEF = """Search by username alexaimephotographycars returned 1 accounts. Found target's other IDs: alexaimephotography, Alexaimephotogr. Search by username alexaimephotography returned 2 accounts. Search by username Alexaimephotogr returned 1 accounts. Extended info extracted from 3 accounts."""
|
SUPPOSED_BRIEF = """Search by username alexaimephotographycars returned 1 accounts. Found target's other IDs: alexaimephotography, Alexaimephotogr. Search by username alexaimephotography returned 2 accounts. Search by username Alexaimephotogr returned 1 accounts. Extended info extracted from 3 accounts."""
|
||||||
|
|
||||||
|
|||||||
@@ -86,6 +86,18 @@ def test_site_strip_engine_data():
|
|||||||
assert amperka_stripped.json == EXAMPLE_DB['sites']['Amperka']
|
assert amperka_stripped.json == EXAMPLE_DB['sites']['Amperka']
|
||||||
|
|
||||||
|
|
||||||
|
def test_site_strip_engine_data_with_site_prior_updates():
|
||||||
|
db = MaigretDatabase()
|
||||||
|
UPDATED_EXAMPLE_DB = dict(EXAMPLE_DB)
|
||||||
|
UPDATED_EXAMPLE_DB['sites']['Amperka']['absenceStrs'] = ["test"]
|
||||||
|
db.load_from_json(UPDATED_EXAMPLE_DB)
|
||||||
|
|
||||||
|
amperka = db.sites[0]
|
||||||
|
amperka_stripped = amperka.strip_engine_data()
|
||||||
|
|
||||||
|
assert amperka_stripped.json == UPDATED_EXAMPLE_DB['sites']['Amperka']
|
||||||
|
|
||||||
|
|
||||||
def test_saving_site_error():
|
def test_saving_site_error():
|
||||||
db = MaigretDatabase()
|
db = MaigretDatabase()
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user