Fixed false positives, updated networkx dep, some lint fixes (#894)

* Fixed false positives, updated networkx dep, some lint fixes

* Downgraded networkx version
This commit is contained in:
Soxoj
2023-04-16 18:24:29 +02:00
committed by GitHub
parent 0064fad85c
commit 0ad2cdef2c
5 changed files with 77 additions and 34 deletions
+12 -4
View File
@@ -529,7 +529,9 @@ def make_site_result(
async def check_site_for_username( async def check_site_for_username(
site, username, options: QueryOptions, logger, query_notify, *args, **kwargs site, username, options: QueryOptions, logger, query_notify, *args, **kwargs
) -> Tuple[str, QueryResultWrapper]: ) -> Tuple[str, QueryResultWrapper]:
default_result = make_site_result(site, username, options, logger, retry=kwargs.get('retry')) default_result = make_site_result(
site, username, options, logger, retry=kwargs.get('retry')
)
future = default_result.get("future") future = default_result.get("future")
if not future: if not future:
return site.name, default_result return site.name, default_result
@@ -667,8 +669,11 @@ async def maigret(
executor = AsyncioSimpleExecutor(logger=logger) executor = AsyncioSimpleExecutor(logger=logger)
else: else:
executor = AsyncioProgressbarQueueExecutor( executor = AsyncioProgressbarQueueExecutor(
logger=logger, in_parallel=max_connections, timeout=timeout + 0.5, logger=logger,
*args, **kwargs in_parallel=max_connections,
timeout=timeout + 0.5,
*args,
**kwargs,
) )
# make options objects for all the requests # make options objects for all the requests
@@ -710,7 +715,10 @@ async def maigret(
tasks_dict[sitename] = ( tasks_dict[sitename] = (
check_site_for_username, check_site_for_username,
[site, username, options, logger, query_notify], [site, username, options, logger, query_notify],
{'default': (sitename, default_result), 'retry': retries-attempts+1}, {
'default': (sitename, default_result),
'retry': retries - attempts + 1,
},
) )
cur_results = await executor.run(tasks_dict.values()) cur_results = await executor.run(tasks_dict.values())
+28 -15
View File
@@ -1255,8 +1255,14 @@
"usernameUnclaimed": "noonewouldeverusethis7" "usernameUnclaimed": "noonewouldeverusethis7"
}, },
"Arduino": { "Arduino": {
"checkType": "status_code", "checkType": "message",
"url": "https://create.arduino.cc/projecthub/{username}", "presenseStrs": [
"Arduino Project Hub</title>"
],
"absenceStrs": [
"<title>Arduino Project Hub</title>"
],
"url": "https://projecthub.arduino.cc/{username}",
"usernameClaimed": "uehkon", "usernameClaimed": "uehkon",
"usernameUnclaimed": "noonewouldeverusethis7" "usernameUnclaimed": "noonewouldeverusethis7"
}, },
@@ -1639,11 +1645,14 @@
], ],
"checkType": "message", "checkType": "message",
"absenceStrs": [ "absenceStrs": [
"\u0423\u043a\u0430\u0437\u0430\u043d\u043d\u044b\u0439 \u043f\u043e\u043b\u044c\u0437\u043e\u0432\u0430\u0442\u0435\u043b\u044c \u043d\u0435 \u043d\u0430\u0439\u0434\u0435\u043d. \u041f\u043e\u0436\u0430\u043b\u0443\u0439\u0441\u0442\u0430, \u0432\u0432\u0435\u0434\u0438\u0442\u0435 \u0434\u0440\u0443\u0433\u043e\u0435 \u0438\u043c\u044f." "Go to the homepage"
],
"presenseStrs": [
"\u041f\u043e\u0441\u0442\u044b \u043e\u0442 "
], ],
"alexaRank": 8074009, "alexaRank": 8074009,
"urlMain": "https://automania.ru", "urlMain": "https://automania.ru",
"url": "https://automania.ru/forums/members/?username={username}", "url": "https://automania.ru/author/{username}/",
"usernameClaimed": "Bones", "usernameClaimed": "Bones",
"usernameUnclaimed": "noonewouldeverusethis7" "usernameUnclaimed": "noonewouldeverusethis7"
}, },
@@ -2024,7 +2033,7 @@
"This user page is currently not available" "This user page is currently not available"
], ],
"presenseStrs": [ "presenseStrs": [
"BentBox photos and videos" "id=\"followingUser\""
], ],
"url": "https://bentbox.co/{username}", "url": "https://bentbox.co/{username}",
"usernameClaimed": "uehkon", "usernameClaimed": "uehkon",
@@ -4269,7 +4278,7 @@
], ],
"checkType": "message", "checkType": "message",
"absenceStrs": [ "absenceStrs": [
"https://www.donationalerts.com/img/404.svg" "/img/404.svg"
], ],
"alexaRank": 19188, "alexaRank": 19188,
"urlMain": "https://www.donationalerts.com/", "urlMain": "https://www.donationalerts.com/",
@@ -6928,6 +6937,7 @@
}, },
"Gothic": { "Gothic": {
"urlSubpath": "/forum", "urlSubpath": "/forum",
"disabled": true,
"tags": [ "tags": [
"forum", "forum",
"ru" "ru"
@@ -14260,6 +14270,7 @@
"usernameUnclaimed": "noonewouldeverusethis7" "usernameUnclaimed": "noonewouldeverusethis7"
}, },
"Shikimori": { "Shikimori": {
"disabled": true,
"tags": [ "tags": [
"ru" "ru"
], ],
@@ -15554,7 +15565,7 @@
"regexCheck": "^[A-Za-z0-9_-]{3,16}$", "regexCheck": "^[A-Za-z0-9_-]{3,16}$",
"checkType": "message", "checkType": "message",
"absenceStrs": [ "absenceStrs": [
"\u041f\u043e\u043b\u044c\u0437\u043e\u0432\u0430\u0442\u0435\u043b\u044c \u043d\u0435 \u043d\u0430\u0439\u0434\u0435\u043d" "404 \u2014 Not found"
], ],
"urlMain": "https://trashbox.ru/", "urlMain": "https://trashbox.ru/",
"url": "https://trashbox.ru/users/{username}", "url": "https://trashbox.ru/users/{username}",
@@ -17077,7 +17088,7 @@
"video" "video"
], ],
"headers": { "headers": {
"Authorization": "jwt eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJleHAiOjE2NjI4NDkxODAsInVzZXJfaWQiOm51bGwsImFwcF9pZCI6NTg0NzksInNjb3BlcyI6InB1YmxpYyIsInRlYW1fdXNlcl9pZCI6bnVsbH0.DAmAaaKixSd_WQ9-7PiTZDmyK61SHEYluYC-qdcJtkE" "Authorization": "jwt eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJleHAiOjE2ODE2NjIxODAsInVzZXJfaWQiOm51bGwsImFwcF9pZCI6NTg0NzksInNjb3BlcyI6InB1YmxpYyIsInRlYW1fdXNlcl9pZCI6bnVsbH0.T_MPgWQBmH-KaXWYmfRPS8XdgmCHn7kTOtzDlCOBlQg"
}, },
"activation": { "activation": {
"url": "https://vimeo.com/_rv/viewer", "url": "https://vimeo.com/_rv/viewer",
@@ -29405,6 +29416,7 @@
"alexaRank": 153717 "alexaRank": 153717
}, },
"forums.imore.com": { "forums.imore.com": {
"disabled": true,
"urlMain": "https://forums.imore.com", "urlMain": "https://forums.imore.com",
"engine": "vBulletin", "engine": "vBulletin",
"usernameClaimed": "alex", "usernameClaimed": "alex",
@@ -31918,6 +31930,7 @@
"url": "https://discuss.kubernetes.io/u/{username}" "url": "https://discuss.kubernetes.io/u/{username}"
}, },
"discuss.newrelic.com": { "discuss.newrelic.com": {
"disabled": true,
"checkType": "status_code", "checkType": "status_code",
"usernameClaimed": "adam", "usernameClaimed": "adam",
"usernameUnclaimed": "noonewouldeverusethis7", "usernameUnclaimed": "noonewouldeverusethis7",
@@ -33913,7 +33926,7 @@
] ]
}, },
"lyricsTraining": { "lyricsTraining": {
"taggs":[ "tags": [
"music" "music"
], ],
"checkType": "message", "checkType": "message",
@@ -33928,7 +33941,7 @@
"usernameUnclaimed": "noonewouldeverusethis12" "usernameUnclaimed": "noonewouldeverusethis12"
}, },
"expoForum": { "expoForum": {
"taggs":[ "tags": [
"forum", "forum",
"coding" "coding"
], ],
@@ -33938,7 +33951,7 @@
"usernameUnclaimed": "noonewouldeverusethis12" "usernameUnclaimed": "noonewouldeverusethis12"
}, },
"rawg.io": { "rawg.io": {
"taggs":[ "tags": [
"gaming" "gaming"
], ],
"checkType": "status_code", "checkType": "status_code",
@@ -33947,7 +33960,7 @@
"usernameUnclaimed": "noonewouldeverusethis12" "usernameUnclaimed": "noonewouldeverusethis12"
}, },
"SchemeColor": { "SchemeColor": {
"taggs":[ "tags": [
"art", "art",
"design" "design"
], ],
@@ -33957,7 +33970,7 @@
"usernameUnclaimed": "noonewouldeverusethis12" "usernameUnclaimed": "noonewouldeverusethis12"
}, },
"aetherhub": { "aetherhub": {
"taggs":[ "tags": [
"gaming" "gaming"
], ],
"checkType": "status_code", "checkType": "status_code",
@@ -33966,7 +33979,7 @@
"usernameUnclaimed": "noonewouldeverusethis12" "usernameUnclaimed": "noonewouldeverusethis12"
}, },
"bugbounty": { "bugbounty": {
"taggs":[ "tags": [
"hacking" "hacking"
], ],
"checkType": "status_code", "checkType": "status_code",
@@ -33975,7 +33988,7 @@
"usernameUnclaimed": "noonewouldeverusethis12" "usernameUnclaimed": "noonewouldeverusethis12"
}, },
"universocraft": { "universocraft": {
"taggs":[ "tags": [
"gaming" "gaming"
], ],
"checkType": "message", "checkType": "message",
+21 -9
View File
@@ -455,31 +455,43 @@ class MaigretDatabase:
for tag in filter(lambda x: not is_country_tag(x), site.tags): for tag in filter(lambda x: not is_country_tag(x), site.tags):
tags[tag] = tags.get(tag, 0) + 1 tags[tag] = tags.get(tag, 0) + 1
enabled_count = total_count-disabled_count enabled_count = total_count - disabled_count
enabled_perc = round(100*enabled_count/total_count, 2) enabled_perc = round(100 * enabled_count / total_count, 2)
output += f"Enabled/total sites: {enabled_count}/{total_count} = {enabled_perc}%\n\n" output += (
f"Enabled/total sites: {enabled_count}/{total_count} = {enabled_perc}%\n\n"
)
checks_perc = round(100*message_checks_one_factor/enabled_count, 2) checks_perc = round(100 * message_checks_one_factor / enabled_count, 2)
output += f"Incomplete message checks: {message_checks_one_factor}/{enabled_count} = {checks_perc}% (false positive risks)\n\n" output += f"Incomplete message checks: {message_checks_one_factor}/{enabled_count} = {checks_perc}% (false positive risks)\n\n"
status_checks_perc = round(100*status_checks/enabled_count, 2) status_checks_perc = round(100 * status_checks / enabled_count, 2)
output += f"Status code checks: {status_checks}/{enabled_count} = {status_checks_perc}% (false positive risks)\n\n" output += f"Status code checks: {status_checks}/{enabled_count} = {status_checks_perc}% (false positive risks)\n\n"
output += f"False positive risk (total): {checks_perc+status_checks_perc:.2f}%\n\n" output += (
f"False positive risk (total): {checks_perc+status_checks_perc:.2f}%\n\n"
)
top_urls_count = 20 top_urls_count = 20
output += f"Top {top_urls_count} profile URLs:\n" output += f"Top {top_urls_count} profile URLs:\n"
for url, count in sorted(urls.items(), key=lambda x: x[1], reverse=True)[:top_urls_count]: for url, count in sorted(urls.items(), key=lambda x: x[1], reverse=True)[
:top_urls_count
]:
if count == 1: if count == 1:
break break
output += f"- ({count})\t`{url}`\n" if is_markdown else f"{count}\t{url}\n" output += f"- ({count})\t`{url}`\n" if is_markdown else f"{count}\t{url}\n"
top_tags_count = 20 top_tags_count = 20
output += f"\nTop {top_tags_count} tags:\n" output += f"\nTop {top_tags_count} tags:\n"
for tag, count in sorted(tags.items(), key=lambda x: x[1], reverse=True)[:top_tags_count]: for tag, count in sorted(tags.items(), key=lambda x: x[1], reverse=True)[
:top_tags_count
]:
mark = "" mark = ""
if tag not in self._tags: if tag not in self._tags:
mark = " (non-standard)" mark = " (non-standard)"
output += f"- ({count})\t`{tag}`{mark}\n" if is_markdown else f"{count}\t{tag}{mark}\n" output += (
f"- ({count})\t`{tag}`{mark}\n"
if is_markdown
else f"{count}\t{tag}{mark}\n"
)
return output return output
+15 -5
View File
@@ -36,6 +36,7 @@ class CloudflareSession:
async def close(self): async def close(self):
pass pass
class Submitter: class Submitter:
HEADERS = { HEADERS = {
"User-Agent": get_random_user_agent(), "User-Agent": get_random_user_agent(),
@@ -54,6 +55,7 @@ class Submitter:
self.logger = logger self.logger = logger
from aiohttp_socks import ProxyConnector from aiohttp_socks import ProxyConnector
proxy = self.args.proxy proxy = self.args.proxy
cookie_jar = None cookie_jar = None
if args.cookie_file: if args.cookie_file:
@@ -163,7 +165,9 @@ class Submitter:
fields['urlSubpath'] = f'/{subpath}' fields['urlSubpath'] = f'/{subpath}'
return fields return fields
async def detect_known_engine(self, url_exists, url_mainpage) -> [List[MaigretSite], str]: async def detect_known_engine(
self, url_exists, url_mainpage
) -> [List[MaigretSite], str]:
resp_text = '' resp_text = ''
try: try:
r = await self.session.get(url_mainpage) r = await self.session.get(url_mainpage)
@@ -366,9 +370,10 @@ class Submitter:
except KeyboardInterrupt: except KeyboardInterrupt:
print('Engine detect process is interrupted.') print('Engine detect process is interrupted.')
if 'cloudflare' in text.lower(): if 'cloudflare' in text.lower():
print('Cloudflare protection detected. I will use cloudscraper for futher work') print(
'Cloudflare protection detected. I will use cloudscraper for futher work'
)
# self.session = CloudflareSession() # self.session = CloudflareSession()
if not sites: if not sites:
@@ -376,11 +381,16 @@ class Submitter:
redirects = False redirects = False
if self.args.verbose: if self.args.verbose:
redirects = 'y' in input('Should we do redirects automatically? [yN] ').lower() redirects = (
'y' in input('Should we do redirects automatically? [yN] ').lower()
)
sites = [ sites = [
await self.check_features_manually( await self.check_features_manually(
url_exists, url_mainpage, cookie_file, redirects, url_exists,
url_mainpage,
cookie_file,
redirects,
) )
] ]
+1 -1
View File
@@ -33,7 +33,7 @@ webencodings==0.5.1
xhtml2pdf==0.2.8 xhtml2pdf==0.2.8
XMind==1.2.0 XMind==1.2.0
yarl==1.8.1 yarl==1.8.1
networkx==2.6 networkx==2.6.3
pyvis==0.2.1 pyvis==0.2.1
reportlab==3.6.12 reportlab==3.6.12
cloudscraper==1.2.66 cloudscraper==1.2.66