Accelerated start time & fixed some false positives

This commit is contained in:
Soxoj
2021-10-31 18:25:01 +03:00
parent aad862b2ed
commit 6480eebbdf
7 changed files with 57 additions and 24 deletions
+4
View File
@@ -2,6 +2,10 @@
## [Unreleased] ## [Unreleased]
## [0.3.1] - 2021-10-31
* fixed false positives
* accelerated maigret start time by 3 times
## [0.3.0] - 2021-06-02 ## [0.3.0] - 2021-06-02
* added support of Tor and I2P sites * added support of Tor and I2P sites
* added experimental DNS checking feature * added experimental DNS checking feature
+1 -1
View File
@@ -1,3 +1,3 @@
"""Maigret version file""" """Maigret version file"""
__version__ = '0.3.0' __version__ = '0.3.1'
+11 -6
View File
@@ -13,12 +13,11 @@ import tqdm
from typing import Tuple, Optional, Dict, List from typing import Tuple, Optional, Dict, List
from urllib.parse import quote from urllib.parse import quote
import aiohttp
import aiodns import aiodns
import tqdm.asyncio import tqdm.asyncio
from aiohttp_socks import ProxyConnector
from python_socks import _errors as proxy_errors from python_socks import _errors as proxy_errors
from socid_extractor import extract from socid_extractor import extract
from aiohttp import TCPConnector, ClientSession, http_exceptions
from aiohttp.client_exceptions import ServerDisconnectedError, ClientConnectorError from aiohttp.client_exceptions import ServerDisconnectedError, ClientConnectorError
from .activation import ParsingActivator, import_aiohttp_cookies from .activation import ParsingActivator, import_aiohttp_cookies
@@ -60,12 +59,15 @@ class SimpleAiohttpChecker(CheckerBase):
cookie_jar = kwargs.get('cookie_jar') cookie_jar = kwargs.get('cookie_jar')
self.logger = kwargs.get('logger', Mock()) self.logger = kwargs.get('logger', Mock())
# moved here to speed up the launch of Maigret
from aiohttp_socks import ProxyConnector
# make http client session # make http client session
connector = ( connector = (
ProxyConnector.from_url(proxy) if proxy else aiohttp.TCPConnector(ssl=False) ProxyConnector.from_url(proxy) if proxy else TCPConnector(ssl=False)
) )
connector.verify_ssl = False connector.verify_ssl = False
self.session = aiohttp.ClientSession( self.session = ClientSession(
connector=connector, trust_env=True, cookie_jar=cookie_jar connector=connector, trust_env=True, cookie_jar=cookie_jar
) )
@@ -113,7 +115,7 @@ class SimpleAiohttpChecker(CheckerBase):
error = CheckError("Connecting failure", str(e)) error = CheckError("Connecting failure", str(e))
except ServerDisconnectedError as e: except ServerDisconnectedError as e:
error = CheckError("Server disconnected", str(e)) error = CheckError("Server disconnected", str(e))
except aiohttp.http_exceptions.BadHttpMessage as e: except http_exceptions.BadHttpMessage as e:
error = CheckError("HTTP", str(e)) error = CheckError("HTTP", str(e))
except proxy_errors.ProxyError as e: except proxy_errors.ProxyError as e:
error = CheckError("Proxy", str(e)) error = CheckError("Proxy", str(e))
@@ -139,9 +141,12 @@ class ProxiedAiohttpChecker(SimpleAiohttpChecker):
cookie_jar = kwargs.get('cookie_jar') cookie_jar = kwargs.get('cookie_jar')
self.logger = kwargs.get('logger', Mock()) self.logger = kwargs.get('logger', Mock())
# moved here to speed up the launch of Maigret
from aiohttp_socks import ProxyConnector
connector = ProxyConnector.from_url(proxy) connector = ProxyConnector.from_url(proxy)
connector.verify_ssl = False connector.verify_ssl = False
self.session = aiohttp.ClientSession( self.session = ClientSession(
connector=connector, trust_env=True, cookie_jar=cookie_jar connector=connector, trust_env=True, cookie_jar=cookie_jar
) )
+7 -5
View File
@@ -1,7 +1,6 @@
""" """
Maigret main module Maigret main module
""" """
import aiohttp
import asyncio import asyncio
import logging import logging
import os import os
@@ -10,8 +9,7 @@ import platform
from argparse import ArgumentParser, RawDescriptionHelpFormatter from argparse import ArgumentParser, RawDescriptionHelpFormatter
from typing import List, Tuple from typing import List, Tuple
import requests from socid_extractor import extract, parse
from socid_extractor import extract, parse, __version__ as socid_version
from .__version__ import __version__ from .__version__ import __version__
from .checking import ( from .checking import (
@@ -114,12 +112,16 @@ def extract_ids_from_results(results: QueryResultWrapper, db: MaigretDatabase) -
def setup_arguments_parser(): def setup_arguments_parser():
from aiohttp import __version__ as aiohttp_version
from requests import __version__ as requests_version
from socid_extractor import __version__ as socid_version
version_string = '\n'.join( version_string = '\n'.join(
[ [
f'%(prog)s {__version__}', f'%(prog)s {__version__}',
f'Socid-extractor: {socid_version}', f'Socid-extractor: {socid_version}',
f'Aiohttp: {aiohttp.__version__}', f'Aiohttp: {aiohttp_version}',
f'Requests: {requests.__version__}', f'Requests: {requests_version}',
f'Python: {platform.python_version()}', f'Python: {platform.python_version()}',
] ]
) )
+13 -4
View File
@@ -7,13 +7,9 @@ import os
from datetime import datetime from datetime import datetime
from typing import Dict, Any from typing import Dict, Any
import pycountry
import xmind import xmind
from dateutil.parser import parse as parse_datetime_str from dateutil.parser import parse as parse_datetime_str
from jinja2 import Template from jinja2 import Template
from xhtml2pdf import pisa
from pyvis.network import Network
import networkx as nx
from .checking import SUPPORTED_IDS from .checking import SUPPORTED_IDS
from .result import QueryStatus from .result import QueryStatus
@@ -78,6 +74,10 @@ def save_html_report(filename: str, context: dict):
def save_pdf_report(filename: str, context: dict): def save_pdf_report(filename: str, context: dict):
template, css = generate_report_template(is_pdf=True) template, css = generate_report_template(is_pdf=True)
filled_template = template.render(**context) filled_template = template.render(**context)
# moved here to speed up the launch of Maigret
from xhtml2pdf import pisa
with open(filename, "w+b") as f: with open(filename, "w+b") as f:
pisa.pisaDocument(io.StringIO(filled_template), dest=f, default_css=css) pisa.pisaDocument(io.StringIO(filled_template), dest=f, default_css=css)
@@ -117,6 +117,9 @@ class MaigretGraph:
def save_graph_report(filename: str, username_results: list, db: MaigretDatabase): def save_graph_report(filename: str, username_results: list, db: MaigretDatabase):
# moved here to speed up the launch of Maigret
import networkx as nx
G = nx.Graph() G = nx.Graph()
graph = MaigretGraph(G) graph = MaigretGraph(G)
@@ -201,6 +204,9 @@ def save_graph_report(filename: str, username_results: list, db: MaigretDatabase
[G.remove_node(node) for node in nodes_to_remove] [G.remove_node(node) for node in nodes_to_remove]
# moved here to speed up the launch of Maigret
from pyvis.network import Network
nt = Network(notebook=True, height="750px", width="100%") nt = Network(notebook=True, height="750px", width="100%")
nt.from_nx(G) nt.from_nx(G)
nt.show(filename) nt.show(filename)
@@ -254,6 +260,9 @@ def generate_report_context(username_results: list):
first_seen = None first_seen = None
# moved here to speed up the launch of Maigret
import pycountry
for username, id_type, results in username_results: for username, id_type, results in username_results:
found_accounts = 0 found_accounts = 0
new_ids = [] new_ids = []
+20 -7
View File
@@ -1159,7 +1159,8 @@
], ],
"checkType": "message", "checkType": "message",
"absenceStrs": [ "absenceStrs": [
"does not exist" "does not exist",
"This user has not filled out their profile page yet."
], ],
"alexaRank": 80, "alexaRank": 80,
"urlMain": "https://armchairgm.fandom.com/", "urlMain": "https://armchairgm.fandom.com/",
@@ -2039,7 +2040,11 @@
"ru", "ru",
"wiki" "wiki"
], ],
"checkType": "status_code", "checkType": "message",
"absenceStrs": [
"does not exist",
"\u042d\u0442\u043e\u0442 \u0443\u0447\u0430\u0441\u0442\u043d\u0438\u043a \u043f\u043e\u043a\u0430 \u043d\u0435 \u0437\u0430\u043f\u043e\u043b\u043d\u0438\u043b \u0441\u0432\u043e\u0439 \u043f\u0440\u043e\u0444\u0438\u043b\u044c."
],
"alexaRank": 80, "alexaRank": 80,
"urlMain": "https://bleach.fandom.com/ru", "urlMain": "https://bleach.fandom.com/ru",
"url": "https://bleach.fandom.com/ru/wiki/%D0%A3%D1%87%D0%B0%D1%81%D1%82%D0%BD%D0%B8%D0%BA:{username}", "url": "https://bleach.fandom.com/ru/wiki/%D0%A3%D1%87%D0%B0%D1%81%D1%82%D0%BD%D0%B8%D0%BA:{username}",
@@ -4459,7 +4464,8 @@
], ],
"checkType": "message", "checkType": "message",
"absenceStrs": [ "absenceStrs": [
"does not exist" "does not exist",
"This user has not filled out their profile page yet."
], ],
"alexaRank": 80, "alexaRank": 80,
"urlMain": "https://community.fandom.com", "urlMain": "https://community.fandom.com",
@@ -8720,6 +8726,7 @@
"usernameUnclaimed": "noonewouldeverusethis7" "usernameUnclaimed": "noonewouldeverusethis7"
}, },
"Metacafe": { "Metacafe": {
"disabled": true,
"tags": [ "tags": [
"in", "in",
"us" "us"
@@ -13071,7 +13078,7 @@
"us" "us"
], ],
"headers": { "headers": {
"authorization": "Bearer BQBtoXAQab7ErdN63dPUer1RqjDLcX2v54xE9AfGPwYnncjU8HS5PlFW5mJE2cgDEDImvT07Xcpjb_ggsww" "authorization": "Bearer BQB8QPkkvz_PhWGy4sSY4ijssYjumEHJgJJBFu3VX2Sm4XIoT9jp0eFZrYL3TayY4QZGHmMiz3BCPLcAth4"
}, },
"errors": { "errors": {
"Spotify is currently not available in your country.": "Access denied in your country, use proxy/vpn" "Spotify is currently not available in your country.": "Access denied in your country, use proxy/vpn"
@@ -14906,7 +14913,7 @@
"video" "video"
], ],
"headers": { "headers": {
"Authorization": "jwt eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJleHAiOjE2MjU0MjEzMDAsInVzZXJfaWQiOm51bGwsImFwcF9pZCI6NTg0NzksInNjb3BlcyI6InB1YmxpYyIsInRlYW1fdXNlcl9pZCI6bnVsbH0.-F7S6fx7mold8Qhve4N3GjIv2Ue8RIaej4kXQUMBxpE" "Authorization": "jwt eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJleHAiOjE2MzU2OTI0NjAsInVzZXJfaWQiOm51bGwsImFwcF9pZCI6NTg0NzksInNjb3BlcyI6InB1YmxpYyIsInRlYW1fdXNlcl9pZCI6bnVsbH0.KZHo96wUe5__rTqZQqAWiJKPKOy2-sjyxRjhOuuhyEc"
}, },
"activation": { "activation": {
"url": "https://vimeo.com/_rv/viewer", "url": "https://vimeo.com/_rv/viewer",
@@ -19212,11 +19219,17 @@
"tags": [ "tags": [
"cn" "cn"
], ],
"checkType": "status_code", "checkType": "message",
"absenceStrs": [
"message\":\"Not Found\""
],
"presenseStrs": [
"- SegmentFault \u601d\u5426</title>"
],
"alexaRank": 2697, "alexaRank": 2697,
"urlMain": "https://segmentfault.com/", "urlMain": "https://segmentfault.com/",
"url": "https://segmentfault.com/u/{username}", "url": "https://segmentfault.com/u/{username}",
"usernameClaimed": "bule", "usernameClaimed": "john",
"usernameUnclaimed": "noonewouldeverusethis7" "usernameUnclaimed": "noonewouldeverusethis7"
}, },
"shadow-belgorod.ucoz.ru": { "shadow-belgorod.ucoz.ru": {
+1 -1
View File
@@ -11,7 +11,7 @@ with open('requirements.txt') as rf:
requires = rf.read().splitlines() requires = rf.read().splitlines()
setup(name='maigret', setup(name='maigret',
version='0.3.0', version='0.3.1',
description='Collect a dossier on a person by username from a huge number of sites', description='Collect a dossier on a person by username from a huge number of sites',
long_description=long_description, long_description=long_description,
long_description_content_type="text/markdown", long_description_content_type="text/markdown",