mirror of
https://github.com/soxoj/maigret.git
synced 2026-05-07 06:24:35 +00:00
Accelerated start time & fixed some false positives
This commit is contained in:
@@ -2,6 +2,10 @@
|
|||||||
|
|
||||||
## [Unreleased]
|
## [Unreleased]
|
||||||
|
|
||||||
|
## [0.3.1] - 2021-10-31
|
||||||
|
* fixed false positives
|
||||||
|
* accelerated maigret start time by 3 times
|
||||||
|
|
||||||
## [0.3.0] - 2021-06-02
|
## [0.3.0] - 2021-06-02
|
||||||
* added support of Tor and I2P sites
|
* added support of Tor and I2P sites
|
||||||
* added experimental DNS checking feature
|
* added experimental DNS checking feature
|
||||||
|
|||||||
@@ -1,3 +1,3 @@
|
|||||||
"""Maigret version file"""
|
"""Maigret version file"""
|
||||||
|
|
||||||
__version__ = '0.3.0'
|
__version__ = '0.3.1'
|
||||||
|
|||||||
+11
-6
@@ -13,12 +13,11 @@ import tqdm
|
|||||||
from typing import Tuple, Optional, Dict, List
|
from typing import Tuple, Optional, Dict, List
|
||||||
from urllib.parse import quote
|
from urllib.parse import quote
|
||||||
|
|
||||||
import aiohttp
|
|
||||||
import aiodns
|
import aiodns
|
||||||
import tqdm.asyncio
|
import tqdm.asyncio
|
||||||
from aiohttp_socks import ProxyConnector
|
|
||||||
from python_socks import _errors as proxy_errors
|
from python_socks import _errors as proxy_errors
|
||||||
from socid_extractor import extract
|
from socid_extractor import extract
|
||||||
|
from aiohttp import TCPConnector, ClientSession, http_exceptions
|
||||||
from aiohttp.client_exceptions import ServerDisconnectedError, ClientConnectorError
|
from aiohttp.client_exceptions import ServerDisconnectedError, ClientConnectorError
|
||||||
|
|
||||||
from .activation import ParsingActivator, import_aiohttp_cookies
|
from .activation import ParsingActivator, import_aiohttp_cookies
|
||||||
@@ -60,12 +59,15 @@ class SimpleAiohttpChecker(CheckerBase):
|
|||||||
cookie_jar = kwargs.get('cookie_jar')
|
cookie_jar = kwargs.get('cookie_jar')
|
||||||
self.logger = kwargs.get('logger', Mock())
|
self.logger = kwargs.get('logger', Mock())
|
||||||
|
|
||||||
|
# moved here to speed up the launch of Maigret
|
||||||
|
from aiohttp_socks import ProxyConnector
|
||||||
|
|
||||||
# make http client session
|
# make http client session
|
||||||
connector = (
|
connector = (
|
||||||
ProxyConnector.from_url(proxy) if proxy else aiohttp.TCPConnector(ssl=False)
|
ProxyConnector.from_url(proxy) if proxy else TCPConnector(ssl=False)
|
||||||
)
|
)
|
||||||
connector.verify_ssl = False
|
connector.verify_ssl = False
|
||||||
self.session = aiohttp.ClientSession(
|
self.session = ClientSession(
|
||||||
connector=connector, trust_env=True, cookie_jar=cookie_jar
|
connector=connector, trust_env=True, cookie_jar=cookie_jar
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -113,7 +115,7 @@ class SimpleAiohttpChecker(CheckerBase):
|
|||||||
error = CheckError("Connecting failure", str(e))
|
error = CheckError("Connecting failure", str(e))
|
||||||
except ServerDisconnectedError as e:
|
except ServerDisconnectedError as e:
|
||||||
error = CheckError("Server disconnected", str(e))
|
error = CheckError("Server disconnected", str(e))
|
||||||
except aiohttp.http_exceptions.BadHttpMessage as e:
|
except http_exceptions.BadHttpMessage as e:
|
||||||
error = CheckError("HTTP", str(e))
|
error = CheckError("HTTP", str(e))
|
||||||
except proxy_errors.ProxyError as e:
|
except proxy_errors.ProxyError as e:
|
||||||
error = CheckError("Proxy", str(e))
|
error = CheckError("Proxy", str(e))
|
||||||
@@ -139,9 +141,12 @@ class ProxiedAiohttpChecker(SimpleAiohttpChecker):
|
|||||||
cookie_jar = kwargs.get('cookie_jar')
|
cookie_jar = kwargs.get('cookie_jar')
|
||||||
self.logger = kwargs.get('logger', Mock())
|
self.logger = kwargs.get('logger', Mock())
|
||||||
|
|
||||||
|
# moved here to speed up the launch of Maigret
|
||||||
|
from aiohttp_socks import ProxyConnector
|
||||||
|
|
||||||
connector = ProxyConnector.from_url(proxy)
|
connector = ProxyConnector.from_url(proxy)
|
||||||
connector.verify_ssl = False
|
connector.verify_ssl = False
|
||||||
self.session = aiohttp.ClientSession(
|
self.session = ClientSession(
|
||||||
connector=connector, trust_env=True, cookie_jar=cookie_jar
|
connector=connector, trust_env=True, cookie_jar=cookie_jar
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
+7
-5
@@ -1,7 +1,6 @@
|
|||||||
"""
|
"""
|
||||||
Maigret main module
|
Maigret main module
|
||||||
"""
|
"""
|
||||||
import aiohttp
|
|
||||||
import asyncio
|
import asyncio
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
@@ -10,8 +9,7 @@ import platform
|
|||||||
from argparse import ArgumentParser, RawDescriptionHelpFormatter
|
from argparse import ArgumentParser, RawDescriptionHelpFormatter
|
||||||
from typing import List, Tuple
|
from typing import List, Tuple
|
||||||
|
|
||||||
import requests
|
from socid_extractor import extract, parse
|
||||||
from socid_extractor import extract, parse, __version__ as socid_version
|
|
||||||
|
|
||||||
from .__version__ import __version__
|
from .__version__ import __version__
|
||||||
from .checking import (
|
from .checking import (
|
||||||
@@ -114,12 +112,16 @@ def extract_ids_from_results(results: QueryResultWrapper, db: MaigretDatabase) -
|
|||||||
|
|
||||||
|
|
||||||
def setup_arguments_parser():
|
def setup_arguments_parser():
|
||||||
|
from aiohttp import __version__ as aiohttp_version
|
||||||
|
from requests import __version__ as requests_version
|
||||||
|
from socid_extractor import __version__ as socid_version
|
||||||
|
|
||||||
version_string = '\n'.join(
|
version_string = '\n'.join(
|
||||||
[
|
[
|
||||||
f'%(prog)s {__version__}',
|
f'%(prog)s {__version__}',
|
||||||
f'Socid-extractor: {socid_version}',
|
f'Socid-extractor: {socid_version}',
|
||||||
f'Aiohttp: {aiohttp.__version__}',
|
f'Aiohttp: {aiohttp_version}',
|
||||||
f'Requests: {requests.__version__}',
|
f'Requests: {requests_version}',
|
||||||
f'Python: {platform.python_version()}',
|
f'Python: {platform.python_version()}',
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
|
|||||||
+13
-4
@@ -7,13 +7,9 @@ import os
|
|||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from typing import Dict, Any
|
from typing import Dict, Any
|
||||||
|
|
||||||
import pycountry
|
|
||||||
import xmind
|
import xmind
|
||||||
from dateutil.parser import parse as parse_datetime_str
|
from dateutil.parser import parse as parse_datetime_str
|
||||||
from jinja2 import Template
|
from jinja2 import Template
|
||||||
from xhtml2pdf import pisa
|
|
||||||
from pyvis.network import Network
|
|
||||||
import networkx as nx
|
|
||||||
|
|
||||||
from .checking import SUPPORTED_IDS
|
from .checking import SUPPORTED_IDS
|
||||||
from .result import QueryStatus
|
from .result import QueryStatus
|
||||||
@@ -78,6 +74,10 @@ def save_html_report(filename: str, context: dict):
|
|||||||
def save_pdf_report(filename: str, context: dict):
|
def save_pdf_report(filename: str, context: dict):
|
||||||
template, css = generate_report_template(is_pdf=True)
|
template, css = generate_report_template(is_pdf=True)
|
||||||
filled_template = template.render(**context)
|
filled_template = template.render(**context)
|
||||||
|
|
||||||
|
# moved here to speed up the launch of Maigret
|
||||||
|
from xhtml2pdf import pisa
|
||||||
|
|
||||||
with open(filename, "w+b") as f:
|
with open(filename, "w+b") as f:
|
||||||
pisa.pisaDocument(io.StringIO(filled_template), dest=f, default_css=css)
|
pisa.pisaDocument(io.StringIO(filled_template), dest=f, default_css=css)
|
||||||
|
|
||||||
@@ -117,6 +117,9 @@ class MaigretGraph:
|
|||||||
|
|
||||||
|
|
||||||
def save_graph_report(filename: str, username_results: list, db: MaigretDatabase):
|
def save_graph_report(filename: str, username_results: list, db: MaigretDatabase):
|
||||||
|
# moved here to speed up the launch of Maigret
|
||||||
|
import networkx as nx
|
||||||
|
|
||||||
G = nx.Graph()
|
G = nx.Graph()
|
||||||
graph = MaigretGraph(G)
|
graph = MaigretGraph(G)
|
||||||
|
|
||||||
@@ -201,6 +204,9 @@ def save_graph_report(filename: str, username_results: list, db: MaigretDatabase
|
|||||||
|
|
||||||
[G.remove_node(node) for node in nodes_to_remove]
|
[G.remove_node(node) for node in nodes_to_remove]
|
||||||
|
|
||||||
|
# moved here to speed up the launch of Maigret
|
||||||
|
from pyvis.network import Network
|
||||||
|
|
||||||
nt = Network(notebook=True, height="750px", width="100%")
|
nt = Network(notebook=True, height="750px", width="100%")
|
||||||
nt.from_nx(G)
|
nt.from_nx(G)
|
||||||
nt.show(filename)
|
nt.show(filename)
|
||||||
@@ -254,6 +260,9 @@ def generate_report_context(username_results: list):
|
|||||||
|
|
||||||
first_seen = None
|
first_seen = None
|
||||||
|
|
||||||
|
# moved here to speed up the launch of Maigret
|
||||||
|
import pycountry
|
||||||
|
|
||||||
for username, id_type, results in username_results:
|
for username, id_type, results in username_results:
|
||||||
found_accounts = 0
|
found_accounts = 0
|
||||||
new_ids = []
|
new_ids = []
|
||||||
|
|||||||
@@ -1159,7 +1159,8 @@
|
|||||||
],
|
],
|
||||||
"checkType": "message",
|
"checkType": "message",
|
||||||
"absenceStrs": [
|
"absenceStrs": [
|
||||||
"does not exist"
|
"does not exist",
|
||||||
|
"This user has not filled out their profile page yet."
|
||||||
],
|
],
|
||||||
"alexaRank": 80,
|
"alexaRank": 80,
|
||||||
"urlMain": "https://armchairgm.fandom.com/",
|
"urlMain": "https://armchairgm.fandom.com/",
|
||||||
@@ -2039,7 +2040,11 @@
|
|||||||
"ru",
|
"ru",
|
||||||
"wiki"
|
"wiki"
|
||||||
],
|
],
|
||||||
"checkType": "status_code",
|
"checkType": "message",
|
||||||
|
"absenceStrs": [
|
||||||
|
"does not exist",
|
||||||
|
"\u042d\u0442\u043e\u0442 \u0443\u0447\u0430\u0441\u0442\u043d\u0438\u043a \u043f\u043e\u043a\u0430 \u043d\u0435 \u0437\u0430\u043f\u043e\u043b\u043d\u0438\u043b \u0441\u0432\u043e\u0439 \u043f\u0440\u043e\u0444\u0438\u043b\u044c."
|
||||||
|
],
|
||||||
"alexaRank": 80,
|
"alexaRank": 80,
|
||||||
"urlMain": "https://bleach.fandom.com/ru",
|
"urlMain": "https://bleach.fandom.com/ru",
|
||||||
"url": "https://bleach.fandom.com/ru/wiki/%D0%A3%D1%87%D0%B0%D1%81%D1%82%D0%BD%D0%B8%D0%BA:{username}",
|
"url": "https://bleach.fandom.com/ru/wiki/%D0%A3%D1%87%D0%B0%D1%81%D1%82%D0%BD%D0%B8%D0%BA:{username}",
|
||||||
@@ -4459,7 +4464,8 @@
|
|||||||
],
|
],
|
||||||
"checkType": "message",
|
"checkType": "message",
|
||||||
"absenceStrs": [
|
"absenceStrs": [
|
||||||
"does not exist"
|
"does not exist",
|
||||||
|
"This user has not filled out their profile page yet."
|
||||||
],
|
],
|
||||||
"alexaRank": 80,
|
"alexaRank": 80,
|
||||||
"urlMain": "https://community.fandom.com",
|
"urlMain": "https://community.fandom.com",
|
||||||
@@ -8720,6 +8726,7 @@
|
|||||||
"usernameUnclaimed": "noonewouldeverusethis7"
|
"usernameUnclaimed": "noonewouldeverusethis7"
|
||||||
},
|
},
|
||||||
"Metacafe": {
|
"Metacafe": {
|
||||||
|
"disabled": true,
|
||||||
"tags": [
|
"tags": [
|
||||||
"in",
|
"in",
|
||||||
"us"
|
"us"
|
||||||
@@ -13071,7 +13078,7 @@
|
|||||||
"us"
|
"us"
|
||||||
],
|
],
|
||||||
"headers": {
|
"headers": {
|
||||||
"authorization": "Bearer BQBtoXAQab7ErdN63dPUer1RqjDLcX2v54xE9AfGPwYnncjU8HS5PlFW5mJE2cgDEDImvT07Xcpjb_ggsww"
|
"authorization": "Bearer BQB8QPkkvz_PhWGy4sSY4ijssYjumEHJgJJBFu3VX2Sm4XIoT9jp0eFZrYL3TayY4QZGHmMiz3BCPLcAth4"
|
||||||
},
|
},
|
||||||
"errors": {
|
"errors": {
|
||||||
"Spotify is currently not available in your country.": "Access denied in your country, use proxy/vpn"
|
"Spotify is currently not available in your country.": "Access denied in your country, use proxy/vpn"
|
||||||
@@ -14906,7 +14913,7 @@
|
|||||||
"video"
|
"video"
|
||||||
],
|
],
|
||||||
"headers": {
|
"headers": {
|
||||||
"Authorization": "jwt eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJleHAiOjE2MjU0MjEzMDAsInVzZXJfaWQiOm51bGwsImFwcF9pZCI6NTg0NzksInNjb3BlcyI6InB1YmxpYyIsInRlYW1fdXNlcl9pZCI6bnVsbH0.-F7S6fx7mold8Qhve4N3GjIv2Ue8RIaej4kXQUMBxpE"
|
"Authorization": "jwt eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJleHAiOjE2MzU2OTI0NjAsInVzZXJfaWQiOm51bGwsImFwcF9pZCI6NTg0NzksInNjb3BlcyI6InB1YmxpYyIsInRlYW1fdXNlcl9pZCI6bnVsbH0.KZHo96wUe5__rTqZQqAWiJKPKOy2-sjyxRjhOuuhyEc"
|
||||||
},
|
},
|
||||||
"activation": {
|
"activation": {
|
||||||
"url": "https://vimeo.com/_rv/viewer",
|
"url": "https://vimeo.com/_rv/viewer",
|
||||||
@@ -19212,11 +19219,17 @@
|
|||||||
"tags": [
|
"tags": [
|
||||||
"cn"
|
"cn"
|
||||||
],
|
],
|
||||||
"checkType": "status_code",
|
"checkType": "message",
|
||||||
|
"absenceStrs": [
|
||||||
|
"message\":\"Not Found\""
|
||||||
|
],
|
||||||
|
"presenseStrs": [
|
||||||
|
"- SegmentFault \u601d\u5426</title>"
|
||||||
|
],
|
||||||
"alexaRank": 2697,
|
"alexaRank": 2697,
|
||||||
"urlMain": "https://segmentfault.com/",
|
"urlMain": "https://segmentfault.com/",
|
||||||
"url": "https://segmentfault.com/u/{username}",
|
"url": "https://segmentfault.com/u/{username}",
|
||||||
"usernameClaimed": "bule",
|
"usernameClaimed": "john",
|
||||||
"usernameUnclaimed": "noonewouldeverusethis7"
|
"usernameUnclaimed": "noonewouldeverusethis7"
|
||||||
},
|
},
|
||||||
"shadow-belgorod.ucoz.ru": {
|
"shadow-belgorod.ucoz.ru": {
|
||||||
|
|||||||
@@ -11,7 +11,7 @@ with open('requirements.txt') as rf:
|
|||||||
requires = rf.read().splitlines()
|
requires = rf.read().splitlines()
|
||||||
|
|
||||||
setup(name='maigret',
|
setup(name='maigret',
|
||||||
version='0.3.0',
|
version='0.3.1',
|
||||||
description='Collect a dossier on a person by username from a huge number of sites',
|
description='Collect a dossier on a person by username from a huge number of sites',
|
||||||
long_description=long_description,
|
long_description=long_description,
|
||||||
long_description_content_type="text/markdown",
|
long_description_content_type="text/markdown",
|
||||||
|
|||||||
Reference in New Issue
Block a user