mirror of
https://github.com/EasyTier/EasyTier.git
synced 2026-05-07 10:14:35 +00:00
add stats metrics (#1207)
support new cli command `easytier-cli stats` It's useful to find out which components are consuming bandwidth.
This commit is contained in:
@@ -302,3 +302,26 @@ service PortForwardManageRpc {
|
||||
rpc RemovePortForward(RemovePortForwardRequest) returns (RemovePortForwardResponse);
|
||||
rpc ListPortForward(ListPortForwardRequest) returns (ListPortForwardResponse);
|
||||
}
|
||||
|
||||
message MetricSnapshot {
|
||||
string name = 1;
|
||||
uint64 value = 2;
|
||||
map<string, string> labels = 3;
|
||||
}
|
||||
|
||||
message GetStatsRequest {}
|
||||
|
||||
message GetStatsResponse {
|
||||
repeated MetricSnapshot metrics = 1;
|
||||
}
|
||||
|
||||
message GetPrometheusStatsRequest {}
|
||||
|
||||
message GetPrometheusStatsResponse {
|
||||
string prometheus_text = 1;
|
||||
}
|
||||
|
||||
service StatsRpc {
|
||||
rpc GetStats(GetStatsRequest) returns (GetStatsResponse);
|
||||
rpc GetPrometheusStats(GetPrometheusStatsRequest) returns (GetPrometheusStatsResponse);
|
||||
}
|
||||
|
||||
@@ -9,7 +9,8 @@ use crate::{
|
||||
tunnel::{packet_def::PacketType, ring::create_ring_tunnel_pair, Tunnel},
|
||||
};
|
||||
|
||||
use super::{client::Client, server::Server};
|
||||
use super::{client::Client, server::Server, service_registry::ServiceRegistry};
|
||||
use crate::common::stats_manager::StatsManager;
|
||||
|
||||
pub struct BidirectRpcManager {
|
||||
rpc_client: Client,
|
||||
@@ -38,6 +39,20 @@ impl BidirectRpcManager {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn new_with_stats_manager(stats_manager: Arc<StatsManager>) -> Self {
|
||||
Self {
|
||||
rpc_client: Client::new_with_stats_manager(stats_manager.clone()),
|
||||
rpc_server: Server::new_with_registry_and_stats_manager(Arc::new(ServiceRegistry::new()), stats_manager),
|
||||
|
||||
rx_timeout: None,
|
||||
error: Arc::new(Mutex::new(None)),
|
||||
tunnel: Mutex::new(None),
|
||||
running: Arc::new(AtomicBool::new(false)),
|
||||
|
||||
tasks: Mutex::new(None),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn set_rx_timeout(mut self, timeout: Option<std::time::Duration>) -> Self {
|
||||
self.rx_timeout = timeout;
|
||||
self
|
||||
|
||||
@@ -10,7 +10,10 @@ use tokio::task::JoinSet;
|
||||
use tokio::time::timeout;
|
||||
use tokio_stream::StreamExt;
|
||||
|
||||
use crate::common::PeerId;
|
||||
use crate::common::{
|
||||
stats_manager::{LabelSet, LabelType, MetricName, StatsManager},
|
||||
PeerId,
|
||||
};
|
||||
use crate::defer;
|
||||
use crate::proto::common::{
|
||||
CompressionAlgoPb, RpcCompressionInfo, RpcDescriptor, RpcPacket, RpcRequest, RpcResponse,
|
||||
@@ -66,6 +69,7 @@ pub struct Client {
|
||||
inflight_requests: InflightRequestTable,
|
||||
peer_info: PeerInfoTable,
|
||||
tasks: Mutex<JoinSet<()>>,
|
||||
stats_manager: Option<Arc<StatsManager>>,
|
||||
}
|
||||
|
||||
impl Client {
|
||||
@@ -77,6 +81,19 @@ impl Client {
|
||||
inflight_requests: Arc::new(DashMap::new()),
|
||||
peer_info: Arc::new(DashMap::new()),
|
||||
tasks: Mutex::new(JoinSet::new()),
|
||||
stats_manager: None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn new_with_stats_manager(stats_manager: Arc<StatsManager>) -> Self {
|
||||
let (ring_a, ring_b) = create_ring_tunnel_pair();
|
||||
Self {
|
||||
mpsc: Mutex::new(MpscTunnel::new(ring_a, None)),
|
||||
transport: Mutex::new(MpscTunnel::new(ring_b, None)),
|
||||
inflight_requests: Arc::new(DashMap::new()),
|
||||
peer_info: Arc::new(DashMap::new()),
|
||||
tasks: Mutex::new(JoinSet::new()),
|
||||
stats_manager: Some(stats_manager),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -168,6 +185,7 @@ impl Client {
|
||||
zc_packet_sender: MpscTunnelSender,
|
||||
inflight_requests: InflightRequestTable,
|
||||
peer_info: PeerInfoTable,
|
||||
stats_manager: Option<Arc<StatsManager>>,
|
||||
_phan: PhantomData<F>,
|
||||
}
|
||||
|
||||
@@ -196,6 +214,7 @@ impl Client {
|
||||
method: <Self::Descriptor as ServiceDescriptor>::Method,
|
||||
input: bytes::Bytes,
|
||||
) -> Result<bytes::Bytes> {
|
||||
let start_time = std::time::Instant::now();
|
||||
let transaction_id = CUR_TID.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
|
||||
let (tx, mut rx) = mpsc::unbounded_channel();
|
||||
let key = InflightRequestKey {
|
||||
@@ -203,6 +222,13 @@ impl Client {
|
||||
to_peer_id: self.to_peer_id,
|
||||
transaction_id,
|
||||
};
|
||||
let desc = self.service_descriptor();
|
||||
let labels = LabelSet::new()
|
||||
.with_label_type(LabelType::NetworkName(self.domain_name.to_string()))
|
||||
.with_label_type(LabelType::SrcPeerId(self.from_peer_id))
|
||||
.with_label_type(LabelType::DstPeerId(self.to_peer_id))
|
||||
.with_label_type(LabelType::ServiceName(desc.name().to_string()))
|
||||
.with_label_type(LabelType::MethodName(method.name().to_string()));
|
||||
|
||||
defer!(self.inflight_requests.remove(&key););
|
||||
self.inflight_requests.insert(
|
||||
@@ -210,11 +236,16 @@ impl Client {
|
||||
InflightRequest {
|
||||
sender: tx,
|
||||
merger: PacketMerger::new(),
|
||||
start_time: std::time::Instant::now(),
|
||||
start_time,
|
||||
},
|
||||
);
|
||||
|
||||
let desc = self.service_descriptor();
|
||||
// Record RPC client TX stats
|
||||
if let Some(ref stats_manager) = self.stats_manager {
|
||||
stats_manager
|
||||
.get_counter(MetricName::PeerRpcClientTx, labels.clone())
|
||||
.inc();
|
||||
}
|
||||
|
||||
let rpc_desc = RpcDescriptor {
|
||||
domain_name: self.domain_name.clone(),
|
||||
@@ -281,12 +312,44 @@ impl Client {
|
||||
let rpc_resp = RpcResponse::decode(Bytes::from(rpc_packet.body))?;
|
||||
|
||||
if let Some(err) = &rpc_resp.error {
|
||||
// Record RPC error stats
|
||||
if let Some(ref stats_manager) = self.stats_manager {
|
||||
let labels = labels
|
||||
.clone()
|
||||
.with_label_type(LabelType::ErrorType(format!("{:?}", err.error_kind)))
|
||||
.with_label_type(LabelType::Status("error".to_string()));
|
||||
|
||||
stats_manager
|
||||
.get_counter(MetricName::PeerRpcErrors, labels.clone())
|
||||
.inc();
|
||||
|
||||
let duration_ms = start_time.elapsed().as_millis() as u64;
|
||||
stats_manager
|
||||
.get_counter(MetricName::PeerRpcDuration, labels)
|
||||
.add(duration_ms);
|
||||
}
|
||||
return Err(err.into());
|
||||
}
|
||||
|
||||
let raw_output = Bytes::from(rpc_resp.response.clone());
|
||||
ctrl.set_raw_output(raw_output.clone());
|
||||
|
||||
// Record RPC client RX and duration stats
|
||||
if let Some(ref stats_manager) = self.stats_manager {
|
||||
let labels = labels
|
||||
.clone()
|
||||
.with_label_type(LabelType::Status("success".to_string()));
|
||||
|
||||
stats_manager
|
||||
.get_counter(MetricName::PeerRpcClientRx, labels.clone())
|
||||
.inc();
|
||||
|
||||
let duration_ms = start_time.elapsed().as_millis() as u64;
|
||||
stats_manager
|
||||
.get_counter(MetricName::PeerRpcDuration, labels)
|
||||
.add(duration_ms);
|
||||
}
|
||||
|
||||
Ok(raw_output)
|
||||
}
|
||||
}
|
||||
@@ -298,6 +361,7 @@ impl Client {
|
||||
zc_packet_sender: self.mpsc.lock().unwrap().get_sink(),
|
||||
inflight_requests: self.inflight_requests.clone(),
|
||||
peer_info: self.peer_info.clone(),
|
||||
stats_manager: self.stats_manager.clone(),
|
||||
_phan: PhantomData,
|
||||
})
|
||||
}
|
||||
|
||||
@@ -10,7 +10,11 @@ use tokio::{task::JoinSet, time::timeout};
|
||||
use tokio_stream::StreamExt;
|
||||
|
||||
use crate::{
|
||||
common::{join_joinset_background, PeerId},
|
||||
common::{
|
||||
join_joinset_background,
|
||||
stats_manager::{LabelSet, LabelType, MetricName, StatsManager},
|
||||
PeerId,
|
||||
},
|
||||
proto::{
|
||||
common::{
|
||||
self, CompressionAlgoPb, RpcCompressionInfo, RpcPacket, RpcRequest, RpcResponse,
|
||||
@@ -46,6 +50,7 @@ pub struct Server {
|
||||
|
||||
tasks: Arc<Mutex<JoinSet<()>>>,
|
||||
packet_mergers: Arc<DashMap<PacketMergerKey, PacketMerger>>,
|
||||
stats_manager: Option<Arc<StatsManager>>,
|
||||
}
|
||||
|
||||
impl Server {
|
||||
@@ -62,6 +67,23 @@ impl Server {
|
||||
transport: Mutex::new(MpscTunnel::new(ring_b, None)),
|
||||
tasks: Arc::new(Mutex::new(JoinSet::new())),
|
||||
packet_mergers: Arc::new(DashMap::new()),
|
||||
stats_manager: None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn new_with_registry_and_stats_manager(
|
||||
registry: Arc<ServiceRegistry>,
|
||||
stats_manager: Arc<StatsManager>,
|
||||
) -> Self {
|
||||
let (ring_a, ring_b) = create_ring_tunnel_pair();
|
||||
|
||||
Self {
|
||||
registry,
|
||||
mpsc: Mutex::new(Some(MpscTunnel::new(ring_a, None))),
|
||||
transport: Mutex::new(MpscTunnel::new(ring_b, None)),
|
||||
tasks: Arc::new(Mutex::new(JoinSet::new())),
|
||||
packet_mergers: Arc::new(DashMap::new()),
|
||||
stats_manager: Some(stats_manager),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -85,6 +107,7 @@ impl Server {
|
||||
|
||||
let packet_merges = self.packet_mergers.clone();
|
||||
let reg = self.registry.clone();
|
||||
let stats_manager = self.stats_manager.clone();
|
||||
let t = Arc::downgrade(&tasks);
|
||||
let tunnel_info = mpsc.tunnel_info();
|
||||
tasks.lock().unwrap().spawn(async move {
|
||||
@@ -133,6 +156,7 @@ impl Server {
|
||||
packet,
|
||||
reg.clone(),
|
||||
tunnel_info.clone(),
|
||||
stats_manager.clone(),
|
||||
));
|
||||
}
|
||||
Ok(None) => {}
|
||||
@@ -189,12 +213,27 @@ impl Server {
|
||||
packet: RpcPacket,
|
||||
reg: Arc<ServiceRegistry>,
|
||||
tunnel_info: Option<TunnelInfo>,
|
||||
stats_manager: Option<Arc<StatsManager>>,
|
||||
) {
|
||||
let from_peer = packet.from_peer;
|
||||
let to_peer = packet.to_peer;
|
||||
let transaction_id = packet.transaction_id;
|
||||
let trace_id = packet.trace_id;
|
||||
let desc = packet.descriptor.clone().unwrap();
|
||||
let method_name = reg.get_method_name(&desc).unwrap_or("<Nil>".to_owned());
|
||||
let labels = LabelSet::new()
|
||||
.with_label_type(LabelType::NetworkName(desc.domain_name.to_string()))
|
||||
.with_label_type(LabelType::SrcPeerId(from_peer))
|
||||
.with_label_type(LabelType::DstPeerId(to_peer))
|
||||
.with_label_type(LabelType::ServiceName(desc.service_name.to_string()))
|
||||
.with_label_type(LabelType::MethodName(method_name));
|
||||
|
||||
// Record RPC server RX stats
|
||||
if let Some(ref stats_manager) = stats_manager {
|
||||
stats_manager
|
||||
.get_counter(MetricName::PeerRpcServerRx, labels.clone())
|
||||
.inc();
|
||||
}
|
||||
|
||||
let mut resp_msg = RpcResponse::default();
|
||||
let now = std::time::Instant::now();
|
||||
@@ -205,9 +244,41 @@ impl Server {
|
||||
match &resp_bytes {
|
||||
Ok(r) => {
|
||||
resp_msg.response = r.clone().into();
|
||||
|
||||
// Record successful RPC server TX and duration stats
|
||||
if let Some(ref stats_manager) = stats_manager {
|
||||
let labels = labels
|
||||
.clone()
|
||||
.with_label_type(LabelType::Status("success".to_string()));
|
||||
|
||||
stats_manager
|
||||
.get_counter(MetricName::PeerRpcServerTx, labels.clone())
|
||||
.inc();
|
||||
|
||||
let duration_ms = now.elapsed().as_millis() as u64;
|
||||
stats_manager
|
||||
.get_counter(MetricName::PeerRpcDuration, labels)
|
||||
.add(duration_ms);
|
||||
}
|
||||
}
|
||||
Err(err) => {
|
||||
resp_msg.error = Some(err.into());
|
||||
|
||||
// Record RPC server error stats
|
||||
if let Some(ref stats_manager) = stats_manager {
|
||||
let labels = labels
|
||||
.clone()
|
||||
.with_label_type(LabelType::Status("error".to_string()));
|
||||
|
||||
stats_manager
|
||||
.get_counter(MetricName::PeerRpcErrors, labels.clone())
|
||||
.inc();
|
||||
|
||||
let duration_ms = now.elapsed().as_millis() as u64;
|
||||
stats_manager
|
||||
.get_counter(MetricName::PeerRpcDuration, labels)
|
||||
.add(duration_ms);
|
||||
}
|
||||
}
|
||||
};
|
||||
resp_msg.runtime_us = now.elapsed().as_micros() as u64;
|
||||
|
||||
@@ -78,6 +78,14 @@ impl ServiceRegistry {
|
||||
self.table.insert(key, entry);
|
||||
}
|
||||
|
||||
pub fn get_method_name(&self, rpc_desc: &RpcDescriptor) -> Option<String> {
|
||||
let service_key = ServiceKey::from(rpc_desc);
|
||||
let entry = self.table.get(&service_key)?;
|
||||
let method_index = rpc_desc.method_index as u8;
|
||||
let method_name = entry.service.get_method_name(method_index).ok()?;
|
||||
Some(method_name)
|
||||
}
|
||||
|
||||
pub fn unregister<H: Handler<Controller = RpcController>>(
|
||||
&self,
|
||||
h: H,
|
||||
|
||||
@@ -1,4 +1,6 @@
|
||||
//! Traits for defining generic RPC handlers.
|
||||
use crate::proto::rpc_types::descriptor::MethodDescriptor;
|
||||
|
||||
use super::{
|
||||
controller::Controller,
|
||||
descriptor::{self, ServiceDescriptor},
|
||||
@@ -49,6 +51,8 @@ pub trait HandlerExt: Send + Sync + 'static {
|
||||
method_index: u8,
|
||||
input: bytes::Bytes,
|
||||
) -> super::error::Result<bytes::Bytes>;
|
||||
|
||||
fn get_method_name(&self, method_index: u8) -> super::error::Result<String>;
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
@@ -64,4 +68,10 @@ impl<C: Controller, T: Handler<Controller = C>> HandlerExt for T {
|
||||
let method = self.get_method_from_index(method_index)?;
|
||||
self.call(ctrl, method, input).await
|
||||
}
|
||||
|
||||
fn get_method_name(&self, method_index: u8) -> super::error::Result<String> {
|
||||
let method = self.get_method_from_index(method_index)?;
|
||||
let name = method.name().to_string();
|
||||
Ok(name)
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user