add stats metrics (#1207)

support new cli command `easytier-cli stats`

It's useful to find out which components are consuming bandwidth.
This commit is contained in:
Sijie.Sun
2025-08-09 00:06:35 +08:00
committed by GitHub
parent efa17a7c10
commit 8cdb27d43d
15 changed files with 1442 additions and 19 deletions
@@ -25,6 +25,7 @@ use crate::{
error::Error,
global_ctx::{ArcGlobalCtx, GlobalCtx, GlobalCtxEvent, NetworkIdentity},
join_joinset_background,
stats_manager::{LabelSet, LabelType, MetricName, StatsManager},
token_bucket::TokenBucket,
PeerId,
},
@@ -75,6 +76,8 @@ struct ForeignNetworkEntry {
peer_center: Arc<PeerCenterInstance>,
stats_mgr: Arc<StatsManager>,
tasks: Mutex<JoinSet<()>>,
pub lock: Mutex<()>,
@@ -89,6 +92,7 @@ impl ForeignNetworkEntry {
relay_data: bool,
pm_packet_sender: PacketRecvChan,
) -> Self {
let stats_mgr = global_ctx.stats_manager().clone();
let foreign_global_ctx = Self::build_foreign_global_ctx(&network, global_ctx.clone());
let (packet_sender, packet_recv) = create_packet_recv_chan();
@@ -141,6 +145,8 @@ impl ForeignNetworkEntry {
bps_limiter,
stats_mgr,
tasks: Mutex::new(JoinSet::new()),
peer_center,
@@ -297,8 +303,24 @@ impl ForeignNetworkEntry {
let network_name = self.network.network_name.clone();
let bps_limiter = self.bps_limiter.clone();
let label_set =
LabelSet::new().with_label_type(LabelType::NetworkName(network_name.clone()));
let forward_bytes = self
.stats_mgr
.get_counter(MetricName::TrafficBytesForwarded, label_set.clone());
let forward_packets = self
.stats_mgr
.get_counter(MetricName::TrafficPacketsForwarded, label_set.clone());
let rx_bytes = self
.stats_mgr
.get_counter(MetricName::TrafficBytesSelfRx, label_set.clone());
let rx_packets = self
.stats_mgr
.get_counter(MetricName::TrafficPacketsRx, label_set.clone());
self.tasks.lock().await.spawn(async move {
while let Ok(zc_packet) = recv_packet_from_chan(&mut recv).await {
let buf_len = zc_packet.buf_len();
let Some(hdr) = zc_packet.peer_manager_header() else {
tracing::warn!("invalid packet, skip");
continue;
@@ -310,6 +332,8 @@ impl ForeignNetworkEntry {
|| hdr.packet_type == PacketType::RpcReq as u8
|| hdr.packet_type == PacketType::RpcResp as u8
{
rx_bytes.add(buf_len as u64);
rx_packets.inc();
rpc_sender.send(zc_packet).unwrap();
continue;
}
@@ -327,6 +351,9 @@ impl ForeignNetworkEntry {
}
}
forward_bytes.add(buf_len as u64);
forward_packets.inc();
let gateway_peer_id = peer_map
.get_gateway_peer_id(to_peer_id, NextHopPolicy::LeastHop)
.await;
+38
View File
@@ -8,6 +8,7 @@ use std::{
},
};
use arc_swap::ArcSwapOption;
use futures::{StreamExt, TryFutureExt};
use prost::Message;
@@ -27,6 +28,7 @@ use crate::{
defer,
error::Error,
global_ctx::ArcGlobalCtx,
stats_manager::{CounterHandle, LabelSet, LabelType, MetricName},
PeerId,
},
proto::{
@@ -85,6 +87,13 @@ impl PeerConnCloseNotify {
}
}
struct PeerConnCounter {
traffic_tx_bytes: CounterHandle,
traffic_rx_bytes: CounterHandle,
traffic_tx_packets: CounterHandle,
traffic_rx_packets: CounterHandle,
}
pub struct PeerConn {
conn_id: PeerConnId,
@@ -111,6 +120,8 @@ pub struct PeerConn {
latency_stats: Arc<WindowLatency>,
throughput: Arc<Throughput>,
loss_rate_stats: Arc<AtomicU32>,
counters: ArcSwapOption<PeerConnCounter>,
}
impl Debug for PeerConn {
@@ -164,6 +175,8 @@ impl PeerConn {
latency_stats: Arc::new(WindowLatency::new(15)),
throughput,
loss_rate_stats: Arc::new(AtomicU32::new(0)),
counters: ArcSwapOption::new(None),
}
}
@@ -362,6 +375,22 @@ impl PeerConn {
let ctrl_sender = self.ctrl_resp_sender.clone();
let conn_info_for_instrument = self.get_conn_info();
let stats_mgr = self.global_ctx.stats_manager();
let label_set = LabelSet::new().with_label_type(LabelType::NetworkName(
conn_info_for_instrument.network_name.clone(),
));
let counters = PeerConnCounter {
traffic_tx_bytes: stats_mgr.get_counter(MetricName::TrafficBytesTx, label_set.clone()),
traffic_rx_bytes: stats_mgr.get_counter(MetricName::TrafficBytesRx, label_set.clone()),
traffic_tx_packets: stats_mgr
.get_counter(MetricName::TrafficPacketsTx, label_set.clone()),
traffic_rx_packets: stats_mgr
.get_counter(MetricName::TrafficPacketsRx, label_set.clone()),
};
self.counters.store(Some(Arc::new(counters)));
let counters = self.counters.load_full().unwrap();
self.tasks.spawn(
async move {
tracing::info!("start recving peer conn packet");
@@ -374,6 +403,10 @@ impl PeerConn {
}
let mut zc_packet = ret.unwrap();
counters.traffic_rx_bytes.add(zc_packet.buf_len() as u64);
counters.traffic_rx_packets.inc();
let Some(peer_mgr_hdr) = zc_packet.mut_peer_manager_header() else {
tracing::error!(
"unexpected packet: {:?}, cannot decode peer manager hdr",
@@ -436,6 +469,11 @@ impl PeerConn {
}
pub async fn send_msg(&self, msg: ZCPacket) -> Result<(), Error> {
let counters = self.counters.load();
if let Some(ref counters) = *counters {
counters.traffic_tx_bytes.add(msg.buf_len() as u64);
counters.traffic_tx_packets.inc();
}
Ok(self.sink.send(msg).await?)
}
+124 -8
View File
@@ -24,6 +24,7 @@ use crate::{
constants::EASYTIER_VERSION,
error::Error,
global_ctx::{ArcGlobalCtx, NetworkIdentity},
stats_manager::{CounterHandle, LabelSet, LabelType, MetricName},
stun::StunInfoCollectorTrait,
PeerId,
},
@@ -116,6 +117,13 @@ enum RouteAlgoInst {
None,
}
struct SelfTxCounters {
self_tx_packets: CounterHandle,
self_tx_bytes: CounterHandle,
compress_tx_bytes_before: CounterHandle,
compress_tx_bytes_after: CounterHandle,
}
pub struct PeerManager {
my_peer_id: PeerId,
@@ -147,6 +155,8 @@ pub struct PeerManager {
reserved_my_peer_id_map: DashMap<String, PeerId>,
allow_loopback_tunnel: AtomicBool,
self_tx_counters: SelfTxCounters,
}
impl Debug for PeerManager {
@@ -214,7 +224,10 @@ impl PeerManager {
peer_rpc_tspt_sender,
encryptor: encryptor.clone(),
});
let peer_rpc_mgr = Arc::new(PeerRpcManager::new(rpc_tspt.clone()));
let peer_rpc_mgr = Arc::new(PeerRpcManager::new_with_stats_manager(
rpc_tspt.clone(),
global_ctx.stats_manager().clone(),
));
let route_algo_inst = match route_algo {
RouteAlgoType::Ospf => RouteAlgoInst::Ospf(PeerRoute::new(
@@ -246,6 +259,30 @@ impl PeerManager {
let exit_nodes = global_ctx.config.get_exit_nodes();
let stats_manager = global_ctx.stats_manager();
let self_tx_counters = SelfTxCounters {
self_tx_packets: stats_manager.get_counter(
MetricName::TrafficPacketsSelfTx,
LabelSet::new()
.with_label_type(LabelType::NetworkName(global_ctx.get_network_name())),
),
self_tx_bytes: stats_manager.get_counter(
MetricName::TrafficBytesSelfTx,
LabelSet::new()
.with_label_type(LabelType::NetworkName(global_ctx.get_network_name())),
),
compress_tx_bytes_before: stats_manager.get_counter(
MetricName::CompressionBytesTxBefore,
LabelSet::new()
.with_label_type(LabelType::NetworkName(global_ctx.get_network_name())),
),
compress_tx_bytes_after: stats_manager.get_counter(
MetricName::CompressionBytesTxAfter,
LabelSet::new()
.with_label_type(LabelType::NetworkName(global_ctx.get_network_name())),
),
};
PeerManager {
my_peer_id,
@@ -277,6 +314,8 @@ impl PeerManager {
reserved_my_peer_id_map: DashMap::new(),
allow_loopback_tunnel: AtomicBool::new(true),
self_tx_counters,
}
}
@@ -507,9 +546,24 @@ impl PeerManager {
let foreign_network_my_peer_id =
foreign_network_mgr.get_network_peer_id(&foreign_network_name);
let buf_len = packet.buf_len();
let stats_manager = peer_map.get_global_ctx().stats_manager().clone();
let label_set =
LabelSet::new().with_label_type(LabelType::NetworkName(foreign_network_name.clone()));
let add_counter = move |bytes_metric, packets_metric| {
stats_manager
.get_counter(bytes_metric, label_set.clone())
.add(buf_len as u64);
stats_manager.get_counter(packets_metric, label_set).inc();
};
// NOTICE: the to peer id is modified by the src from foreign network my peer id to the origin my peer id
if to_peer_id == my_peer_id {
// packet sent from other peer to me, extract the inner packet and forward it
add_counter(
MetricName::TrafficBytesForeignForwardRx,
MetricName::TrafficPacketsForeignForwardRx,
);
if let Err(e) = foreign_network_mgr
.send_msg_to_peer(
&foreign_network_name,
@@ -540,6 +594,11 @@ impl PeerManager {
return Err(packet);
};
add_counter(
MetricName::TrafficBytesForeignForwardTx,
MetricName::TrafficPacketsForeignForwardTx,
);
// modify the to_peer id from foreign network my peer id to the origin my peer id
packet
.mut_peer_manager_header()
@@ -558,10 +617,13 @@ impl PeerManager {
"send_msg_directly failed when forward local generated foreign network packet"
);
}
Ok(())
} else {
// target is not me, forward it. try get origin peer id
add_counter(
MetricName::TrafficBytesForeignForwardForwarded,
MetricName::TrafficPacketsForeignForwardForwarded,
);
Err(packet)
}
}
@@ -577,6 +639,29 @@ impl PeerManager {
let compress_algo = self.data_compress_algo;
let acl_filter = self.global_ctx.get_acl_filter().clone();
let global_ctx = self.global_ctx.clone();
let stats_mgr = self.global_ctx.stats_manager().clone();
let label_set =
LabelSet::new().with_label_type(LabelType::NetworkName(global_ctx.get_network_name()));
let self_tx_bytes = self.self_tx_counters.self_tx_bytes.clone();
let self_tx_packets = self.self_tx_counters.self_tx_packets.clone();
let self_rx_bytes =
stats_mgr.get_counter(MetricName::TrafficBytesSelfRx, label_set.clone());
let self_rx_packets =
stats_mgr.get_counter(MetricName::TrafficPacketsSelfRx, label_set.clone());
let forward_tx_bytes =
stats_mgr.get_counter(MetricName::TrafficBytesForwarded, label_set.clone());
let forward_tx_packets =
stats_mgr.get_counter(MetricName::TrafficPacketsForwarded, label_set.clone());
let compress_tx_bytes_before = self.self_tx_counters.compress_tx_bytes_before.clone();
let compress_tx_bytes_after = self.self_tx_counters.compress_tx_bytes_after.clone();
let compress_rx_bytes_before =
stats_mgr.get_counter(MetricName::CompressionBytesRxBefore, label_set.clone());
let compress_rx_bytes_after =
stats_mgr.get_counter(MetricName::CompressionBytesRxAfter, label_set.clone());
self.tasks.lock().await.spawn(async move {
tracing::trace!("start_peer_recv");
while let Ok(ret) = recv_packet_from_chan(&mut recv).await {
@@ -587,6 +672,7 @@ impl PeerManager {
continue;
};
let buf_len = ret.buf_len();
let Some(hdr) = ret.mut_peer_manager_header() else {
tracing::warn!(?ret, "invalid packet, skip");
continue;
@@ -608,13 +694,24 @@ impl PeerManager {
hdr.forward_counter += 1;
if from_peer_id == my_peer_id
&& (hdr.packet_type == PacketType::Data as u8
if from_peer_id == my_peer_id {
compress_tx_bytes_before.add(buf_len as u64);
if hdr.packet_type == PacketType::Data as u8
|| hdr.packet_type == PacketType::KcpSrc as u8
|| hdr.packet_type == PacketType::KcpDst as u8)
{
let _ = Self::try_compress_and_encrypt(compress_algo, &encryptor, &mut ret)
.await;
|| hdr.packet_type == PacketType::KcpDst as u8
{
let _ =
Self::try_compress_and_encrypt(compress_algo, &encryptor, &mut ret)
.await;
}
compress_tx_bytes_after.add(ret.buf_len() as u64);
self_tx_bytes.add(ret.buf_len() as u64);
self_tx_packets.inc();
} else {
forward_tx_bytes.add(buf_len as u64);
forward_tx_packets.inc();
}
tracing::trace!(?to_peer_id, ?my_peer_id, "need forward");
@@ -629,12 +726,18 @@ impl PeerManager {
continue;
}
self_rx_bytes.add(buf_len as u64);
self_rx_packets.inc();
compress_rx_bytes_before.add(buf_len as u64);
let compressor = DefaultCompressor {};
if let Err(e) = compressor.decompress(&mut ret).await {
tracing::error!(?e, "decompress failed");
continue;
}
compress_rx_bytes_after.add(ret.buf_len() as u64);
if !acl_filter.process_packet_with_acl(
&ret,
true,
@@ -1053,8 +1156,16 @@ impl PeerManager {
return Ok(());
}
self.self_tx_counters
.compress_tx_bytes_before
.add(msg.buf_len() as u64);
Self::try_compress_and_encrypt(self.data_compress_algo, &self.encryptor, &mut msg).await?;
self.self_tx_counters
.compress_tx_bytes_after
.add(msg.buf_len() as u64);
let is_latency_first = self.global_ctx.get_flags().latency_first;
msg.mut_peer_manager_header()
.unwrap()
@@ -1077,6 +1188,11 @@ impl PeerManager {
.to_peer_id
.set(*peer_id);
self.self_tx_counters
.self_tx_bytes
.add(msg.buf_len() as u64);
self.self_tx_counters.self_tx_packets.inc();
if let Err(e) =
Self::send_msg_internal(&self.peers, &self.foreign_network_client, msg, *peer_id)
.await
+10 -1
View File
@@ -4,7 +4,7 @@ use futures::{SinkExt as _, StreamExt};
use tokio::task::JoinSet;
use crate::{
common::{error::Error, PeerId},
common::{error::Error, PeerId, stats_manager::StatsManager},
proto::rpc_impl::{self, bidirect::BidirectRpcManager},
tunnel::packet_def::ZCPacket,
};
@@ -47,6 +47,15 @@ impl PeerRpcManager {
}
}
pub fn new_with_stats_manager(tspt: impl PeerRpcManagerTransport, stats_manager: Arc<StatsManager>) -> Self {
Self {
tspt: Arc::new(Box::new(tspt)),
bidirect_rpc: BidirectRpcManager::new_with_stats_manager(stats_manager),
tasks: Mutex::new(JoinSet::new()),
}
}
pub fn run(&self) {
let ret = self.bidirect_rpc.run_and_create_tunnel();
let (mut rx, mut tx) = ret.split();