use workspace, prepare for config server and gui (#48)

This commit is contained in:
Sijie.Sun
2024-04-04 10:33:53 +08:00
committed by GitHub
parent bb4ae71869
commit 4eb7efe5fc
77 changed files with 162 additions and 195 deletions
+382
View File
@@ -0,0 +1,382 @@
use std::{
collections::hash_map::DefaultHasher,
hash::{Hash, Hasher},
sync::{
atomic::{AtomicBool, Ordering},
Arc,
},
time::{Duration, SystemTime},
};
use crossbeam::atomic::AtomicCell;
use futures::Future;
use tokio::{
sync::{Mutex, RwLock},
task::JoinSet,
};
use tracing::Instrument;
use crate::{
common::PeerId,
peers::{peer_manager::PeerManager, rpc_service::PeerManagerRpcService},
rpc::{GetGlobalPeerMapRequest, GetGlobalPeerMapResponse},
};
use super::{
server::PeerCenterServer,
service::{GlobalPeerMap, PeerCenterService, PeerCenterServiceClient, PeerInfoForGlobalMap},
Digest, Error,
};
struct PeerCenterBase {
peer_mgr: Arc<PeerManager>,
tasks: Arc<Mutex<JoinSet<()>>>,
lock: Arc<Mutex<()>>,
}
static SERVICE_ID: u32 = 5;
struct PeridicJobCtx<T> {
peer_mgr: Arc<PeerManager>,
center_peer: AtomicCell<PeerId>,
job_ctx: T,
}
impl PeerCenterBase {
pub async fn init(&self) -> Result<(), Error> {
self.peer_mgr.get_peer_rpc_mgr().run_service(
SERVICE_ID,
PeerCenterServer::new(self.peer_mgr.my_peer_id()).serve(),
);
Ok(())
}
async fn select_center_peer(peer_mgr: &Arc<PeerManager>) -> Option<PeerId> {
let peers = peer_mgr.list_routes().await;
if peers.is_empty() {
return None;
}
// find peer with alphabetical smallest id.
let mut min_peer = peer_mgr.my_peer_id();
for peer in peers.iter() {
let peer_id = peer.peer_id;
if peer_id < min_peer {
min_peer = peer_id;
}
}
Some(min_peer)
}
async fn init_periodic_job<
T: Send + Sync + 'static + Clone,
Fut: Future<Output = Result<u32, tarpc::client::RpcError>> + Send + 'static,
>(
&self,
job_ctx: T,
job_fn: (impl Fn(PeerCenterServiceClient, Arc<PeridicJobCtx<T>>) -> Fut + Send + Sync + 'static),
) -> () {
let my_peer_id = self.peer_mgr.my_peer_id();
let peer_mgr = self.peer_mgr.clone();
let lock = self.lock.clone();
self.tasks.lock().await.spawn(
async move {
let ctx = Arc::new(PeridicJobCtx {
peer_mgr: peer_mgr.clone(),
center_peer: AtomicCell::new(PeerId::default()),
job_ctx,
});
loop {
let Some(center_peer) = Self::select_center_peer(&peer_mgr).await else {
tracing::trace!("no center peer found, sleep 1 second");
tokio::time::sleep(Duration::from_secs(1)).await;
continue;
};
ctx.center_peer.store(center_peer.clone());
tracing::trace!(?center_peer, "run periodic job");
let rpc_mgr = peer_mgr.get_peer_rpc_mgr();
let _g = lock.lock().await;
let ret = rpc_mgr
.do_client_rpc_scoped(SERVICE_ID, center_peer, |c| async {
let client =
PeerCenterServiceClient::new(tarpc::client::Config::default(), c)
.spawn();
job_fn(client, ctx.clone()).await
})
.await;
drop(_g);
let Ok(sleep_time_ms) = ret else {
tracing::error!("periodic job to center server rpc failed: {:?}", ret);
tokio::time::sleep(Duration::from_secs(3)).await;
continue;
};
if sleep_time_ms > 0 {
tokio::time::sleep(Duration::from_millis(sleep_time_ms as u64)).await;
}
}
}
.instrument(tracing::info_span!("periodic_job", ?my_peer_id)),
);
}
pub fn new(peer_mgr: Arc<PeerManager>) -> Self {
PeerCenterBase {
peer_mgr,
tasks: Arc::new(Mutex::new(JoinSet::new())),
lock: Arc::new(Mutex::new(())),
}
}
}
pub struct PeerCenterInstanceService {
global_peer_map: Arc<RwLock<GlobalPeerMap>>,
global_peer_map_digest: Arc<RwLock<Digest>>,
}
#[tonic::async_trait]
impl crate::rpc::cli::peer_center_rpc_server::PeerCenterRpc for PeerCenterInstanceService {
async fn get_global_peer_map(
&self,
_request: tonic::Request<GetGlobalPeerMapRequest>,
) -> Result<tonic::Response<GetGlobalPeerMapResponse>, tonic::Status> {
let global_peer_map = self.global_peer_map.read().await.clone();
Ok(tonic::Response::new(GetGlobalPeerMapResponse {
global_peer_map: global_peer_map
.map
.into_iter()
.map(|(k, v)| (k, v))
.collect(),
}))
}
}
pub struct PeerCenterInstance {
peer_mgr: Arc<PeerManager>,
client: Arc<PeerCenterBase>,
global_peer_map: Arc<RwLock<GlobalPeerMap>>,
global_peer_map_digest: Arc<RwLock<Digest>>,
}
impl PeerCenterInstance {
pub fn new(peer_mgr: Arc<PeerManager>) -> Self {
PeerCenterInstance {
peer_mgr: peer_mgr.clone(),
client: Arc::new(PeerCenterBase::new(peer_mgr.clone())),
global_peer_map: Arc::new(RwLock::new(GlobalPeerMap::new())),
global_peer_map_digest: Arc::new(RwLock::new(Digest::default())),
}
}
pub async fn init(&self) {
self.client.init().await.unwrap();
self.init_get_global_info_job().await;
self.init_report_peers_job().await;
}
async fn init_get_global_info_job(&self) {
struct Ctx {
global_peer_map: Arc<RwLock<GlobalPeerMap>>,
global_peer_map_digest: Arc<RwLock<Digest>>,
}
let ctx = Arc::new(Ctx {
global_peer_map: self.global_peer_map.clone(),
global_peer_map_digest: self.global_peer_map_digest.clone(),
});
self.client
.init_periodic_job(ctx, |client, ctx| async move {
let mut rpc_ctx = tarpc::context::current();
rpc_ctx.deadline = SystemTime::now() + Duration::from_secs(3);
let ret = client
.get_global_peer_map(
rpc_ctx,
ctx.job_ctx.global_peer_map_digest.read().await.clone(),
)
.await?;
let Ok(resp) = ret else {
tracing::error!(
"get global info from center server got error result: {:?}",
ret
);
return Ok(1000);
};
let Some(resp) = resp else {
return Ok(5000);
};
tracing::info!(
"get global info from center server: {:?}, digest: {:?}",
resp.global_peer_map,
resp.digest
);
*ctx.job_ctx.global_peer_map.write().await = resp.global_peer_map;
*ctx.job_ctx.global_peer_map_digest.write().await = resp.digest;
Ok(10000)
})
.await;
}
async fn init_report_peers_job(&self) {
struct Ctx {
service: PeerManagerRpcService,
need_send_peers: AtomicBool,
last_report_peers: Mutex<PeerInfoForGlobalMap>,
last_center_peer: AtomicCell<PeerId>,
}
let ctx = Arc::new(Ctx {
service: PeerManagerRpcService::new(self.peer_mgr.clone()),
need_send_peers: AtomicBool::new(true),
last_report_peers: Mutex::new(PeerInfoForGlobalMap::default()),
last_center_peer: AtomicCell::new(PeerId::default()),
});
self.client
.init_periodic_job(ctx, |client, ctx| async move {
let my_node_id = ctx.peer_mgr.my_peer_id();
// if peers are not same in next 10 seconds, report peers to center server
let mut peers = PeerInfoForGlobalMap::default();
for _ in 1..10 {
peers = ctx.job_ctx.service.list_peers().await.into();
if ctx.center_peer.load() != ctx.job_ctx.last_center_peer.load() {
// if center peer changed, report peers immediately
break;
}
if peers == *ctx.job_ctx.last_report_peers.lock().await {
return Ok(3000);
}
tokio::time::sleep(Duration::from_secs(2)).await;
}
*ctx.job_ctx.last_report_peers.lock().await = peers.clone();
let mut hasher = DefaultHasher::new();
peers.hash(&mut hasher);
let peers = if ctx.job_ctx.need_send_peers.load(Ordering::Relaxed) {
Some(peers)
} else {
None
};
let mut rpc_ctx = tarpc::context::current();
rpc_ctx.deadline = SystemTime::now() + Duration::from_secs(3);
let ret = client
.report_peers(
rpc_ctx,
my_node_id.clone(),
peers,
hasher.finish() as Digest,
)
.await?;
if matches!(ret.as_ref().err(), Some(Error::DigestMismatch)) {
ctx.job_ctx.need_send_peers.store(true, Ordering::Relaxed);
return Ok(0);
} else if ret.is_err() {
tracing::error!("report peers to center server got error result: {:?}", ret);
return Ok(500);
}
ctx.job_ctx.last_center_peer.store(ctx.center_peer.load());
ctx.job_ctx.need_send_peers.store(false, Ordering::Relaxed);
Ok(3000)
})
.await;
}
pub fn get_rpc_service(&self) -> PeerCenterInstanceService {
PeerCenterInstanceService {
global_peer_map: self.global_peer_map.clone(),
global_peer_map_digest: self.global_peer_map_digest.clone(),
}
}
}
#[cfg(test)]
mod tests {
use std::ops::Deref;
use crate::{
peer_center::server::get_global_data,
peers::tests::{connect_peer_manager, create_mock_peer_manager, wait_route_appear},
};
use super::*;
#[tokio::test]
async fn test_peer_center_instance() {
let peer_mgr_a = create_mock_peer_manager().await;
let peer_mgr_b = create_mock_peer_manager().await;
let peer_mgr_c = create_mock_peer_manager().await;
let peer_center_a = PeerCenterInstance::new(peer_mgr_a.clone());
let peer_center_b = PeerCenterInstance::new(peer_mgr_b.clone());
let peer_center_c = PeerCenterInstance::new(peer_mgr_c.clone());
let peer_centers = vec![&peer_center_a, &peer_center_b, &peer_center_c];
for pc in peer_centers.iter() {
pc.init().await;
}
connect_peer_manager(peer_mgr_a.clone(), peer_mgr_b.clone()).await;
connect_peer_manager(peer_mgr_b.clone(), peer_mgr_c.clone()).await;
wait_route_appear(peer_mgr_a.clone(), peer_mgr_c.clone())
.await
.unwrap();
let center_peer = PeerCenterBase::select_center_peer(&peer_mgr_a)
.await
.unwrap();
let center_data = get_global_data(center_peer);
// wait center_data has 3 records for 10 seconds
let now = std::time::Instant::now();
loop {
if center_data.read().await.global_peer_map.map.len() == 3 {
println!(
"center data ready, {:#?}",
center_data.read().await.global_peer_map
);
break;
}
if now.elapsed().as_secs() > 60 {
panic!("center data not ready");
}
tokio::time::sleep(Duration::from_millis(100)).await;
}
let mut digest = None;
for pc in peer_centers.iter() {
let rpc_service = pc.get_rpc_service();
let now = std::time::Instant::now();
while now.elapsed().as_secs() < 10 {
if rpc_service.global_peer_map.read().await.map.len() == 3 {
break;
}
tokio::time::sleep(Duration::from_millis(100)).await;
}
assert_eq!(rpc_service.global_peer_map.read().await.map.len(), 3);
println!("rpc service ready, {:#?}", rpc_service.global_peer_map);
if digest.is_none() {
digest = Some(rpc_service.global_peer_map_digest.read().await.clone());
} else {
let v = rpc_service.global_peer_map_digest.read().await;
assert_eq!(digest.as_ref().unwrap(), v.deref());
}
}
let global_digest = get_global_data(center_peer).read().await.digest.clone();
assert_eq!(digest.as_ref().unwrap(), &global_digest);
}
}
+20
View File
@@ -0,0 +1,20 @@
// peer_center is used to collect peer info into one peer node.
// the center node is selected with the following rules:
// 1. has smallest peer id
// 2. TODO: has allow_to_be_center peer feature
// peer center is not guaranteed to be stable and can be changed when peer enter or leave.
// it's used to reduce the cost to exchange infos between peers.
pub mod instance;
mod server;
mod service;
#[derive(thiserror::Error, Debug, serde::Deserialize, serde::Serialize)]
pub enum Error {
#[error("Digest not match, need provide full peer info to center server.")]
DigestMismatch,
#[error("Not center server")]
NotCenterServer,
}
pub type Digest = u64;
+152
View File
@@ -0,0 +1,152 @@
use std::{
hash::{Hash, Hasher},
sync::Arc,
};
use dashmap::DashMap;
use once_cell::sync::Lazy;
use tokio::{sync::RwLock, task::JoinSet};
use crate::common::PeerId;
use super::{
service::{GetGlobalPeerMapResponse, GlobalPeerMap, PeerCenterService, PeerInfoForGlobalMap},
Digest, Error,
};
pub(crate) struct PeerCenterServerGlobalData {
pub global_peer_map: GlobalPeerMap,
pub digest: Digest,
pub update_time: std::time::Instant,
pub peer_update_time: DashMap<PeerId, std::time::Instant>,
}
impl PeerCenterServerGlobalData {
fn new() -> Self {
PeerCenterServerGlobalData {
global_peer_map: GlobalPeerMap::new(),
digest: Digest::default(),
update_time: std::time::Instant::now(),
peer_update_time: DashMap::new(),
}
}
}
// a global unique instance for PeerCenterServer
pub(crate) static GLOBAL_DATA: Lazy<DashMap<PeerId, Arc<RwLock<PeerCenterServerGlobalData>>>> =
Lazy::new(DashMap::new);
pub(crate) fn get_global_data(node_id: PeerId) -> Arc<RwLock<PeerCenterServerGlobalData>> {
GLOBAL_DATA
.entry(node_id)
.or_insert_with(|| Arc::new(RwLock::new(PeerCenterServerGlobalData::new())))
.value()
.clone()
}
#[derive(Clone, Debug)]
pub struct PeerCenterServer {
// every peer has its own server, so use per-struct dash map is ok.
my_node_id: PeerId,
digest_map: DashMap<PeerId, Digest>,
tasks: Arc<JoinSet<()>>,
}
impl PeerCenterServer {
pub fn new(my_node_id: PeerId) -> Self {
let mut tasks = JoinSet::new();
tasks.spawn(async move {
loop {
tokio::time::sleep(std::time::Duration::from_secs(10)).await;
PeerCenterServer::clean_outdated_peer(my_node_id).await;
}
});
PeerCenterServer {
my_node_id,
digest_map: DashMap::new(),
tasks: Arc::new(tasks),
}
}
async fn clean_outdated_peer(my_node_id: PeerId) {
let data = get_global_data(my_node_id);
let mut locked_data = data.write().await;
let now = std::time::Instant::now();
let mut to_remove = Vec::new();
for kv in locked_data.peer_update_time.iter() {
if now.duration_since(*kv.value()).as_secs() > 20 {
to_remove.push(*kv.key());
}
}
for peer_id in to_remove {
locked_data.global_peer_map.map.remove(&peer_id);
locked_data.peer_update_time.remove(&peer_id);
}
}
}
#[tarpc::server]
impl PeerCenterService for PeerCenterServer {
#[tracing::instrument()]
async fn report_peers(
self,
_: tarpc::context::Context,
my_peer_id: PeerId,
peers: Option<PeerInfoForGlobalMap>,
digest: Digest,
) -> Result<(), Error> {
tracing::trace!("receive report_peers");
let data = get_global_data(self.my_node_id);
let mut locked_data = data.write().await;
locked_data
.peer_update_time
.insert(my_peer_id, std::time::Instant::now());
let old_digest = self.digest_map.get(&my_peer_id);
// if digest match, no need to update
if let Some(old_digest) = old_digest {
if *old_digest == digest {
return Ok(());
}
}
if peers.is_none() {
return Err(Error::DigestMismatch);
}
self.digest_map.insert(my_peer_id, digest);
locked_data
.global_peer_map
.map
.insert(my_peer_id, peers.unwrap());
let mut hasher = std::collections::hash_map::DefaultHasher::new();
locked_data.global_peer_map.map.hash(&mut hasher);
locked_data.digest = hasher.finish() as Digest;
locked_data.update_time = std::time::Instant::now();
Ok(())
}
async fn get_global_peer_map(
self,
_: tarpc::context::Context,
digest: Digest,
) -> Result<Option<GetGlobalPeerMapResponse>, Error> {
let data = get_global_data(self.my_node_id);
if digest == data.read().await.digest {
return Ok(None);
}
let data = get_global_data(self.my_node_id);
let locked_data = data.read().await;
Ok(Some(GetGlobalPeerMapResponse {
global_peer_map: locked_data.global_peer_map.clone(),
digest: locked_data.digest,
}))
}
}
+84
View File
@@ -0,0 +1,84 @@
use std::collections::BTreeMap;
use crate::{common::PeerId, rpc::DirectConnectedPeerInfo};
use super::{Digest, Error};
use crate::rpc::PeerInfo;
pub type LatencyLevel = crate::rpc::cli::LatencyLevel;
impl LatencyLevel {
pub const fn from_latency_ms(lat_ms: u32) -> Self {
if lat_ms < 10 {
LatencyLevel::VeryLow
} else if lat_ms < 50 {
LatencyLevel::Low
} else if lat_ms < 100 {
LatencyLevel::Normal
} else if lat_ms < 200 {
LatencyLevel::High
} else {
LatencyLevel::VeryHigh
}
}
}
pub type PeerInfoForGlobalMap = crate::rpc::cli::PeerInfoForGlobalMap;
impl From<Vec<PeerInfo>> for PeerInfoForGlobalMap {
fn from(peers: Vec<PeerInfo>) -> Self {
let mut peer_map = BTreeMap::new();
for peer in peers {
let min_lat = peer
.conns
.iter()
.map(|conn| conn.stats.as_ref().unwrap().latency_us)
.min()
.unwrap_or(0);
let dp_info = DirectConnectedPeerInfo {
latency_level: LatencyLevel::from_latency_ms(min_lat as u32 / 1000) as i32,
};
// sort conn info so hash result is stable
peer_map.insert(peer.peer_id, dp_info);
}
PeerInfoForGlobalMap {
direct_peers: peer_map,
}
}
}
// a global peer topology map, peers can use it to find optimal path to other peers
#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
pub struct GlobalPeerMap {
pub map: BTreeMap<PeerId, PeerInfoForGlobalMap>,
}
impl GlobalPeerMap {
pub fn new() -> Self {
GlobalPeerMap {
map: BTreeMap::new(),
}
}
}
#[derive(Debug, Clone, serde::Deserialize, serde::Serialize)]
pub struct GetGlobalPeerMapResponse {
pub global_peer_map: GlobalPeerMap,
pub digest: Digest,
}
#[tarpc::service]
pub trait PeerCenterService {
// report center server which peer is directly connected to me
// digest is a hash of current peer map, if digest not match, we need to transfer the whole map
async fn report_peers(
my_peer_id: PeerId,
peers: Option<PeerInfoForGlobalMap>,
digest: Digest,
) -> Result<(), Error>;
async fn get_global_peer_map(digest: Digest)
-> Result<Option<GetGlobalPeerMapResponse>, Error>;
}