release dashmap memory (#1485)

2026-05-07 10:14:35 +00:00 · 2025-10-18 12:48:04 +08:00
parent 8f1786fa23
commit cc8f35787e
29 changed files with 339 additions and 187 deletions
@@ -3,7 +3,7 @@ use std::{
    net::{IpAddr, SocketAddr},
    str::FromStr as _,
    sync::Arc,
-    time::{Duration, SystemTime, UNIX_EPOCH},
+    time::{Duration, Instant, SystemTime, UNIX_EPOCH},
 };

 use crate::common::{config::ConfigLoader, global_ctx::ArcGlobalCtx, token_bucket::TokenBucket};
@@ -28,6 +28,12 @@ impl RateLimitKey {
    }
 }

+/// Value wrapper for rate limiters with last update timestamp
+pub struct RateLimitValue {
+    pub token_bucket: Arc<TokenBucket>,
+    pub last_update: Instant,
+}
+
 // Performance-optimized rule identifier to avoid string allocations
 #[derive(Debug, Clone, PartialEq, Eq)]
 pub enum RuleId {
@@ -104,7 +110,7 @@ impl AclCacheKey {
 pub struct AclCacheEntry {
    pub action: Action,
    pub matched_rule: RuleId,
-    pub last_access: u64,
+    pub last_access: std::time::Instant,
    // New fields to track rule characteristics for proper cache behavior
    pub conn_track_key: Option<String>,
    pub rate_limit_keys: Vec<RateLimitKey>,
@@ -188,7 +194,7 @@ impl AclLogContext {

 pub type SharedState = (
    Arc<DashMap<String, ConnTrackEntry>>,
-    Arc<DashMap<RateLimitKey, Arc<TokenBucket>>>,
+    Arc<DashMap<RateLimitKey, RateLimitValue>>,
    Arc<DashMap<AclStatKey, u64>>,
 );

@@ -209,7 +215,7 @@ pub struct AclProcessor {
    conn_track: Arc<DashMap<String, ConnTrackEntry>>,

    // Rate limiting buckets per rule using TokenBucket with optimized keys
-    rate_limiters: Arc<DashMap<RateLimitKey, Arc<TokenBucket>>>,
+    rate_limiters: Arc<DashMap<RateLimitKey, RateLimitValue>>,

    // Rule lookup cache with LRU cleanup
    rule_cache: Arc<DashMap<AclCacheKey, AclCacheEntry>>,
@@ -234,7 +240,7 @@ impl AclProcessor {
    pub fn new_with_shared_state(
        acl_config: Acl,
        conn_track: Option<Arc<DashMap<String, ConnTrackEntry>>>,
-        rate_limiters: Option<Arc<DashMap<RateLimitKey, Arc<TokenBucket>>>>,
+        rate_limiters: Option<Arc<DashMap<RateLimitKey, RateLimitValue>>>,
        stats: Option<Arc<DashMap<AclStatKey, u64>>>,
    ) -> Self {
        let (inbound_rules, outbound_rules, forward_rules) = Self::build_rules(&acl_config);
@@ -261,7 +267,7 @@ impl AclProcessor {
            conn_track: conn_track.unwrap_or_else(|| Arc::new(DashMap::new())),
            rate_limiters: rate_limiters.unwrap_or_else(|| Arc::new(DashMap::new())),
            rule_cache: Arc::new(DashMap::new()), // Always start with fresh cache
-            cache_max_size: 10000,                // Limit cache to 10k entries
+            cache_max_size: 1024,                 // Limit cache to 1k entries
            cache_cleanup_interval: Duration::from_secs(20), // Cleanup every 5 minutes
            stats: stats.unwrap_or_else(|| Arc::new(DashMap::new())),
            tasks,
@@ -362,6 +368,7 @@ impl AclProcessor {

    /// Start periodic cache cleanup task
    fn start_cache_cleanup_task(&mut self) {
+        let rate_limiters = self.rate_limiters.clone();
        let rule_cache = self.rule_cache.clone();
        let cache_max_size = self.cache_max_size;
        let cleanup_interval = self.cache_cleanup_interval;
@@ -371,6 +378,10 @@ impl AclProcessor {
            loop {
                interval.tick().await;
                Self::cleanup_cache(&rule_cache, cache_max_size);
+                rule_cache.shrink_to_fit();
+
+                rate_limiters.retain(|_, v| v.last_update.elapsed() < cleanup_interval);
+                rate_limiters.shrink_to_fit();
            }
        });

@@ -380,19 +391,26 @@ impl AclProcessor {
            loop {
                interval.tick().await;
                Self::cleanup_expired_connections(conn_track.clone(), 60);
+                conn_track.shrink_to_fit();
            }
        });
    }

    /// Clean up cache using LRU strategy
    fn cleanup_cache(cache: &DashMap<AclCacheKey, AclCacheEntry>, max_size: usize) {
+        // remove cache not be used in last 15 second
+        let expired_timepoint = Instant::now()
+            .checked_sub(Duration::from_secs(15))
+            .unwrap_or(Instant::now());
+        cache.retain(|_, entry| entry.last_access > expired_timepoint);
+
        let current_size = cache.len();
        if current_size <= max_size {
            return;
        }

        // Remove oldest entries (LRU cleanup)
-        let mut entries: Vec<(AclCacheKey, u64)> = cache
+        let mut entries: Vec<(AclCacheKey, std::time::Instant)> = cache
            .iter()
            .map(|entry| (entry.key().clone(), entry.value().last_access))
            .collect();
@@ -472,10 +490,7 @@ impl AclProcessor {
        // If cache hit and can skip checks, return cached result
        if let Some(mut cached) = self.rule_cache.get_mut(&cache_key) {
            // Update last access time for LRU
-            cached.last_access = SystemTime::now()
-                .duration_since(UNIX_EPOCH)
-                .unwrap()
-                .as_secs();
+            cached.last_access = Instant::now();

            self.increment_stat(AclStatKey::CacheHits);
            return self.process_packet_with_cache_entry(packet_info, &cached);
@@ -499,10 +514,7 @@ impl AclProcessor {
        let mut cache_entry = AclCacheEntry {
            action: Action::Allow,
            matched_rule: RuleId::Default,
-            last_access: SystemTime::now()
-                .duration_since(UNIX_EPOCH)
-                .unwrap()
-                .as_secs(),
+            last_access: Instant::now(),
            conn_track_key: None,
            rate_limit_keys: vec![],
            chain_type,
@@ -774,19 +786,26 @@ impl AclProcessor {
            return true; // No rate limiting
        }

-        let bucket = self
+        let mut rate_limiter = self
            .rate_limiters
            .entry(rule_key.clone())
            .or_insert_with(|| {
                if !allow_create {
                    panic!("Rate limit bucket not found");
                }
-                TokenBucket::new(burst as u64, rate as u64, Duration::from_millis(10))
-            })
-            .clone();
+                RateLimitValue {
+                    token_bucket: TokenBucket::new(
+                        burst as u64,
+                        rate as u64,
+                        Duration::from_millis(10),
+                    ),
+                    last_update: Instant::now(),
+                }
+            });

        // Try to consume 1 token (1 packet)
-        bucket.try_consume(1)
+        rate_limiter.last_update = Instant::now();
+        rate_limiter.token_bucket.try_consume(1)
    }

    /// Convert proto Rule to FastLookupRule
@@ -156,6 +156,16 @@ pub fn get_machine_id() -> uuid::Uuid {
    gen_mid
 }

+pub fn shrink_dashmap<K: Eq + std::hash::Hash, V>(
+    map: &dashmap::DashMap<K, V>,
+    threshold: Option<usize>,
+) {
+    let threshold = threshold.unwrap_or(16);
+    if map.capacity() - map.len() > threshold {
+        map.shrink_to_fit();
+    }
+}
+
 #[cfg(test)]
 mod tests {
    use super::*;
@@ -527,6 +527,7 @@ impl StatsManager {
                counters.retain(|_, metric_data: &mut Arc<MetricData>| unsafe {
                    metric_data.get_last_updated() > cutoff_time
                });
+                counters.shrink_to_fit();
            }
        });

@@ -178,6 +178,7 @@ impl TokenBucketManager {
            loop {
                // Retain only buckets that are still in use
                buckets_clone.retain(|_, bucket| Arc::<TokenBucket>::strong_count(bucket) > 1);
+                buckets_clone.shrink_to_fit();
                // Sleep for a while before next retention check
                tokio::time::sleep(Duration::from_secs(5)).await;
            }