zero copy tunnel (#55)

make tunnel zero copy, for better performance. remove most of the locks in io path.
introduce quic tunnel
prepare for encryption
This commit is contained in:
Sijie.Sun
2024-04-24 23:12:46 +08:00
committed by GitHub
parent 39021d7b1b
commit 3467890270
44 changed files with 6504 additions and 688 deletions
+71 -67
View File
@@ -16,12 +16,13 @@ use tokio::{
sync::{mpsc::UnboundedSender, Mutex},
task::JoinSet,
};
use tokio_util::bytes::Bytes;
use tracing::Instrument;
use crate::{
common::{error::Error, global_ctx::ArcGlobalCtx, PeerId},
peers::{packet, peer_manager::PeerManager, PeerPacketFilter},
peers::{peer_manager::PeerManager, PeerPacketFilter},
tunnel::packet_def::{PacketType, ZCPacket},
};
use super::CidrSet;
@@ -78,11 +79,7 @@ fn socket_recv(socket: &Socket, buf: &mut [MaybeUninit<u8>]) -> Result<(usize, I
Ok((size, addr))
}
fn socket_recv_loop(
socket: Socket,
nat_table: IcmpNatTable,
sender: UnboundedSender<packet::Packet>,
) {
fn socket_recv_loop(socket: Socket, nat_table: IcmpNatTable, sender: UnboundedSender<ZCPacket>) {
let mut buf = [0u8; 4096];
let data: &mut [MaybeUninit<u8>] = unsafe { std::mem::transmute(&mut buf[12..]) };
@@ -126,13 +123,14 @@ fn socket_recv_loop(
ipv4_packet.set_destination(dest_ip);
ipv4_packet.set_checksum(ipv4::checksum(&ipv4_packet.to_immutable()));
let peer_packet = packet::Packet::new_data_packet(
v.my_peer_id,
v.src_peer_id,
&ipv4_packet.to_immutable().packet(),
let mut p = ZCPacket::new_with_payload(ipv4_packet.packet());
p.fill_peer_manager_hdr(
v.my_peer_id.into(),
v.src_peer_id.into(),
PacketType::Data as u8,
);
if let Err(e) = sender.send(peer_packet) {
if let Err(e) = sender.send(p) {
tracing::error!("send icmp packet to peer failed: {:?}, may exiting..", e);
break;
}
@@ -141,61 +139,12 @@ fn socket_recv_loop(
#[async_trait::async_trait]
impl PeerPacketFilter for IcmpProxy {
async fn try_process_packet_from_peer(
&self,
packet: &packet::ArchivedPacket,
_: &Bytes,
) -> Option<()> {
let _ = self.global_ctx.get_ipv4()?;
if packet.packet_type != packet::PacketType::Data {
return None;
};
let ipv4 = Ipv4Packet::new(&packet.payload.as_bytes())?;
if ipv4.get_version() != 4 || ipv4.get_next_level_protocol() != IpNextHeaderProtocols::Icmp
{
async fn try_process_packet_from_peer(&self, packet: ZCPacket) -> Option<ZCPacket> {
if let Some(_) = self.try_handle_peer_packet(&packet).await {
return None;
} else {
return Some(packet);
}
if !self.cidr_set.contains_v4(ipv4.get_destination()) {
return None;
}
let icmp_packet = icmp::echo_request::EchoRequestPacket::new(&ipv4.payload())?;
if icmp_packet.get_icmp_type() != IcmpTypes::EchoRequest {
// drop it because we do not support other icmp types
tracing::trace!("unsupported icmp type: {:?}", icmp_packet.get_icmp_type());
return Some(());
}
let icmp_id = icmp_packet.get_identifier();
let icmp_seq = icmp_packet.get_sequence_number();
let key = IcmpNatKey {
dst_ip: ipv4.get_destination().into(),
icmp_id,
icmp_seq,
};
let value = IcmpNatEntry::new(
packet.from_peer.into(),
packet.to_peer.into(),
ipv4.get_source().into(),
)
.ok()?;
if let Some(old) = self.nat_table.insert(key, value) {
tracing::info!("icmp nat table entry replaced: {:?}", old);
}
if let Err(e) = self.send_icmp_packet(ipv4.get_destination(), &icmp_packet) {
tracing::error!("send icmp packet failed: {:?}", e);
}
Some(())
}
}
@@ -262,8 +211,9 @@ impl IcmpProxy {
self.tasks.lock().await.spawn(
async move {
while let Some(msg) = receiver.recv().await {
let to_peer_id = msg.to_peer.into();
let ret = peer_manager.send_msg(msg.into(), to_peer_id).await;
let hdr = msg.peer_manager_header().unwrap();
let to_peer_id = hdr.to_peer_id.into();
let ret = peer_manager.send_msg(msg, to_peer_id).await;
if ret.is_err() {
tracing::error!("send icmp packet to peer failed: {:?}", ret);
}
@@ -290,4 +240,58 @@ impl IcmpProxy {
Ok(())
}
async fn try_handle_peer_packet(&self, packet: &ZCPacket) -> Option<()> {
let _ = self.global_ctx.get_ipv4()?;
let hdr = packet.peer_manager_header().unwrap();
if hdr.packet_type != PacketType::Data as u8 {
return None;
};
let ipv4 = Ipv4Packet::new(&packet.payload())?;
if ipv4.get_version() != 4 || ipv4.get_next_level_protocol() != IpNextHeaderProtocols::Icmp
{
return None;
}
if !self.cidr_set.contains_v4(ipv4.get_destination()) {
return None;
}
let icmp_packet = icmp::echo_request::EchoRequestPacket::new(&ipv4.payload())?;
if icmp_packet.get_icmp_type() != IcmpTypes::EchoRequest {
// drop it because we do not support other icmp types
tracing::trace!("unsupported icmp type: {:?}", icmp_packet.get_icmp_type());
return Some(());
}
let icmp_id = icmp_packet.get_identifier();
let icmp_seq = icmp_packet.get_sequence_number();
let key = IcmpNatKey {
dst_ip: ipv4.get_destination().into(),
icmp_id,
icmp_seq,
};
let value = IcmpNatEntry::new(
hdr.from_peer_id.into(),
hdr.to_peer_id.into(),
ipv4.get_source().into(),
)
.ok()?;
if let Some(old) = self.nat_table.insert(key, value) {
tracing::info!("icmp nat table entry replaced: {:?}", old);
}
if let Err(e) = self.send_icmp_packet(ipv4.get_destination(), &icmp_packet) {
tracing::error!("send icmp packet failed: {:?}", e);
}
Some(())
}
}
+98 -92
View File
@@ -2,7 +2,9 @@ use crossbeam::atomic::AtomicCell;
use dashmap::DashMap;
use pnet::packet::ip::IpNextHeaderProtocols;
use pnet::packet::ipv4::{Ipv4Packet, MutableIpv4Packet};
use pnet::packet::tcp::{ipv4_checksum, MutableTcpPacket};
use pnet::packet::tcp::{ipv4_checksum, MutableTcpPacket, TcpPacket};
use pnet::packet::MutablePacket;
use pnet::packet::Packet;
use std::net::{IpAddr, Ipv4Addr, SocketAddr, SocketAddrV4};
use std::sync::atomic::AtomicU16;
use std::sync::Arc;
@@ -11,16 +13,16 @@ use tokio::io::copy_bidirectional;
use tokio::net::{TcpListener, TcpSocket, TcpStream};
use tokio::sync::Mutex;
use tokio::task::JoinSet;
use tokio_util::bytes::{Bytes, BytesMut};
use tracing::Instrument;
use crate::common::error::Result;
use crate::common::global_ctx::GlobalCtx;
use crate::common::join_joinset_background;
use crate::common::netns::NetNS;
use crate::peers::packet::{self, ArchivedPacket};
use crate::peers::peer_manager::PeerManager;
use crate::peers::{NicPacketFilter, PeerPacketFilter};
use crate::tunnel::packet_def::{PacketType, ZCPacket};
use super::CidrSet;
@@ -83,98 +85,37 @@ pub struct TcpProxy {
#[async_trait::async_trait]
impl PeerPacketFilter for TcpProxy {
async fn try_process_packet_from_peer(&self, packet: &ArchivedPacket, _: &Bytes) -> Option<()> {
let ipv4_addr = self.global_ctx.get_ipv4()?;
if packet.packet_type != packet::PacketType::Data {
return None;
};
let payload_bytes = packet.payload.as_bytes();
let ipv4 = Ipv4Packet::new(payload_bytes)?;
if ipv4.get_version() != 4 || ipv4.get_next_level_protocol() != IpNextHeaderProtocols::Tcp {
async fn try_process_packet_from_peer(&self, mut packet: ZCPacket) -> Option<ZCPacket> {
if let Some(_) = self.try_handle_peer_packet(&mut packet).await {
if let Err(e) = self.peer_manager.get_nic_channel().send(packet).await {
tracing::error!("send to nic failed: {:?}", e);
}
return None;
} else {
Some(packet)
}
if !self.cidr_set.contains_v4(ipv4.get_destination()) {
return None;
}
tracing::trace!(ipv4 = ?ipv4, cidr_set = ?self.cidr_set, "proxy tcp packet received");
let mut packet_buffer = BytesMut::with_capacity(payload_bytes.len());
packet_buffer.extend_from_slice(&payload_bytes.to_vec());
let (ip_buffer, tcp_buffer) =
packet_buffer.split_at_mut(ipv4.get_header_length() as usize * 4);
let mut ip_packet = MutableIpv4Packet::new(ip_buffer).unwrap();
let mut tcp_packet = MutableTcpPacket::new(tcp_buffer).unwrap();
let is_tcp_syn = tcp_packet.get_flags() & pnet::packet::tcp::TcpFlags::SYN != 0;
if is_tcp_syn {
let source_ip = ip_packet.get_source();
let source_port = tcp_packet.get_source();
let src = SocketAddr::V4(SocketAddrV4::new(source_ip, source_port));
let dest_ip = ip_packet.get_destination();
let dest_port = tcp_packet.get_destination();
let dst = SocketAddr::V4(SocketAddrV4::new(dest_ip, dest_port));
let old_val = self
.syn_map
.insert(src, Arc::new(NatDstEntry::new(src, dst)));
tracing::trace!(src = ?src, dst = ?dst, old_entry = ?old_val, "tcp syn received");
}
ip_packet.set_destination(ipv4_addr);
tcp_packet.set_destination(self.get_local_port());
Self::update_ipv4_packet_checksum(&mut ip_packet, &mut tcp_packet);
tracing::trace!(ip_packet = ?ip_packet, tcp_packet = ?tcp_packet, "tcp packet forwarded");
if let Err(e) = self
.peer_manager
.get_nic_channel()
.send(packet_buffer.freeze())
.await
{
tracing::error!("send to nic failed: {:?}", e);
}
Some(())
}
}
#[async_trait::async_trait]
impl NicPacketFilter for TcpProxy {
async fn try_process_packet_from_nic(&self, mut data: BytesMut) -> BytesMut {
async fn try_process_packet_from_nic(&self, zc_packet: &mut ZCPacket) {
let Some(my_ipv4) = self.global_ctx.get_ipv4() else {
return data;
return;
};
let header_len = {
let Some(ipv4) = &Ipv4Packet::new(&data[..]) else {
return data;
};
if ipv4.get_version() != 4
|| ipv4.get_source() != my_ipv4
|| ipv4.get_next_level_protocol() != IpNextHeaderProtocols::Tcp
{
return data;
}
ipv4.get_header_length() as usize * 4
};
let (ip_buffer, tcp_buffer) = data.split_at_mut(header_len);
let mut ip_packet = MutableIpv4Packet::new(ip_buffer).unwrap();
let mut tcp_packet = MutableTcpPacket::new(tcp_buffer).unwrap();
let data = zc_packet.payload();
let ip_packet = Ipv4Packet::new(data).unwrap();
if ip_packet.get_version() != 4
|| ip_packet.get_source() != my_ipv4
|| ip_packet.get_next_level_protocol() != IpNextHeaderProtocols::Tcp
{
return;
}
let tcp_packet = TcpPacket::new(ip_packet.payload()).unwrap();
if tcp_packet.get_source() != self.get_local_port() {
return data;
return;
}
let dst_addr = SocketAddr::V4(SocketAddrV4::new(
@@ -187,7 +128,7 @@ impl NicPacketFilter for TcpProxy {
entry
} else {
let Some(syn_entry) = self.syn_map.get(&dst_addr) else {
return data;
return;
};
syn_entry
};
@@ -199,13 +140,18 @@ impl NicPacketFilter for TcpProxy {
panic!("v4 nat entry src ip is not v4");
};
let mut ip_packet = MutableIpv4Packet::new(zc_packet.mut_payload()).unwrap();
ip_packet.set_source(ip);
let dst = ip_packet.get_destination();
let mut tcp_packet = MutableTcpPacket::new(ip_packet.payload_mut()).unwrap();
tcp_packet.set_source(nat_entry.dst.port());
Self::update_ipv4_packet_checksum(&mut ip_packet, &mut tcp_packet);
Self::update_tcp_packet_checksum(&mut tcp_packet, &ip, &dst);
drop(tcp_packet);
Self::update_ip_packet_checksum(&mut ip_packet);
tracing::trace!(dst_addr = ?dst_addr, nat_entry = ?nat_entry, packet = ?ip_packet, "tcp packet after modified");
data
}
}
@@ -226,17 +172,20 @@ impl TcpProxy {
})
}
fn update_ipv4_packet_checksum(
ipv4_packet: &mut MutableIpv4Packet,
fn update_tcp_packet_checksum(
tcp_packet: &mut MutableTcpPacket,
ipv4_src: &Ipv4Addr,
ipv4_dst: &Ipv4Addr,
) {
tcp_packet.set_checksum(ipv4_checksum(
&tcp_packet.to_immutable(),
&ipv4_packet.get_source(),
&ipv4_packet.get_destination(),
ipv4_src,
ipv4_dst,
));
}
ipv4_packet.set_checksum(pnet::packet::ipv4::checksum(&ipv4_packet.to_immutable()));
fn update_ip_packet_checksum(ip_packet: &mut MutableIpv4Packet) {
ip_packet.set_checksum(pnet::packet::ipv4::checksum(&ip_packet.to_immutable()));
}
pub async fn start(self: &Arc<Self>) -> Result<()> {
@@ -302,6 +251,7 @@ impl TcpProxy {
tracing::error!("tcp connection from unknown source: {:?}", socket_addr);
continue;
};
tracing::info!(?socket_addr, "tcp connection accepted for proxy");
assert_eq!(entry.state.load(), NatDstEntryState::SynReceived);
let entry_clone = entry.clone();
@@ -404,4 +354,60 @@ impl TcpProxy {
pub fn get_local_port(&self) -> u16 {
self.local_port.load(std::sync::atomic::Ordering::Relaxed)
}
async fn try_handle_peer_packet(&self, packet: &mut ZCPacket) -> Option<()> {
let ipv4_addr = self.global_ctx.get_ipv4()?;
let hdr = packet.peer_manager_header().unwrap();
if hdr.packet_type != PacketType::Data as u8 {
return None;
};
let payload_bytes = packet.mut_payload();
let ipv4 = Ipv4Packet::new(payload_bytes)?;
if ipv4.get_version() != 4 || ipv4.get_next_level_protocol() != IpNextHeaderProtocols::Tcp {
return None;
}
if !self.cidr_set.contains_v4(ipv4.get_destination()) {
return None;
}
tracing::info!(ipv4 = ?ipv4, cidr_set = ?self.cidr_set, "proxy tcp packet received");
let ip_packet = Ipv4Packet::new(payload_bytes).unwrap();
let tcp_packet = TcpPacket::new(ip_packet.payload()).unwrap();
let is_tcp_syn = tcp_packet.get_flags() & pnet::packet::tcp::TcpFlags::SYN != 0;
if is_tcp_syn {
let source_ip = ip_packet.get_source();
let source_port = tcp_packet.get_source();
let src = SocketAddr::V4(SocketAddrV4::new(source_ip, source_port));
let dest_ip = ip_packet.get_destination();
let dest_port = tcp_packet.get_destination();
let dst = SocketAddr::V4(SocketAddrV4::new(dest_ip, dest_port));
let old_val = self
.syn_map
.insert(src, Arc::new(NatDstEntry::new(src, dst)));
tracing::trace!(src = ?src, dst = ?dst, old_entry = ?old_val, "tcp syn received");
}
let mut ip_packet = MutableIpv4Packet::new(payload_bytes).unwrap();
ip_packet.set_destination(ipv4_addr);
let source = ip_packet.get_source();
let mut tcp_packet = MutableTcpPacket::new(ip_packet.payload_mut()).unwrap();
tcp_packet.set_destination(self.get_local_port());
Self::update_tcp_packet_checksum(&mut tcp_packet, &source, &ipv4_addr);
drop(tcp_packet);
Self::update_ip_packet_checksum(&mut ip_packet);
tracing::info!(?source, ?ipv4_addr, ?packet, "tcp packet after modified");
Some(())
}
}
+29 -27
View File
@@ -21,12 +21,12 @@ use tokio::{
time::timeout,
};
use tokio_util::bytes::Bytes;
use tracing::Level;
use crate::{
common::{error::Error, global_ctx::ArcGlobalCtx, PeerId},
peers::{packet, peer_manager::PeerManager, PeerPacketFilter},
peers::{peer_manager::PeerManager, PeerPacketFilter},
tunnel::packet_def::{PacketType, ZCPacket},
tunnels::common::setup_sokcet2,
};
@@ -79,7 +79,7 @@ impl UdpNatEntry {
async fn compose_ipv4_packet(
self: &Arc<Self>,
packet_sender: &mut UnboundedSender<packet::Packet>,
packet_sender: &mut UnboundedSender<ZCPacket>,
buf: &mut [u8],
src_v4: &SocketAddrV4,
payload_len: usize,
@@ -140,13 +140,10 @@ impl UdpNatEntry {
tracing::trace!(?ipv4_packet, "udp nat packet response send");
let peer_packet = packet::Packet::new_data_packet(
self.my_peer_id,
self.src_peer_id,
&ipv4_packet.to_immutable().packet(),
);
let mut p = ZCPacket::new_with_payload(ipv4_packet.packet());
p.fill_peer_manager_hdr(self.my_peer_id, self.src_peer_id, PacketType::Data as u8);
if let Err(e) = packet_sender.send(peer_packet) {
if let Err(e) = packet_sender.send(p) {
tracing::error!("send icmp packet to peer failed: {:?}, may exiting..", e);
return Err(Error::AnyhowError(e.into()));
}
@@ -158,7 +155,7 @@ impl UdpNatEntry {
Ok(())
}
async fn forward_task(self: Arc<Self>, mut packet_sender: UnboundedSender<packet::Packet>) {
async fn forward_task(self: Arc<Self>, mut packet_sender: UnboundedSender<ZCPacket>) {
let mut buf = [0u8; 8192];
let mut udp_body: &mut [u8] = unsafe { std::mem::transmute(&mut buf[20 + 8..]) };
let mut ip_id = 1;
@@ -220,31 +217,25 @@ pub struct UdpProxy {
nat_table: Arc<DashMap<UdpNatKey, Arc<UdpNatEntry>>>,
sender: UnboundedSender<packet::Packet>,
receiver: Mutex<Option<UnboundedReceiver<packet::Packet>>>,
sender: UnboundedSender<ZCPacket>,
receiver: Mutex<Option<UnboundedReceiver<ZCPacket>>>,
tasks: Mutex<JoinSet<()>>,
}
#[async_trait::async_trait]
impl PeerPacketFilter for UdpProxy {
async fn try_process_packet_from_peer(
&self,
packet: &packet::ArchivedPacket,
_: &Bytes,
) -> Option<()> {
impl UdpProxy {
async fn try_handle_packet(&self, packet: &ZCPacket) -> Option<()> {
if self.cidr_set.is_empty() {
return None;
}
let _ = self.global_ctx.get_ipv4()?;
if packet.packet_type != packet::PacketType::Data {
let hdr = packet.peer_manager_header().unwrap();
if hdr.packet_type != PacketType::Data as u8 {
return None;
};
let ipv4 = Ipv4Packet::new(packet.payload.as_bytes())?;
let ipv4 = Ipv4Packet::new(packet.payload())?;
if ipv4.get_version() != 4 || ipv4.get_next_level_protocol() != IpNextHeaderProtocols::Udp {
return None;
}
@@ -272,8 +263,8 @@ impl PeerPacketFilter for UdpProxy {
tracing::info!(?packet, ?ipv4, ?udp_packet, "udp nat table entry created");
let _g = self.global_ctx.net_ns.guard();
Ok(Arc::new(UdpNatEntry::new(
packet.from_peer.into(),
packet.to_peer.into(),
hdr.from_peer_id.get(),
hdr.to_peer_id.get(),
nat_key.src_socket,
)?))
})
@@ -316,6 +307,17 @@ impl PeerPacketFilter for UdpProxy {
}
}
#[async_trait::async_trait]
impl PeerPacketFilter for UdpProxy {
async fn try_process_packet_from_peer(&self, packet: ZCPacket) -> Option<ZCPacket> {
if let Some(_) = self.try_handle_packet(&packet).await {
return None;
} else {
return Some(packet);
}
}
}
impl UdpProxy {
pub fn new(
global_ctx: ArcGlobalCtx,
@@ -362,9 +364,9 @@ impl UdpProxy {
let peer_manager = self.peer_manager.clone();
self.tasks.lock().await.spawn(async move {
while let Some(msg) = receiver.recv().await {
let to_peer_id: PeerId = msg.to_peer.into();
let to_peer_id: PeerId = msg.peer_manager_header().unwrap().to_peer_id.get();
tracing::trace!(?msg, ?to_peer_id, "udp nat packet response send");
let ret = peer_manager.send_msg(msg.into(), to_peer_id).await;
let ret = peer_manager.send_msg(msg, to_peer_id).await;
if ret.is_err() {
tracing::error!("send icmp packet to peer failed: {:?}", ret);
}