mirror of
https://github.com/dndx/phantun.git
synced 2025-09-16 04:04:29 +08:00
Compare commits
12 Commits
Author | SHA1 | Date | |
---|---|---|---|
|
99bff568f6 | ||
|
91ad2c03a1 | ||
|
581d80d08c | ||
|
55da4d6a62 | ||
|
bb859be6b6 | ||
|
8d315ea4e7 | ||
|
21eabe8b82 | ||
|
8a74b31c6e | ||
|
ca14ba457f | ||
|
33a0cfe567 | ||
|
95dfd8ab54 | ||
|
1c35635091 |
42
README.md
42
README.md
@@ -2,6 +2,9 @@
|
||||
|
||||
A lightweight and fast UDP to TCP obfuscator.
|
||||
|
||||

|
||||

|
||||
|
||||
Table of Contents
|
||||
=================
|
||||
|
||||
@@ -32,7 +35,7 @@ Table of Contents
|
||||
|
||||
# Latest release
|
||||
|
||||
[v0.2.5](https://github.com/dndx/phantun/releases/tag/v0.2.5)
|
||||
[v0.3.1](https://github.com/dndx/phantun/releases/tag/v0.3.1)
|
||||
|
||||
# Overview
|
||||
|
||||
@@ -52,6 +55,10 @@ connection from the perspective of firewalls/NAT devices.
|
||||
Phantun means Phantom TUN, as it is an obfuscator for UDP traffic that does just enough work
|
||||
to make it pass through stateful firewall/NATs as TCP packets.
|
||||
|
||||
Phantun is written in 100% safe Rust. It has been optimized extensively to scale well on multi-core
|
||||
systems and has no issue saturating all available CPU resources on a fast connection.
|
||||
See the [Performance](#performance) section for benchmarking results.
|
||||
|
||||

|
||||
|
||||
# Usage
|
||||
@@ -218,18 +225,20 @@ RUST_LOG=info /usr/local/bin/phantun_client --local 127.0.0.1:1234 --remote exam
|
||||
Phantun aims to keep tunneling overhead to the minimum. The overhead compared to a plain UDP packet
|
||||
is the following:
|
||||
|
||||
Standard UDP packet: 20 byte IP header + 8 byte UDP header = 28 bytes
|
||||
**Standard UDP packet:** `20 byte IP header + 8 byte UDP header = 28 bytes`
|
||||
|
||||
Phantun obfuscated UDP packet: 20 byte IP header + 20 byte TCP header = 40 bytes
|
||||
**Obfuscated packet:** `20 byte IP header + 20 byte TCP header = 40 bytes`
|
||||
|
||||
|
||||
Note that Phantun does not add any additional header other than IP and TCP headers in order to pass through
|
||||
stateful packet inspection!
|
||||
|
||||
Phantun's additional overhead: 12 bytes. I other words, when using Phantun, the usable payload for
|
||||
Phantun's additional overhead: `12 bytes`. I other words, when using Phantun, the usable payload for
|
||||
UDP packet is reduced by 12 bytes. This is the minimum overhead possible when doing such kind
|
||||
of obfuscation.
|
||||
|
||||

|
||||
|
||||
[Back to TOC](#table-of-contents)
|
||||
|
||||
## MTU calculation for WireGuard
|
||||
@@ -237,14 +246,20 @@ of obfuscation.
|
||||
For people who use Phantun to tunnel [WireGuard®](https://www.wireguard.com) UDP packets, here are some guidelines on figuring
|
||||
out the correct MTU to use for your WireGuard interface.
|
||||
|
||||
```
|
||||
WireGuard MTU = Interface MTU - IP header (20 bytes) - TCP header (20 bytes) - WireGuard overhead (32 bytes)
|
||||
```
|
||||
|
||||
For example, for a Ethernet interface with 1500 bytes MTU, the WireGuard interface MTU should be set as:
|
||||
|
||||
```
|
||||
1500 - 20 - 20 - 32 = 1428 bytes
|
||||
```
|
||||
|
||||
The resulted Phantun TCP data packet will be 1500 bytes which does not exceed the
|
||||
interface MTU of 1500.
|
||||
interface MTU of 1500. Please note it is strongly recommended to use the same interface
|
||||
MTU for both ends of a WireGuard tunnel, or unexected packet loss may occur and these issues are
|
||||
generally very hard to troubleshoot.
|
||||
|
||||
[Back to TOC](#table-of-contents)
|
||||
|
||||
@@ -264,13 +279,16 @@ For users who wish to use `fake-tcp` library inside their own project, refer to
|
||||
|
||||
# Performance
|
||||
|
||||
Performance was tested on AWS t3.xlarge instance with 4 vCPUs and 5 Gb/s NIC. WireGuard was used
|
||||
for tunneling TCP/UDP traffic between two test instances and MTU has been tuned to avoid fragmentation.
|
||||
Performance was tested on 2 AWS `t4g.xlarge` instances with 4 vCPUs and 5 Gb/s NIC over LAN. `nftables` was used to redirect
|
||||
UDP stream of `iperf3` to go through the Phantun/udp2raw tunnel between two test instances and MTU has been tuned to avoid fragmentation.
|
||||
|
||||
| | WireGuard | WireGuard + Phantun | WireGuard + udp2raw (cipher-mode=none auth-mode=none disable-anti-replay) |
|
||||
|-----------------|-------------|---------------------|---------------------------------------------------------------------------|
|
||||
| iperf3 -c IP -R | 1.56 Gbit/s | 540 Mbit/s | 369 Mbit/s |
|
||||
| iperf3 -c IP | 1.71 Gbit/s | 519 Mbit/s | 312 Mbit/s |
|
||||
Test command: `iperf3 -c <IP> -p <PORT> -R -u -l 1400 -b 1000m -t 30 -P 5`
|
||||
|
||||
| Mode | Speed | Overall CPU Usage |
|
||||
|---------------------------------------------------------------|----------------|--------------------------|
|
||||
| Direct connection | 3.35 Gbits/sec | 25% (1 core at 100%) |
|
||||
| Phantun | 2.03 Gbits/sec | 95% (all cores utilized) |
|
||||
| udp2raw (cipher-mode=none auth-mode=none disable-anti-replay) | 876 Mbits/sec | 50% (2 cores at 100%) |
|
||||
|
||||
[Back to TOC](#table-of-contents)
|
||||
|
||||
@@ -300,7 +318,7 @@ Here is a quick overview of comparison between those two to help you choose:
|
||||
| UDP over UDP obfuscation | ❌ | ✅ |
|
||||
| Multi-threaded | ✅ | ❌ |
|
||||
| Throughput | Better | Good |
|
||||
| Raw IP mode | TUN interface | Raw sockets + BPF |
|
||||
| L4 IP mode | TUN interface | Raw sockets + BPF |
|
||||
| Tunneling MTU overhead | 12 bytes | 44 bytes |
|
||||
| Seprate TCP connections for each UDP connection | Client/Server | Server only |
|
||||
| Anti-replay, encryption | ❌ | ✅ |
|
||||
|
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "fake-tcp"
|
||||
version = "0.2.4"
|
||||
version = "0.3.1"
|
||||
edition = "2021"
|
||||
authors = ["Datong Sun <dndx@idndx.com>"]
|
||||
license = "MIT OR Apache-2.0"
|
||||
@@ -22,3 +22,4 @@ rand = { version = "0.8", features = ["small_rng"] }
|
||||
log = "0.4"
|
||||
internet-checksum = "0.2"
|
||||
tokio-tun = "0.5"
|
||||
flume = "0.10"
|
||||
|
@@ -53,14 +53,15 @@ use std::net::{Ipv4Addr, SocketAddrV4};
|
||||
use std::sync::atomic::{AtomicU32, Ordering};
|
||||
use std::sync::{Arc, RwLock};
|
||||
use tokio::sync::broadcast;
|
||||
use tokio::sync::mpsc::{self, Receiver, Sender};
|
||||
use tokio::sync::Mutex as AsyncMutex;
|
||||
use tokio::sync::mpsc;
|
||||
use tokio::time;
|
||||
use tokio_tun::Tun;
|
||||
|
||||
const TIMEOUT: time::Duration = time::Duration::from_secs(1);
|
||||
const RETRIES: usize = 6;
|
||||
const MPSC_BUFFER_LEN: usize = 512;
|
||||
const MPMC_BUFFER_LEN: usize = 512;
|
||||
const MPSC_BUFFER_LEN: usize = 128;
|
||||
const MAX_UNACKED_LEN: u32 = 128 * 1024 * 1024; // 128MB
|
||||
|
||||
#[derive(Hash, Eq, PartialEq, Clone, Debug)]
|
||||
struct AddrTuple {
|
||||
@@ -78,17 +79,17 @@ impl AddrTuple {
|
||||
}
|
||||
|
||||
struct Shared {
|
||||
tuples: RwLock<HashMap<AddrTuple, Sender<Bytes>>>,
|
||||
tuples: RwLock<HashMap<AddrTuple, flume::Sender<Bytes>>>,
|
||||
listening: RwLock<HashSet<u16>>,
|
||||
tun: Vec<Arc<Tun>>,
|
||||
ready: Sender<Socket>,
|
||||
ready: mpsc::Sender<Socket>,
|
||||
tuples_purge: broadcast::Sender<AddrTuple>,
|
||||
}
|
||||
|
||||
pub struct Stack {
|
||||
shared: Arc<Shared>,
|
||||
local_ip: Ipv4Addr,
|
||||
ready: Receiver<Socket>,
|
||||
ready: mpsc::Receiver<Socket>,
|
||||
}
|
||||
|
||||
pub enum State {
|
||||
@@ -101,11 +102,12 @@ pub enum State {
|
||||
pub struct Socket {
|
||||
shared: Arc<Shared>,
|
||||
tun: Arc<Tun>,
|
||||
incoming: AsyncMutex<Receiver<Bytes>>,
|
||||
incoming: flume::Receiver<Bytes>,
|
||||
local_addr: SocketAddrV4,
|
||||
remote_addr: SocketAddrV4,
|
||||
seq: AtomicU32,
|
||||
ack: AtomicU32,
|
||||
last_ack: AtomicU32,
|
||||
state: State,
|
||||
}
|
||||
|
||||
@@ -124,18 +126,19 @@ impl Socket {
|
||||
remote_addr: SocketAddrV4,
|
||||
ack: Option<u32>,
|
||||
state: State,
|
||||
) -> (Socket, Sender<Bytes>) {
|
||||
let (incoming_tx, incoming_rx) = mpsc::channel(MPSC_BUFFER_LEN);
|
||||
) -> (Socket, flume::Sender<Bytes>) {
|
||||
let (incoming_tx, incoming_rx) = flume::bounded(MPMC_BUFFER_LEN);
|
||||
|
||||
(
|
||||
Socket {
|
||||
shared,
|
||||
tun,
|
||||
incoming: AsyncMutex::new(incoming_rx),
|
||||
incoming: incoming_rx,
|
||||
local_addr,
|
||||
remote_addr,
|
||||
seq: AtomicU32::new(0),
|
||||
ack: AtomicU32::new(ack.unwrap_or(0)),
|
||||
last_ack: AtomicU32::new(ack.unwrap_or(0)),
|
||||
state,
|
||||
},
|
||||
incoming_tx,
|
||||
@@ -143,11 +146,14 @@ impl Socket {
|
||||
}
|
||||
|
||||
fn build_tcp_packet(&self, flags: u16, payload: Option<&[u8]>) -> Bytes {
|
||||
let ack = self.ack.load(Ordering::Relaxed);
|
||||
self.last_ack.store(ack, Ordering::Relaxed);
|
||||
|
||||
build_tcp_packet(
|
||||
self.local_addr,
|
||||
self.remote_addr,
|
||||
self.seq.load(Ordering::Relaxed),
|
||||
self.ack.load(Ordering::Relaxed),
|
||||
ack,
|
||||
flags,
|
||||
payload,
|
||||
)
|
||||
@@ -165,12 +171,7 @@ impl Socket {
|
||||
State::Established => {
|
||||
let buf = self.build_tcp_packet(tcp::TcpFlags::ACK, Some(payload));
|
||||
self.seq.fetch_add(payload.len() as u32, Ordering::Relaxed);
|
||||
|
||||
tokio::select! {
|
||||
res = self.tun.send(&buf) => {
|
||||
res.ok().and(Some(()))
|
||||
},
|
||||
}
|
||||
self.tun.send(&buf).await.ok().and(Some(()))
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
@@ -186,8 +187,7 @@ impl Socket {
|
||||
pub async fn recv(&self, buf: &mut [u8]) -> Option<usize> {
|
||||
match self.state {
|
||||
State::Established => {
|
||||
let mut incoming = self.incoming.lock().await;
|
||||
incoming.recv().await.and_then(|raw_buf| {
|
||||
self.incoming.recv_async().await.ok().and_then(|raw_buf| {
|
||||
let (_v4_packet, tcp_packet) = parse_ipv4_packet(&raw_buf);
|
||||
|
||||
if (tcp_packet.get_flags() & tcp::TcpFlags::RST) != 0 {
|
||||
@@ -197,8 +197,18 @@ impl Socket {
|
||||
|
||||
let payload = tcp_packet.payload();
|
||||
|
||||
self.ack
|
||||
.store(tcp_packet.get_sequence().wrapping_add(1), Ordering::Relaxed);
|
||||
let new_ack = tcp_packet.get_sequence().wrapping_add(payload.len() as u32);
|
||||
let last_ask = self.last_ack.load(Ordering::Relaxed);
|
||||
self.ack.store(new_ack, Ordering::Relaxed);
|
||||
|
||||
if new_ack.overflowing_sub(last_ask).0 > MAX_UNACKED_LEN {
|
||||
let buf = self.build_tcp_packet(tcp::TcpFlags::ACK, None);
|
||||
if let Err(e) = self.tun.try_send(&buf) {
|
||||
// This should not really happen as we have not sent anything for
|
||||
// quite some time...
|
||||
info!("Connection {} unable to send idling ACK back: {}", self, e)
|
||||
}
|
||||
}
|
||||
|
||||
buf[..payload.len()].copy_from_slice(payload);
|
||||
|
||||
@@ -220,7 +230,7 @@ impl Socket {
|
||||
info!("Sent SYN + ACK to client");
|
||||
}
|
||||
State::SynReceived => {
|
||||
let res = time::timeout(TIMEOUT, self.incoming.lock().await.recv()).await;
|
||||
let res = time::timeout(TIMEOUT, self.incoming.recv_async()).await;
|
||||
if let Ok(buf) = res {
|
||||
let buf = buf.unwrap();
|
||||
let (_v4_packet, tcp_packet) = parse_ipv4_packet(&buf);
|
||||
@@ -264,7 +274,7 @@ impl Socket {
|
||||
info!("Sent SYN to server");
|
||||
}
|
||||
State::SynSent => {
|
||||
match time::timeout(TIMEOUT, self.incoming.lock().await.recv()).await {
|
||||
match time::timeout(TIMEOUT, self.incoming.recv_async()).await {
|
||||
Ok(buf) => {
|
||||
let buf = buf.unwrap();
|
||||
let (_v4_packet, tcp_packet) = parse_ipv4_packet(&buf);
|
||||
@@ -315,7 +325,14 @@ impl Drop for Socket {
|
||||
// purge cache
|
||||
self.shared.tuples_purge.send(tuple).unwrap();
|
||||
|
||||
let buf = self.build_tcp_packet(tcp::TcpFlags::RST, None);
|
||||
let buf = build_tcp_packet(
|
||||
self.local_addr,
|
||||
self.remote_addr,
|
||||
self.seq.load(Ordering::Relaxed),
|
||||
0,
|
||||
tcp::TcpFlags::RST,
|
||||
None,
|
||||
);
|
||||
if let Err(e) = self.tun.try_send(&buf) {
|
||||
warn!("Unable to send RST to remote end: {}", e);
|
||||
}
|
||||
@@ -408,7 +425,7 @@ impl Stack {
|
||||
shared: Arc<Shared>,
|
||||
mut tuples_purge: broadcast::Receiver<AddrTuple>,
|
||||
) {
|
||||
let mut tuples: HashMap<AddrTuple, Sender<Bytes>> = HashMap::new();
|
||||
let mut tuples: HashMap<AddrTuple, flume::Sender<Bytes>> = HashMap::new();
|
||||
|
||||
loop {
|
||||
let mut buf = BytesMut::with_capacity(MAX_PACKET_LEN);
|
||||
@@ -432,7 +449,7 @@ impl Stack {
|
||||
|
||||
let tuple = AddrTuple::new(local_addr, remote_addr);
|
||||
if let Some(c) = tuples.get(&tuple) {
|
||||
if c.send(buf).await.is_err() {
|
||||
if c.send_async(buf).await.is_err() {
|
||||
trace!("Cache hit, but receiver already closed, dropping packet");
|
||||
}
|
||||
|
||||
@@ -451,7 +468,7 @@ impl Stack {
|
||||
if let Some(c) = sender {
|
||||
trace!("Storing connection information into local tuples");
|
||||
tuples.insert(tuple, c.clone());
|
||||
c.send(buf).await.unwrap();
|
||||
c.send_async(buf).await.unwrap();
|
||||
continue;
|
||||
}
|
||||
}
|
||||
@@ -486,8 +503,8 @@ impl Stack {
|
||||
local_addr,
|
||||
remote_addr,
|
||||
0,
|
||||
tcp_packet.get_sequence() + 1,
|
||||
tcp::TcpFlags::RST,
|
||||
tcp_packet.get_sequence() + tcp_packet.payload().len() as u32 + 1, // +1 because of SYN flag set
|
||||
tcp::TcpFlags::RST | tcp::TcpFlags::ACK,
|
||||
None,
|
||||
);
|
||||
shared.tun[0].try_send(&buf).unwrap();
|
||||
@@ -498,8 +515,8 @@ impl Stack {
|
||||
local_addr,
|
||||
remote_addr,
|
||||
tcp_packet.get_acknowledgement(),
|
||||
0,
|
||||
tcp::TcpFlags::RST,
|
||||
tcp_packet.get_sequence() + tcp_packet.payload().len() as u32,
|
||||
tcp::TcpFlags::RST | tcp::TcpFlags::ACK,
|
||||
None,
|
||||
);
|
||||
shared.tun[0].try_send(&buf).unwrap();
|
||||
@@ -508,7 +525,7 @@ impl Stack {
|
||||
tuple = tuples_purge.recv() => {
|
||||
let tuple = tuple.unwrap();
|
||||
tuples.remove(&tuple);
|
||||
trace!("Removed cached tuple");
|
||||
trace!("Removed cached tuple: {:?}", tuple);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
BIN
images/packet-headers.png
Normal file
BIN
images/packet-headers.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 30 KiB |
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "phantun"
|
||||
version = "0.3.0"
|
||||
version = "0.3.2"
|
||||
edition = "2021"
|
||||
authors = ["Datong Sun <dndx@idndx.com>"]
|
||||
license = "MIT OR Apache-2.0"
|
||||
@@ -13,7 +13,7 @@ Layer 3 & Layer 4 (NAPT) firewalls/NATs.
|
||||
[dependencies]
|
||||
clap = { version = "3.0", features = ["cargo"] }
|
||||
socket2 = { version = "0.4", features = ["all"] }
|
||||
fake-tcp = { path = "../fake-tcp", version = "0.2" }
|
||||
fake-tcp = { path = "../fake-tcp", version = "0.3" }
|
||||
tokio = { version = "1.14", features = ["full"] }
|
||||
tokio-util = "0.7"
|
||||
log = "0.4"
|
||||
|
@@ -99,6 +99,7 @@ async fn main() {
|
||||
.expect("bad peer address for Tun interface");
|
||||
|
||||
let num_cpus = num_cpus::get();
|
||||
info!("{} cores available", num_cpus);
|
||||
|
||||
let tun = TunBuilder::new()
|
||||
.name(matches.value_of("tun").unwrap()) // if name is empty, then it is set by kernel.
|
||||
@@ -157,7 +158,7 @@ async fn main() {
|
||||
|
||||
for i in 0..num_cpus {
|
||||
let sock = sock.clone();
|
||||
let quit = quit.child_token();
|
||||
let quit = quit.clone();
|
||||
let packet_received = packet_received.clone();
|
||||
|
||||
tokio::spawn(async move {
|
||||
|
@@ -94,6 +94,7 @@ async fn main() {
|
||||
.expect("bad peer address for Tun interface");
|
||||
|
||||
let num_cpus = num_cpus::get();
|
||||
info!("{} cores available", num_cpus);
|
||||
|
||||
let tun = TunBuilder::new()
|
||||
.name(matches.value_of("tun").unwrap()) // if name is empty, then it is set by kernel.
|
||||
@@ -134,7 +135,7 @@ async fn main() {
|
||||
|
||||
for i in 0..num_cpus {
|
||||
let sock = sock.clone();
|
||||
let quit = quit.child_token();
|
||||
let quit = quit.clone();
|
||||
let packet_received = packet_received.clone();
|
||||
let udp_sock = new_udp_reuseport(local_addr);
|
||||
|
||||
|
Reference in New Issue
Block a user