Explorar o código

Support multiple UDP source ports (multiport)

The goal of this work is to send packets between two hosts using more than one
5-tuple. When running on networks like AWS where the underlying network driver
and overlay fabric makes routing, load balancing, and failover decisions based
on the flow hash, this enables more than one flow between pairs of hosts.

Multiport spreads outgoing UDP packets across multiple UDP send ports,
which allows nebula to work around any issues on the underlay network.
Some example issues this could work around:

- UDP rate limits on a per flow basis.
- Partial underlay network failure in which some flows work and some don't

Agreement is done during the handshake to decide if multiport mode will
be used for a given tunnel (one side must have tx_enabled set, the other
side must have rx_enabled set)

NOTE: you cannot use multiport on a host if you are relying on UDP hole
punching to get through a NAT or firewall.

NOTE: Linux only (uses raw sockets to send). Also currently only works
with IPv4 underlay network remotes.

This is implemented by opening a raw socket and sending packets with
a source port that is based on a hash of the overlay source/destiation
port. For ICMP and Nebula metadata packets, we use a random source port.

Example configuration:

    multiport:
      # This host support sending via multiple UDP ports.
      tx_enabled: false

      # This host supports receiving packets sent from multiple UDP ports.
      rx_enabled: false

      # How many UDP ports to use when sending. The lowest source port will be
      # listen.port and go up to (but not including) listen.port + tx_ports.
      tx_ports: 100

      # NOTE: All of your hosts must be running a version of Nebula that supports
      # multiport if you want to enable this feature. Older versions of Nebula
      # will be confused by these multiport handshakes.
      #
      # If handshakes are not getting a response, attempt to transmit handshakes
      # using random UDP source ports (to get around partial underlay network
      # failures).
      tx_handshake: false

      # How many unresponded handshakes we should send before we attempt to
      # send multiport handshakes.
      tx_handshake_delay: 2
Wade Simmons %!s(int64=2) %!d(string=hai) anos
pai
achega
326fc8758d

+ 8 - 0
.github/workflows/smoke.yml

@@ -53,4 +53,12 @@ jobs:
       working-directory: ./.github/workflows/smoke
       run: ./smoke-relay.sh
 
+    - name: setup docker image for multiport
+      working-directory: ./.github/workflows/smoke
+      run: NAME="smoke-multiport" MULTIPORT_TX=true MULTIPORT_RX=true MULTIPORT_HANDSHAKE=true ./build.sh
+
+    - name: run smoke
+      working-directory: ./.github/workflows/smoke
+      run: ./smoke.sh
+
     timeout-minutes: 10

+ 1 - 1
.github/workflows/smoke/build.sh

@@ -36,4 +36,4 @@ mkdir ./build
     ../../../../nebula-cert sign -name "host4" -groups "host,host4" -ip "192.168.100.4/24"
 )
 
-sudo docker build -t nebula:smoke .
+sudo docker build -t "nebula:${NAME:-smoke}" .

+ 4 - 0
.github/workflows/smoke/genconfig.sh

@@ -48,6 +48,10 @@ listen:
 
 tun:
   dev: ${TUN_DEV:-nebula1}
+  multiport:
+    tx_enabled: ${MULTIPORT_TX:-false}
+    rx_enabled: ${MULTIPORT_RX:-false}
+    tx_handshake: ${MULTIPORT_HANDSHAKE:-false}
 
 firewall:
   outbound: ${OUTBOUND:-$FIREWALL_ALL}

+ 10 - 8
.github/workflows/smoke/smoke.sh

@@ -20,18 +20,20 @@ cleanup() {
 
 trap cleanup EXIT
 
-sudo docker run --name lighthouse1 --rm nebula:smoke -config lighthouse1.yml -test
-sudo docker run --name host2 --rm nebula:smoke -config host2.yml -test
-sudo docker run --name host3 --rm nebula:smoke -config host3.yml -test
-sudo docker run --name host4 --rm nebula:smoke -config host4.yml -test
+CONTAINER="nebula:${NAME:-smoke}"
 
-sudo docker run --name lighthouse1 --device /dev/net/tun:/dev/net/tun --cap-add NET_ADMIN --rm nebula:smoke -config lighthouse1.yml 2>&1 | tee logs/lighthouse1 | sed -u 's/^/  [lighthouse1]  /' &
+sudo docker run --name lighthouse1 --rm "$CONTAINER" -config lighthouse1.yml -test
+sudo docker run --name host2 --rm "$CONTAINER" -config host2.yml -test
+sudo docker run --name host3 --rm "$CONTAINER" -config host3.yml -test
+sudo docker run --name host4 --rm "$CONTAINER" -config host4.yml -test
+
+sudo docker run --name lighthouse1 --device /dev/net/tun:/dev/net/tun --cap-add NET_ADMIN --rm "$CONTAINER" -config lighthouse1.yml 2>&1 | tee logs/lighthouse1 | sed -u 's/^/  [lighthouse1]  /' &
 sleep 1
-sudo docker run --name host2 --device /dev/net/tun:/dev/net/tun --cap-add NET_ADMIN --rm nebula:smoke -config host2.yml 2>&1 | tee logs/host2 | sed -u 's/^/  [host2]  /' &
+sudo docker run --name host2 --device /dev/net/tun:/dev/net/tun --cap-add NET_ADMIN --rm "$CONTAINER" -config host2.yml 2>&1 | tee logs/host2 | sed -u 's/^/  [host2]  /' &
 sleep 1
-sudo docker run --name host3 --device /dev/net/tun:/dev/net/tun --cap-add NET_ADMIN --rm nebula:smoke -config host3.yml 2>&1 | tee logs/host3 | sed -u 's/^/  [host3]  /' &
+sudo docker run --name host3 --device /dev/net/tun:/dev/net/tun --cap-add NET_ADMIN --rm "$CONTAINER" -config host3.yml 2>&1 | tee logs/host3 | sed -u 's/^/  [host3]  /' &
 sleep 1
-sudo docker run --name host4 --device /dev/net/tun:/dev/net/tun --cap-add NET_ADMIN --rm nebula:smoke -config host4.yml 2>&1 | tee logs/host4 | sed -u 's/^/  [host4]  /' &
+sudo docker run --name host4 --device /dev/net/tun:/dev/net/tun --cap-add NET_ADMIN --rm "$CONTAINER" -config host4.yml 2>&1 | tee logs/host4 | sed -u 's/^/  [host4]  /' &
 sleep 1
 
 set +x

+ 4 - 0
Makefile

@@ -175,6 +175,10 @@ smoke-relay-docker: bin-docker
 	cd .github/workflows/smoke/ && ./build-relay.sh
 	cd .github/workflows/smoke/ && ./smoke-relay.sh
 
+smoke-multiport-docker: bin-docker
+	cd .github/workflows/smoke/ && NAME="smoke-multiport" MULTIPORT_TX=true MULTIPORT_RX=true MULTIPORT_HANDSHAKE=true ./build.sh
+	cd .github/workflows/smoke/ && NAME="smoke-multiport" ./smoke.sh
+
 smoke-docker-race: BUILD_ARGS = -race
 smoke-docker-race: smoke-docker
 

+ 40 - 0
examples/config.yml

@@ -202,6 +202,46 @@ tun:
     #  mtu: 1300
     #  metric: 100
 
+  # EXPERIMENTAL: This option may change or disappear in the future.
+  # Multiport spreads outgoing UDP packets across multiple UDP send ports,
+  # which allows nebula to work around any issues on the underlay network.
+  # Some example issues this could work around:
+  #  - UDP rate limits on a per flow basis.
+  #  - Partial underlay network failure in which some flows work and some don't
+  # Agreement is done during the handshake to decide if multiport mode will
+  # be used for a given tunnel (one side must have tx_enabled set, the other
+  # side must have rx_enabled set)
+  #
+  # NOTE: you cannot use multiport on a host if you are relying on UDP hole
+  # punching to get through a NAT or firewall.
+  #
+  # NOTE: Linux only (uses raw sockets to send). Also currently only works
+  # with IPv4 underlay network remotes.
+  #
+  # The default values are listed below:
+  #multiport:
+    # This host support sending via multiple UDP ports.
+    #tx_enabled: false
+    #
+    # This host supports receiving packets sent from multiple UDP ports.
+    #rx_enabled: false
+    #
+    # How many UDP ports to use when sending. The lowest source port will be
+    # listen.port and go up to (but not including) listen.port + tx_ports.
+    #tx_ports: 100
+    #
+    # NOTE: All of your hosts must be running a version of Nebula that supports
+    # multiport if you want to enable this feature. Older versions of Nebula
+    # will be confused by these multiport handshakes.
+    #
+    # If handshakes are not getting a response, attempt to transmit handshakes
+    # using random UDP source ports (to get around partial underlay network
+    # failures).
+    #tx_handshake: false
+    #
+    # How many unresponded handshakes we should send before we attempt to
+    # send multiport handshakes.
+    #tx_handshake_delay: 2
 
 # TODO
 # Configure logging level

+ 28 - 0
firewall/packet.go

@@ -3,6 +3,7 @@ package firewall
 import (
 	"encoding/json"
 	"fmt"
+	mathrand "math/rand"
 
 	"github.com/slackhq/nebula/iputil"
 )
@@ -60,3 +61,30 @@ func (fp Packet) MarshalJSON() ([]byte, error) {
 		"Fragment":   fp.Fragment,
 	})
 }
+
+// UDPSendPort calculates the UDP port to send from when using multiport mode.
+// The result will be from [0, numBuckets)
+func (fp Packet) UDPSendPort(numBuckets int) uint16 {
+	if numBuckets <= 1 {
+		return 0
+	}
+
+	// If there is no port (like an ICMP packet), pick a random UDP send port
+	if fp.LocalPort == 0 {
+		return uint16(mathrand.Intn(numBuckets))
+	}
+
+	// A decent enough 32bit hash function
+	// Prospecting for Hash Functions
+	// - https://nullprogram.com/blog/2018/07/31/
+	// - https://github.com/skeeto/hash-prospector
+	//   [16 21f0aaad 15 d35a2d97 15] = 0.10760229515479501
+	x := (uint32(fp.LocalPort) << 16) | uint32(fp.RemotePort)
+	x ^= x >> 16
+	x *= 0x21f0aaad
+	x ^= x >> 15
+	x *= 0xd35a2d97
+	x ^= x >> 15
+
+	return uint16(x) % uint16(numBuckets)
+}

+ 71 - 2
handshake_ix.go

@@ -37,6 +37,15 @@ func ixHandshakeStage0(f *Interface, vpnIp iputil.VpnIp, hostinfo *HostInfo) {
 		Cert:           ci.certState.rawCertificateNoKey,
 	}
 
+	if f.multiPort.Tx || f.multiPort.Rx {
+		hsProto.InitiatorMultiPort = &MultiPortDetails{
+			RxSupported: f.multiPort.Rx,
+			TxSupported: f.multiPort.Tx,
+			BasePort:    uint32(f.multiPort.TxBasePort),
+			TotalPorts:  uint32(f.multiPort.TxPorts),
+		}
+	}
+
 	hsBytes := []byte{}
 
 	hs := &NebulaHandshake{
@@ -130,6 +139,29 @@ func ixHandshakeStage1(f *Interface, addr *udp.Addr, via interface{}, packet []b
 		return
 	}
 
+	var multiportTx, multiportRx bool
+	if f.multiPort.Rx || f.multiPort.Tx {
+		if hs.Details.InitiatorMultiPort != nil {
+			multiportTx = hs.Details.InitiatorMultiPort.RxSupported && f.multiPort.Tx
+			multiportRx = hs.Details.InitiatorMultiPort.TxSupported && f.multiPort.Rx
+		}
+
+		hs.Details.ResponderMultiPort = &MultiPortDetails{
+			TxSupported: f.multiPort.Tx,
+			RxSupported: f.multiPort.Rx,
+			BasePort:    uint32(f.multiPort.TxBasePort),
+			TotalPorts:  uint32(f.multiPort.TxPorts),
+		}
+	}
+	if hs.Details.InitiatorMultiPort != nil && hs.Details.InitiatorMultiPort.BasePort != uint32(addr.Port) {
+		// The other side sent us a handshake from a different port, make sure
+		// we send responses back to the BasePort
+		addr = &udp.Addr{
+			IP:   addr.IP,
+			Port: uint16(hs.Details.InitiatorMultiPort.BasePort),
+		}
+	}
+
 	hostinfo := &HostInfo{
 		ConnectionState:   ci,
 		localIndexId:      myIndex,
@@ -137,6 +169,8 @@ func ixHandshakeStage1(f *Interface, addr *udp.Addr, via interface{}, packet []b
 		vpnIp:             vpnIp,
 		HandshakePacket:   make(map[uint8][]byte, 0),
 		lastHandshakeTime: hs.Details.Time,
+		multiportTx:       multiportTx,
+		multiportRx:       multiportRx,
 		relayState: RelayState{
 			relays:        map[iputil.VpnIp]struct{}{},
 			relayForByIp:  map[iputil.VpnIp]*Relay{},
@@ -153,6 +187,7 @@ func ixHandshakeStage1(f *Interface, addr *udp.Addr, via interface{}, packet []b
 		WithField("issuer", issuer).
 		WithField("initiatorIndex", hs.Details.InitiatorIndex).WithField("responderIndex", hs.Details.ResponderIndex).
 		WithField("remoteIndex", h.RemoteIndex).WithField("handshake", m{"stage": 1, "style": "ix_psk0"}).
+		WithField("multiportTx", multiportTx).WithField("multiportRx", multiportRx).
 		Info("Handshake message received")
 
 	hs.Details.ResponderIndex = myIndex
@@ -231,7 +266,14 @@ func ixHandshakeStage1(f *Interface, addr *udp.Addr, via interface{}, packet []b
 			msg = existing.HandshakePacket[2]
 			f.messageMetrics.Tx(header.Handshake, header.MessageSubType(msg[1]), 1)
 			if addr != nil {
-				err := f.outside.WriteTo(msg, addr)
+				if multiportTx {
+					// TODO remove alloc here
+					raw := make([]byte, len(msg)+udp.RawOverhead)
+					copy(raw[udp.RawOverhead:], msg)
+					err = f.udpRaw.WriteTo(raw, udp.RandomSendPort.UDPSendPort(f.multiPort.TxPorts), addr)
+				} else {
+					err = f.outside.WriteTo(msg, addr)
+				}
 				if err != nil {
 					f.l.WithField("vpnIp", existing.vpnIp).WithField("udpAddr", addr).
 						WithField("handshake", m{"stage": 2, "style": "ix_psk0"}).WithField("cached", true).
@@ -308,7 +350,14 @@ func ixHandshakeStage1(f *Interface, addr *udp.Addr, via interface{}, packet []b
 	// Do the send
 	f.messageMetrics.Tx(header.Handshake, header.MessageSubType(msg[1]), 1)
 	if addr != nil {
-		err = f.outside.WriteTo(msg, addr)
+		if multiportTx {
+			// TODO remove alloc here
+			raw := make([]byte, len(msg)+udp.RawOverhead)
+			copy(raw[udp.RawOverhead:], msg)
+			err = f.udpRaw.WriteTo(raw, udp.RandomSendPort.UDPSendPort(f.multiPort.TxPorts), addr)
+		} else {
+			err = f.outside.WriteTo(msg, addr)
+		}
 		if err != nil {
 			f.l.WithField("vpnIp", vpnIp).WithField("udpAddr", addr).
 				WithField("certName", certName).
@@ -368,6 +417,11 @@ func ixHandshakeStage2(f *Interface, addr *udp.Addr, via interface{}, hostinfo *
 
 	ci := hostinfo.ConnectionState
 	if ci.ready {
+		if hostinfo.multiportRx {
+			// The other host is sending to us with multiport, so only grab the IP
+			addr.Port = hostinfo.remote.Port
+		}
+
 		f.l.WithField("vpnIp", hostinfo.vpnIp).WithField("udpAddr", addr).
 			WithField("handshake", m{"stage": 2, "style": "ix_psk0"}).WithField("header", h).
 			Info("Handshake is already complete")
@@ -413,6 +467,20 @@ func ixHandshakeStage2(f *Interface, addr *udp.Addr, via interface{}, hostinfo *
 		return true
 	}
 
+	if (f.multiPort.Tx || f.multiPort.Rx) && hs.Details.ResponderMultiPort != nil {
+		hostinfo.multiportTx = hs.Details.ResponderMultiPort.RxSupported && f.multiPort.Tx
+		hostinfo.multiportRx = hs.Details.ResponderMultiPort.TxSupported && f.multiPort.Rx
+	}
+
+	if hs.Details.ResponderMultiPort != nil && hs.Details.ResponderMultiPort.BasePort != uint32(addr.Port) {
+		// The other side sent us a handshake from a different port, make sure
+		// we send responses back to the BasePort
+		addr = &udp.Addr{
+			IP:   addr.IP,
+			Port: uint16(hs.Details.ResponderMultiPort.BasePort),
+		}
+	}
+
 	remoteCert, err := RecombineCertAndValidate(ci.H, hs.Details.Cert, f.caPool)
 	if err != nil {
 		f.l.WithError(err).WithField("vpnIp", hostinfo.vpnIp).WithField("udpAddr", addr).
@@ -480,6 +548,7 @@ func ixHandshakeStage2(f *Interface, addr *udp.Addr, via interface{}, hostinfo *
 		WithField("remoteIndex", h.RemoteIndex).WithField("handshake", m{"stage": 2, "style": "ix_psk0"}).
 		WithField("durationNs", duration).
 		WithField("sentCachedPackets", len(hostinfo.packetStore)).
+		WithField("multiportTx", hostinfo.multiportTx).WithField("multiportRx", hostinfo.multiportRx).
 		Info("Handshake message received")
 
 	hostinfo.remoteIndexId = hs.Details.ResponderIndex

+ 26 - 0
handshake_manager.go

@@ -53,6 +53,9 @@ type HandshakeManager struct {
 	metricTimedOut         metrics.Counter
 	l                      *logrus.Logger
 
+	multiPort MultiPortConfig
+	udpRaw    *udp.RawConn
+
 	// can be used to trigger outbound handshake for the given vpnIp
 	trigger chan iputil.VpnIp
 }
@@ -163,6 +166,7 @@ func (c *HandshakeManager) handleOutbound(vpnIp iputil.VpnIp, f udp.EncWriter, l
 
 	// Send a the handshake to all known ips, stage 2 takes care of assigning the hostinfo.remote based on the first to reply
 	var sentTo []*udp.Addr
+	var sentMultiport bool
 	hostinfo.remotes.ForEach(c.pendingHostMap.preferredRanges, func(addr *udp.Addr, _ bool) {
 		c.messageMetrics.Tx(header.Handshake, header.MessageSubType(hostinfo.HandshakePacket[0][1]), 1)
 		err = c.outside.WriteTo(hostinfo.HandshakePacket[0], addr)
@@ -175,6 +179,27 @@ func (c *HandshakeManager) handleOutbound(vpnIp iputil.VpnIp, f udp.EncWriter, l
 		} else {
 			sentTo = append(sentTo, addr)
 		}
+
+		// Attempt a multiport handshake if we are past the TxHandshakeDelay attempts
+		if c.multiPort.TxHandshake && c.udpRaw != nil && hostinfo.HandshakeCounter >= c.multiPort.TxHandshakeDelay {
+			sentMultiport = true
+			// We need to re-allocate with 8 bytes at the start of SOCK_RAW
+			raw := hostinfo.HandshakePacket[0x80]
+			if raw == nil {
+				raw = make([]byte, len(hostinfo.HandshakePacket[0])+udp.RawOverhead)
+				copy(raw[udp.RawOverhead:], hostinfo.HandshakePacket[0])
+				hostinfo.HandshakePacket[0x80] = raw
+			}
+
+			c.messageMetrics.Tx(header.Handshake, header.MessageSubType(hostinfo.HandshakePacket[0][1]), 1)
+			err = c.udpRaw.WriteTo(raw, udp.RandomSendPort.UDPSendPort(c.multiPort.TxPorts), addr)
+			if err != nil {
+				hostinfo.logger(c.l).WithField("udpAddr", addr).
+					WithField("initiatorIndex", hostinfo.localIndexId).
+					WithField("handshake", m{"stage": 1, "style": "ix_psk0"}).
+					WithError(err).Error("Failed to send handshake message")
+			}
+		}
 	})
 
 	// Don't be too noisy or confusing if we fail to send a handshake - if we don't get through we'll eventually log a timeout
@@ -182,6 +207,7 @@ func (c *HandshakeManager) handleOutbound(vpnIp iputil.VpnIp, f udp.EncWriter, l
 		hostinfo.logger(c.l).WithField("udpAddrs", sentTo).
 			WithField("initiatorIndex", hostinfo.localIndexId).
 			WithField("handshake", m{"stage": 1, "style": "ix_psk0"}).
+			WithField("multiportHandshake", sentMultiport).
 			Info("Handshake message sent")
 	}
 

+ 2 - 0
hostmap.go

@@ -154,6 +154,8 @@ type HostInfo struct {
 	remote            *udp.Addr
 	remotes           *RemoteList
 	promoteCounter    uint32
+	multiportTx       bool
+	multiportRx       bool
 	ConnectionState   *ConnectionState
 	handshakeStart    time.Time        //todo: this an entry in the handshake manager
 	HandshakeReady    bool             //todo: being in the manager means you are ready

+ 36 - 7
inside.go

@@ -70,7 +70,7 @@ func (f *Interface) consumeInsidePacket(packet []byte, fwPacket *firewall.Packet
 
 	dropReason := f.firewall.Drop(packet, *fwPacket, false, hostinfo, f.caPool, localCache)
 	if dropReason == nil {
-		f.sendNoMetrics(header.Message, 0, ci, hostinfo, nil, packet, nb, out, q)
+		f.sendNoMetrics(header.Message, 0, ci, hostinfo, nil, packet, nb, out, q, fwPacket)
 
 	} else if f.l.Level >= logrus.DebugLevel {
 		hostinfo.logger(f.l).
@@ -161,7 +161,7 @@ func (f *Interface) sendMessageNow(t header.MessageType, st header.MessageSubTyp
 		return
 	}
 
-	f.sendNoMetrics(header.Message, st, hostInfo.ConnectionState, hostInfo, nil, p, nb, out, 0)
+	f.sendNoMetrics(header.Message, st, hostInfo.ConnectionState, hostInfo, nil, p, nb, out, 0, nil)
 }
 
 // SendMessageToVpnIp handles real ip:port lookup and sends to the current best known address for vpnIp
@@ -197,12 +197,12 @@ func (f *Interface) sendMessageToVpnIp(t header.MessageType, st header.MessageSu
 
 func (f *Interface) send(t header.MessageType, st header.MessageSubType, ci *ConnectionState, hostinfo *HostInfo, p, nb, out []byte) {
 	f.messageMetrics.Tx(t, st, 1)
-	f.sendNoMetrics(t, st, ci, hostinfo, nil, p, nb, out, 0)
+	f.sendNoMetrics(t, st, ci, hostinfo, nil, p, nb, out, 0, nil)
 }
 
 func (f *Interface) sendTo(t header.MessageType, st header.MessageSubType, ci *ConnectionState, hostinfo *HostInfo, remote *udp.Addr, p, nb, out []byte) {
 	f.messageMetrics.Tx(t, st, 1)
-	f.sendNoMetrics(t, st, ci, hostinfo, remote, p, nb, out, 0)
+	f.sendNoMetrics(t, st, ci, hostinfo, remote, p, nb, out, 0, nil)
 }
 
 // sendVia sends a payload through a Relay tunnel. No authentication or encryption is done
@@ -261,11 +261,28 @@ func (f *Interface) SendVia(viaIfc interface{},
 	}
 }
 
-func (f *Interface) sendNoMetrics(t header.MessageType, st header.MessageSubType, ci *ConnectionState, hostinfo *HostInfo, remote *udp.Addr, p, nb, out []byte, q int) {
+func (f *Interface) sendNoMetrics(t header.MessageType, st header.MessageSubType, ci *ConnectionState, hostinfo *HostInfo, remote *udp.Addr, p, nb, out []byte, q int, udpPortGetter udp.SendPortGetter) {
 	if ci.eKey == nil {
 		//TODO: log warning
 		return
 	}
+
+	multiport := f.multiPort.Tx && hostinfo.multiportTx
+	rawOut := out
+	if multiport {
+		if len(out) < udp.RawOverhead {
+			// NOTE: This is because some spots in the code send us `out[:0]`, so
+			// we need to expand the slice back out to get our 8 bytes back.
+			out = out[:udp.RawOverhead]
+		}
+		// Preserve bytes needed for the raw socket
+		out = out[udp.RawOverhead:]
+
+		if udpPortGetter == nil {
+			udpPortGetter = udp.RandomSendPort
+		}
+	}
+
 	useRelay := remote == nil && hostinfo.remote == nil
 	fullOut := out
 
@@ -312,13 +329,25 @@ func (f *Interface) sendNoMetrics(t header.MessageType, st header.MessageSubType
 	}
 
 	if remote != nil {
-		err = f.writers[q].WriteTo(out, remote)
+		if multiport {
+			rawOut = rawOut[:len(out)+udp.RawOverhead]
+			port := udpPortGetter.UDPSendPort(f.multiPort.TxPorts)
+			err = f.udpRaw.WriteTo(rawOut, port, remote)
+		} else {
+			err = f.writers[q].WriteTo(out, remote)
+		}
 		if err != nil {
 			hostinfo.logger(f.l).WithError(err).
 				WithField("udpAddr", remote).Error("Failed to write outgoing packet")
 		}
 	} else if hostinfo.remote != nil {
-		err = f.writers[q].WriteTo(out, hostinfo.remote)
+		if multiport {
+			rawOut = rawOut[:len(out)+udp.RawOverhead]
+			port := udpPortGetter.UDPSendPort(f.multiPort.TxPorts)
+			err = f.udpRaw.WriteTo(rawOut, port, hostinfo.remote)
+		} else {
+			err = f.writers[q].WriteTo(out, hostinfo.remote)
+		}
 		if err != nil {
 			hostinfo.logger(f.l).WithError(err).
 				WithField("udpAddr", remote).Error("Failed to write outgoing packet")

+ 22 - 0
interface.go

@@ -80,6 +80,9 @@ type Interface struct {
 
 	writers []*udp.Conn
 	readers []io.ReadWriteCloser
+	udpRaw  *udp.RawConn
+
+	multiPort MultiPortConfig
 
 	metricHandshakes    metrics.Histogram
 	messageMetrics      *MessageMetrics
@@ -88,6 +91,15 @@ type Interface struct {
 	l *logrus.Logger
 }
 
+type MultiPortConfig struct {
+	Tx               bool
+	Rx               bool
+	TxBasePort       uint16
+	TxPorts          int
+	TxHandshake      bool
+	TxHandshakeDelay int
+}
+
 type sendRecvErrorConfig uint8
 
 const (
@@ -194,6 +206,8 @@ func (f *Interface) activate() {
 
 	metrics.GetOrRegisterGauge("routines", nil).Update(int64(f.routines))
 
+	metrics.GetOrRegisterGauge("multiport.tx_ports", nil).Update(int64(f.multiPort.TxPorts))
+
 	// Prepare n tun queues
 	var reader io.ReadWriteCloser = f.inside
 	for i := 0; i < f.routines; i++ {
@@ -378,6 +392,8 @@ func (f *Interface) emitStats(ctx context.Context, i time.Duration) {
 
 	udpStats := udp.NewUDPStatsEmitter(f.writers)
 
+	var rawStats func()
+
 	for {
 		select {
 		case <-ctx.Done():
@@ -386,6 +402,12 @@ func (f *Interface) emitStats(ctx context.Context, i time.Duration) {
 			f.firewall.EmitStats()
 			f.handshakeManager.EmitStats()
 			udpStats()
+			if f.udpRaw != nil {
+				if rawStats == nil {
+					rawStats = udp.NewRawStatsEmitter(f.udpRaw)
+				}
+				rawStats()
+			}
 		}
 	}
 }

+ 33 - 0
main.go

@@ -304,6 +304,39 @@ func Main(c *config.C, configTest bool, buildVersion string, logger *logrus.Logg
 		// I don't want to make this initial commit too far-reaching though
 		ifce.writers = udpConns
 
+		loadMultiPortConfig := func(c *config.C) {
+			ifce.multiPort.Rx = c.GetBool("tun.multiport.rx_enabled", false)
+
+			tx := c.GetBool("tun.multiport.tx_enabled", false)
+
+			if tx && ifce.udpRaw == nil {
+				ifce.udpRaw, err = udp.NewRawConn(l, c.GetString("listen.host", "0.0.0.0"), port, uint16(port))
+				if err != nil {
+					l.WithError(err).Error("Failed to get raw socket for tun.multiport.tx_enabled")
+					ifce.udpRaw = nil
+					tx = false
+				}
+			}
+
+			if tx {
+				ifce.multiPort.TxBasePort = uint16(port)
+				ifce.multiPort.TxPorts = c.GetInt("tun.multiport.tx_ports", 100)
+				ifce.multiPort.TxHandshake = c.GetBool("tun.multiport.tx_handshake", false)
+				ifce.multiPort.TxHandshakeDelay = c.GetInt("tun.multiport.tx_handshake_delay", 2)
+				ifce.udpRaw.ReloadConfig(c)
+			}
+			ifce.multiPort.Tx = tx
+
+			// TODO: if we upstream this, make this cleaner
+			handshakeManager.udpRaw = ifce.udpRaw
+			handshakeManager.multiPort = ifce.multiPort
+
+			l.WithField("multiPort", ifce.multiPort).Info("Multiport configured")
+		}
+
+		loadMultiPortConfig(c)
+		c.RegisterReloadCallback(loadMultiPortConfig)
+
 		ifce.RegisterConfigChangeCallbacks(c)
 
 		ifce.reloadSendRecvError(c)

+ 449 - 53
nebula.pb.go

@@ -121,7 +121,7 @@ func (x NebulaControl_MessageType) String() string {
 }
 
 func (NebulaControl_MessageType) EnumDescriptor() ([]byte, []int) {
-	return fileDescriptor_2d65afa7693df5ef, []int{7, 0}
+	return fileDescriptor_2d65afa7693df5ef, []int{8, 0}
 }
 
 type NebulaMeta struct {
@@ -468,19 +468,89 @@ func (m *NebulaHandshake) GetHmac() []byte {
 	return nil
 }
 
+type MultiPortDetails struct {
+	RxSupported bool   `protobuf:"varint,1,opt,name=RxSupported,proto3" json:"RxSupported,omitempty"`
+	TxSupported bool   `protobuf:"varint,2,opt,name=TxSupported,proto3" json:"TxSupported,omitempty"`
+	BasePort    uint32 `protobuf:"varint,3,opt,name=BasePort,proto3" json:"BasePort,omitempty"`
+	TotalPorts  uint32 `protobuf:"varint,4,opt,name=TotalPorts,proto3" json:"TotalPorts,omitempty"`
+}
+
+func (m *MultiPortDetails) Reset()         { *m = MultiPortDetails{} }
+func (m *MultiPortDetails) String() string { return proto.CompactTextString(m) }
+func (*MultiPortDetails) ProtoMessage()    {}
+func (*MultiPortDetails) Descriptor() ([]byte, []int) {
+	return fileDescriptor_2d65afa7693df5ef, []int{6}
+}
+func (m *MultiPortDetails) XXX_Unmarshal(b []byte) error {
+	return m.Unmarshal(b)
+}
+func (m *MultiPortDetails) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
+	if deterministic {
+		return xxx_messageInfo_MultiPortDetails.Marshal(b, m, deterministic)
+	} else {
+		b = b[:cap(b)]
+		n, err := m.MarshalToSizedBuffer(b)
+		if err != nil {
+			return nil, err
+		}
+		return b[:n], nil
+	}
+}
+func (m *MultiPortDetails) XXX_Merge(src proto.Message) {
+	xxx_messageInfo_MultiPortDetails.Merge(m, src)
+}
+func (m *MultiPortDetails) XXX_Size() int {
+	return m.Size()
+}
+func (m *MultiPortDetails) XXX_DiscardUnknown() {
+	xxx_messageInfo_MultiPortDetails.DiscardUnknown(m)
+}
+
+var xxx_messageInfo_MultiPortDetails proto.InternalMessageInfo
+
+func (m *MultiPortDetails) GetRxSupported() bool {
+	if m != nil {
+		return m.RxSupported
+	}
+	return false
+}
+
+func (m *MultiPortDetails) GetTxSupported() bool {
+	if m != nil {
+		return m.TxSupported
+	}
+	return false
+}
+
+func (m *MultiPortDetails) GetBasePort() uint32 {
+	if m != nil {
+		return m.BasePort
+	}
+	return 0
+}
+
+func (m *MultiPortDetails) GetTotalPorts() uint32 {
+	if m != nil {
+		return m.TotalPorts
+	}
+	return 0
+}
+
 type NebulaHandshakeDetails struct {
-	Cert           []byte `protobuf:"bytes,1,opt,name=Cert,proto3" json:"Cert,omitempty"`
-	InitiatorIndex uint32 `protobuf:"varint,2,opt,name=InitiatorIndex,proto3" json:"InitiatorIndex,omitempty"`
-	ResponderIndex uint32 `protobuf:"varint,3,opt,name=ResponderIndex,proto3" json:"ResponderIndex,omitempty"`
-	Cookie         uint64 `protobuf:"varint,4,opt,name=Cookie,proto3" json:"Cookie,omitempty"`
-	Time           uint64 `protobuf:"varint,5,opt,name=Time,proto3" json:"Time,omitempty"`
+	Cert               []byte            `protobuf:"bytes,1,opt,name=Cert,proto3" json:"Cert,omitempty"`
+	InitiatorIndex     uint32            `protobuf:"varint,2,opt,name=InitiatorIndex,proto3" json:"InitiatorIndex,omitempty"`
+	ResponderIndex     uint32            `protobuf:"varint,3,opt,name=ResponderIndex,proto3" json:"ResponderIndex,omitempty"`
+	Cookie             uint64            `protobuf:"varint,4,opt,name=Cookie,proto3" json:"Cookie,omitempty"`
+	Time               uint64            `protobuf:"varint,5,opt,name=Time,proto3" json:"Time,omitempty"`
+	InitiatorMultiPort *MultiPortDetails `protobuf:"bytes,6,opt,name=InitiatorMultiPort,proto3" json:"InitiatorMultiPort,omitempty"`
+	ResponderMultiPort *MultiPortDetails `protobuf:"bytes,7,opt,name=ResponderMultiPort,proto3" json:"ResponderMultiPort,omitempty"`
 }
 
 func (m *NebulaHandshakeDetails) Reset()         { *m = NebulaHandshakeDetails{} }
 func (m *NebulaHandshakeDetails) String() string { return proto.CompactTextString(m) }
 func (*NebulaHandshakeDetails) ProtoMessage()    {}
 func (*NebulaHandshakeDetails) Descriptor() ([]byte, []int) {
-	return fileDescriptor_2d65afa7693df5ef, []int{6}
+	return fileDescriptor_2d65afa7693df5ef, []int{7}
 }
 func (m *NebulaHandshakeDetails) XXX_Unmarshal(b []byte) error {
 	return m.Unmarshal(b)
@@ -544,6 +614,20 @@ func (m *NebulaHandshakeDetails) GetTime() uint64 {
 	return 0
 }
 
+func (m *NebulaHandshakeDetails) GetInitiatorMultiPort() *MultiPortDetails {
+	if m != nil {
+		return m.InitiatorMultiPort
+	}
+	return nil
+}
+
+func (m *NebulaHandshakeDetails) GetResponderMultiPort() *MultiPortDetails {
+	if m != nil {
+		return m.ResponderMultiPort
+	}
+	return nil
+}
+
 type NebulaControl struct {
 	Type                NebulaControl_MessageType `protobuf:"varint,1,opt,name=Type,proto3,enum=nebula.NebulaControl_MessageType" json:"Type,omitempty"`
 	InitiatorRelayIndex uint32                    `protobuf:"varint,2,opt,name=InitiatorRelayIndex,proto3" json:"InitiatorRelayIndex,omitempty"`
@@ -556,7 +640,7 @@ func (m *NebulaControl) Reset()         { *m = NebulaControl{} }
 func (m *NebulaControl) String() string { return proto.CompactTextString(m) }
 func (*NebulaControl) ProtoMessage()    {}
 func (*NebulaControl) Descriptor() ([]byte, []int) {
-	return fileDescriptor_2d65afa7693df5ef, []int{7}
+	return fileDescriptor_2d65afa7693df5ef, []int{8}
 }
 func (m *NebulaControl) XXX_Unmarshal(b []byte) error {
 	return m.Unmarshal(b)
@@ -630,6 +714,7 @@ func init() {
 	proto.RegisterType((*Ip6AndPort)(nil), "nebula.Ip6AndPort")
 	proto.RegisterType((*NebulaPing)(nil), "nebula.NebulaPing")
 	proto.RegisterType((*NebulaHandshake)(nil), "nebula.NebulaHandshake")
+	proto.RegisterType((*MultiPortDetails)(nil), "nebula.MultiPortDetails")
 	proto.RegisterType((*NebulaHandshakeDetails)(nil), "nebula.NebulaHandshakeDetails")
 	proto.RegisterType((*NebulaControl)(nil), "nebula.NebulaControl")
 }
@@ -637,51 +722,56 @@ func init() {
 func init() { proto.RegisterFile("nebula.proto", fileDescriptor_2d65afa7693df5ef) }
 
 var fileDescriptor_2d65afa7693df5ef = []byte{
-	// 696 bytes of a gzipped FileDescriptorProto
-	0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0x7c, 0x54, 0xcd, 0x6e, 0xd3, 0x4a,
-	0x14, 0x8e, 0x1d, 0xe7, 0xef, 0xa4, 0x49, 0x7d, 0x4f, 0xef, 0xcd, 0x4d, 0xaf, 0xae, 0xac, 0xe0,
-	0x05, 0xca, 0x2a, 0xad, 0xd2, 0x52, 0xb1, 0x04, 0x82, 0x50, 0x52, 0xb5, 0x55, 0x18, 0x15, 0x90,
-	0xd8, 0xa0, 0x69, 0x32, 0xd4, 0x56, 0x12, 0x8f, 0x6b, 0x4f, 0x50, 0xf3, 0x16, 0x3c, 0x4c, 0x1f,
-	0x82, 0x05, 0x12, 0x5d, 0xb0, 0x60, 0x89, 0xda, 0x17, 0x41, 0x33, 0x76, 0x6c, 0x27, 0x0d, 0xec,
-	0xce, 0xcf, 0xf7, 0xcd, 0x7c, 0xe7, 0x9b, 0x63, 0xc3, 0x96, 0xc7, 0x2e, 0xe6, 0x53, 0xda, 0xf1,
-	0x03, 0x2e, 0x38, 0x16, 0xa3, 0xcc, 0xfe, 0xaa, 0x03, 0x9c, 0xa9, 0xf0, 0x94, 0x09, 0x8a, 0x5d,
-	0x30, 0xce, 0x17, 0x3e, 0x6b, 0x6a, 0x2d, 0xad, 0x5d, 0xef, 0x5a, 0x9d, 0x98, 0x93, 0x22, 0x3a,
-	0xa7, 0x2c, 0x0c, 0xe9, 0x25, 0x93, 0x28, 0xa2, 0xb0, 0x78, 0x00, 0xa5, 0x97, 0x4c, 0x50, 0x77,
-	0x1a, 0x36, 0xf5, 0x96, 0xd6, 0xae, 0x76, 0x77, 0x1f, 0xd2, 0x62, 0x00, 0x59, 0x22, 0xed, 0xef,
-	0x1a, 0x54, 0x33, 0x47, 0x61, 0x19, 0x8c, 0x33, 0xee, 0x31, 0x33, 0x87, 0x35, 0xa8, 0xf4, 0x79,
-	0x28, 0x5e, 0xcf, 0x59, 0xb0, 0x30, 0x35, 0x44, 0xa8, 0x27, 0x29, 0x61, 0xfe, 0x74, 0x61, 0xea,
-	0xf8, 0x1f, 0x34, 0x64, 0xed, 0x8d, 0x3f, 0xa6, 0x82, 0x9d, 0x71, 0xe1, 0x7e, 0x74, 0x47, 0x54,
-	0xb8, 0xdc, 0x33, 0xf3, 0xb8, 0x0b, 0xff, 0xc8, 0xde, 0x29, 0xff, 0xc4, 0xc6, 0x2b, 0x2d, 0x63,
-	0xd9, 0x1a, 0xce, 0xbd, 0x91, 0xb3, 0xd2, 0x2a, 0x60, 0x1d, 0x40, 0xb6, 0xde, 0x39, 0x9c, 0xce,
-	0x5c, 0xb3, 0x88, 0x3b, 0xb0, 0x9d, 0xe6, 0xd1, 0xb5, 0x25, 0xa9, 0x6c, 0x48, 0x85, 0xd3, 0x73,
-	0xd8, 0x68, 0x62, 0x96, 0xa5, 0xb2, 0x24, 0x8d, 0x20, 0x15, 0xfb, 0x9b, 0x06, 0x7f, 0x3d, 0x98,
-	0x1a, 0xff, 0x86, 0xc2, 0x5b, 0xdf, 0x1b, 0xf8, 0xca, 0xd6, 0x1a, 0x89, 0x12, 0x3c, 0x84, 0xea,
-	0xc0, 0x3f, 0x7c, 0xee, 0x8d, 0x87, 0x3c, 0x10, 0xd2, 0xbb, 0x7c, 0xbb, 0xda, 0xc5, 0xa5, 0x77,
-	0x69, 0x8b, 0x64, 0x61, 0x11, 0xeb, 0x28, 0x61, 0x19, 0xeb, 0xac, 0xa3, 0x0c, 0x2b, 0x81, 0xa1,
-	0x05, 0x40, 0xd8, 0x94, 0x2e, 0x22, 0x19, 0x85, 0x56, 0xbe, 0x5d, 0x23, 0x99, 0x0a, 0x36, 0xa1,
-	0x34, 0xe2, 0x73, 0x4f, 0xb0, 0xa0, 0x99, 0x57, 0x1a, 0x97, 0xa9, 0xbd, 0x0f, 0x90, 0x5e, 0x8f,
-	0x75, 0xd0, 0x93, 0x31, 0xf4, 0x81, 0x8f, 0x08, 0x86, 0xac, 0xab, 0x87, 0xaf, 0x11, 0x15, 0xdb,
-	0xcf, 0x24, 0xe3, 0x28, 0xc3, 0xe8, 0xbb, 0x8a, 0x61, 0x10, 0xbd, 0xef, 0xca, 0xfc, 0x84, 0x2b,
-	0xbc, 0x41, 0xf4, 0x13, 0x9e, 0x9c, 0x90, 0xcf, 0x9c, 0x70, 0xbd, 0xdc, 0xc9, 0xa1, 0xeb, 0x5d,
-	0xfe, 0x79, 0x27, 0x25, 0x62, 0xc3, 0x4e, 0x22, 0x18, 0xe7, 0xee, 0x8c, 0xc5, 0xf7, 0xa8, 0xd8,
-	0xb6, 0x1f, 0x6c, 0x9c, 0x24, 0x9b, 0x39, 0xac, 0x40, 0x21, 0x7a, 0x3f, 0xcd, 0xfe, 0x00, 0xdb,
-	0xd1, 0xb9, 0x7d, 0xea, 0x8d, 0x43, 0x87, 0x4e, 0x18, 0x3e, 0x4d, 0xd7, 0x5b, 0x53, 0xeb, 0xbd,
-	0xa6, 0x20, 0x41, 0xae, 0xef, 0xb8, 0x14, 0xd1, 0x9f, 0xd1, 0x91, 0x12, 0xb1, 0x45, 0x54, 0x6c,
-	0xdf, 0x68, 0xd0, 0xd8, 0xcc, 0x93, 0xf0, 0x1e, 0x0b, 0x84, 0xba, 0x65, 0x8b, 0xa8, 0x18, 0x1f,
-	0x43, 0x7d, 0xe0, 0xb9, 0xc2, 0xa5, 0x82, 0x07, 0x03, 0x6f, 0xcc, 0xae, 0x63, 0xa7, 0xd7, 0xaa,
-	0x12, 0x47, 0x58, 0xe8, 0x73, 0x6f, 0xcc, 0x62, 0x5c, 0xe4, 0xe7, 0x5a, 0x15, 0x1b, 0x50, 0xec,
-	0x71, 0x3e, 0x71, 0x59, 0xd3, 0x50, 0xce, 0xc4, 0x59, 0xe2, 0x57, 0x21, 0xf5, 0xeb, 0xd8, 0x28,
-	0x17, 0xcd, 0xd2, 0xb1, 0x51, 0x2e, 0x99, 0x65, 0xfb, 0x46, 0x87, 0x5a, 0x24, 0xbb, 0xc7, 0x3d,
-	0x11, 0xf0, 0x29, 0x3e, 0x59, 0x79, 0x95, 0x47, 0xab, 0x9e, 0xc4, 0xa0, 0x0d, 0x0f, 0xb3, 0x0f,
-	0x3b, 0x89, 0x74, 0xb5, 0x7f, 0xd9, 0xa9, 0x36, 0xb5, 0x24, 0x23, 0x19, 0x22, 0xc3, 0x88, 0xe6,
-	0xdb, 0xd4, 0xc2, 0xff, 0xa1, 0xa2, 0xb2, 0x73, 0x3e, 0xf0, 0xd5, 0x9c, 0x35, 0x92, 0x16, 0xb0,
-	0x05, 0x55, 0x95, 0xbc, 0x0a, 0xf8, 0x4c, 0x7d, 0x0b, 0xb2, 0x9f, 0x2d, 0xd9, 0xfd, 0xdf, 0xfd,
-	0x9a, 0x1a, 0x80, 0xbd, 0x80, 0x51, 0xc1, 0x14, 0x9a, 0xb0, 0xab, 0x39, 0x0b, 0x85, 0xa9, 0xe1,
-	0xbf, 0xb0, 0xb3, 0x52, 0x97, 0x92, 0x42, 0x66, 0xea, 0x2f, 0x0e, 0xbe, 0xdc, 0x59, 0xda, 0xed,
-	0x9d, 0xa5, 0xfd, 0xbc, 0xb3, 0xb4, 0xcf, 0xf7, 0x56, 0xee, 0xf6, 0xde, 0xca, 0xfd, 0xb8, 0xb7,
-	0x72, 0xef, 0x77, 0x2f, 0x5d, 0xe1, 0xcc, 0x2f, 0x3a, 0x23, 0x3e, 0xdb, 0x0b, 0xa7, 0x74, 0x34,
-	0x71, 0xae, 0xf6, 0x22, 0x0b, 0x2f, 0x8a, 0xea, 0x0f, 0x7d, 0xf0, 0x2b, 0x00, 0x00, 0xff, 0xff,
-	0xcd, 0xd7, 0xbe, 0xd5, 0xb1, 0x05, 0x00, 0x00,
+	// 775 bytes of a gzipped FileDescriptorProto
+	0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0x84, 0x55, 0xcd, 0x6e, 0xd3, 0x4a,
+	0x14, 0x8e, 0x1d, 0xe7, 0xef, 0xa4, 0x49, 0x7d, 0xa7, 0xf7, 0xe6, 0xa6, 0xd5, 0x95, 0x95, 0xeb,
+	0xc5, 0x55, 0x56, 0x69, 0x95, 0xf6, 0x56, 0x2c, 0xa1, 0x41, 0x28, 0x91, 0x9a, 0x2a, 0x0c, 0x01,
+	0x24, 0x36, 0x68, 0x9a, 0x0c, 0x8d, 0x15, 0xc7, 0xe3, 0xda, 0x63, 0xd4, 0xbc, 0x05, 0xe2, 0x59,
+	0x58, 0xf2, 0x00, 0x2c, 0x90, 0xe8, 0x82, 0x05, 0x4b, 0xd4, 0xbe, 0x08, 0x9a, 0xf1, 0x6f, 0x7e,
+	0x80, 0xdd, 0x9c, 0x73, 0xbe, 0xef, 0xcc, 0x37, 0xe7, 0x7c, 0x71, 0x60, 0xc7, 0xa1, 0x97, 0x81,
+	0x4d, 0x3a, 0xae, 0xc7, 0x38, 0x43, 0xc5, 0x30, 0x32, 0x3f, 0xab, 0x00, 0x17, 0xf2, 0x38, 0xa4,
+	0x9c, 0xa0, 0x2e, 0x68, 0xe3, 0xa5, 0x4b, 0x9b, 0x4a, 0x4b, 0x69, 0xd7, 0xbb, 0x46, 0x27, 0xe2,
+	0xa4, 0x88, 0xce, 0x90, 0xfa, 0x3e, 0xb9, 0xa2, 0x02, 0x85, 0x25, 0x16, 0x1d, 0x43, 0xe9, 0x31,
+	0xe5, 0xc4, 0xb2, 0xfd, 0xa6, 0xda, 0x52, 0xda, 0xd5, 0xee, 0xfe, 0x26, 0x2d, 0x02, 0xe0, 0x18,
+	0x69, 0x7e, 0x55, 0xa0, 0x9a, 0x69, 0x85, 0xca, 0xa0, 0x5d, 0x30, 0x87, 0xea, 0x39, 0x54, 0x83,
+	0x4a, 0x9f, 0xf9, 0xfc, 0x69, 0x40, 0xbd, 0xa5, 0xae, 0x20, 0x04, 0xf5, 0x24, 0xc4, 0xd4, 0xb5,
+	0x97, 0xba, 0x8a, 0x0e, 0xa0, 0x21, 0x72, 0xcf, 0xdd, 0x29, 0xe1, 0xf4, 0x82, 0x71, 0xeb, 0x8d,
+	0x35, 0x21, 0xdc, 0x62, 0x8e, 0x9e, 0x47, 0xfb, 0xf0, 0x97, 0xa8, 0x0d, 0xd9, 0x5b, 0x3a, 0x5d,
+	0x29, 0x69, 0x71, 0x69, 0x14, 0x38, 0x93, 0xd9, 0x4a, 0xa9, 0x80, 0xea, 0x00, 0xa2, 0xf4, 0x72,
+	0xc6, 0xc8, 0xc2, 0xd2, 0x8b, 0x68, 0x0f, 0x76, 0xd3, 0x38, 0xbc, 0xb6, 0x24, 0x94, 0x8d, 0x08,
+	0x9f, 0xf5, 0x66, 0x74, 0x32, 0xd7, 0xcb, 0x42, 0x59, 0x12, 0x86, 0x90, 0x8a, 0xf9, 0x45, 0x81,
+	0x3f, 0x36, 0x5e, 0x8d, 0xfe, 0x84, 0xc2, 0x0b, 0xd7, 0x19, 0xb8, 0x72, 0xac, 0x35, 0x1c, 0x06,
+	0xe8, 0x04, 0xaa, 0x03, 0xf7, 0xe4, 0x91, 0x33, 0x1d, 0x31, 0x8f, 0x8b, 0xd9, 0xe5, 0xdb, 0xd5,
+	0x2e, 0x8a, 0x67, 0x97, 0x96, 0x70, 0x16, 0x16, 0xb2, 0x4e, 0x13, 0x96, 0xb6, 0xce, 0x3a, 0xcd,
+	0xb0, 0x12, 0x18, 0x32, 0x00, 0x30, 0xb5, 0xc9, 0x32, 0x94, 0x51, 0x68, 0xe5, 0xdb, 0x35, 0x9c,
+	0xc9, 0xa0, 0x26, 0x94, 0x26, 0x2c, 0x70, 0x38, 0xf5, 0x9a, 0x79, 0xa9, 0x31, 0x0e, 0xcd, 0x23,
+	0x80, 0xf4, 0x7a, 0x54, 0x07, 0x35, 0x79, 0x86, 0x3a, 0x70, 0x11, 0x02, 0x4d, 0xe4, 0xe5, 0xe2,
+	0x6b, 0x58, 0x9e, 0xcd, 0x87, 0x82, 0x71, 0x9a, 0x61, 0xf4, 0x2d, 0xc9, 0xd0, 0xb0, 0xda, 0xb7,
+	0x44, 0x7c, 0xce, 0x24, 0x5e, 0xc3, 0xea, 0x39, 0x4b, 0x3a, 0xe4, 0x33, 0x1d, 0x6e, 0x62, 0x4f,
+	0x8e, 0x2c, 0xe7, 0xea, 0xd7, 0x9e, 0x14, 0x88, 0x2d, 0x9e, 0x44, 0xa0, 0x8d, 0xad, 0x05, 0x8d,
+	0xee, 0x91, 0x67, 0xd3, 0xdc, 0x70, 0x9c, 0x20, 0xeb, 0x39, 0x54, 0x81, 0x42, 0xb8, 0x3f, 0xc5,
+	0x7c, 0x0d, 0xbb, 0x61, 0xdf, 0x3e, 0x71, 0xa6, 0xfe, 0x8c, 0xcc, 0x29, 0x7a, 0x90, 0xda, 0x5b,
+	0x91, 0xf6, 0x5e, 0x53, 0x90, 0x20, 0xd7, 0x3d, 0x2e, 0x44, 0xf4, 0x17, 0x64, 0x22, 0x45, 0xec,
+	0x60, 0x79, 0x36, 0xdf, 0x2b, 0xa0, 0x0f, 0x03, 0x9b, 0x5b, 0xe2, 0xa1, 0x31, 0xb0, 0x05, 0x55,
+	0x7c, 0xf3, 0x2c, 0x70, 0x5d, 0xe6, 0x71, 0x3a, 0x95, 0xd7, 0x94, 0x71, 0x36, 0x25, 0x10, 0xe3,
+	0x0c, 0x42, 0x0d, 0x11, 0x99, 0x14, 0x3a, 0x80, 0xf2, 0x19, 0xf1, 0x69, 0x66, 0x96, 0x49, 0x2c,
+	0xb6, 0x3f, 0x66, 0x9c, 0xd8, 0xb1, 0x65, 0x44, 0x35, 0x93, 0x31, 0x3f, 0xaa, 0xd0, 0xd8, 0xfe,
+	0x18, 0xf1, 0x86, 0x1e, 0xf5, 0xb8, 0xd4, 0xb4, 0x83, 0xe5, 0x19, 0xfd, 0x07, 0xf5, 0x81, 0x63,
+	0x71, 0x8b, 0x70, 0xe6, 0x0d, 0x9c, 0x29, 0xbd, 0x89, 0xd6, 0xbf, 0x96, 0x15, 0x38, 0x4c, 0x7d,
+	0x97, 0x39, 0x53, 0x1a, 0xe1, 0x42, 0x61, 0x6b, 0x59, 0xd4, 0x80, 0x62, 0x8f, 0xb1, 0xb9, 0x45,
+	0xa5, 0x34, 0x0d, 0x47, 0x51, 0xb2, 0xc4, 0x42, 0xba, 0x44, 0xd4, 0x07, 0x94, 0xdc, 0x92, 0xcc,
+	0xb1, 0x59, 0x94, 0x8b, 0x69, 0xc6, 0x8b, 0x59, 0x1f, 0x30, 0xde, 0xc2, 0x11, 0x9d, 0x12, 0x1d,
+	0x69, 0xa7, 0xd2, 0xef, 0x3a, 0x6d, 0x72, 0xcc, 0x0f, 0x2a, 0xd4, 0xc2, 0xf1, 0xf5, 0x98, 0xc3,
+	0x3d, 0x66, 0xa3, 0xff, 0x57, 0x2c, 0xfb, 0xef, 0xaa, 0x61, 0x22, 0xd0, 0x16, 0xd7, 0x1e, 0xc1,
+	0x5e, 0x22, 0x54, 0xfe, 0x38, 0xb3, 0xd3, 0xdd, 0x56, 0x12, 0x8c, 0x44, 0x50, 0x86, 0x11, 0xce,
+	0x79, 0x5b, 0x09, 0xfd, 0x03, 0x15, 0x19, 0x8d, 0xd9, 0xc0, 0x8d, 0xac, 0x90, 0x26, 0xa4, 0x13,
+	0x45, 0xf0, 0xc4, 0x63, 0x0b, 0xf9, 0xa1, 0x10, 0xf5, 0x6c, 0xca, 0xec, 0xff, 0xec, 0xbb, 0xdd,
+	0x00, 0xd4, 0xf3, 0x28, 0xe1, 0x54, 0xa2, 0x31, 0xbd, 0x0e, 0xa8, 0xcf, 0x75, 0x05, 0xfd, 0x0d,
+	0x7b, 0x2b, 0x79, 0x21, 0xc9, 0xa7, 0xba, 0x7a, 0x76, 0xfc, 0xe9, 0xce, 0x50, 0x6e, 0xef, 0x0c,
+	0xe5, 0xfb, 0x9d, 0xa1, 0xbc, 0xbb, 0x37, 0x72, 0xb7, 0xf7, 0x46, 0xee, 0xdb, 0xbd, 0x91, 0x7b,
+	0xb5, 0x7f, 0x65, 0xf1, 0x59, 0x70, 0xd9, 0x99, 0xb0, 0xc5, 0xa1, 0x6f, 0x93, 0xc9, 0x7c, 0x76,
+	0x7d, 0x18, 0x8e, 0xf0, 0xb2, 0x28, 0xff, 0xbe, 0x8e, 0x7f, 0x04, 0x00, 0x00, 0xff, 0xff, 0xdc,
+	0x87, 0xe2, 0x33, 0xce, 0x06, 0x00, 0x00,
 }
 
 func (m *NebulaMeta) Marshal() (dAtA []byte, err error) {
@@ -949,6 +1039,59 @@ func (m *NebulaHandshake) MarshalToSizedBuffer(dAtA []byte) (int, error) {
 	return len(dAtA) - i, nil
 }
 
+func (m *MultiPortDetails) Marshal() (dAtA []byte, err error) {
+	size := m.Size()
+	dAtA = make([]byte, size)
+	n, err := m.MarshalToSizedBuffer(dAtA[:size])
+	if err != nil {
+		return nil, err
+	}
+	return dAtA[:n], nil
+}
+
+func (m *MultiPortDetails) MarshalTo(dAtA []byte) (int, error) {
+	size := m.Size()
+	return m.MarshalToSizedBuffer(dAtA[:size])
+}
+
+func (m *MultiPortDetails) MarshalToSizedBuffer(dAtA []byte) (int, error) {
+	i := len(dAtA)
+	_ = i
+	var l int
+	_ = l
+	if m.TotalPorts != 0 {
+		i = encodeVarintNebula(dAtA, i, uint64(m.TotalPorts))
+		i--
+		dAtA[i] = 0x20
+	}
+	if m.BasePort != 0 {
+		i = encodeVarintNebula(dAtA, i, uint64(m.BasePort))
+		i--
+		dAtA[i] = 0x18
+	}
+	if m.TxSupported {
+		i--
+		if m.TxSupported {
+			dAtA[i] = 1
+		} else {
+			dAtA[i] = 0
+		}
+		i--
+		dAtA[i] = 0x10
+	}
+	if m.RxSupported {
+		i--
+		if m.RxSupported {
+			dAtA[i] = 1
+		} else {
+			dAtA[i] = 0
+		}
+		i--
+		dAtA[i] = 0x8
+	}
+	return len(dAtA) - i, nil
+}
+
 func (m *NebulaHandshakeDetails) Marshal() (dAtA []byte, err error) {
 	size := m.Size()
 	dAtA = make([]byte, size)
@@ -969,6 +1112,30 @@ func (m *NebulaHandshakeDetails) MarshalToSizedBuffer(dAtA []byte) (int, error)
 	_ = i
 	var l int
 	_ = l
+	if m.ResponderMultiPort != nil {
+		{
+			size, err := m.ResponderMultiPort.MarshalToSizedBuffer(dAtA[:i])
+			if err != nil {
+				return 0, err
+			}
+			i -= size
+			i = encodeVarintNebula(dAtA, i, uint64(size))
+		}
+		i--
+		dAtA[i] = 0x3a
+	}
+	if m.InitiatorMultiPort != nil {
+		{
+			size, err := m.InitiatorMultiPort.MarshalToSizedBuffer(dAtA[:i])
+			if err != nil {
+				return 0, err
+			}
+			i -= size
+			i = encodeVarintNebula(dAtA, i, uint64(size))
+		}
+		i--
+		dAtA[i] = 0x32
+	}
 	if m.Time != 0 {
 		i = encodeVarintNebula(dAtA, i, uint64(m.Time))
 		i--
@@ -1173,6 +1340,27 @@ func (m *NebulaHandshake) Size() (n int) {
 	return n
 }
 
+func (m *MultiPortDetails) Size() (n int) {
+	if m == nil {
+		return 0
+	}
+	var l int
+	_ = l
+	if m.RxSupported {
+		n += 2
+	}
+	if m.TxSupported {
+		n += 2
+	}
+	if m.BasePort != 0 {
+		n += 1 + sovNebula(uint64(m.BasePort))
+	}
+	if m.TotalPorts != 0 {
+		n += 1 + sovNebula(uint64(m.TotalPorts))
+	}
+	return n
+}
+
 func (m *NebulaHandshakeDetails) Size() (n int) {
 	if m == nil {
 		return 0
@@ -1195,6 +1383,14 @@ func (m *NebulaHandshakeDetails) Size() (n int) {
 	if m.Time != 0 {
 		n += 1 + sovNebula(uint64(m.Time))
 	}
+	if m.InitiatorMultiPort != nil {
+		l = m.InitiatorMultiPort.Size()
+		n += 1 + l + sovNebula(uint64(l))
+	}
+	if m.ResponderMultiPort != nil {
+		l = m.ResponderMultiPort.Size()
+		n += 1 + l + sovNebula(uint64(l))
+	}
 	return n
 }
 
@@ -1968,6 +2164,134 @@ func (m *NebulaHandshake) Unmarshal(dAtA []byte) error {
 	}
 	return nil
 }
+func (m *MultiPortDetails) Unmarshal(dAtA []byte) error {
+	l := len(dAtA)
+	iNdEx := 0
+	for iNdEx < l {
+		preIndex := iNdEx
+		var wire uint64
+		for shift := uint(0); ; shift += 7 {
+			if shift >= 64 {
+				return ErrIntOverflowNebula
+			}
+			if iNdEx >= l {
+				return io.ErrUnexpectedEOF
+			}
+			b := dAtA[iNdEx]
+			iNdEx++
+			wire |= uint64(b&0x7F) << shift
+			if b < 0x80 {
+				break
+			}
+		}
+		fieldNum := int32(wire >> 3)
+		wireType := int(wire & 0x7)
+		if wireType == 4 {
+			return fmt.Errorf("proto: MultiPortDetails: wiretype end group for non-group")
+		}
+		if fieldNum <= 0 {
+			return fmt.Errorf("proto: MultiPortDetails: illegal tag %d (wire type %d)", fieldNum, wire)
+		}
+		switch fieldNum {
+		case 1:
+			if wireType != 0 {
+				return fmt.Errorf("proto: wrong wireType = %d for field RxSupported", wireType)
+			}
+			var v int
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return ErrIntOverflowNebula
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				v |= int(b&0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+			m.RxSupported = bool(v != 0)
+		case 2:
+			if wireType != 0 {
+				return fmt.Errorf("proto: wrong wireType = %d for field TxSupported", wireType)
+			}
+			var v int
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return ErrIntOverflowNebula
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				v |= int(b&0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+			m.TxSupported = bool(v != 0)
+		case 3:
+			if wireType != 0 {
+				return fmt.Errorf("proto: wrong wireType = %d for field BasePort", wireType)
+			}
+			m.BasePort = 0
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return ErrIntOverflowNebula
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				m.BasePort |= uint32(b&0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+		case 4:
+			if wireType != 0 {
+				return fmt.Errorf("proto: wrong wireType = %d for field TotalPorts", wireType)
+			}
+			m.TotalPorts = 0
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return ErrIntOverflowNebula
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				m.TotalPorts |= uint32(b&0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+		default:
+			iNdEx = preIndex
+			skippy, err := skipNebula(dAtA[iNdEx:])
+			if err != nil {
+				return err
+			}
+			if (skippy < 0) || (iNdEx+skippy) < 0 {
+				return ErrInvalidLengthNebula
+			}
+			if (iNdEx + skippy) > l {
+				return io.ErrUnexpectedEOF
+			}
+			iNdEx += skippy
+		}
+	}
+
+	if iNdEx > l {
+		return io.ErrUnexpectedEOF
+	}
+	return nil
+}
 func (m *NebulaHandshakeDetails) Unmarshal(dAtA []byte) error {
 	l := len(dAtA)
 	iNdEx := 0
@@ -2107,6 +2431,78 @@ func (m *NebulaHandshakeDetails) Unmarshal(dAtA []byte) error {
 					break
 				}
 			}
+		case 6:
+			if wireType != 2 {
+				return fmt.Errorf("proto: wrong wireType = %d for field InitiatorMultiPort", wireType)
+			}
+			var msglen int
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return ErrIntOverflowNebula
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				msglen |= int(b&0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+			if msglen < 0 {
+				return ErrInvalidLengthNebula
+			}
+			postIndex := iNdEx + msglen
+			if postIndex < 0 {
+				return ErrInvalidLengthNebula
+			}
+			if postIndex > l {
+				return io.ErrUnexpectedEOF
+			}
+			if m.InitiatorMultiPort == nil {
+				m.InitiatorMultiPort = &MultiPortDetails{}
+			}
+			if err := m.InitiatorMultiPort.Unmarshal(dAtA[iNdEx:postIndex]); err != nil {
+				return err
+			}
+			iNdEx = postIndex
+		case 7:
+			if wireType != 2 {
+				return fmt.Errorf("proto: wrong wireType = %d for field ResponderMultiPort", wireType)
+			}
+			var msglen int
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return ErrIntOverflowNebula
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				msglen |= int(b&0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+			if msglen < 0 {
+				return ErrInvalidLengthNebula
+			}
+			postIndex := iNdEx + msglen
+			if postIndex < 0 {
+				return ErrInvalidLengthNebula
+			}
+			if postIndex > l {
+				return io.ErrUnexpectedEOF
+			}
+			if m.ResponderMultiPort == nil {
+				m.ResponderMultiPort = &MultiPortDetails{}
+			}
+			if err := m.ResponderMultiPort.Unmarshal(dAtA[iNdEx:postIndex]); err != nil {
+				return err
+			}
+			iNdEx = postIndex
 		default:
 			iNdEx = preIndex
 			skippy, err := skipNebula(dAtA[iNdEx:])

+ 10 - 2
nebula.proto

@@ -55,14 +55,22 @@ message NebulaHandshake {
   bytes Hmac = 2;
 }
 
+message MultiPortDetails {
+  bool RxSupported = 1;
+  bool TxSupported = 2;
+  uint32 BasePort = 3;
+  uint32 TotalPorts = 4;
+}
+
 message NebulaHandshakeDetails {
   bytes Cert = 1;
   uint32 InitiatorIndex = 2;
   uint32 ResponderIndex = 3;
   uint64 Cookie = 4;
   uint64 Time = 5;
-  // reserved for WIP multiport
-  reserved 6, 7;
+
+  MultiPortDetails InitiatorMultiPort = 6;
+  MultiPortDetails ResponderMultiPort = 7;
 }
 
 message NebulaControl {

+ 13 - 0
outside.go

@@ -257,6 +257,19 @@ func (f *Interface) sendCloseTunnel(h *HostInfo) {
 
 func (f *Interface) handleHostRoaming(hostinfo *HostInfo, addr *udp.Addr) {
 	if addr != nil && !hostinfo.remote.Equals(addr) {
+		if hostinfo.multiportRx {
+			// If the remote is sending with multiport, we aren't roaming unless
+			// the IP has changed
+			if hostinfo.remote.IP.Equal(addr.IP) {
+				return
+			}
+			// Keep the port from the original hostinfo, because the remote is transmitting from multiport ports
+			addr = &udp.Addr{
+				IP:   addr.IP,
+				Port: hostinfo.remote.Port,
+			}
+		}
+
 		if !f.lightHouse.GetRemoteAllowList().Allow(hostinfo.vpnIp, addr.IP) {
 			hostinfo.logger(f.l).WithField("newAddr", addr).Debug("lighthouse.remote_allow_list denied roaming")
 			return

+ 16 - 0
udp/udp_raw.go

@@ -0,0 +1,16 @@
+package udp
+
+import mathrand "math/rand"
+
+type SendPortGetter interface {
+	// UDPSendPort returns the port to use
+	UDPSendPort(maxPort int) uint16
+}
+
+type randomSendPort struct{}
+
+func (randomSendPort) UDPSendPort(maxPort int) uint16 {
+	return uint16(mathrand.Intn(maxPort))
+}
+
+var RandomSendPort = randomSendPort{}

+ 190 - 0
udp/udp_raw_linux.go

@@ -0,0 +1,190 @@
+//go:build !e2e_testing
+// +build !e2e_testing
+
+package udp
+
+import (
+	"encoding/binary"
+	"fmt"
+	"net"
+	"syscall"
+	"unsafe"
+
+	"github.com/rcrowley/go-metrics"
+	"github.com/sirupsen/logrus"
+	"github.com/slackhq/nebula/config"
+	"golang.org/x/net/ipv4"
+	"golang.org/x/sys/unix"
+)
+
+// RawOverhead is the number of bytes that need to be reserved at the start of
+// the raw bytes passed to (*RawConn).WriteTo. This is used by WriteTo to prefix
+// the IP and UDP headers.
+const RawOverhead = 28
+
+type RawConn struct {
+	sysFd    int
+	basePort uint16
+	l        *logrus.Logger
+}
+
+func NewRawConn(l *logrus.Logger, ip string, port int, basePort uint16) (*RawConn, error) {
+	syscall.ForkLock.RLock()
+	// With IPPROTO_UDP, the linux kernel tries to deliver every UDP packet
+	// received in the system to our socket. This constantly overflows our
+	// buffer and marks our socket as having dropped packets. This makes the
+	// stats on the socket useless.
+	//
+	// In contrast, IPPROTO_RAW is not delivered any packets and thus our read
+	// buffer will not fill up and mark as having dropped packets. The only
+	// difference is that we have to assemble the IP header as well, but this
+	// is fairly easy since Linux does the checksum for us.
+	//
+	// TODO: How to get this working with Inet6 correctly? I was having issues
+	// with the source address when testing before, probably need to `bind(2)`?
+	fd, err := unix.Socket(unix.AF_INET, unix.SOCK_RAW, unix.IPPROTO_RAW)
+	if err == nil {
+		unix.CloseOnExec(fd)
+	}
+	syscall.ForkLock.RUnlock()
+	if err != nil {
+		return nil, err
+	}
+
+	// We only want to send, not recv. This will hopefully help the kernel avoid
+	// wasting time on us
+	if err = unix.SetsockoptInt(fd, unix.SOL_SOCKET, unix.SO_RCVBUF, 0); err != nil {
+		return nil, fmt.Errorf("unable to set SO_RCVBUF: %s", err)
+	}
+
+	var lip [16]byte
+	copy(lip[:], net.ParseIP(ip))
+
+	// TODO do we need to `bind(2)` so that we send from the correct address/interface?
+	if err = unix.Bind(fd, &unix.SockaddrInet6{Addr: lip, Port: port}); err != nil {
+		return nil, fmt.Errorf("unable to bind to socket: %s", err)
+	}
+
+	return &RawConn{
+		sysFd:    fd,
+		basePort: basePort,
+		l:        l,
+	}, nil
+}
+
+// WriteTo must be called with raw leaving the first `udp.RawOverhead` bytes empty,
+// for the IP/UDP headers.
+func (u *RawConn) WriteTo(raw []byte, fromPort uint16, addr *Addr) error {
+	var rsa unix.RawSockaddrInet4
+	rsa.Family = unix.AF_INET
+	copy(rsa.Addr[:], addr.IP.To4())
+
+	totalLen := len(raw)
+	udpLen := totalLen - ipv4.HeaderLen
+
+	// IP header
+	raw[0] = byte(ipv4.Version<<4 | (ipv4.HeaderLen >> 2 & 0x0f))
+	raw[1] = 0 // tos
+	binary.BigEndian.PutUint16(raw[2:4], uint16(totalLen))
+	binary.BigEndian.PutUint16(raw[4:6], 0)   // id (linux does it for us)
+	binary.BigEndian.PutUint16(raw[6:8], 0)   // frag options
+	raw[8] = byte(64)                         // ttl
+	raw[9] = byte(17)                         // protocol
+	binary.BigEndian.PutUint16(raw[10:12], 0) // checksum (linux does it for us)
+	binary.BigEndian.PutUint32(raw[12:16], 0) // src (linux does it for us)
+	copy(raw[16:20], rsa.Addr[:])             // dst
+
+	// UDP header
+	fromPort = u.basePort + fromPort
+	binary.BigEndian.PutUint16(raw[20:22], uint16(fromPort))  // src port
+	binary.BigEndian.PutUint16(raw[22:24], uint16(addr.Port)) // dst port
+	binary.BigEndian.PutUint16(raw[24:26], uint16(udpLen))    // UDP length
+	binary.BigEndian.PutUint16(raw[26:28], 0)                 // checksum (optional)
+
+	for {
+		_, _, err := unix.Syscall6(
+			unix.SYS_SENDTO,
+			uintptr(u.sysFd),
+			uintptr(unsafe.Pointer(&raw[0])),
+			uintptr(len(raw)),
+			uintptr(0),
+			uintptr(unsafe.Pointer(&rsa)),
+			uintptr(unix.SizeofSockaddrInet4),
+		)
+
+		if err != 0 {
+			return &net.OpError{Op: "sendto", Err: err}
+		}
+
+		//TODO: handle incomplete writes
+
+		return nil
+	}
+}
+
+func (u *RawConn) ReloadConfig(c *config.C) {
+	b := c.GetInt("listen.write_buffer", 0)
+	if b <= 0 {
+		return
+	}
+
+	if err := u.SetSendBuffer(b); err != nil {
+		u.l.WithError(err).Error("Failed to set listen.write_buffer")
+		return
+	}
+
+	s, err := u.GetSendBuffer()
+	if err != nil {
+		u.l.WithError(err).Warn("Failed to get listen.write_buffer")
+		return
+	}
+
+	u.l.WithField("size", s).Info("listen.write_buffer was set")
+}
+
+func (u *RawConn) SetSendBuffer(n int) error {
+	return unix.SetsockoptInt(u.sysFd, unix.SOL_SOCKET, unix.SO_SNDBUFFORCE, n)
+}
+
+func (u *RawConn) GetSendBuffer() (int, error) {
+	return unix.GetsockoptInt(u.sysFd, unix.SOL_SOCKET, unix.SO_SNDBUF)
+}
+
+func (u *RawConn) getMemInfo(meminfo *_SK_MEMINFO) error {
+	var vallen uint32 = 4 * _SK_MEMINFO_VARS
+	_, _, err := unix.Syscall6(unix.SYS_GETSOCKOPT, uintptr(u.sysFd), uintptr(unix.SOL_SOCKET), uintptr(unix.SO_MEMINFO), uintptr(unsafe.Pointer(meminfo)), uintptr(unsafe.Pointer(&vallen)), 0)
+	if err != 0 {
+		return err
+	}
+	return nil
+}
+
+func NewRawStatsEmitter(rawConn *RawConn) func() {
+	// Check if our kernel supports SO_MEMINFO before registering the gauges
+	var gauges [_SK_MEMINFO_VARS]metrics.Gauge
+	var meminfo _SK_MEMINFO
+	if err := rawConn.getMemInfo(&meminfo); err == nil {
+		gauges = [_SK_MEMINFO_VARS]metrics.Gauge{
+			metrics.GetOrRegisterGauge("raw.rmem_alloc", nil),
+			metrics.GetOrRegisterGauge("raw.rcvbuf", nil),
+			metrics.GetOrRegisterGauge("raw.wmem_alloc", nil),
+			metrics.GetOrRegisterGauge("raw.sndbuf", nil),
+			metrics.GetOrRegisterGauge("raw.fwd_alloc", nil),
+			metrics.GetOrRegisterGauge("raw.wmem_queued", nil),
+			metrics.GetOrRegisterGauge("raw.optmem", nil),
+			metrics.GetOrRegisterGauge("raw.backlog", nil),
+			metrics.GetOrRegisterGauge("raw.drops", nil),
+		}
+	} else {
+		// return no-op because we don't support SO_MEMINFO
+		return func() {}
+	}
+
+	return func() {
+		if err := rawConn.getMemInfo(&meminfo); err == nil {
+			for j := 0; j < _SK_MEMINFO_VARS; j++ {
+				gauges[j].Update(int64(meminfo[j]))
+			}
+		}
+	}
+}

+ 28 - 0
udp/udp_raw_unsupported.go

@@ -0,0 +1,28 @@
+//go:build !linux || e2e_testing
+// +build !linux e2e_testing
+
+package udp
+
+import (
+	"fmt"
+	"runtime"
+
+	"github.com/sirupsen/logrus"
+	"github.com/slackhq/nebula/config"
+)
+
+const RawOverhead = 0
+
+type RawConn struct{}
+
+func NewRawConn(l *logrus.Logger, ip string, port int, basePort uint16) (*RawConn, error) {
+	return nil, fmt.Errorf("multiport tx is not supported on %s", runtime.GOOS)
+}
+
+func (u *RawConn) WriteTo(raw []byte, fromPort uint16, addr *Addr) error {
+	return fmt.Errorf("multiport tx is not supported on %s", runtime.GOOS)
+}
+
+func (u *RawConn) ReloadConfig(c *config.C) {}
+
+func NewRawStatsEmitter(rawConn *RawConn) func() { return func() {} }