1 month ago · aa44f4c7c9
--- a/batch_pipeline.go
+++ b/batch_pipeline.go
@@ -0,0 +1,164 @@
 
															+package nebula
														
 
															+
														
 
															+import (
														
 
															+	"net/netip"
														
 
															+
														
 
															+	"github.com/slackhq/nebula/overlay"
														
 
															+	"github.com/slackhq/nebula/udp"
														
 
															+)
														
 
															+
														
 
															+// batchPipelines tracks whether the inside device can operate on packet batches
														
 
															+// and, if so, holds the shared packet pool sized for the virtio headroom and
														
 
															+// payload limits advertised by the device. It also owns the fan-in/fan-out
														
 
															+// queues between the TUN readers, encrypt/decrypt workers, and the UDP writers.
														
 
															+type batchPipelines struct {
														
 
															+	enabled    bool
														
 
															+	inside     overlay.BatchCapableDevice
														
 
															+	headroom   int
														
 
															+	payloadCap int
														
 
															+	pool       *overlay.PacketPool
														
 
															+	batchSize  int
														
 
															+	routines   int
														
 
															+	rxQueues   []chan *overlay.Packet
														
 
															+	txQueues   []chan queuedDatagram
														
 
															+	tunQueues  []chan *overlay.Packet
														
 
															+}
														
 
															+
														
 
															+type queuedDatagram struct {
														
 
															+	packet *overlay.Packet
														
 
															+	addr   netip.AddrPort
														
 
															+}
														
 
															+
														
 
															+func (bp *batchPipelines) init(device overlay.Device, routines int, queueDepth int, maxSegments int) {
														
 
															+	if device == nil || routines <= 0 {
														
 
															+		return
														
 
															+	}
														
 
															+	bcap, ok := device.(overlay.BatchCapableDevice)
														
 
															+	if !ok {
														
 
															+		return
														
 
															+	}
														
 
															+	headroom := bcap.BatchHeadroom()
														
 
															+	payload := bcap.BatchPayloadCap()
														
 
															+	if maxSegments < 1 {
														
 
															+		maxSegments = 1
														
 
															+	}
														
 
															+	requiredPayload := udp.MTU * maxSegments
														
 
															+	if payload < requiredPayload {
														
 
															+		payload = requiredPayload
														
 
															+	}
														
 
															+	batchSize := bcap.BatchSize()
														
 
															+	if headroom <= 0 || payload <= 0 || batchSize <= 0 {
														
 
															+		return
														
 
															+	}
														
 
															+	bp.enabled = true
														
 
															+	bp.inside = bcap
														
 
															+	bp.headroom = headroom
														
 
															+	bp.payloadCap = payload
														
 
															+	bp.batchSize = batchSize
														
 
															+	bp.routines = routines
														
 
															+	bp.pool = overlay.NewPacketPool(headroom, payload)
														
 
															+	queueCap := batchSize * defaultBatchQueueDepthFactor
														
 
															+	if queueDepth > 0 {
														
 
															+		queueCap = queueDepth
														
 
															+	}
														
 
															+	if queueCap < batchSize {
														
 
															+		queueCap = batchSize
														
 
															+	}
														
 
															+	bp.rxQueues = make([]chan *overlay.Packet, routines)
														
 
															+	bp.txQueues = make([]chan queuedDatagram, routines)
														
 
															+	bp.tunQueues = make([]chan *overlay.Packet, routines)
														
 
															+	for i := 0; i < routines; i++ {
														
 
															+		bp.rxQueues[i] = make(chan *overlay.Packet, queueCap)
														
 
															+		bp.txQueues[i] = make(chan queuedDatagram, queueCap)
														
 
															+		bp.tunQueues[i] = make(chan *overlay.Packet, queueCap)
														
 
															+	}
														
 
															+}
														
 
															+
														
 
															+func (bp *batchPipelines) Pool() *overlay.PacketPool {
														
 
															+	if bp == nil || !bp.enabled {
														
 
															+		return nil
														
 
															+	}
														
 
															+	return bp.pool
														
 
															+}
														
 
															+
														
 
															+func (bp *batchPipelines) Enabled() bool {
														
 
															+	return bp != nil && bp.enabled
														
 
															+}
														
 
															+
														
 
															+func (bp *batchPipelines) batchSizeHint() int {
														
 
															+	if bp == nil || bp.batchSize <= 0 {
														
 
															+		return 1
														
 
															+	}
														
 
															+	return bp.batchSize
														
 
															+}
														
 
															+
														
 
															+func (bp *batchPipelines) rxQueue(i int) chan *overlay.Packet {
														
 
															+	if bp == nil || !bp.enabled || i < 0 || i >= len(bp.rxQueues) {
														
 
															+		return nil
														
 
															+	}
														
 
															+	return bp.rxQueues[i]
														
 
															+}
														
 
															+
														
 
															+func (bp *batchPipelines) txQueue(i int) chan queuedDatagram {
														
 
															+	if bp == nil || !bp.enabled || i < 0 || i >= len(bp.txQueues) {
														
 
															+		return nil
														
 
															+	}
														
 
															+	return bp.txQueues[i]
														
 
															+}
														
 
															+
														
 
															+func (bp *batchPipelines) tunQueue(i int) chan *overlay.Packet {
														
 
															+	if bp == nil || !bp.enabled || i < 0 || i >= len(bp.tunQueues) {
														
 
															+		return nil
														
 
															+	}
														
 
															+	return bp.tunQueues[i]
														
 
															+}
														
 
															+
														
 
															+func (bp *batchPipelines) txQueueLen(i int) int {
														
 
															+	q := bp.txQueue(i)
														
 
															+	if q == nil {
														
 
															+		return 0
														
 
															+	}
														
 
															+	return len(q)
														
 
															+}
														
 
															+
														
 
															+func (bp *batchPipelines) tunQueueLen(i int) int {
														
 
															+	q := bp.tunQueue(i)
														
 
															+	if q == nil {
														
 
															+		return 0
														
 
															+	}
														
 
															+	return len(q)
														
 
															+}
														
 
															+
														
 
															+func (bp *batchPipelines) enqueueRx(i int, pkt *overlay.Packet) bool {
														
 
															+	q := bp.rxQueue(i)
														
 
															+	if q == nil {
														
 
															+		return false
														
 
															+	}
														
 
															+	q <- pkt
														
 
															+	return true
														
 
															+}
														
 
															+
														
 
															+func (bp *batchPipelines) enqueueTx(i int, pkt *overlay.Packet, addr netip.AddrPort) bool {
														
 
															+	q := bp.txQueue(i)
														
 
															+	if q == nil {
														
 
															+		return false
														
 
															+	}
														
 
															+	q <- queuedDatagram{packet: pkt, addr: addr}
														
 
															+	return true
														
 
															+}
														
 
															+
														
 
															+func (bp *batchPipelines) enqueueTun(i int, pkt *overlay.Packet) bool {
														
 
															+	q := bp.tunQueue(i)
														
 
															+	if q == nil {
														
 
															+		return false
														
 
															+	}
														
 
															+	q <- pkt
														
 
															+	return true
														
 
															+}
														
 
															+
														
 
															+func (bp *batchPipelines) newPacket() *overlay.Packet {
														
 
															+	if bp == nil || !bp.enabled || bp.pool == nil {
														
 
															+		return nil
														
 
															+	}
														
 
															+	return bp.pool.Get()
														
 
															+}
														
--- a/inside.go
+++ b/inside.go
@@ -8,6 +8,7 @@ import (
 
															 	"github.com/slackhq/nebula/header"
														
 
															 	"github.com/slackhq/nebula/iputil"
														
 
															 	"github.com/slackhq/nebula/noiseutil"
														
 
															+	"github.com/slackhq/nebula/overlay"
														
 
															 	"github.com/slackhq/nebula/routing"
														
 
															 )
														
@@ -335,9 +336,21 @@ func (f *Interface) sendNoMetrics(t header.MessageType, st header.MessageSubType
 
															 	if ci.eKey == nil {
														
 
															 		return
														
 
															 	}
														
 
															-	useRelay := !remote.IsValid() && !hostinfo.remote.IsValid()
														
 
															+	target := remote
														
 
															+	if !target.IsValid() {
														
 
															+		target = hostinfo.remote
														
 
															+	}
														
 
															+	useRelay := !target.IsValid()
														
 
															 	fullOut := out
														
 
															+	var pkt *overlay.Packet
														
 
															+	if !useRelay && f.batches.Enabled() {
														
 
															+		pkt = f.batches.newPacket()
														
 
															+		if pkt != nil {
														
 
															+			out = pkt.Payload()[:0]
														
 
															+		}
														
 
															+	}
														
 
															+
														
 
															 	if useRelay {
														
 
															 		if len(out) < header.Len {
														
 
															 			// out always has a capacity of mtu, but not always a length greater than the header.Len.
														
@@ -376,36 +389,61 @@ func (f *Interface) sendNoMetrics(t header.MessageType, st header.MessageSubType
 
															 		ci.writeLock.Unlock()
														
 
															 	}
														
 
															 	if err != nil {
														
 
															+		if pkt != nil {
														
 
															+			pkt.Release()
														
 
															+		}
														
 
															 		hostinfo.logger(f.l).WithError(err).
														
 
															-			WithField("udpAddr", remote).WithField("counter", c).
														
 
															+			WithField("udpAddr", target).WithField("counter", c).
														
 
															 			WithField("attemptedCounter", c).
														
 
															 			Error("Failed to encrypt outgoing packet")
														
 
															 		return
														
 
															 	}
														
 
															-	if remote.IsValid() {
														
 
															-		err = f.writers[q].WriteTo(out, remote)
														
 
															-		if err != nil {
														
 
															-			hostinfo.logger(f.l).WithError(err).
														
 
															-				WithField("udpAddr", remote).Error("Failed to write outgoing packet")
														
 
															+	if target.IsValid() {
														
 
															+		if pkt != nil {
														
 
															+			pkt.Len = len(out)
														
 
															+			if f.l.Level >= logrus.DebugLevel {
														
 
															+				f.l.WithFields(logrus.Fields{
														
 
															+					"queue":        q,
														
 
															+					"dest":         target,
														
 
															+					"payload_len":  pkt.Len,
														
 
															+					"use_batches":  true,
														
 
															+					"remote_index": hostinfo.remoteIndexId,
														
 
															+				}).Debug("enqueueing packet to UDP batch queue")
														
 
															+			}
														
 
															+			if f.tryQueuePacket(q, pkt, target) {
														
 
															+				return
														
 
															+			}
														
 
															+			if f.l.Level >= logrus.DebugLevel {
														
 
															+				f.l.WithFields(logrus.Fields{
														
 
															+					"queue": q,
														
 
															+					"dest":  target,
														
 
															+				}).Debug("failed to enqueue packet; falling back to immediate send")
														
 
															+			}
														
 
															+			f.writeImmediatePacket(q, pkt, target, hostinfo)
														
 
															+			return
														
 
															 		}
														
 
															-	} else if hostinfo.remote.IsValid() {
														
 
															-		err = f.writers[q].WriteTo(out, hostinfo.remote)
														
 
															-		if err != nil {
														
 
															-			hostinfo.logger(f.l).WithError(err).
														
 
															-				WithField("udpAddr", remote).Error("Failed to write outgoing packet")
														
 
															+		if f.tryQueueDatagram(q, out, target) {
														
 
															+			return
														
 
															 		}
														
 
															-	} else {
														
 
															-		// Try to send via a relay
														
 
															-		for _, relayIP := range hostinfo.relayState.CopyRelayIps() {
														
 
															-			relayHostInfo, relay, err := f.hostMap.QueryVpnAddrsRelayFor(hostinfo.vpnAddrs, relayIP)
														
 
															-			if err != nil {
														
 
															-				hostinfo.relayState.DeleteRelay(relayIP)
														
 
															-				hostinfo.logger(f.l).WithField("relay", relayIP).WithError(err).Info("sendNoMetrics failed to find HostInfo")
														
 
															-				continue
														
 
															-			}
														
 
															-			f.SendVia(relayHostInfo, relay, out, nb, fullOut[:header.Len+len(out)], true)
														
 
															-			break
														
 
															+		f.writeImmediate(q, out, target, hostinfo)
														
 
															+		return
														
 
															+	}
														
 
															+
														
 
															+	// fall back to relay path
														
 
															+	if pkt != nil {
														
 
															+		pkt.Release()
														
 
															+	}
														
 
															+
														
 
															+	// Try to send via a relay
														
 
															+	for _, relayIP := range hostinfo.relayState.CopyRelayIps() {
														
 
															+		relayHostInfo, relay, err := f.hostMap.QueryVpnAddrsRelayFor(hostinfo.vpnAddrs, relayIP)
														
 
															+		if err != nil {
														
 
															+			hostinfo.relayState.DeleteRelay(relayIP)
														
 
															+			hostinfo.logger(f.l).WithField("relay", relayIP).WithError(err).Info("sendNoMetrics failed to find HostInfo")
														
 
															+			continue
														
 
															 		}
														
 
															+		f.SendVia(relayHostInfo, relay, out, nb, fullOut[:header.Len+len(out)], true)
														
 
															+		break
														
 
															 	}
														
 
															 }
														
--- a/interface.go
+++ b/interface.go
@@ -21,7 +21,13 @@ import (
 
															 	"github.com/slackhq/nebula/udp"
														
 
															 )
														
 
															-const mtu = 9001
														
 
															+const (
														
 
															+	mtu                          = 9001
														
 
															+	defaultGSOFlushInterval      = 150 * time.Microsecond
														
 
															+	defaultBatchQueueDepthFactor = 4
														
 
															+	defaultGSOMaxSegments        = 8
														
 
															+	maxKernelGSOSegments         = 64
														
 
															+)
														
 
															 type InterfaceConfig struct {
														
 
															 	HostMap            *HostMap
														
@@ -36,6 +42,9 @@ type InterfaceConfig struct {
 
															 	connectionManager  *connectionManager
														
 
															 	DropLocalBroadcast bool
														
 
															 	DropMulticast      bool
														
 
															+	EnableGSO          bool
														
 
															+	EnableGRO          bool
														
 
															+	GSOMaxSegments     int
														
 
															 	routines           int
														
 
															 	MessageMetrics     *MessageMetrics
														
 
															 	version            string
														
@@ -47,6 +56,8 @@ type InterfaceConfig struct {
 
															 	reQueryWait     time.Duration
														
 
															 	ConntrackCacheTimeout time.Duration
														
 
															+	BatchFlushInterval    time.Duration
														
 
															+	BatchQueueDepth       int
														
 
															 	l                     *logrus.Logger
														
 
															 }
														
@@ -84,9 +95,20 @@ type Interface struct {
 
															 	version     string
														
 
															 	conntrackCacheTimeout time.Duration
														
 
															+	batchQueueDepth       int
														
 
															+	enableGSO             bool
														
 
															+	enableGRO             bool
														
 
															+	gsoMaxSegments        int
														
 
															+	batchUDPQueueGauge    metrics.Gauge
														
 
															+	batchUDPFlushCounter  metrics.Counter
														
 
															+	batchTunQueueGauge    metrics.Gauge
														
 
															+	batchTunFlushCounter  metrics.Counter
														
 
															+	batchFlushInterval    atomic.Int64
														
 
															+	sendSem               chan struct{}
														
 
															 	writers []udp.Conn
														
 
															 	readers []io.ReadWriteCloser
														
 
															+	batches batchPipelines
														
 
															 	metricHandshakes    metrics.Histogram
														
 
															 	messageMetrics      *MessageMetrics
														
@@ -161,6 +183,22 @@ func NewInterface(ctx context.Context, c *InterfaceConfig) (*Interface, error) {
 
															 		return nil, errors.New("no connection manager")
														
 
															 	}
														
 
															+	if c.GSOMaxSegments <= 0 {
														
 
															+		c.GSOMaxSegments = defaultGSOMaxSegments
														
 
															+	}
														
 
															+	if c.GSOMaxSegments > maxKernelGSOSegments {
														
 
															+		c.GSOMaxSegments = maxKernelGSOSegments
														
 
															+	}
														
 
															+	if c.BatchQueueDepth <= 0 {
														
 
															+		c.BatchQueueDepth = c.routines * defaultBatchQueueDepthFactor
														
 
															+	}
														
 
															+	if c.BatchFlushInterval < 0 {
														
 
															+		c.BatchFlushInterval = 0
														
 
															+	}
														
 
															+	if c.BatchFlushInterval == 0 && c.EnableGSO {
														
 
															+		c.BatchFlushInterval = defaultGSOFlushInterval
														
 
															+	}
														
 
															+
														
 
															 	cs := c.pki.getCertState()
														
 
															 	ifce := &Interface{
														
 
															 		pki:                   c.pki,
														
@@ -186,6 +224,10 @@ func NewInterface(ctx context.Context, c *InterfaceConfig) (*Interface, error) {
 
															 		relayManager:          c.relayManager,
														
 
															 		connectionManager:     c.connectionManager,
														
 
															 		conntrackCacheTimeout: c.ConntrackCacheTimeout,
														
 
															+		batchQueueDepth:       c.BatchQueueDepth,
														
 
															+		enableGSO:             c.EnableGSO,
														
 
															+		enableGRO:             c.EnableGRO,
														
 
															+		gsoMaxSegments:        c.GSOMaxSegments,
														
 
															 		metricHandshakes: metrics.GetOrRegisterHistogram("handshakes", nil, metrics.NewExpDecaySample(1028, 0.015)),
														
 
															 		messageMetrics:   c.MessageMetrics,
														
@@ -198,8 +240,25 @@ func NewInterface(ctx context.Context, c *InterfaceConfig) (*Interface, error) {
 
															 	}
														
 
															 	ifce.tryPromoteEvery.Store(c.tryPromoteEvery)
														
 
															+	ifce.batchUDPQueueGauge = metrics.GetOrRegisterGauge("batch.udp.queue_depth", nil)
														
 
															+	ifce.batchUDPFlushCounter = metrics.GetOrRegisterCounter("batch.udp.flushes", nil)
														
 
															+	ifce.batchTunQueueGauge = metrics.GetOrRegisterGauge("batch.tun.queue_depth", nil)
														
 
															+	ifce.batchTunFlushCounter = metrics.GetOrRegisterCounter("batch.tun.flushes", nil)
														
 
															+	ifce.batchFlushInterval.Store(int64(c.BatchFlushInterval))
														
 
															+	ifce.sendSem = make(chan struct{}, c.routines)
														
 
															+	ifce.batches.init(c.Inside, c.routines, c.BatchQueueDepth, c.GSOMaxSegments)
														
 
															 	ifce.reQueryEvery.Store(c.reQueryEvery)
														
 
															 	ifce.reQueryWait.Store(int64(c.reQueryWait))
														
 
															+	if c.l.Level >= logrus.DebugLevel {
														
 
															+		c.l.WithFields(logrus.Fields{
														
 
															+			"enableGSO":       c.EnableGSO,
														
 
															+			"enableGRO":       c.EnableGRO,
														
 
															+			"gsoMaxSegments":  c.GSOMaxSegments,
														
 
															+			"batchQueueDepth": c.BatchQueueDepth,
														
 
															+			"batchFlush":      c.BatchFlushInterval,
														
 
															+			"batching":        ifce.batches.Enabled(),
														
 
															+		}).Debug("initialized batch pipelines")
														
 
															+	}
														
 
															 	ifce.connectionManager.intf = ifce
														
@@ -248,6 +307,18 @@ func (f *Interface) run() {
 
															 		go f.listenOut(i)
														
 
															 	}
														
 
															+	if f.l.Level >= logrus.DebugLevel {
														
 
															+		f.l.WithField("batching", f.batches.Enabled()).Debug("starting interface run loops")
														
 
															+	}
														
 
															+
														
 
															+	if f.batches.Enabled() {
														
 
															+		for i := 0; i < f.routines; i++ {
														
 
															+			go f.runInsideBatchWorker(i)
														
 
															+			go f.runTunWriteQueue(i)
														
 
															+			go f.runSendQueue(i)
														
 
															+		}
														
 
															+	}
														
 
															+
														
 
															 	// Launch n queues to read packets from tun dev
														
 
															 	for i := 0; i < f.routines; i++ {
														
 
															 		go f.listenIn(f.readers[i], i)
														
@@ -279,6 +350,17 @@ func (f *Interface) listenOut(i int) {
 
															 func (f *Interface) listenIn(reader io.ReadWriteCloser, i int) {
														
 
															 	runtime.LockOSThread()
														
 
															+	if f.batches.Enabled() {
														
 
															+		if br, ok := reader.(overlay.BatchReader); ok {
														
 
															+			f.listenInBatchLocked(reader, br, i)
														
 
															+			return
														
 
															+		}
														
 
															+	}
														
 
															+
														
 
															+	f.listenInLegacyLocked(reader, i)
														
 
															+}
														
 
															+
														
 
															+func (f *Interface) listenInLegacyLocked(reader io.ReadWriteCloser, i int) {
														
 
															 	packet := make([]byte, mtu)
														
 
															 	out := make([]byte, mtu)
														
 
															 	fwPacket := &firewall.Packet{}
														
@@ -302,6 +384,489 @@ func (f *Interface) listenIn(reader io.ReadWriteCloser, i int) {
 
															 	}
														
 
															 }
														
 
															+func (f *Interface) listenInBatchLocked(raw io.ReadWriteCloser, reader overlay.BatchReader, i int) {
														
 
															+	pool := f.batches.Pool()
														
 
															+	if pool == nil {
														
 
															+		f.l.Warn("batch pipeline enabled without an allocated pool; falling back to single-packet reads")
														
 
															+		f.listenInLegacyLocked(raw, i)
														
 
															+		return
														
 
															+	}
														
 
															+
														
 
															+	for {
														
 
															+		packets, err := reader.ReadIntoBatch(pool)
														
 
															+		if err != nil {
														
 
															+			if errors.Is(err, os.ErrClosed) && f.closed.Load() {
														
 
															+				return
														
 
															+			}
														
 
															+
														
 
															+			f.l.WithError(err).Error("Error while reading outbound packet batch")
														
 
															+			os.Exit(2)
														
 
															+		}
														
 
															+
														
 
															+		if len(packets) == 0 {
														
 
															+			continue
														
 
															+		}
														
 
															+
														
 
															+		for _, pkt := range packets {
														
 
															+			if pkt == nil {
														
 
															+				continue
														
 
															+			}
														
 
															+			if !f.batches.enqueueRx(i, pkt) {
														
 
															+				pkt.Release()
														
 
															+			}
														
 
															+		}
														
 
															+	}
														
 
															+}
														
 
															+
														
 
															+func (f *Interface) runInsideBatchWorker(i int) {
														
 
															+	queue := f.batches.rxQueue(i)
														
 
															+	if queue == nil {
														
 
															+		return
														
 
															+	}
														
 
															+
														
 
															+	out := make([]byte, mtu)
														
 
															+	fwPacket := &firewall.Packet{}
														
 
															+	nb := make([]byte, 12, 12)
														
 
															+	conntrackCache := firewall.NewConntrackCacheTicker(f.conntrackCacheTimeout)
														
 
															+
														
 
															+	for pkt := range queue {
														
 
															+		if pkt == nil {
														
 
															+			continue
														
 
															+		}
														
 
															+		f.consumeInsidePacket(pkt.Payload(), fwPacket, nb, out, i, conntrackCache.Get(f.l))
														
 
															+		pkt.Release()
														
 
															+	}
														
 
															+}
														
 
															+
														
 
															+func (f *Interface) runSendQueue(i int) {
														
 
															+	queue := f.batches.txQueue(i)
														
 
															+	if queue == nil {
														
 
															+		if f.l.Level >= logrus.DebugLevel {
														
 
															+			f.l.WithField("queue", i).Debug("tx queue not initialized; batching disabled for writer")
														
 
															+		}
														
 
															+		return
														
 
															+	}
														
 
															+	writer := f.writerForIndex(i)
														
 
															+	if writer == nil {
														
 
															+		if f.l.Level >= logrus.DebugLevel {
														
 
															+			f.l.WithField("queue", i).Debug("no UDP writer for batch queue")
														
 
															+		}
														
 
															+		return
														
 
															+	}
														
 
															+	if f.l.Level >= logrus.DebugLevel {
														
 
															+		f.l.WithField("queue", i).Debug("send queue worker started")
														
 
															+	}
														
 
															+	defer func() {
														
 
															+		if f.l.Level >= logrus.WarnLevel {
														
 
															+			f.l.WithField("queue", i).Warn("send queue worker exited")
														
 
															+		}
														
 
															+	}()
														
 
															+
														
 
															+	batchCap := f.batches.batchSizeHint()
														
 
															+	if batchCap <= 0 {
														
 
															+		batchCap = 1
														
 
															+	}
														
 
															+	gsoLimit := f.effectiveGSOMaxSegments()
														
 
															+	if gsoLimit > batchCap {
														
 
															+		batchCap = gsoLimit
														
 
															+	}
														
 
															+	pending := make([]queuedDatagram, 0, batchCap)
														
 
															+	var (
														
 
															+		flushTimer *time.Timer
														
 
															+		flushC     <-chan time.Time
														
 
															+	)
														
 
															+	dispatch := func(reason string, timerFired bool) {
														
 
															+		if len(pending) == 0 {
														
 
															+			return
														
 
															+		}
														
 
															+		batch := pending
														
 
															+		f.flushAndReleaseBatch(i, writer, batch, reason)
														
 
															+		for idx := range batch {
														
 
															+			batch[idx] = queuedDatagram{}
														
 
															+		}
														
 
															+		pending = pending[:0]
														
 
															+		if flushTimer != nil {
														
 
															+			if !timerFired {
														
 
															+				if !flushTimer.Stop() {
														
 
															+					select {
														
 
															+					case <-flushTimer.C:
														
 
															+					default:
														
 
															+					}
														
 
															+				}
														
 
															+			}
														
 
															+			flushTimer = nil
														
 
															+			flushC = nil
														
 
															+		}
														
 
															+	}
														
 
															+	armTimer := func() {
														
 
															+		delay := f.currentBatchFlushInterval()
														
 
															+		if delay <= 0 {
														
 
															+			dispatch("nogso", false)
														
 
															+			return
														
 
															+		}
														
 
															+		if flushTimer == nil {
														
 
															+			flushTimer = time.NewTimer(delay)
														
 
															+			flushC = flushTimer.C
														
 
															+		}
														
 
															+	}
														
 
															+
														
 
															+	for {
														
 
															+		select {
														
 
															+		case d := <-queue:
														
 
															+			if d.packet == nil {
														
 
															+				continue
														
 
															+			}
														
 
															+			if f.l.Level >= logrus.DebugLevel {
														
 
															+				f.l.WithFields(logrus.Fields{
														
 
															+					"queue":       i,
														
 
															+					"payload_len": d.packet.Len,
														
 
															+					"dest":        d.addr,
														
 
															+				}).Debug("send queue received packet")
														
 
															+			}
														
 
															+			pending = append(pending, d)
														
 
															+			if gsoLimit > 0 && len(pending) >= gsoLimit {
														
 
															+				dispatch("gso", false)
														
 
															+				continue
														
 
															+			}
														
 
															+			if len(pending) >= cap(pending) {
														
 
															+				dispatch("cap", false)
														
 
															+				continue
														
 
															+			}
														
 
															+			armTimer()
														
 
															+			f.observeUDPQueueLen(i)
														
 
															+		case <-flushC:
														
 
															+			dispatch("timer", true)
														
 
															+		}
														
 
															+	}
														
 
															+}
														
 
															+
														
 
															+func (f *Interface) runTunWriteQueue(i int) {
														
 
															+	queue := f.batches.tunQueue(i)
														
 
															+	if queue == nil {
														
 
															+		return
														
 
															+	}
														
 
															+	writer := f.batches.inside
														
 
															+	if writer == nil {
														
 
															+		return
														
 
															+	}
														
 
															+
														
 
															+	batchCap := f.batches.batchSizeHint()
														
 
															+	if batchCap <= 0 {
														
 
															+		batchCap = 1
														
 
															+	}
														
 
															+	pending := make([]*overlay.Packet, 0, batchCap)
														
 
															+	var (
														
 
															+		flushTimer *time.Timer
														
 
															+		flushC     <-chan time.Time
														
 
															+	)
														
 
															+	flush := func(reason string, timerFired bool) {
														
 
															+		if len(pending) == 0 {
														
 
															+			return
														
 
															+		}
														
 
															+		if _, err := writer.WriteBatch(pending); err != nil {
														
 
															+			f.l.WithError(err).
														
 
															+				WithField("queue", i).
														
 
															+				WithField("reason", reason).
														
 
															+				Warn("Failed to write tun batch")
														
 
															+		}
														
 
															+		for idx := range pending {
														
 
															+			if pending[idx] != nil {
														
 
															+				pending[idx].Release()
														
 
															+			}
														
 
															+		}
														
 
															+		pending = pending[:0]
														
 
															+		if flushTimer != nil {
														
 
															+			if !timerFired {
														
 
															+				if !flushTimer.Stop() {
														
 
															+					select {
														
 
															+					case <-flushTimer.C:
														
 
															+					default:
														
 
															+					}
														
 
															+				}
														
 
															+			}
														
 
															+			flushTimer = nil
														
 
															+			flushC = nil
														
 
															+		}
														
 
															+	}
														
 
															+	armTimer := func() {
														
 
															+		delay := f.currentBatchFlushInterval()
														
 
															+		if delay <= 0 {
														
 
															+			return
														
 
															+		}
														
 
															+		if flushTimer == nil {
														
 
															+			flushTimer = time.NewTimer(delay)
														
 
															+			flushC = flushTimer.C
														
 
															+		}
														
 
															+	}
														
 
															+
														
 
															+	for {
														
 
															+		select {
														
 
															+		case pkt := <-queue:
														
 
															+			if pkt == nil {
														
 
															+				continue
														
 
															+			}
														
 
															+			pending = append(pending, pkt)
														
 
															+			if len(pending) >= cap(pending) {
														
 
															+				flush("cap", false)
														
 
															+				continue
														
 
															+			}
														
 
															+			armTimer()
														
 
															+			f.observeTunQueueLen(i)
														
 
															+		case <-flushC:
														
 
															+			flush("timer", true)
														
 
															+		}
														
 
															+	}
														
 
															+}
														
 
															+
														
 
															+func (f *Interface) flushAndReleaseBatch(index int, writer udp.Conn, batch []queuedDatagram, reason string) {
														
 
															+	if len(batch) == 0 {
														
 
															+		return
														
 
															+	}
														
 
															+	f.flushDatagrams(index, writer, batch, reason)
														
 
															+	for idx := range batch {
														
 
															+		if batch[idx].packet != nil {
														
 
															+			batch[idx].packet.Release()
														
 
															+			batch[idx].packet = nil
														
 
															+		}
														
 
															+	}
														
 
															+	if f.batchUDPFlushCounter != nil {
														
 
															+		f.batchUDPFlushCounter.Inc(int64(len(batch)))
														
 
															+	}
														
 
															+}
														
 
															+
														
 
															+func (f *Interface) flushDatagrams(index int, writer udp.Conn, batch []queuedDatagram, reason string) {
														
 
															+	if len(batch) == 0 {
														
 
															+		return
														
 
															+	}
														
 
															+	if f.l.Level >= logrus.DebugLevel {
														
 
															+		f.l.WithFields(logrus.Fields{
														
 
															+			"writer":  index,
														
 
															+			"reason":  reason,
														
 
															+			"pending": len(batch),
														
 
															+		}).Debug("udp batch flush summary")
														
 
															+	}
														
 
															+	maxSeg := f.effectiveGSOMaxSegments()
														
 
															+	if bw, ok := writer.(udp.BatchConn); ok {
														
 
															+		chunkCap := maxSeg
														
 
															+		if chunkCap <= 0 {
														
 
															+			chunkCap = len(batch)
														
 
															+		}
														
 
															+		chunk := make([]udp.Datagram, 0, chunkCap)
														
 
															+		var (
														
 
															+			currentAddr netip.AddrPort
														
 
															+			segments    int
														
 
															+		)
														
 
															+		flushChunk := func() {
														
 
															+			if len(chunk) == 0 {
														
 
															+				return
														
 
															+			}
														
 
															+			if f.l.Level >= logrus.DebugLevel {
														
 
															+				f.l.WithFields(logrus.Fields{
														
 
															+					"writer":        index,
														
 
															+					"segments":      len(chunk),
														
 
															+					"dest":          chunk[0].Addr,
														
 
															+					"reason":        reason,
														
 
															+					"pending_total": len(batch),
														
 
															+				}).Debug("flushing UDP batch")
														
 
															+			}
														
 
															+			if err := bw.WriteBatch(chunk); err != nil {
														
 
															+				f.l.WithError(err).
														
 
															+					WithField("writer", index).
														
 
															+					WithField("reason", reason).
														
 
															+					Warn("Failed to write UDP batch")
														
 
															+			}
														
 
															+			chunk = chunk[:0]
														
 
															+			segments = 0
														
 
															+		}
														
 
															+		for _, item := range batch {
														
 
															+			if item.packet == nil || !item.addr.IsValid() {
														
 
															+				continue
														
 
															+			}
														
 
															+			payload := item.packet.Payload()[:item.packet.Len]
														
 
															+			if segments == 0 {
														
 
															+				currentAddr = item.addr
														
 
															+			}
														
 
															+			if item.addr != currentAddr || (maxSeg > 0 && segments >= maxSeg) {
														
 
															+				flushChunk()
														
 
															+				currentAddr = item.addr
														
 
															+			}
														
 
															+			chunk = append(chunk, udp.Datagram{Payload: payload, Addr: item.addr})
														
 
															+			segments++
														
 
															+		}
														
 
															+		flushChunk()
														
 
															+		return
														
 
															+	}
														
 
															+	for _, item := range batch {
														
 
															+		if item.packet == nil || !item.addr.IsValid() {
														
 
															+			continue
														
 
															+		}
														
 
															+		if f.l.Level >= logrus.DebugLevel {
														
 
															+			f.l.WithFields(logrus.Fields{
														
 
															+				"writer":   index,
														
 
															+				"reason":   reason,
														
 
															+				"dest":     item.addr,
														
 
															+				"segments": 1,
														
 
															+			}).Debug("flushing UDP batch")
														
 
															+		}
														
 
															+		if err := writer.WriteTo(item.packet.Payload()[:item.packet.Len], item.addr); err != nil {
														
 
															+			f.l.WithError(err).
														
 
															+				WithField("writer", index).
														
 
															+				WithField("udpAddr", item.addr).
														
 
															+				WithField("reason", reason).
														
 
															+				Warn("Failed to write UDP packet")
														
 
															+		}
														
 
															+	}
														
 
															+}
														
 
															+
														
 
															+func (f *Interface) tryQueueDatagram(q int, buf []byte, addr netip.AddrPort) bool {
														
 
															+	if !addr.IsValid() || !f.batches.Enabled() {
														
 
															+		return false
														
 
															+	}
														
 
															+	pkt := f.batches.newPacket()
														
 
															+	if pkt == nil {
														
 
															+		return false
														
 
															+	}
														
 
															+	payload := pkt.Payload()
														
 
															+	if len(payload) < len(buf) {
														
 
															+		pkt.Release()
														
 
															+		return false
														
 
															+	}
														
 
															+	copy(payload, buf)
														
 
															+	pkt.Len = len(buf)
														
 
															+	if f.batches.enqueueTx(q, pkt, addr) {
														
 
															+		f.observeUDPQueueLen(q)
														
 
															+		return true
														
 
															+	}
														
 
															+	pkt.Release()
														
 
															+	return false
														
 
															+}
														
 
															+
														
 
															+func (f *Interface) writerForIndex(i int) udp.Conn {
														
 
															+	if i < 0 || i >= len(f.writers) {
														
 
															+		return nil
														
 
															+	}
														
 
															+	return f.writers[i]
														
 
															+}
														
 
															+
														
 
															+func (f *Interface) writeImmediate(q int, buf []byte, addr netip.AddrPort, hostinfo *HostInfo) {
														
 
															+	writer := f.writerForIndex(q)
														
 
															+	if writer == nil {
														
 
															+		f.l.WithField("udpAddr", addr).
														
 
															+			WithField("writer", q).
														
 
															+			Error("Failed to write outgoing packet: no writer available")
														
 
															+		return
														
 
															+	}
														
 
															+	if err := writer.WriteTo(buf, addr); err != nil {
														
 
															+		hostinfo.logger(f.l).
														
 
															+			WithError(err).
														
 
															+			WithField("udpAddr", addr).
														
 
															+			Error("Failed to write outgoing packet")
														
 
															+	}
														
 
															+}
														
 
															+
														
 
															+func (f *Interface) tryQueuePacket(q int, pkt *overlay.Packet, addr netip.AddrPort) bool {
														
 
															+	if pkt == nil || !addr.IsValid() || !f.batches.Enabled() {
														
 
															+		return false
														
 
															+	}
														
 
															+	if f.batches.enqueueTx(q, pkt, addr) {
														
 
															+		f.observeUDPQueueLen(q)
														
 
															+		return true
														
 
															+	}
														
 
															+	return false
														
 
															+}
														
 
															+
														
 
															+func (f *Interface) writeImmediatePacket(q int, pkt *overlay.Packet, addr netip.AddrPort, hostinfo *HostInfo) {
														
 
															+	if pkt == nil {
														
 
															+		return
														
 
															+	}
														
 
															+	writer := f.writerForIndex(q)
														
 
															+	if writer == nil {
														
 
															+		f.l.WithField("udpAddr", addr).
														
 
															+			WithField("writer", q).
														
 
															+			Error("Failed to write outgoing packet: no writer available")
														
 
															+		pkt.Release()
														
 
															+		return
														
 
															+	}
														
 
															+	if err := writer.WriteTo(pkt.Payload()[:pkt.Len], addr); err != nil {
														
 
															+		hostinfo.logger(f.l).
														
 
															+			WithError(err).
														
 
															+			WithField("udpAddr", addr).
														
 
															+			Error("Failed to write outgoing packet")
														
 
															+	}
														
 
															+	pkt.Release()
														
 
															+}
														
 
															+
														
 
															+func (f *Interface) writePacketToTun(q int, pkt *overlay.Packet) {
														
 
															+	if pkt == nil {
														
 
															+		return
														
 
															+	}
														
 
															+	writer := f.readers[q]
														
 
															+	if writer == nil {
														
 
															+		pkt.Release()
														
 
															+		return
														
 
															+	}
														
 
															+	if _, err := writer.Write(pkt.Payload()[:pkt.Len]); err != nil {
														
 
															+		f.l.WithError(err).Error("Failed to write to tun")
														
 
															+	}
														
 
															+	pkt.Release()
														
 
															+}
														
 
															+
														
 
															+func (f *Interface) observeUDPQueueLen(i int) {
														
 
															+	if f.batchUDPQueueGauge == nil {
														
 
															+		return
														
 
															+	}
														
 
															+	f.batchUDPQueueGauge.Update(int64(f.batches.txQueueLen(i)))
														
 
															+}
														
 
															+
														
 
															+func (f *Interface) observeTunQueueLen(i int) {
														
 
															+	if f.batchTunQueueGauge == nil {
														
 
															+		return
														
 
															+	}
														
 
															+	f.batchTunQueueGauge.Update(int64(f.batches.tunQueueLen(i)))
														
 
															+}
														
 
															+
														
 
															+func (f *Interface) currentBatchFlushInterval() time.Duration {
														
 
															+	if v := f.batchFlushInterval.Load(); v > 0 {
														
 
															+		return time.Duration(v)
														
 
															+	}
														
 
															+	return 0
														
 
															+}
														
 
															+
														
 
															+func (f *Interface) effectiveGSOMaxSegments() int {
														
 
															+	max := f.gsoMaxSegments
														
 
															+	if max <= 0 {
														
 
															+		max = defaultGSOMaxSegments
														
 
															+	}
														
 
															+	if max > maxKernelGSOSegments {
														
 
															+		max = maxKernelGSOSegments
														
 
															+	}
														
 
															+	if !f.enableGSO {
														
 
															+		return 1
														
 
															+	}
														
 
															+	return max
														
 
															+}
														
 
															+
														
 
															+type udpOffloadConfigurator interface {
														
 
															+	ConfigureOffload(enableGSO, enableGRO bool, maxSegments int)
														
 
															+}
														
 
															+
														
 
															+func (f *Interface) applyOffloadConfig(enableGSO, enableGRO bool, maxSegments int) {
														
 
															+	if maxSegments <= 0 {
														
 
															+		maxSegments = defaultGSOMaxSegments
														
 
															+	}
														
 
															+	if maxSegments > maxKernelGSOSegments {
														
 
															+		maxSegments = maxKernelGSOSegments
														
 
															+	}
														
 
															+	f.enableGSO = enableGSO
														
 
															+	f.enableGRO = enableGRO
														
 
															+	f.gsoMaxSegments = maxSegments
														
 
															+	for _, writer := range f.writers {
														
 
															+		if cfg, ok := writer.(udpOffloadConfigurator); ok {
														
 
															+			cfg.ConfigureOffload(enableGSO, enableGRO, maxSegments)
														
 
															+		}
														
 
															+	}
														
 
															+}
														
 
															+
														
 
															 func (f *Interface) RegisterConfigChangeCallbacks(c *config.C) {
														
 
															 	c.RegisterReloadCallback(f.reloadFirewall)
														
 
															 	c.RegisterReloadCallback(f.reloadSendRecvError)
														
@@ -404,6 +969,42 @@ func (f *Interface) reloadMisc(c *config.C) {
 
															 		f.reQueryWait.Store(int64(n))
														
 
															 		f.l.Info("timers.requery_wait_duration has changed")
														
 
															 	}
														
 
															+
														
 
															+	if c.HasChanged("listen.gso_flush_timeout") {
														
 
															+		d := c.GetDuration("listen.gso_flush_timeout", defaultGSOFlushInterval)
														
 
															+		if d < 0 {
														
 
															+			d = 0
														
 
															+		}
														
 
															+		f.batchFlushInterval.Store(int64(d))
														
 
															+		f.l.WithField("duration", d).Info("listen.gso_flush_timeout has changed")
														
 
															+	} else if c.HasChanged("batch.flush_interval") {
														
 
															+		d := c.GetDuration("batch.flush_interval", defaultGSOFlushInterval)
														
 
															+		if d < 0 {
														
 
															+			d = 0
														
 
															+		}
														
 
															+		f.batchFlushInterval.Store(int64(d))
														
 
															+		f.l.WithField("duration", d).Warn("batch.flush_interval is deprecated; use listen.gso_flush_timeout")
														
 
															+	}
														
 
															+
														
 
															+	if c.HasChanged("batch.queue_depth") {
														
 
															+		n := c.GetInt("batch.queue_depth", f.batchQueueDepth)
														
 
															+		if n != f.batchQueueDepth {
														
 
															+			f.batchQueueDepth = n
														
 
															+			f.l.Warn("batch.queue_depth changes require a restart to take effect")
														
 
															+		}
														
 
															+	}
														
 
															+
														
 
															+	if c.HasChanged("listen.enable_gso") || c.HasChanged("listen.enable_gro") || c.HasChanged("listen.gso_max_segments") {
														
 
															+		enableGSO := c.GetBool("listen.enable_gso", f.enableGSO)
														
 
															+		enableGRO := c.GetBool("listen.enable_gro", f.enableGRO)
														
 
															+		maxSeg := c.GetInt("listen.gso_max_segments", f.gsoMaxSegments)
														
 
															+		f.applyOffloadConfig(enableGSO, enableGRO, maxSeg)
														
 
															+		f.l.WithFields(logrus.Fields{
														
 
															+			"enableGSO":      enableGSO,
														
 
															+			"enableGRO":      enableGRO,
														
 
															+			"gsoMaxSegments": maxSeg,
														
 
															+		}).Info("listen GSO/GRO configuration updated")
														
 
															+	}
														
 
															 }
														
 
															 func (f *Interface) emitStats(ctx context.Context, i time.Duration) {
														
--- a/main.go
+++ b/main.go
@@ -144,6 +144,20 @@ func Main(c *config.C, configTest bool, buildVersion string, logger *logrus.Logg
 
															 	// set up our UDP listener
														
 
															 	udpConns := make([]udp.Conn, routines)
														
 
															 	port := c.GetInt("listen.port", 0)
														
 
															+	enableGSO := c.GetBool("listen.enable_gso", true)
														
 
															+	enableGRO := c.GetBool("listen.enable_gro", true)
														
 
															+	gsoMaxSegments := c.GetInt("listen.gso_max_segments", defaultGSOMaxSegments)
														
 
															+	if gsoMaxSegments <= 0 {
														
 
															+		gsoMaxSegments = defaultGSOMaxSegments
														
 
															+	}
														
 
															+	if gsoMaxSegments > maxKernelGSOSegments {
														
 
															+		gsoMaxSegments = maxKernelGSOSegments
														
 
															+	}
														
 
															+	gsoFlushTimeout := c.GetDuration("listen.gso_flush_timeout", defaultGSOFlushInterval)
														
 
															+	if gsoFlushTimeout < 0 {
														
 
															+		gsoFlushTimeout = 0
														
 
															+	}
														
 
															+	batchQueueDepth := c.GetInt("batch.queue_depth", 0)
														
 
															 	if !configTest {
														
 
															 		rawListenHost := c.GetString("listen.host", "0.0.0.0")
														
@@ -179,6 +193,11 @@ func Main(c *config.C, configTest bool, buildVersion string, logger *logrus.Logg
 
															 				return nil, util.NewContextualError("Failed to open udp listener", m{"queue": i}, err)
														
 
															 			}
														
 
															 			udpServer.ReloadConfig(c)
														
 
															+			if cfg, ok := udpServer.(interface {
														
 
															+				ConfigureOffload(bool, bool, int)
														
 
															+			}); ok {
														
 
															+				cfg.ConfigureOffload(enableGSO, enableGRO, gsoMaxSegments)
														
 
															+			}
														
 
															 			udpConns[i] = udpServer
														
 
															 			// If port is dynamic, discover it before the next pass through the for loop
														
@@ -246,12 +265,17 @@ func Main(c *config.C, configTest bool, buildVersion string, logger *logrus.Logg
 
															 		reQueryWait:           c.GetDuration("timers.requery_wait_duration", defaultReQueryWait),
														
 
															 		DropLocalBroadcast:    c.GetBool("tun.drop_local_broadcast", false),
														
 
															 		DropMulticast:         c.GetBool("tun.drop_multicast", false),
														
 
															+		EnableGSO:             enableGSO,
														
 
															+		EnableGRO:             enableGRO,
														
 
															+		GSOMaxSegments:        gsoMaxSegments,
														
 
															 		routines:              routines,
														
 
															 		MessageMetrics:        messageMetrics,
														
 
															 		version:               buildVersion,
														
 
															 		relayManager:          NewRelayManager(ctx, l, hostMap, c),
														
 
															 		punchy:                punchy,
														
 
															 		ConntrackCacheTimeout: conntrackCacheTimeout,
														
 
															+		BatchFlushInterval:    gsoFlushTimeout,
														
 
															+		BatchQueueDepth:       batchQueueDepth,
														
 
															 		l:                     l,
														
 
															 	}
														
@@ -263,6 +287,7 @@ func Main(c *config.C, configTest bool, buildVersion string, logger *logrus.Logg
 
															 		}
														
 
															 		ifce.writers = udpConns
														
 
															+		ifce.applyOffloadConfig(enableGSO, enableGRO, gsoMaxSegments)
														
 
															 		lightHouse.ifce = ifce
														
 
															 		ifce.RegisterConfigChangeCallbacks(c)
														
--- a/outside.go
+++ b/outside.go
@@ -12,6 +12,7 @@ import (
 
															 	"github.com/sirupsen/logrus"
														
 
															 	"github.com/slackhq/nebula/firewall"
														
 
															 	"github.com/slackhq/nebula/header"
														
 
															+	"github.com/slackhq/nebula/overlay"
														
 
															 	"golang.org/x/net/ipv4"
														
 
															 )
														
@@ -466,22 +467,41 @@ func (f *Interface) decrypt(hostinfo *HostInfo, mc uint64, out []byte, packet []
 
															 }
														
 
															 func (f *Interface) decryptToTun(hostinfo *HostInfo, messageCounter uint64, out []byte, packet []byte, fwPacket *firewall.Packet, nb []byte, q int, localCache firewall.ConntrackCache) bool {
														
 
															-	var err error
														
 
															+	var (
														
 
															+		err error
														
 
															+		pkt *overlay.Packet
														
 
															+	)
														
 
															+
														
 
															+	if f.batches.tunQueue(q) != nil {
														
 
															+		pkt = f.batches.newPacket()
														
 
															+		if pkt != nil {
														
 
															+			out = pkt.Payload()[:0]
														
 
															+		}
														
 
															+	}
														
 
															 	out, err = hostinfo.ConnectionState.dKey.DecryptDanger(out, packet[:header.Len], packet[header.Len:], messageCounter, nb)
														
 
															 	if err != nil {
														
 
															+		if pkt != nil {
														
 
															+			pkt.Release()
														
 
															+		}
														
 
															 		hostinfo.logger(f.l).WithError(err).Error("Failed to decrypt packet")
														
 
															 		return false
														
 
															 	}
														
 
															 	err = newPacket(out, true, fwPacket)
														
 
															 	if err != nil {
														
 
															+		if pkt != nil {
														
 
															+			pkt.Release()
														
 
															+		}
														
 
															 		hostinfo.logger(f.l).WithError(err).WithField("packet", out).
														
 
															 			Warnf("Error while validating inbound packet")
														
 
															 		return false
														
 
															 	}
														
 
															 	if !hostinfo.ConnectionState.window.Update(f.l, messageCounter) {
														
 
															+		if pkt != nil {
														
 
															+			pkt.Release()
														
 
															+		}
														
 
															 		hostinfo.logger(f.l).WithField("fwPacket", fwPacket).
														
 
															 			Debugln("dropping out of window packet")
														
 
															 		return false
														
@@ -489,6 +509,9 @@ func (f *Interface) decryptToTun(hostinfo *HostInfo, messageCounter uint64, out
 
															 	dropReason := f.firewall.Drop(*fwPacket, true, hostinfo, f.pki.GetCAPool(), localCache)
														
 
															 	if dropReason != nil {
														
 
															+		if pkt != nil {
														
 
															+			pkt.Release()
														
 
															+		}
														
 
															 		// NOTE: We give `packet` as the `out` here since we already decrypted from it and we don't need it anymore
														
 
															 		// This gives us a buffer to build the reject packet in
														
 
															 		f.rejectOutside(out, hostinfo.ConnectionState, hostinfo, nb, packet, q)
														
@@ -501,8 +524,17 @@ func (f *Interface) decryptToTun(hostinfo *HostInfo, messageCounter uint64, out
 
															 	}
														
 
															 	f.connectionManager.In(hostinfo)
														
 
															-	_, err = f.readers[q].Write(out)
														
 
															-	if err != nil {
														
 
															+	if pkt != nil {
														
 
															+		pkt.Len = len(out)
														
 
															+		if f.batches.enqueueTun(q, pkt) {
														
 
															+			f.observeTunQueueLen(q)
														
 
															+			return true
														
 
															+		}
														
 
															+		f.writePacketToTun(q, pkt)
														
 
															+		return true
														
 
															+	}
														
 
															+
														
 
															+	if _, err = f.readers[q].Write(out); err != nil {
														
 
															 		f.l.WithError(err).Error("Failed to write to tun")
														
 
															 	}
														
 
															 	return true
														
--- a/overlay/device.go
+++ b/overlay/device.go
@@ -3,6 +3,7 @@ package overlay
 
															 import (
														
 
															 	"io"
														
 
															 	"net/netip"
														
 
															+	"sync"
														
 
															 	"github.com/slackhq/nebula/routing"
														
 
															 )
														
@@ -15,3 +16,84 @@ type Device interface {
 
															 	RoutesFor(netip.Addr) routing.Gateways
														
 
															 	NewMultiQueueReader() (io.ReadWriteCloser, error)
														
 
															 }
														
 
															+
														
 
															+// Packet represents a single packet buffer with optional headroom to carry
														
 
															+// metadata (for example virtio-net headers).
														
 
															+type Packet struct {
														
 
															+	Buf     []byte
														
 
															+	Offset  int
														
 
															+	Len     int
														
 
															+	release func()
														
 
															+}
														
 
															+
														
 
															+func (p *Packet) Payload() []byte {
														
 
															+	return p.Buf[p.Offset : p.Offset+p.Len]
														
 
															+}
														
 
															+
														
 
															+func (p *Packet) Reset() {
														
 
															+	p.Len = 0
														
 
															+	p.Offset = 0
														
 
															+	p.release = nil
														
 
															+}
														
 
															+
														
 
															+func (p *Packet) Release() {
														
 
															+	if p.release != nil {
														
 
															+		p.release()
														
 
															+		p.release = nil
														
 
															+	}
														
 
															+}
														
 
															+
														
 
															+func (p *Packet) Capacity() int {
														
 
															+	return len(p.Buf) - p.Offset
														
 
															+}
														
 
															+
														
 
															+// PacketPool manages reusable buffers with headroom.
														
 
															+type PacketPool struct {
														
 
															+	headroom int
														
 
															+	blksz    int
														
 
															+	pool     sync.Pool
														
 
															+}
														
 
															+
														
 
															+func NewPacketPool(headroom, payload int) *PacketPool {
														
 
															+	p := &PacketPool{headroom: headroom, blksz: headroom + payload}
														
 
															+	p.pool.New = func() any {
														
 
															+		buf := make([]byte, p.blksz)
														
 
															+		return &Packet{Buf: buf, Offset: headroom}
														
 
															+	}
														
 
															+	return p
														
 
															+}
														
 
															+
														
 
															+func (p *PacketPool) Get() *Packet {
														
 
															+	pkt := p.pool.Get().(*Packet)
														
 
															+	pkt.Offset = p.headroom
														
 
															+	pkt.Len = 0
														
 
															+	pkt.release = func() { p.put(pkt) }
														
 
															+	return pkt
														
 
															+}
														
 
															+
														
 
															+func (p *PacketPool) put(pkt *Packet) {
														
 
															+	pkt.Reset()
														
 
															+	p.pool.Put(pkt)
														
 
															+}
														
 
															+
														
 
															+// BatchReader allows reading multiple packets into a shared pool with
														
 
															+// preallocated headroom (e.g. virtio-net headers).
														
 
															+type BatchReader interface {
														
 
															+	ReadIntoBatch(pool *PacketPool) ([]*Packet, error)
														
 
															+}
														
 
															+
														
 
															+// BatchWriter writes a slice of packets that carry their own metadata.
														
 
															+type BatchWriter interface {
														
 
															+	WriteBatch(packets []*Packet) (int, error)
														
 
															+}
														
 
															+
														
 
															+// BatchCapableDevice describes a device that can efficiently read and write
														
 
															+// batches of packets with virtio headroom.
														
 
															+type BatchCapableDevice interface {
														
 
															+	Device
														
 
															+	BatchReader
														
 
															+	BatchWriter
														
 
															+	BatchHeadroom() int
														
 
															+	BatchPayloadCap() int
														
 
															+	BatchSize() int
														
 
															+}
														
--- a/overlay/tun_linux_batch.go
+++ b/overlay/tun_linux_batch.go
@@ -0,0 +1,56 @@
 
															+//go:build linux && !android && !e2e_testing
														
 
															+
														
 
															+package overlay
														
 
															+
														
 
															+import "fmt"
														
 
															+
														
 
															+func (t *tun) batchIO() (*wireguardTunIO, bool) {
														
 
															+	io, ok := t.ReadWriteCloser.(*wireguardTunIO)
														
 
															+	return io, ok
														
 
															+}
														
 
															+
														
 
															+func (t *tun) ReadIntoBatch(pool *PacketPool) ([]*Packet, error) {
														
 
															+	io, ok := t.batchIO()
														
 
															+	if !ok {
														
 
															+		return nil, fmt.Errorf("wireguard batch I/O not enabled")
														
 
															+	}
														
 
															+	return io.ReadIntoBatch(pool)
														
 
															+}
														
 
															+
														
 
															+func (t *tun) WriteBatch(packets []*Packet) (int, error) {
														
 
															+	io, ok := t.batchIO()
														
 
															+	if ok {
														
 
															+		return io.WriteBatch(packets)
														
 
															+	}
														
 
															+	for _, pkt := range packets {
														
 
															+		if pkt == nil {
														
 
															+			continue
														
 
															+		}
														
 
															+		if _, err := t.Write(pkt.Payload()[:pkt.Len]); err != nil {
														
 
															+			return 0, err
														
 
															+		}
														
 
															+		pkt.Release()
														
 
															+	}
														
 
															+	return len(packets), nil
														
 
															+}
														
 
															+
														
 
															+func (t *tun) BatchHeadroom() int {
														
 
															+	if io, ok := t.batchIO(); ok {
														
 
															+		return io.BatchHeadroom()
														
 
															+	}
														
 
															+	return 0
														
 
															+}
														
 
															+
														
 
															+func (t *tun) BatchPayloadCap() int {
														
 
															+	if io, ok := t.batchIO(); ok {
														
 
															+		return io.BatchPayloadCap()
														
 
															+	}
														
 
															+	return 0
														
 
															+}
														
 
															+
														
 
															+func (t *tun) BatchSize() int {
														
 
															+	if io, ok := t.batchIO(); ok {
														
 
															+		return io.BatchSize()
														
 
															+	}
														
 
															+	return 1
														
 
															+}
														
--- a/overlay/wireguard_tun_linux.go
+++ b/overlay/wireguard_tun_linux.go
@@ -14,15 +14,15 @@ type wireguardTunIO struct {
 
															 	mtu       int
														
 
															 	batchSize int
														
 
															-	readMu   sync.Mutex
														
 
															-	readBufs [][]byte
														
 
															-	readLens []int
														
 
															-	pending  [][]byte
														
 
															-	pendIdx  int
														
 
															-
														
 
															-	writeMu   sync.Mutex
														
 
															-	writeBuf  []byte
														
 
															-	writeWrap [][]byte
														
 
															+	readMu      sync.Mutex
														
 
															+	readBuffers [][]byte
														
 
															+	readLens    []int
														
 
															+	legacyBuf   []byte
														
 
															+
														
 
															+	writeMu      sync.Mutex
														
 
															+	writeBuf     []byte
														
 
															+	writeWrap    [][]byte
														
 
															+	writeBuffers [][]byte
														
 
															 }
														
 
															 func newWireguardTunIO(dev wgtun.Device, mtu int) *wireguardTunIO {
														
@@ -33,17 +33,12 @@ func newWireguardTunIO(dev wgtun.Device, mtu int) *wireguardTunIO {
 
															 	if mtu <= 0 {
														
 
															 		mtu = DefaultMTU
														
 
															 	}
														
 
															-	bufs := make([][]byte, batch)
														
 
															-	for i := range bufs {
														
 
															-		bufs[i] = make([]byte, wgtun.VirtioNetHdrLen+mtu)
														
 
															-	}
														
 
															 	return &wireguardTunIO{
														
 
															 		dev:       dev,
														
 
															 		mtu:       mtu,
														
 
															 		batchSize: batch,
														
 
															-		readBufs:  bufs,
														
 
															 		readLens:  make([]int, batch),
														
 
															-		pending:   make([][]byte, 0, batch),
														
 
															+		legacyBuf: make([]byte, wgtun.VirtioNetHdrLen+mtu),
														
 
															 		writeBuf:  make([]byte, wgtun.VirtioNetHdrLen+mtu),
														
 
															 		writeWrap: make([][]byte, 1),
														
 
															 	}
														
@@ -53,29 +48,21 @@ func (w *wireguardTunIO) Read(p []byte) (int, error) {
 
															 	w.readMu.Lock()
														
 
															 	defer w.readMu.Unlock()
														
 
															-	for {
														
 
															-		if w.pendIdx < len(w.pending) {
														
 
															-			segment := w.pending[w.pendIdx]
														
 
															-			w.pendIdx++
														
 
															-			n := copy(p, segment)
														
 
															-			return n, nil
														
 
															-		}
														
 
															-
														
 
															-		n, err := w.dev.Read(w.readBufs, w.readLens, wgtun.VirtioNetHdrLen)
														
 
															-		if err != nil {
														
 
															-			return 0, err
														
 
															-		}
														
 
															-		w.pending = w.pending[:0]
														
 
															-		w.pendIdx = 0
														
 
															-		for i := 0; i < n; i++ {
														
 
															-			length := w.readLens[i]
														
 
															-			if length == 0 {
														
 
															-				continue
														
 
															-			}
														
 
															-			segment := w.readBufs[i][wgtun.VirtioNetHdrLen : wgtun.VirtioNetHdrLen+length]
														
 
															-			w.pending = append(w.pending, segment)
														
 
															-		}
														
 
															+	bufs := w.readBuffers
														
 
															+	if len(bufs) == 0 {
														
 
															+		bufs = [][]byte{w.legacyBuf}
														
 
															+		w.readBuffers = bufs
														
 
															 	}
														
 
															+	n, err := w.dev.Read(bufs[:1], w.readLens[:1], wgtun.VirtioNetHdrLen)
														
 
															+	if err != nil {
														
 
															+		return 0, err
														
 
															+	}
														
 
															+	if n == 0 {
														
 
															+		return 0, nil
														
 
															+	}
														
 
															+	length := w.readLens[0]
														
 
															+	copy(p, w.legacyBuf[wgtun.VirtioNetHdrLen:wgtun.VirtioNetHdrLen+length])
														
 
															+	return length, nil
														
 
															 }
														
 
															 func (w *wireguardTunIO) Write(p []byte) (int, error) {
														
@@ -97,6 +84,134 @@ func (w *wireguardTunIO) Write(p []byte) (int, error) {
 
															 	return len(p), nil
														
 
															 }
														
 
															+func (w *wireguardTunIO) ReadIntoBatch(pool *PacketPool) ([]*Packet, error) {
														
 
															+	if pool == nil {
														
 
															+		return nil, fmt.Errorf("wireguard tun: packet pool is nil")
														
 
															+	}
														
 
															+
														
 
															+	w.readMu.Lock()
														
 
															+	defer w.readMu.Unlock()
														
 
															+
														
 
															+	if len(w.readBuffers) < w.batchSize {
														
 
															+		w.readBuffers = make([][]byte, w.batchSize)
														
 
															+	}
														
 
															+	if len(w.readLens) < w.batchSize {
														
 
															+		w.readLens = make([]int, w.batchSize)
														
 
															+	}
														
 
															+
														
 
															+	packets := make([]*Packet, w.batchSize)
														
 
															+	requiredHeadroom := w.BatchHeadroom()
														
 
															+	requiredPayload := w.BatchPayloadCap()
														
 
															+	headroom := 0
														
 
															+	for i := 0; i < w.batchSize; i++ {
														
 
															+		pkt := pool.Get()
														
 
															+		if pkt == nil {
														
 
															+			releasePackets(packets[:i])
														
 
															+			return nil, fmt.Errorf("wireguard tun: packet pool returned nil packet")
														
 
															+		}
														
 
															+		if pkt.Capacity() < requiredPayload {
														
 
															+			pkt.Release()
														
 
															+			releasePackets(packets[:i])
														
 
															+			return nil, fmt.Errorf("wireguard tun: packet capacity %d below required %d", pkt.Capacity(), requiredPayload)
														
 
															+		}
														
 
															+		if i == 0 {
														
 
															+			headroom = pkt.Offset
														
 
															+			if headroom < requiredHeadroom {
														
 
															+				pkt.Release()
														
 
															+				releasePackets(packets[:i])
														
 
															+				return nil, fmt.Errorf("wireguard tun: packet headroom %d below virtio requirement %d", headroom, requiredHeadroom)
														
 
															+			}
														
 
															+		} else if pkt.Offset != headroom {
														
 
															+			pkt.Release()
														
 
															+			releasePackets(packets[:i])
														
 
															+			return nil, fmt.Errorf("wireguard tun: inconsistent packet headroom (%d != %d)", pkt.Offset, headroom)
														
 
															+		}
														
 
															+		packets[i] = pkt
														
 
															+		w.readBuffers[i] = pkt.Buf
														
 
															+	}
														
 
															+
														
 
															+	n, err := w.dev.Read(w.readBuffers[:w.batchSize], w.readLens[:w.batchSize], headroom)
														
 
															+	if err != nil {
														
 
															+		releasePackets(packets)
														
 
															+		return nil, err
														
 
															+	}
														
 
															+	if n == 0 {
														
 
															+		releasePackets(packets)
														
 
															+		return nil, nil
														
 
															+	}
														
 
															+	for i := 0; i < n; i++ {
														
 
															+		packets[i].Len = w.readLens[i]
														
 
															+	}
														
 
															+	for i := n; i < w.batchSize; i++ {
														
 
															+		packets[i].Release()
														
 
															+		packets[i] = nil
														
 
															+	}
														
 
															+	return packets[:n], nil
														
 
															+}
														
 
															+
														
 
															+func (w *wireguardTunIO) WriteBatch(packets []*Packet) (int, error) {
														
 
															+	if len(packets) == 0 {
														
 
															+		return 0, nil
														
 
															+	}
														
 
															+	requiredHeadroom := w.BatchHeadroom()
														
 
															+	offset := packets[0].Offset
														
 
															+	if offset < requiredHeadroom {
														
 
															+		releasePackets(packets)
														
 
															+		return 0, fmt.Errorf("wireguard tun: packet offset %d smaller than required headroom %d", offset, requiredHeadroom)
														
 
															+	}
														
 
															+	for _, pkt := range packets {
														
 
															+		if pkt == nil {
														
 
															+			continue
														
 
															+		}
														
 
															+		if pkt.Offset != offset {
														
 
															+			releasePackets(packets)
														
 
															+			return 0, fmt.Errorf("wireguard tun: mixed packet offsets not supported")
														
 
															+		}
														
 
															+		limit := pkt.Offset + pkt.Len
														
 
															+		if limit > len(pkt.Buf) {
														
 
															+			releasePackets(packets)
														
 
															+			return 0, fmt.Errorf("wireguard tun: packet length %d exceeds buffer capacity %d", pkt.Len, len(pkt.Buf)-pkt.Offset)
														
 
															+		}
														
 
															+	}
														
 
															+	w.writeMu.Lock()
														
 
															+	defer w.writeMu.Unlock()
														
 
															+
														
 
															+	if len(w.writeBuffers) < len(packets) {
														
 
															+		w.writeBuffers = make([][]byte, len(packets))
														
 
															+	}
														
 
															+	for i, pkt := range packets {
														
 
															+		if pkt == nil {
														
 
															+			w.writeBuffers[i] = nil
														
 
															+			continue
														
 
															+		}
														
 
															+		limit := pkt.Offset + pkt.Len
														
 
															+		w.writeBuffers[i] = pkt.Buf[:limit]
														
 
															+	}
														
 
															+	n, err := w.dev.Write(w.writeBuffers[:len(packets)], offset)
														
 
															+	releasePackets(packets)
														
 
															+	return n, err
														
 
															+}
														
 
															+
														
 
															+func (w *wireguardTunIO) BatchHeadroom() int {
														
 
															+	return wgtun.VirtioNetHdrLen
														
 
															+}
														
 
															+
														
 
															+func (w *wireguardTunIO) BatchPayloadCap() int {
														
 
															+	return w.mtu
														
 
															+}
														
 
															+
														
 
															+func (w *wireguardTunIO) BatchSize() int {
														
 
															+	return w.batchSize
														
 
															+}
														
 
															+
														
 
															 func (w *wireguardTunIO) Close() error {
														
 
															 	return nil
														
 
															 }
														
 
															+
														
 
															+func releasePackets(pkts []*Packet) {
														
 
															+	for _, pkt := range pkts {
														
 
															+		if pkt != nil {
														
 
															+			pkt.Release()
														
 
															+		}
														
 
															+	}
														
 
															+}
														
--- a/udp/conn.go
+++ b/udp/conn.go
@@ -22,6 +22,18 @@ type Conn interface {
 
															 	Close() error
														
 
															 }
														
 
															+// Datagram represents a UDP payload destined to a specific address.
														
 
															+type Datagram struct {
														
 
															+	Payload []byte
														
 
															+	Addr    netip.AddrPort
														
 
															+}
														
 
															+
														
 
															+// BatchConn can send multiple datagrams in one syscall.
														
 
															+type BatchConn interface {
														
 
															+	Conn
														
 
															+	WriteBatch(pkts []Datagram) error
														
 
															+}
														
 
															+
														
 
															 type NoopConn struct{}
														
 
															 func (NoopConn) Rebind() error {
														
--- a/udp/wireguard_conn_linux.go
+++ b/udp/wireguard_conn_linux.go
@@ -20,8 +20,12 @@ type WGConn struct {
 
															 	bind      *wgconn.StdNetBind
														
 
															 	recvers   []wgconn.ReceiveFunc
														
 
															 	batch     int
														
 
															+	reqBatch  int
														
 
															 	localIP   netip.Addr
														
 
															 	localPort uint16
														
 
															+	enableGSO bool
														
 
															+	enableGRO bool
														
 
															+	gsoMaxSeg int
														
 
															 	closed    atomic.Bool
														
 
															 	closeOnce sync.Once
														
@@ -34,7 +38,9 @@ func NewWireguardListener(l *logrus.Logger, ip netip.Addr, port int, multi bool,
 
															 	if err != nil {
														
 
															 		return nil, err
														
 
															 	}
														
 
															-	if batch <= 0 || batch > bind.BatchSize() {
														
 
															+	if batch <= 0 {
														
 
															+		batch = bind.BatchSize()
														
 
															+	} else if batch > bind.BatchSize() {
														
 
															 		batch = bind.BatchSize()
														
 
															 	}
														
 
															 	return &WGConn{
														
@@ -42,6 +48,7 @@ func NewWireguardListener(l *logrus.Logger, ip netip.Addr, port int, multi bool,
 
															 		bind:      bind,
														
 
															 		recvers:   recvers,
														
 
															 		batch:     batch,
														
 
															+		reqBatch:  batch,
														
 
															 		localIP:   ip,
														
 
															 		localPort: actualPort,
														
 
															 	}, nil
														
@@ -118,6 +125,92 @@ func (c *WGConn) WriteTo(b []byte, addr netip.AddrPort) error {
 
															 	return c.bind.Send([][]byte{b}, ep)
														
 
															 }
														
 
															+func (c *WGConn) WriteBatch(datagrams []Datagram) error {
														
 
															+	if len(datagrams) == 0 {
														
 
															+		return nil
														
 
															+	}
														
 
															+	if c.closed.Load() {
														
 
															+		return net.ErrClosed
														
 
															+	}
														
 
															+	max := c.batch
														
 
															+	if max <= 0 {
														
 
															+		max = len(datagrams)
														
 
															+		if max == 0 {
														
 
															+			max = 1
														
 
															+		}
														
 
															+	}
														
 
															+	bufs := make([][]byte, 0, max)
														
 
															+	var (
														
 
															+		current  netip.AddrPort
														
 
															+		endpoint *wgconn.StdNetEndpoint
														
 
															+		haveAddr bool
														
 
															+	)
														
 
															+	flush := func() error {
														
 
															+		if len(bufs) == 0 || endpoint == nil {
														
 
															+			bufs = bufs[:0]
														
 
															+			return nil
														
 
															+		}
														
 
															+		err := c.bind.Send(bufs, endpoint)
														
 
															+		bufs = bufs[:0]
														
 
															+		return err
														
 
															+	}
														
 
															+
														
 
															+	for _, d := range datagrams {
														
 
															+		if len(d.Payload) == 0 || !d.Addr.IsValid() {
														
 
															+			continue
														
 
															+		}
														
 
															+		if !haveAddr || d.Addr != current {
														
 
															+			if err := flush(); err != nil {
														
 
															+				return err
														
 
															+			}
														
 
															+			current = d.Addr
														
 
															+			endpoint = &wgconn.StdNetEndpoint{AddrPort: current}
														
 
															+			haveAddr = true
														
 
															+		}
														
 
															+		bufs = append(bufs, d.Payload)
														
 
															+		if len(bufs) >= max {
														
 
															+			if err := flush(); err != nil {
														
 
															+				return err
														
 
															+			}
														
 
															+		}
														
 
															+	}
														
 
															+	return flush()
														
 
															+}
														
 
															+
														
 
															+func (c *WGConn) ConfigureOffload(enableGSO, enableGRO bool, maxSegments int) {
														
 
															+	c.enableGSO = enableGSO
														
 
															+	c.enableGRO = enableGRO
														
 
															+	if maxSegments <= 0 {
														
 
															+		maxSegments = 1
														
 
															+	} else if maxSegments > wgconn.IdealBatchSize {
														
 
															+		maxSegments = wgconn.IdealBatchSize
														
 
															+	}
														
 
															+	c.gsoMaxSeg = maxSegments
														
 
															+
														
 
															+	effectiveBatch := c.reqBatch
														
 
															+	if enableGSO && c.bind != nil {
														
 
															+		bindBatch := c.bind.BatchSize()
														
 
															+		if effectiveBatch < bindBatch {
														
 
															+			if c.l != nil {
														
 
															+				c.l.WithFields(logrus.Fields{
														
 
															+					"requested": c.reqBatch,
														
 
															+					"effective": bindBatch,
														
 
															+				}).Warn("listen.batch below wireguard minimum; using bind batch size for UDP GSO support")
														
 
															+			}
														
 
															+			effectiveBatch = bindBatch
														
 
															+		}
														
 
															+	}
														
 
															+	c.batch = effectiveBatch
														
 
															+
														
 
															+	if c.l != nil {
														
 
															+		c.l.WithFields(logrus.Fields{
														
 
															+			"enableGSO":      enableGSO,
														
 
															+			"enableGRO":      enableGRO,
														
 
															+			"gsoMaxSegments": maxSegments,
														
 
															+		}).Debug("configured wireguard UDP offload")
														
 
															+	}
														
 
															+}
														
 
															+
														
 
															 func (c *WGConn) ReloadConfig(*config.C) {
														
 
															 	// WireGuard bind currently does not expose runtime configuration knobs.
														
 
															 }
														
--- a/wgstack/conn/errors_default.go
+++ b/wgstack/conn/errors_default.go
@@ -0,0 +1,12 @@
 
															+//go:build !linux
														
 
															+
														
 
															+/* SPDX-License-Identifier: MIT
														
 
															+ *
														
 
															+ * Copyright (C) 2017-2023 WireGuard LLC. All Rights Reserved.
														
 
															+ */
														
 
															+
														
 
															+package conn
														
 
															+
														
 
															+func errShouldDisableUDPGSO(err error) bool {
														
 
															+	return false
														
 
															+}
														
--- a/wgstack/conn/errors_linux.go
+++ b/wgstack/conn/errors_linux.go
@@ -0,0 +1,26 @@
 
															+/* SPDX-License-Identifier: MIT
														
 
															+ *
														
 
															+ * Copyright (C) 2017-2023 WireGuard LLC. All Rights Reserved.
														
 
															+ */
														
 
															+
														
 
															+package conn
														
 
															+
														
 
															+import (
														
 
															+	"errors"
														
 
															+	"os"
														
 
															+
														
 
															+	"golang.org/x/sys/unix"
														
 
															+)
														
 
															+
														
 
															+func errShouldDisableUDPGSO(err error) bool {
														
 
															+	var serr *os.SyscallError
														
 
															+	if errors.As(err, &serr) {
														
 
															+		// EIO is returned by udp_send_skb() if the device driver does not have
														
 
															+		// tx checksumming enabled, which is a hard requirement of UDP_SEGMENT.
														
 
															+		// See:
														
 
															+		// https://git.kernel.org/pub/scm/docs/man-pages/man-pages.git/tree/man7/udp.7?id=806eabd74910447f21005160e90957bde4db0183#n228
														
 
															+		// https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/net/ipv4/udp.c?h=v6.2&id=c9c3395d5e3dcc6daee66c6908354d47bf98cb0c#n942
														
 
															+		return serr.Err == unix.EIO
														
 
															+	}
														
 
															+	return false
														
 
															+}
														
--- a/wgstack/conn/features_default.go
+++ b/wgstack/conn/features_default.go
@@ -0,0 +1,15 @@
 
															+//go:build !linux
														
 
															+// +build !linux
														
 
															+
														
 
															+/* SPDX-License-Identifier: MIT
														
 
															+ *
														
 
															+ * Copyright (C) 2017-2023 WireGuard LLC. All Rights Reserved.
														
 
															+ */
														
 
															+
														
 
															+package conn
														
 
															+
														
 
															+import "net"
														
 
															+
														
 
															+func supportsUDPOffload(conn *net.UDPConn) (txOffload, rxOffload bool) {
														
 
															+	return
														
 
															+}
														
--- a/wgstack/conn/features_linux.go
+++ b/wgstack/conn/features_linux.go
@@ -0,0 +1,29 @@
 
															+/* SPDX-License-Identifier: MIT
														
 
															+ *
														
 
															+ * Copyright (C) 2017-2023 WireGuard LLC. All Rights Reserved.
														
 
															+ */
														
 
															+
														
 
															+package conn
														
 
															+
														
 
															+import (
														
 
															+	"net"
														
 
															+
														
 
															+	"golang.org/x/sys/unix"
														
 
															+)
														
 
															+
														
 
															+func supportsUDPOffload(conn *net.UDPConn) (txOffload, rxOffload bool) {
														
 
															+	rc, err := conn.SyscallConn()
														
 
															+	if err != nil {
														
 
															+		return
														
 
															+	}
														
 
															+	err = rc.Control(func(fd uintptr) {
														
 
															+		_, errSyscall := unix.GetsockoptInt(int(fd), unix.IPPROTO_UDP, unix.UDP_SEGMENT)
														
 
															+		txOffload = errSyscall == nil
														
 
															+		opt, errSyscall := unix.GetsockoptInt(int(fd), unix.IPPROTO_UDP, unix.UDP_GRO)
														
 
															+		rxOffload = errSyscall == nil && opt == 1
														
 
															+	})
														
 
															+	if err != nil {
														
 
															+		return false, false
														
 
															+	}
														
 
															+	return txOffload, rxOffload
														
 
															+}
														
--- a/wgstack/conn/gso_default.go
+++ b/wgstack/conn/gso_default.go
@@ -0,0 +1,21 @@
 
															+//go:build !linux
														
 
															+
														
 
															+/* SPDX-License-Identifier: MIT
														
 
															+ *
														
 
															+ * Copyright (C) 2017-2023 WireGuard LLC. All Rights Reserved.
														
 
															+ */
														
 
															+
														
 
															+package conn
														
 
															+
														
 
															+// getGSOSize parses control for UDP_GRO and if found returns its GSO size data.
														
 
															+func getGSOSize(control []byte) (int, error) {
														
 
															+	return 0, nil
														
 
															+}
														
 
															+
														
 
															+// setGSOSize sets a UDP_SEGMENT in control based on gsoSize.
														
 
															+func setGSOSize(control *[]byte, gsoSize uint16) {
														
 
															+}
														
 
															+
														
 
															+// gsoControlSize returns the recommended buffer size for pooling sticky and UDP
														
 
															+// offloading control data.
														
 
															+const gsoControlSize = 0
														
--- a/wgstack/conn/gso_linux.go
+++ b/wgstack/conn/gso_linux.go
@@ -0,0 +1,65 @@
 
															+//go:build linux
														
 
															+
														
 
															+/* SPDX-License-Identifier: MIT
														
 
															+ *
														
 
															+ * Copyright (C) 2017-2023 WireGuard LLC. All Rights Reserved.
														
 
															+ */
														
 
															+
														
 
															+package conn
														
 
															+
														
 
															+import (
														
 
															+	"fmt"
														
 
															+	"unsafe"
														
 
															+
														
 
															+	"golang.org/x/sys/unix"
														
 
															+)
														
 
															+
														
 
															+const (
														
 
															+	sizeOfGSOData = 2
														
 
															+)
														
 
															+
														
 
															+// getGSOSize parses control for UDP_GRO and if found returns its GSO size data.
														
 
															+func getGSOSize(control []byte) (int, error) {
														
 
															+	var (
														
 
															+		hdr  unix.Cmsghdr
														
 
															+		data []byte
														
 
															+		rem  = control
														
 
															+		err  error
														
 
															+	)
														
 
															+
														
 
															+	for len(rem) > unix.SizeofCmsghdr {
														
 
															+		hdr, data, rem, err = unix.ParseOneSocketControlMessage(rem)
														
 
															+		if err != nil {
														
 
															+			return 0, fmt.Errorf("error parsing socket control message: %w", err)
														
 
															+		}
														
 
															+		if hdr.Level == unix.SOL_UDP && hdr.Type == unix.UDP_GRO && len(data) >= sizeOfGSOData {
														
 
															+			var gso uint16
														
 
															+			copy(unsafe.Slice((*byte)(unsafe.Pointer(&gso)), sizeOfGSOData), data[:sizeOfGSOData])
														
 
															+			return int(gso), nil
														
 
															+		}
														
 
															+	}
														
 
															+	return 0, nil
														
 
															+}
														
 
															+
														
 
															+// setGSOSize sets a UDP_SEGMENT in control based on gsoSize. It leaves existing
														
 
															+// data in control untouched.
														
 
															+func setGSOSize(control *[]byte, gsoSize uint16) {
														
 
															+	existingLen := len(*control)
														
 
															+	avail := cap(*control) - existingLen
														
 
															+	space := unix.CmsgSpace(sizeOfGSOData)
														
 
															+	if avail < space {
														
 
															+		return
														
 
															+	}
														
 
															+	*control = (*control)[:cap(*control)]
														
 
															+	gsoControl := (*control)[existingLen:]
														
 
															+	hdr := (*unix.Cmsghdr)(unsafe.Pointer(&(gsoControl)[0]))
														
 
															+	hdr.Level = unix.SOL_UDP
														
 
															+	hdr.Type = unix.UDP_SEGMENT
														
 
															+	hdr.SetLen(unix.CmsgLen(sizeOfGSOData))
														
 
															+	copy((gsoControl)[unix.CmsgLen(0):], unsafe.Slice((*byte)(unsafe.Pointer(&gsoSize)), sizeOfGSOData))
														
 
															+	*control = (*control)[:existingLen+space]
														
 
															+}
														
 
															+
														
 
															+// gsoControlSize returns the recommended buffer size for pooling UDP
														
 
															+// offloading control data.
														
 
															+var gsoControlSize = unix.CmsgSpace(sizeOfGSOData)
														
--- a/wgstack/conn/sticky_default.go
+++ b/wgstack/conn/sticky_default.go
@@ -0,0 +1,42 @@
 
															+//go:build !linux || android
														
 
															+
														
 
															+/* SPDX-License-Identifier: MIT
														
 
															+ *
														
 
															+ * Copyright (C) 2017-2023 WireGuard LLC. All Rights Reserved.
														
 
															+ */
														
 
															+
														
 
															+package conn
														
 
															+
														
 
															+import "net/netip"
														
 
															+
														
 
															+func (e *StdNetEndpoint) SrcIP() netip.Addr {
														
 
															+	return netip.Addr{}
														
 
															+}
														
 
															+
														
 
															+func (e *StdNetEndpoint) SrcIfidx() int32 {
														
 
															+	return 0
														
 
															+}
														
 
															+
														
 
															+func (e *StdNetEndpoint) SrcToString() string {
														
 
															+	return ""
														
 
															+}
														
 
															+
														
 
															+// TODO: macOS, FreeBSD and other BSDs likely do support the sticky sockets
														
 
															+// {get,set}srcControl feature set, but use alternatively named flags and need
														
 
															+// ports and require testing.
														
 
															+
														
 
															+// getSrcFromControl parses the control for PKTINFO and if found updates ep with
														
 
															+// the source information found.
														
 
															+func getSrcFromControl(control []byte, ep *StdNetEndpoint) {
														
 
															+}
														
 
															+
														
 
															+// setSrcControl parses the control for PKTINFO and if found updates ep with
														
 
															+// the source information found.
														
 
															+func setSrcControl(control *[]byte, ep *StdNetEndpoint) {
														
 
															+}
														
 
															+
														
 
															+// stickyControlSize returns the recommended buffer size for pooling sticky
														
 
															+// offloading control data.
														
 
															+const stickyControlSize = 0
														
 
															+
														
 
															+const StdNetSupportsStickySockets = false