Browse Source

Unsafe route reload (#1083)

Nate Brown 1 year ago
parent
commit
bbb15f8cb1

+ 30 - 0
overlay/route.go

@@ -1,6 +1,7 @@
 package overlay
 package overlay
 
 
 import (
 import (
+	"bytes"
 	"fmt"
 	"fmt"
 	"math"
 	"math"
 	"net"
 	"net"
@@ -21,6 +22,35 @@ type Route struct {
 	Install bool
 	Install bool
 }
 }
 
 
+// Equal determines if a route that could be installed in the system route table is equal to another
+// Via is ignored since that is only consumed within nebula itself
+func (r Route) Equal(t Route) bool {
+	if !r.Cidr.IP.Equal(t.Cidr.IP) {
+		return false
+	}
+	if !bytes.Equal(r.Cidr.Mask, t.Cidr.Mask) {
+		return false
+	}
+	if r.Metric != t.Metric {
+		return false
+	}
+	if r.MTU != t.MTU {
+		return false
+	}
+	if r.Install != t.Install {
+		return false
+	}
+	return true
+}
+
+func (r Route) String() string {
+	s := r.Cidr.String()
+	if r.Metric != 0 {
+		s += fmt.Sprintf(" metric: %v", r.Metric)
+	}
+	return s
+}
+
 func makeRouteTree(l *logrus.Logger, routes []Route, allowMTU bool) (*cidr.Tree4[iputil.VpnIp], error) {
 func makeRouteTree(l *logrus.Logger, routes []Route, allowMTU bool) (*cidr.Tree4[iputil.VpnIp], error) {
 	routeTree := cidr.NewTree4[iputil.VpnIp]()
 	routeTree := cidr.NewTree4[iputil.VpnIp]()
 	for _, r := range routes {
 	for _, r := range routes {

+ 41 - 38
overlay/tun.go

@@ -10,60 +10,63 @@ import (
 
 
 const DefaultMTU = 1300
 const DefaultMTU = 1300
 
 
+// TODO: We may be able to remove routines
 type DeviceFactory func(c *config.C, l *logrus.Logger, tunCidr *net.IPNet, routines int) (Device, error)
 type DeviceFactory func(c *config.C, l *logrus.Logger, tunCidr *net.IPNet, routines int) (Device, error)
 
 
 func NewDeviceFromConfig(c *config.C, l *logrus.Logger, tunCidr *net.IPNet, routines int) (Device, error) {
 func NewDeviceFromConfig(c *config.C, l *logrus.Logger, tunCidr *net.IPNet, routines int) (Device, error) {
-	routes, err := parseRoutes(c, tunCidr)
-	if err != nil {
-		return nil, util.NewContextualError("Could not parse tun.routes", nil, err)
-	}
-
-	unsafeRoutes, err := parseUnsafeRoutes(c, tunCidr)
-	if err != nil {
-		return nil, util.NewContextualError("Could not parse tun.unsafe_routes", nil, err)
-	}
-	routes = append(routes, unsafeRoutes...)
-
 	switch {
 	switch {
 	case c.GetBool("tun.disabled", false):
 	case c.GetBool("tun.disabled", false):
 		tun := newDisabledTun(tunCidr, c.GetInt("tun.tx_queue", 500), c.GetBool("stats.message_metrics", false), l)
 		tun := newDisabledTun(tunCidr, c.GetInt("tun.tx_queue", 500), c.GetBool("stats.message_metrics", false), l)
 		return tun, nil
 		return tun, nil
 
 
 	default:
 	default:
-		return newTun(
-			l,
-			c.GetString("tun.dev", ""),
-			tunCidr,
-			c.GetInt("tun.mtu", DefaultMTU),
-			routes,
-			c.GetInt("tun.tx_queue", 500),
-			routines > 1,
-			c.GetBool("tun.use_system_route_table", false),
-		)
+		return newTun(c, l, tunCidr, routines > 1)
 	}
 	}
 }
 }
 
 
 func NewFdDeviceFromConfig(fd *int) DeviceFactory {
 func NewFdDeviceFromConfig(fd *int) DeviceFactory {
 	return func(c *config.C, l *logrus.Logger, tunCidr *net.IPNet, routines int) (Device, error) {
 	return func(c *config.C, l *logrus.Logger, tunCidr *net.IPNet, routines int) (Device, error) {
-		routes, err := parseRoutes(c, tunCidr)
-		if err != nil {
-			return nil, util.NewContextualError("Could not parse tun.routes", nil, err)
-		}
+		return newTunFromFd(c, l, *fd, tunCidr)
+	}
+}
+
+func getAllRoutesFromConfig(c *config.C, cidr *net.IPNet, initial bool) (bool, []Route, error) {
+	if !initial && !c.HasChanged("tun.routes") && !c.HasChanged("tun.unsafe_routes") {
+		return false, nil, nil
+	}
 
 
-		unsafeRoutes, err := parseUnsafeRoutes(c, tunCidr)
-		if err != nil {
-			return nil, util.NewContextualError("Could not parse tun.unsafe_routes", nil, err)
+	routes, err := parseRoutes(c, cidr)
+	if err != nil {
+		return true, nil, util.NewContextualError("Could not parse tun.routes", nil, err)
+	}
+
+	unsafeRoutes, err := parseUnsafeRoutes(c, cidr)
+	if err != nil {
+		return true, nil, util.NewContextualError("Could not parse tun.unsafe_routes", nil, err)
+	}
+
+	routes = append(routes, unsafeRoutes...)
+	return true, routes, nil
+}
+
+// findRemovedRoutes will return all routes that are not present in the newRoutes list and would affect the system route table.
+// Via is not used to evaluate since it does not affect the system route table.
+func findRemovedRoutes(newRoutes, oldRoutes []Route) []Route {
+	var removed []Route
+	has := func(entry Route) bool {
+		for _, check := range newRoutes {
+			if check.Equal(entry) {
+				return true
+			}
 		}
 		}
-		routes = append(routes, unsafeRoutes...)
-		return newTunFromFd(
-			l,
-			*fd,
-			tunCidr,
-			c.GetInt("tun.mtu", DefaultMTU),
-			routes,
-			c.GetInt("tun.tx_queue", 500),
-			c.GetBool("tun.use_system_route_table", false),
-		)
+		return false
+	}
 
 
+	for _, oldEntry := range oldRoutes {
+		if !has(oldEntry) {
+			removed = append(removed, oldEntry)
+		}
 	}
 	}
+
+	return removed
 }
 }

+ 45 - 12
overlay/tun_android.go

@@ -8,45 +8,57 @@ import (
 	"io"
 	"io"
 	"net"
 	"net"
 	"os"
 	"os"
+	"sync/atomic"
 
 
 	"github.com/sirupsen/logrus"
 	"github.com/sirupsen/logrus"
 	"github.com/slackhq/nebula/cidr"
 	"github.com/slackhq/nebula/cidr"
+	"github.com/slackhq/nebula/config"
 	"github.com/slackhq/nebula/iputil"
 	"github.com/slackhq/nebula/iputil"
+	"github.com/slackhq/nebula/util"
 )
 )
 
 
 type tun struct {
 type tun struct {
 	io.ReadWriteCloser
 	io.ReadWriteCloser
 	fd        int
 	fd        int
 	cidr      *net.IPNet
 	cidr      *net.IPNet
-	routeTree *cidr.Tree4[iputil.VpnIp]
+	Routes    atomic.Pointer[[]Route]
+	routeTree atomic.Pointer[cidr.Tree4[iputil.VpnIp]]
 	l         *logrus.Logger
 	l         *logrus.Logger
 }
 }
 
 
-func newTunFromFd(l *logrus.Logger, deviceFd int, cidr *net.IPNet, _ int, routes []Route, _ int, _ bool) (*tun, error) {
-	routeTree, err := makeRouteTree(l, routes, false)
-	if err != nil {
-		return nil, err
-	}
-
+func newTunFromFd(c *config.C, l *logrus.Logger, deviceFd int, cidr *net.IPNet) (*tun, error) {
 	// XXX Android returns an fd in non-blocking mode which is necessary for shutdown to work properly.
 	// XXX Android returns an fd in non-blocking mode which is necessary for shutdown to work properly.
 	// Be sure not to call file.Fd() as it will set the fd to blocking mode.
 	// Be sure not to call file.Fd() as it will set the fd to blocking mode.
 	file := os.NewFile(uintptr(deviceFd), "/dev/net/tun")
 	file := os.NewFile(uintptr(deviceFd), "/dev/net/tun")
 
 
-	return &tun{
+	t := &tun{
 		ReadWriteCloser: file,
 		ReadWriteCloser: file,
 		fd:              deviceFd,
 		fd:              deviceFd,
 		cidr:            cidr,
 		cidr:            cidr,
 		l:               l,
 		l:               l,
-		routeTree:       routeTree,
-	}, nil
+	}
+
+	err := t.reload(c, true)
+	if err != nil {
+		return nil, err
+	}
+
+	c.RegisterReloadCallback(func(c *config.C) {
+		err := t.reload(c, false)
+		if err != nil {
+			util.LogWithContextIfNeeded("failed to reload tun device", err, t.l)
+		}
+	})
+
+	return t, nil
 }
 }
 
 
-func newTun(_ *logrus.Logger, _ string, _ *net.IPNet, _ int, _ []Route, _ int, _ bool, _ bool) (*tun, error) {
+func newTun(_ *config.C, _ *logrus.Logger, _ *net.IPNet, _ bool) (*tun, error) {
 	return nil, fmt.Errorf("newTun not supported in Android")
 	return nil, fmt.Errorf("newTun not supported in Android")
 }
 }
 
 
 func (t *tun) RouteFor(ip iputil.VpnIp) iputil.VpnIp {
 func (t *tun) RouteFor(ip iputil.VpnIp) iputil.VpnIp {
-	_, r := t.routeTree.MostSpecificContains(ip)
+	_, r := t.routeTree.Load().MostSpecificContains(ip)
 	return r
 	return r
 }
 }
 
 
@@ -54,6 +66,27 @@ func (t tun) Activate() error {
 	return nil
 	return nil
 }
 }
 
 
+func (t *tun) reload(c *config.C, initial bool) error {
+	change, routes, err := getAllRoutesFromConfig(c, t.cidr, initial)
+	if err != nil {
+		return err
+	}
+
+	if !initial && !change {
+		return nil
+	}
+
+	routeTree, err := makeRouteTree(t.l, routes, false)
+	if err != nil {
+		return err
+	}
+
+	// Teach nebula how to handle the routes
+	t.Routes.Store(&routes)
+	t.routeTree.Store(routeTree)
+	return nil
+}
+
 func (t *tun) Cidr() *net.IPNet {
 func (t *tun) Cidr() *net.IPNet {
 	return t.cidr
 	return t.cidr
 }
 }

+ 162 - 30
overlay/tun_darwin.go

@@ -9,12 +9,15 @@ import (
 	"io"
 	"io"
 	"net"
 	"net"
 	"os"
 	"os"
+	"sync/atomic"
 	"syscall"
 	"syscall"
 	"unsafe"
 	"unsafe"
 
 
 	"github.com/sirupsen/logrus"
 	"github.com/sirupsen/logrus"
 	"github.com/slackhq/nebula/cidr"
 	"github.com/slackhq/nebula/cidr"
+	"github.com/slackhq/nebula/config"
 	"github.com/slackhq/nebula/iputil"
 	"github.com/slackhq/nebula/iputil"
+	"github.com/slackhq/nebula/util"
 	netroute "golang.org/x/net/route"
 	netroute "golang.org/x/net/route"
 	"golang.org/x/sys/unix"
 	"golang.org/x/sys/unix"
 )
 )
@@ -24,8 +27,9 @@ type tun struct {
 	Device     string
 	Device     string
 	cidr       *net.IPNet
 	cidr       *net.IPNet
 	DefaultMTU int
 	DefaultMTU int
-	Routes     []Route
-	routeTree  *cidr.Tree4[iputil.VpnIp]
+	Routes     atomic.Pointer[[]Route]
+	routeTree  atomic.Pointer[cidr.Tree4[iputil.VpnIp]]
+	linkAddr   *netroute.LinkAddr
 	l          *logrus.Logger
 	l          *logrus.Logger
 
 
 	// cache out buffer since we need to prepend 4 bytes for tun metadata
 	// cache out buffer since we need to prepend 4 bytes for tun metadata
@@ -69,12 +73,8 @@ type ifreqMTU struct {
 	pad  [8]byte
 	pad  [8]byte
 }
 }
 
 
-func newTun(l *logrus.Logger, name string, cidr *net.IPNet, defaultMTU int, routes []Route, _ int, _ bool, _ bool) (*tun, error) {
-	routeTree, err := makeRouteTree(l, routes, false)
-	if err != nil {
-		return nil, err
-	}
-
+func newTun(c *config.C, l *logrus.Logger, cidr *net.IPNet, _ bool) (*tun, error) {
+	name := c.GetString("tun.dev", "")
 	ifIndex := -1
 	ifIndex := -1
 	if name != "" && name != "utun" {
 	if name != "" && name != "utun" {
 		_, err := fmt.Sscanf(name, "utun%d", &ifIndex)
 		_, err := fmt.Sscanf(name, "utun%d", &ifIndex)
@@ -142,17 +142,27 @@ func newTun(l *logrus.Logger, name string, cidr *net.IPNet, defaultMTU int, rout
 
 
 	file := os.NewFile(uintptr(fd), "")
 	file := os.NewFile(uintptr(fd), "")
 
 
-	tun := &tun{
+	t := &tun{
 		ReadWriteCloser: file,
 		ReadWriteCloser: file,
 		Device:          name,
 		Device:          name,
 		cidr:            cidr,
 		cidr:            cidr,
-		DefaultMTU:      defaultMTU,
-		Routes:          routes,
-		routeTree:       routeTree,
+		DefaultMTU:      c.GetInt("tun.mtu", DefaultMTU),
 		l:               l,
 		l:               l,
 	}
 	}
 
 
-	return tun, nil
+	err = t.reload(c, true)
+	if err != nil {
+		return nil, err
+	}
+
+	c.RegisterReloadCallback(func(c *config.C) {
+		err := t.reload(c, false)
+		if err != nil {
+			util.LogWithContextIfNeeded("failed to reload tun device", err, t.l)
+		}
+	})
+
+	return t, nil
 }
 }
 
 
 func (t *tun) deviceBytes() (o [16]byte) {
 func (t *tun) deviceBytes() (o [16]byte) {
@@ -162,7 +172,7 @@ func (t *tun) deviceBytes() (o [16]byte) {
 	return
 	return
 }
 }
 
 
-func newTunFromFd(_ *logrus.Logger, _ int, _ *net.IPNet, _ int, _ []Route, _ int, _ bool) (*tun, error) {
+func newTunFromFd(_ *config.C, _ *logrus.Logger, _ int, _ *net.IPNet) (*tun, error) {
 	return nil, fmt.Errorf("newTunFromFd not supported in Darwin")
 	return nil, fmt.Errorf("newTunFromFd not supported in Darwin")
 }
 }
 
 
@@ -260,6 +270,7 @@ func (t *tun) Activate() error {
 	if linkAddr == nil {
 	if linkAddr == nil {
 		return fmt.Errorf("unable to discover link_addr for tun interface")
 		return fmt.Errorf("unable to discover link_addr for tun interface")
 	}
 	}
+	t.linkAddr = linkAddr
 
 
 	copy(routeAddr.IP[:], addr[:])
 	copy(routeAddr.IP[:], addr[:])
 	copy(maskAddr.IP[:], mask[:])
 	copy(maskAddr.IP[:], mask[:])
@@ -278,33 +289,48 @@ func (t *tun) Activate() error {
 	}
 	}
 
 
 	// Unsafe path routes
 	// Unsafe path routes
-	for _, r := range t.Routes {
-		if r.Via == nil || !r.Install {
-			// We don't allow route MTUs so only install routes with a via
-			continue
-		}
+	return t.addRoutes(false)
+}
 
 
-		copy(routeAddr.IP[:], r.Cidr.IP.To4())
-		copy(maskAddr.IP[:], net.IP(r.Cidr.Mask).To4())
+func (t *tun) reload(c *config.C, initial bool) error {
+	change, routes, err := getAllRoutesFromConfig(c, t.cidr, initial)
+	if err != nil {
+		return err
+	}
+
+	if !initial && !change {
+		return nil
+	}
 
 
-		err = addRoute(routeSock, routeAddr, maskAddr, linkAddr)
+	routeTree, err := makeRouteTree(t.l, routes, false)
+	if err != nil {
+		return err
+	}
+
+	// Teach nebula how to handle the routes before establishing them in the system table
+	oldRoutes := t.Routes.Swap(&routes)
+	t.routeTree.Store(routeTree)
+
+	if !initial {
+		// Remove first, if the system removes a wanted route hopefully it will be re-added next
+		err := t.removeRoutes(findRemovedRoutes(routes, *oldRoutes))
 		if err != nil {
 		if err != nil {
-			if errors.Is(err, unix.EEXIST) {
-				t.l.WithField("route", r.Cidr).
-					Warnf("unable to add unsafe_route, identical route already exists")
-			} else {
-				return err
-			}
+			util.LogWithContextIfNeeded("Failed to remove routes", err, t.l)
 		}
 		}
 
 
-		// TODO how to set metric
+		// Ensure any routes we actually want are installed
+		err = t.addRoutes(true)
+		if err != nil {
+			// Catch any stray logs
+			util.LogWithContextIfNeeded("Failed to add routes", err, t.l)
+		}
 	}
 	}
 
 
 	return nil
 	return nil
 }
 }
 
 
 func (t *tun) RouteFor(ip iputil.VpnIp) iputil.VpnIp {
 func (t *tun) RouteFor(ip iputil.VpnIp) iputil.VpnIp {
-	ok, r := t.routeTree.MostSpecificContains(ip)
+	ok, r := t.routeTree.Load().MostSpecificContains(ip)
 	if ok {
 	if ok {
 		return r
 		return r
 	}
 	}
@@ -340,6 +366,88 @@ func getLinkAddr(name string) (*netroute.LinkAddr, error) {
 	return nil, nil
 	return nil, nil
 }
 }
 
 
+func (t *tun) addRoutes(logErrors bool) error {
+	routeSock, err := unix.Socket(unix.AF_ROUTE, unix.SOCK_RAW, unix.AF_UNSPEC)
+	if err != nil {
+		return fmt.Errorf("unable to create AF_ROUTE socket: %v", err)
+	}
+
+	defer func() {
+		unix.Shutdown(routeSock, unix.SHUT_RDWR)
+		err := unix.Close(routeSock)
+		if err != nil {
+			t.l.WithError(err).Error("failed to close AF_ROUTE socket")
+		}
+	}()
+
+	routeAddr := &netroute.Inet4Addr{}
+	maskAddr := &netroute.Inet4Addr{}
+	routes := *t.Routes.Load()
+	for _, r := range routes {
+		if r.Via == nil || !r.Install {
+			// We don't allow route MTUs so only install routes with a via
+			continue
+		}
+
+		copy(routeAddr.IP[:], r.Cidr.IP.To4())
+		copy(maskAddr.IP[:], net.IP(r.Cidr.Mask).To4())
+
+		err := addRoute(routeSock, routeAddr, maskAddr, t.linkAddr)
+		if err != nil {
+			if errors.Is(err, unix.EEXIST) {
+				t.l.WithField("route", r.Cidr).
+					Warnf("unable to add unsafe_route, identical route already exists")
+			} else {
+				retErr := util.NewContextualError("Failed to add route", map[string]interface{}{"route": r}, err)
+				if logErrors {
+					retErr.Log(t.l)
+				} else {
+					return retErr
+				}
+			}
+		} else {
+			t.l.WithField("route", r).Info("Added route")
+		}
+	}
+
+	return nil
+}
+
+func (t *tun) removeRoutes(routes []Route) error {
+	routeSock, err := unix.Socket(unix.AF_ROUTE, unix.SOCK_RAW, unix.AF_UNSPEC)
+	if err != nil {
+		return fmt.Errorf("unable to create AF_ROUTE socket: %v", err)
+	}
+
+	defer func() {
+		unix.Shutdown(routeSock, unix.SHUT_RDWR)
+		err := unix.Close(routeSock)
+		if err != nil {
+			t.l.WithError(err).Error("failed to close AF_ROUTE socket")
+		}
+	}()
+
+	routeAddr := &netroute.Inet4Addr{}
+	maskAddr := &netroute.Inet4Addr{}
+
+	for _, r := range routes {
+		if !r.Install {
+			continue
+		}
+
+		copy(routeAddr.IP[:], r.Cidr.IP.To4())
+		copy(maskAddr.IP[:], net.IP(r.Cidr.Mask).To4())
+
+		err := delRoute(routeSock, routeAddr, maskAddr, t.linkAddr)
+		if err != nil {
+			t.l.WithError(err).WithField("route", r).Error("Failed to remove route")
+		} else {
+			t.l.WithField("route", r).Info("Removed route")
+		}
+	}
+	return nil
+}
+
 func addRoute(sock int, addr, mask *netroute.Inet4Addr, link *netroute.LinkAddr) error {
 func addRoute(sock int, addr, mask *netroute.Inet4Addr, link *netroute.LinkAddr) error {
 	r := netroute.RouteMessage{
 	r := netroute.RouteMessage{
 		Version: unix.RTM_VERSION,
 		Version: unix.RTM_VERSION,
@@ -365,6 +473,30 @@ func addRoute(sock int, addr, mask *netroute.Inet4Addr, link *netroute.LinkAddr)
 	return nil
 	return nil
 }
 }
 
 
+func delRoute(sock int, addr, mask *netroute.Inet4Addr, link *netroute.LinkAddr) error {
+	r := netroute.RouteMessage{
+		Version: unix.RTM_VERSION,
+		Type:    unix.RTM_DELETE,
+		Seq:     1,
+		Addrs: []netroute.Addr{
+			unix.RTAX_DST:     addr,
+			unix.RTAX_GATEWAY: link,
+			unix.RTAX_NETMASK: mask,
+		},
+	}
+
+	data, err := r.Marshal()
+	if err != nil {
+		return fmt.Errorf("failed to create route.RouteMessage: %w", err)
+	}
+	_, err = unix.Write(sock, data[:])
+	if err != nil {
+		return fmt.Errorf("failed to write route.RouteMessage to socket: %w", err)
+	}
+
+	return nil
+}
+
 func (t *tun) Read(to []byte) (int, error) {
 func (t *tun) Read(to []byte) (int, error) {
 
 
 	buf := make([]byte, len(to)+4)
 	buf := make([]byte, len(to)+4)

+ 110 - 28
overlay/tun_freebsd.go

@@ -13,12 +13,15 @@ import (
 	"os"
 	"os"
 	"os/exec"
 	"os/exec"
 	"strconv"
 	"strconv"
+	"sync/atomic"
 	"syscall"
 	"syscall"
 	"unsafe"
 	"unsafe"
 
 
 	"github.com/sirupsen/logrus"
 	"github.com/sirupsen/logrus"
 	"github.com/slackhq/nebula/cidr"
 	"github.com/slackhq/nebula/cidr"
+	"github.com/slackhq/nebula/config"
 	"github.com/slackhq/nebula/iputil"
 	"github.com/slackhq/nebula/iputil"
+	"github.com/slackhq/nebula/util"
 )
 )
 
 
 const (
 const (
@@ -47,8 +50,8 @@ type tun struct {
 	Device    string
 	Device    string
 	cidr      *net.IPNet
 	cidr      *net.IPNet
 	MTU       int
 	MTU       int
-	Routes    []Route
-	routeTree *cidr.Tree4[iputil.VpnIp]
+	Routes    atomic.Pointer[[]Route]
+	routeTree atomic.Pointer[cidr.Tree4[iputil.VpnIp]]
 	l         *logrus.Logger
 	l         *logrus.Logger
 
 
 	io.ReadWriteCloser
 	io.ReadWriteCloser
@@ -76,14 +79,15 @@ func (t *tun) Close() error {
 	return nil
 	return nil
 }
 }
 
 
-func newTunFromFd(_ *logrus.Logger, _ int, _ *net.IPNet, _ int, _ []Route, _ int, _ bool) (*tun, error) {
+func newTunFromFd(_ *config.C, _ *logrus.Logger, _ int, _ *net.IPNet) (*tun, error) {
 	return nil, fmt.Errorf("newTunFromFd not supported in FreeBSD")
 	return nil, fmt.Errorf("newTunFromFd not supported in FreeBSD")
 }
 }
 
 
-func newTun(l *logrus.Logger, deviceName string, cidr *net.IPNet, defaultMTU int, routes []Route, _ int, _ bool, _ bool) (*tun, error) {
+func newTun(c *config.C, l *logrus.Logger, cidr *net.IPNet, _ bool) (*tun, error) {
 	// Try to open existing tun device
 	// Try to open existing tun device
 	var file *os.File
 	var file *os.File
 	var err error
 	var err error
+	deviceName := c.GetString("tun.dev", "")
 	if deviceName != "" {
 	if deviceName != "" {
 		file, err = os.OpenFile("/dev/"+deviceName, os.O_RDWR, 0)
 		file, err = os.OpenFile("/dev/"+deviceName, os.O_RDWR, 0)
 	}
 	}
@@ -144,47 +148,85 @@ func newTun(l *logrus.Logger, deviceName string, cidr *net.IPNet, defaultMTU int
 		ioctl(fd, syscall.SIOCSIFNAME, uintptr(unsafe.Pointer(&ifrr)))
 		ioctl(fd, syscall.SIOCSIFNAME, uintptr(unsafe.Pointer(&ifrr)))
 	}
 	}
 
 
-	routeTree, err := makeRouteTree(l, routes, false)
-	if err != nil {
-		return nil, err
-	}
-
-	return &tun{
+	t := &tun{
 		ReadWriteCloser: file,
 		ReadWriteCloser: file,
 		Device:          deviceName,
 		Device:          deviceName,
 		cidr:            cidr,
 		cidr:            cidr,
-		MTU:             defaultMTU,
-		Routes:          routes,
-		routeTree:       routeTree,
+		MTU:             c.GetInt("tun.mtu", DefaultMTU),
 		l:               l,
 		l:               l,
-	}, nil
+	}
+
+	err = t.reload(c, true)
+	if err != nil {
+		return nil, err
+	}
+
+	c.RegisterReloadCallback(func(c *config.C) {
+		err := t.reload(c, false)
+		if err != nil {
+			util.LogWithContextIfNeeded("failed to reload tun device", err, t.l)
+		}
+	})
+
+	return t, nil
 }
 }
 
 
 func (t *tun) Activate() error {
 func (t *tun) Activate() error {
 	var err error
 	var err error
 	// TODO use syscalls instead of exec.Command
 	// TODO use syscalls instead of exec.Command
-	t.l.Debug("command: ifconfig", t.Device, t.cidr.String(), t.cidr.IP.String())
-	if err = exec.Command("/sbin/ifconfig", t.Device, t.cidr.String(), t.cidr.IP.String()).Run(); err != nil {
+	cmd := exec.Command("/sbin/ifconfig", t.Device, t.cidr.String(), t.cidr.IP.String())
+	t.l.Debug("command: ", cmd.String())
+	if err = cmd.Run(); err != nil {
 		return fmt.Errorf("failed to run 'ifconfig': %s", err)
 		return fmt.Errorf("failed to run 'ifconfig': %s", err)
 	}
 	}
-	t.l.Debug("command: route", "-n", "add", "-net", t.cidr.String(), "-interface", t.Device)
-	if err = exec.Command("/sbin/route", "-n", "add", "-net", t.cidr.String(), "-interface", t.Device).Run(); err != nil {
+
+	cmd = exec.Command("/sbin/route", "-n", "add", "-net", t.cidr.String(), "-interface", t.Device)
+	t.l.Debug("command: ", cmd.String())
+	if err = cmd.Run(); err != nil {
 		return fmt.Errorf("failed to run 'route add': %s", err)
 		return fmt.Errorf("failed to run 'route add': %s", err)
 	}
 	}
-	t.l.Debug("command: ifconfig", t.Device, "mtu", strconv.Itoa(t.MTU))
-	if err = exec.Command("/sbin/ifconfig", t.Device, "mtu", strconv.Itoa(t.MTU)).Run(); err != nil {
+
+	cmd = exec.Command("/sbin/ifconfig", t.Device, "mtu", strconv.Itoa(t.MTU))
+	t.l.Debug("command: ", cmd.String())
+	if err = cmd.Run(); err != nil {
 		return fmt.Errorf("failed to run 'ifconfig': %s", err)
 		return fmt.Errorf("failed to run 'ifconfig': %s", err)
 	}
 	}
+
 	// Unsafe path routes
 	// Unsafe path routes
-	for _, r := range t.Routes {
-		if r.Via == nil || !r.Install {
-			// We don't allow route MTUs so only install routes with a via
-			continue
+	return t.addRoutes(false)
+}
+
+func (t *tun) reload(c *config.C, initial bool) error {
+	change, routes, err := getAllRoutesFromConfig(c, t.cidr, initial)
+	if err != nil {
+		return err
+	}
+
+	if !initial && !change {
+		return nil
+	}
+
+	routeTree, err := makeRouteTree(t.l, routes, false)
+	if err != nil {
+		return err
+	}
+
+	// Teach nebula how to handle the routes before establishing them in the system table
+	oldRoutes := t.Routes.Swap(&routes)
+	t.routeTree.Store(routeTree)
+
+	if !initial {
+		// Remove first, if the system removes a wanted route hopefully it will be re-added next
+		err := t.removeRoutes(findRemovedRoutes(routes, *oldRoutes))
+		if err != nil {
+			util.LogWithContextIfNeeded("Failed to remove routes", err, t.l)
 		}
 		}
 
 
-		t.l.Debug("command: route", "-n", "add", "-net", r.Cidr.String(), "-interface", t.Device)
-		if err = exec.Command("/sbin/route", "-n", "add", "-net", r.Cidr.String(), "-interface", t.Device).Run(); err != nil {
-			return fmt.Errorf("failed to run 'route add' for unsafe_route %s: %s", r.Cidr.String(), err)
+		// Ensure any routes we actually want are installed
+		err = t.addRoutes(true)
+		if err != nil {
+			// Catch any stray logs
+			util.LogWithContextIfNeeded("Failed to add routes", err, t.l)
 		}
 		}
 	}
 	}
 
 
@@ -192,7 +234,7 @@ func (t *tun) Activate() error {
 }
 }
 
 
 func (t *tun) RouteFor(ip iputil.VpnIp) iputil.VpnIp {
 func (t *tun) RouteFor(ip iputil.VpnIp) iputil.VpnIp {
-	_, r := t.routeTree.MostSpecificContains(ip)
+	_, r := t.routeTree.Load().MostSpecificContains(ip)
 	return r
 	return r
 }
 }
 
 
@@ -208,6 +250,46 @@ func (t *tun) NewMultiQueueReader() (io.ReadWriteCloser, error) {
 	return nil, fmt.Errorf("TODO: multiqueue not implemented for freebsd")
 	return nil, fmt.Errorf("TODO: multiqueue not implemented for freebsd")
 }
 }
 
 
+func (t *tun) addRoutes(logErrors bool) error {
+	routes := *t.Routes.Load()
+	for _, r := range routes {
+		if r.Via == nil || !r.Install {
+			// We don't allow route MTUs so only install routes with a via
+			continue
+		}
+
+		cmd := exec.Command("/sbin/route", "-n", "add", "-net", r.Cidr.String(), "-interface", t.Device)
+		t.l.Debug("command: ", cmd.String())
+		if err := cmd.Run(); err != nil {
+			retErr := util.NewContextualError("failed to run 'route add' for unsafe_route", map[string]interface{}{"route": r}, err)
+			if logErrors {
+				retErr.Log(t.l)
+			} else {
+				return retErr
+			}
+		}
+	}
+
+	return nil
+}
+
+func (t *tun) removeRoutes(routes []Route) error {
+	for _, r := range routes {
+		if !r.Install {
+			continue
+		}
+
+		cmd := exec.Command("/sbin/route", "-n", "delete", "-net", r.Cidr.String(), "-interface", t.Device)
+		t.l.Debug("command: ", cmd.String())
+		if err := cmd.Run(); err != nil {
+			t.l.WithError(err).WithField("route", r).Error("Failed to remove route")
+		} else {
+			t.l.WithField("route", r).Info("Removed route")
+		}
+	}
+	return nil
+}
+
 func (t *tun) deviceBytes() (o [16]byte) {
 func (t *tun) deviceBytes() (o [16]byte) {
 	for i, c := range t.Device {
 	for i, c := range t.Device {
 		o[i] = byte(c)
 		o[i] = byte(c)

+ 46 - 11
overlay/tun_ios.go

@@ -10,43 +10,78 @@ import (
 	"net"
 	"net"
 	"os"
 	"os"
 	"sync"
 	"sync"
+	"sync/atomic"
 	"syscall"
 	"syscall"
 
 
 	"github.com/sirupsen/logrus"
 	"github.com/sirupsen/logrus"
 	"github.com/slackhq/nebula/cidr"
 	"github.com/slackhq/nebula/cidr"
+	"github.com/slackhq/nebula/config"
 	"github.com/slackhq/nebula/iputil"
 	"github.com/slackhq/nebula/iputil"
+	"github.com/slackhq/nebula/util"
 )
 )
 
 
 type tun struct {
 type tun struct {
 	io.ReadWriteCloser
 	io.ReadWriteCloser
 	cidr      *net.IPNet
 	cidr      *net.IPNet
-	routeTree *cidr.Tree4[iputil.VpnIp]
+	Routes    atomic.Pointer[[]Route]
+	routeTree atomic.Pointer[cidr.Tree4[iputil.VpnIp]]
+	l         *logrus.Logger
 }
 }
 
 
-func newTun(_ *logrus.Logger, _ string, _ *net.IPNet, _ int, _ []Route, _ int, _ bool, _ bool) (*tun, error) {
+func newTun(_ *config.C, _ *logrus.Logger, _ *net.IPNet, _ bool) (*tun, error) {
 	return nil, fmt.Errorf("newTun not supported in iOS")
 	return nil, fmt.Errorf("newTun not supported in iOS")
 }
 }
 
 
-func newTunFromFd(l *logrus.Logger, deviceFd int, cidr *net.IPNet, _ int, routes []Route, _ int, _ bool) (*tun, error) {
-	routeTree, err := makeRouteTree(l, routes, false)
+func newTunFromFd(c *config.C, l *logrus.Logger, deviceFd int, cidr *net.IPNet) (*tun, error) {
+	file := os.NewFile(uintptr(deviceFd), "/dev/tun")
+	t := &tun{
+		cidr:            cidr,
+		ReadWriteCloser: &tunReadCloser{f: file},
+		l:               l,
+	}
+
+	err := t.reload(c, true)
 	if err != nil {
 	if err != nil {
 		return nil, err
 		return nil, err
 	}
 	}
 
 
-	file := os.NewFile(uintptr(deviceFd), "/dev/tun")
-	return &tun{
-		cidr:            cidr,
-		ReadWriteCloser: &tunReadCloser{f: file},
-		routeTree:       routeTree,
-	}, nil
+	c.RegisterReloadCallback(func(c *config.C) {
+		err := t.reload(c, false)
+		if err != nil {
+			util.LogWithContextIfNeeded("failed to reload tun device", err, t.l)
+		}
+	})
+
+	return t, nil
 }
 }
 
 
 func (t *tun) Activate() error {
 func (t *tun) Activate() error {
 	return nil
 	return nil
 }
 }
 
 
+func (t *tun) reload(c *config.C, initial bool) error {
+	change, routes, err := getAllRoutesFromConfig(c, t.cidr, initial)
+	if err != nil {
+		return err
+	}
+
+	if !initial && !change {
+		return nil
+	}
+
+	routeTree, err := makeRouteTree(t.l, routes, false)
+	if err != nil {
+		return err
+	}
+
+	// Teach nebula how to handle the routes
+	t.Routes.Store(&routes)
+	t.routeTree.Store(routeTree)
+	return nil
+}
+
 func (t *tun) RouteFor(ip iputil.VpnIp) iputil.VpnIp {
 func (t *tun) RouteFor(ip iputil.VpnIp) iputil.VpnIp {
-	_, r := t.routeTree.MostSpecificContains(ip)
+	_, r := t.routeTree.Load().MostSpecificContains(ip)
 	return r
 	return r
 }
 }
 
 

+ 192 - 70
overlay/tun_linux.go

@@ -15,21 +15,25 @@ import (
 
 
 	"github.com/sirupsen/logrus"
 	"github.com/sirupsen/logrus"
 	"github.com/slackhq/nebula/cidr"
 	"github.com/slackhq/nebula/cidr"
+	"github.com/slackhq/nebula/config"
 	"github.com/slackhq/nebula/iputil"
 	"github.com/slackhq/nebula/iputil"
+	"github.com/slackhq/nebula/util"
 	"github.com/vishvananda/netlink"
 	"github.com/vishvananda/netlink"
 	"golang.org/x/sys/unix"
 	"golang.org/x/sys/unix"
 )
 )
 
 
 type tun struct {
 type tun struct {
 	io.ReadWriteCloser
 	io.ReadWriteCloser
-	fd         int
-	Device     string
-	cidr       *net.IPNet
-	MaxMTU     int
-	DefaultMTU int
-	TXQueueLen int
-
-	Routes          []Route
+	fd          int
+	Device      string
+	cidr        *net.IPNet
+	MaxMTU      int
+	DefaultMTU  int
+	TXQueueLen  int
+	deviceIndex int
+	ioctlFd     uintptr
+
+	Routes          atomic.Pointer[[]Route]
 	routeTree       atomic.Pointer[cidr.Tree4[iputil.VpnIp]]
 	routeTree       atomic.Pointer[cidr.Tree4[iputil.VpnIp]]
 	routeChan       chan struct{}
 	routeChan       chan struct{}
 	useSystemRoutes bool
 	useSystemRoutes bool
@@ -61,30 +65,20 @@ type ifreqQLEN struct {
 	pad   [8]byte
 	pad   [8]byte
 }
 }
 
 
-func newTunFromFd(l *logrus.Logger, deviceFd int, cidr *net.IPNet, defaultMTU int, routes []Route, txQueueLen int, useSystemRoutes bool) (*tun, error) {
-	routeTree, err := makeRouteTree(l, routes, true)
+func newTunFromFd(c *config.C, l *logrus.Logger, deviceFd int, cidr *net.IPNet) (*tun, error) {
+	file := os.NewFile(uintptr(deviceFd), "/dev/net/tun")
+
+	t, err := newTunGeneric(c, l, file, cidr)
 	if err != nil {
 	if err != nil {
 		return nil, err
 		return nil, err
 	}
 	}
 
 
-	file := os.NewFile(uintptr(deviceFd), "/dev/net/tun")
+	t.Device = "tun0"
 
 
-	t := &tun{
-		ReadWriteCloser: file,
-		fd:              int(file.Fd()),
-		Device:          "tun0",
-		cidr:            cidr,
-		DefaultMTU:      defaultMTU,
-		TXQueueLen:      txQueueLen,
-		Routes:          routes,
-		useSystemRoutes: useSystemRoutes,
-		l:               l,
-	}
-	t.routeTree.Store(routeTree)
 	return t, nil
 	return t, nil
 }
 }
 
 
-func newTun(l *logrus.Logger, deviceName string, cidr *net.IPNet, defaultMTU int, routes []Route, txQueueLen int, multiqueue bool, useSystemRoutes bool) (*tun, error) {
+func newTun(c *config.C, l *logrus.Logger, cidr *net.IPNet, multiqueue bool) (*tun, error) {
 	fd, err := unix.Open("/dev/net/tun", os.O_RDWR, 0)
 	fd, err := unix.Open("/dev/net/tun", os.O_RDWR, 0)
 	if err != nil {
 	if err != nil {
 		return nil, err
 		return nil, err
@@ -95,46 +89,113 @@ func newTun(l *logrus.Logger, deviceName string, cidr *net.IPNet, defaultMTU int
 	if multiqueue {
 	if multiqueue {
 		req.Flags |= unix.IFF_MULTI_QUEUE
 		req.Flags |= unix.IFF_MULTI_QUEUE
 	}
 	}
-	copy(req.Name[:], deviceName)
+	copy(req.Name[:], c.GetString("tun.dev", ""))
 	if err = ioctl(uintptr(fd), uintptr(unix.TUNSETIFF), uintptr(unsafe.Pointer(&req))); err != nil {
 	if err = ioctl(uintptr(fd), uintptr(unix.TUNSETIFF), uintptr(unsafe.Pointer(&req))); err != nil {
 		return nil, err
 		return nil, err
 	}
 	}
 	name := strings.Trim(string(req.Name[:]), "\x00")
 	name := strings.Trim(string(req.Name[:]), "\x00")
 
 
 	file := os.NewFile(uintptr(fd), "/dev/net/tun")
 	file := os.NewFile(uintptr(fd), "/dev/net/tun")
-
-	maxMTU := defaultMTU
-	for _, r := range routes {
-		if r.MTU == 0 {
-			r.MTU = defaultMTU
-		}
-
-		if r.MTU > maxMTU {
-			maxMTU = r.MTU
-		}
-	}
-
-	routeTree, err := makeRouteTree(l, routes, true)
+	t, err := newTunGeneric(c, l, file, cidr)
 	if err != nil {
 	if err != nil {
 		return nil, err
 		return nil, err
 	}
 	}
 
 
+	t.Device = name
+
+	return t, nil
+}
+
+func newTunGeneric(c *config.C, l *logrus.Logger, file *os.File, cidr *net.IPNet) (*tun, error) {
 	t := &tun{
 	t := &tun{
 		ReadWriteCloser: file,
 		ReadWriteCloser: file,
 		fd:              int(file.Fd()),
 		fd:              int(file.Fd()),
-		Device:          name,
 		cidr:            cidr,
 		cidr:            cidr,
-		MaxMTU:          maxMTU,
-		DefaultMTU:      defaultMTU,
-		TXQueueLen:      txQueueLen,
-		Routes:          routes,
-		useSystemRoutes: useSystemRoutes,
+		TXQueueLen:      c.GetInt("tun.tx_queue", 500),
+		useSystemRoutes: c.GetBool("tun.use_system_route_table", false),
 		l:               l,
 		l:               l,
 	}
 	}
-	t.routeTree.Store(routeTree)
+
+	err := t.reload(c, true)
+	if err != nil {
+		return nil, err
+	}
+
+	c.RegisterReloadCallback(func(c *config.C) {
+		err := t.reload(c, false)
+		if err != nil {
+			util.LogWithContextIfNeeded("failed to reload tun device", err, t.l)
+		}
+	})
+
 	return t, nil
 	return t, nil
 }
 }
 
 
+func (t *tun) reload(c *config.C, initial bool) error {
+	routeChange, routes, err := getAllRoutesFromConfig(c, t.cidr, initial)
+	if err != nil {
+		return err
+	}
+
+	if !initial && !routeChange && !c.HasChanged("tun.mtu") {
+		return nil
+	}
+
+	routeTree, err := makeRouteTree(t.l, routes, true)
+	if err != nil {
+		return err
+	}
+
+	oldDefaultMTU := t.DefaultMTU
+	oldMaxMTU := t.MaxMTU
+	newDefaultMTU := c.GetInt("tun.mtu", DefaultMTU)
+	newMaxMTU := newDefaultMTU
+	for i, r := range routes {
+		if r.MTU == 0 {
+			routes[i].MTU = newDefaultMTU
+		}
+
+		if r.MTU > t.MaxMTU {
+			newMaxMTU = r.MTU
+		}
+	}
+
+	t.MaxMTU = newMaxMTU
+	t.DefaultMTU = newDefaultMTU
+
+	// Teach nebula how to handle the routes before establishing them in the system table
+	oldRoutes := t.Routes.Swap(&routes)
+	t.routeTree.Store(routeTree)
+
+	if !initial {
+		if oldMaxMTU != newMaxMTU {
+			t.setMTU()
+			t.l.Infof("Set max MTU to %v was %v", t.MaxMTU, oldMaxMTU)
+		}
+
+		if oldDefaultMTU != newDefaultMTU {
+			err := t.setDefaultRoute()
+			if err != nil {
+				t.l.Warn(err)
+			} else {
+				t.l.Infof("Set default MTU to %v was %v", t.DefaultMTU, oldDefaultMTU)
+			}
+		}
+
+		// Remove first, if the system removes a wanted route hopefully it will be re-added next
+		t.removeRoutes(findRemovedRoutes(routes, *oldRoutes))
+
+		// Ensure any routes we actually want are installed
+		err = t.addRoutes(true)
+		if err != nil {
+			// This should never be called since addRoutes should log its own errors in a reload condition
+			util.LogWithContextIfNeeded("Failed to refresh routes", err, t.l)
+		}
+	}
+
+	return nil
+}
+
 func (t *tun) NewMultiQueueReader() (io.ReadWriteCloser, error) {
 func (t *tun) NewMultiQueueReader() (io.ReadWriteCloser, error) {
 	fd, err := unix.Open("/dev/net/tun", os.O_RDWR, 0)
 	fd, err := unix.Open("/dev/net/tun", os.O_RDWR, 0)
 	if err != nil {
 	if err != nil {
@@ -208,7 +269,7 @@ func (t *tun) Activate() error {
 	if err != nil {
 	if err != nil {
 		return err
 		return err
 	}
 	}
-	fd := uintptr(s)
+	t.ioctlFd = uintptr(s)
 
 
 	ifra := ifreqAddr{
 	ifra := ifreqAddr{
 		Name: devName,
 		Name: devName,
@@ -219,52 +280,76 @@ func (t *tun) Activate() error {
 	}
 	}
 
 
 	// Set the device ip address
 	// Set the device ip address
-	if err = ioctl(fd, unix.SIOCSIFADDR, uintptr(unsafe.Pointer(&ifra))); err != nil {
+	if err = ioctl(t.ioctlFd, unix.SIOCSIFADDR, uintptr(unsafe.Pointer(&ifra))); err != nil {
 		return fmt.Errorf("failed to set tun address: %s", err)
 		return fmt.Errorf("failed to set tun address: %s", err)
 	}
 	}
 
 
 	// Set the device network
 	// Set the device network
 	ifra.Addr.Addr = mask
 	ifra.Addr.Addr = mask
-	if err = ioctl(fd, unix.SIOCSIFNETMASK, uintptr(unsafe.Pointer(&ifra))); err != nil {
+	if err = ioctl(t.ioctlFd, unix.SIOCSIFNETMASK, uintptr(unsafe.Pointer(&ifra))); err != nil {
 		return fmt.Errorf("failed to set tun netmask: %s", err)
 		return fmt.Errorf("failed to set tun netmask: %s", err)
 	}
 	}
 
 
 	// Set the device name
 	// Set the device name
 	ifrf := ifReq{Name: devName}
 	ifrf := ifReq{Name: devName}
-	if err = ioctl(fd, unix.SIOCGIFFLAGS, uintptr(unsafe.Pointer(&ifrf))); err != nil {
+	if err = ioctl(t.ioctlFd, unix.SIOCGIFFLAGS, uintptr(unsafe.Pointer(&ifrf))); err != nil {
 		return fmt.Errorf("failed to set tun device name: %s", err)
 		return fmt.Errorf("failed to set tun device name: %s", err)
 	}
 	}
 
 
-	// Set the MTU on the device
-	ifm := ifreqMTU{Name: devName, MTU: int32(t.MaxMTU)}
-	if err = ioctl(fd, unix.SIOCSIFMTU, uintptr(unsafe.Pointer(&ifm))); err != nil {
-		// This is currently a non fatal condition because the route table must have the MTU set appropriately as well
-		t.l.WithError(err).Error("Failed to set tun mtu")
-	}
+	// Setup our default MTU
+	t.setMTU()
 
 
 	// Set the transmit queue length
 	// Set the transmit queue length
 	ifrq := ifreqQLEN{Name: devName, Value: int32(t.TXQueueLen)}
 	ifrq := ifreqQLEN{Name: devName, Value: int32(t.TXQueueLen)}
-	if err = ioctl(fd, unix.SIOCSIFTXQLEN, uintptr(unsafe.Pointer(&ifrq))); err != nil {
+	if err = ioctl(t.ioctlFd, unix.SIOCSIFTXQLEN, uintptr(unsafe.Pointer(&ifrq))); err != nil {
 		// If we can't set the queue length nebula will still work but it may lead to packet loss
 		// If we can't set the queue length nebula will still work but it may lead to packet loss
 		t.l.WithError(err).Error("Failed to set tun tx queue length")
 		t.l.WithError(err).Error("Failed to set tun tx queue length")
 	}
 	}
 
 
 	// Bring up the interface
 	// Bring up the interface
 	ifrf.Flags = ifrf.Flags | unix.IFF_UP
 	ifrf.Flags = ifrf.Flags | unix.IFF_UP
-	if err = ioctl(fd, unix.SIOCSIFFLAGS, uintptr(unsafe.Pointer(&ifrf))); err != nil {
+	if err = ioctl(t.ioctlFd, unix.SIOCSIFFLAGS, uintptr(unsafe.Pointer(&ifrf))); err != nil {
 		return fmt.Errorf("failed to bring the tun device up: %s", err)
 		return fmt.Errorf("failed to bring the tun device up: %s", err)
 	}
 	}
 
 
-	// Set the routes
 	link, err := netlink.LinkByName(t.Device)
 	link, err := netlink.LinkByName(t.Device)
 	if err != nil {
 	if err != nil {
 		return fmt.Errorf("failed to get tun device link: %s", err)
 		return fmt.Errorf("failed to get tun device link: %s", err)
 	}
 	}
+	t.deviceIndex = link.Attrs().Index
+
+	if err = t.setDefaultRoute(); err != nil {
+		return err
+	}
+
+	// Set the routes
+	if err = t.addRoutes(false); err != nil {
+		return err
+	}
 
 
+	// Run the interface
+	ifrf.Flags = ifrf.Flags | unix.IFF_UP | unix.IFF_RUNNING
+	if err = ioctl(t.ioctlFd, unix.SIOCSIFFLAGS, uintptr(unsafe.Pointer(&ifrf))); err != nil {
+		return fmt.Errorf("failed to run tun device: %s", err)
+	}
+
+	return nil
+}
+
+func (t *tun) setMTU() {
+	// Set the MTU on the device
+	ifm := ifreqMTU{Name: t.deviceBytes(), MTU: int32(t.MaxMTU)}
+	if err := ioctl(t.ioctlFd, unix.SIOCSIFMTU, uintptr(unsafe.Pointer(&ifm))); err != nil {
+		// This is currently a non fatal condition because the route table must have the MTU set appropriately as well
+		t.l.WithError(err).Error("Failed to set tun mtu")
+	}
+}
+
+func (t *tun) setDefaultRoute() error {
 	// Default route
 	// Default route
 	dr := &net.IPNet{IP: t.cidr.IP.Mask(t.cidr.Mask), Mask: t.cidr.Mask}
 	dr := &net.IPNet{IP: t.cidr.IP.Mask(t.cidr.Mask), Mask: t.cidr.Mask}
 	nr := netlink.Route{
 	nr := netlink.Route{
-		LinkIndex: link.Attrs().Index,
+		LinkIndex: t.deviceIndex,
 		Dst:       dr,
 		Dst:       dr,
 		MTU:       t.DefaultMTU,
 		MTU:       t.DefaultMTU,
 		AdvMSS:    t.advMSS(Route{}),
 		AdvMSS:    t.advMSS(Route{}),
@@ -274,19 +359,24 @@ func (t *tun) Activate() error {
 		Table:     unix.RT_TABLE_MAIN,
 		Table:     unix.RT_TABLE_MAIN,
 		Type:      unix.RTN_UNICAST,
 		Type:      unix.RTN_UNICAST,
 	}
 	}
-	err = netlink.RouteReplace(&nr)
+	err := netlink.RouteReplace(&nr)
 	if err != nil {
 	if err != nil {
 		return fmt.Errorf("failed to set mtu %v on the default route %v; %v", t.DefaultMTU, dr, err)
 		return fmt.Errorf("failed to set mtu %v on the default route %v; %v", t.DefaultMTU, dr, err)
 	}
 	}
 
 
+	return nil
+}
+
+func (t *tun) addRoutes(logErrors bool) error {
 	// Path routes
 	// Path routes
-	for _, r := range t.Routes {
+	routes := *t.Routes.Load()
+	for _, r := range routes {
 		if !r.Install {
 		if !r.Install {
 			continue
 			continue
 		}
 		}
 
 
 		nr := netlink.Route{
 		nr := netlink.Route{
-			LinkIndex: link.Attrs().Index,
+			LinkIndex: t.deviceIndex,
 			Dst:       r.Cidr,
 			Dst:       r.Cidr,
 			MTU:       r.MTU,
 			MTU:       r.MTU,
 			AdvMSS:    t.advMSS(r),
 			AdvMSS:    t.advMSS(r),
@@ -297,21 +387,49 @@ func (t *tun) Activate() error {
 			nr.Priority = r.Metric
 			nr.Priority = r.Metric
 		}
 		}
 
 
-		err = netlink.RouteAdd(&nr)
+		err := netlink.RouteReplace(&nr)
 		if err != nil {
 		if err != nil {
-			return fmt.Errorf("failed to set mtu %v on route %v; %v", r.MTU, r.Cidr, err)
+			retErr := util.NewContextualError("Failed to add route", map[string]interface{}{"route": r}, err)
+			if logErrors {
+				retErr.Log(t.l)
+			} else {
+				return retErr
+			}
+		} else {
+			t.l.WithField("route", r).Info("Added route")
 		}
 		}
 	}
 	}
 
 
-	// Run the interface
-	ifrf.Flags = ifrf.Flags | unix.IFF_UP | unix.IFF_RUNNING
-	if err = ioctl(fd, unix.SIOCSIFFLAGS, uintptr(unsafe.Pointer(&ifrf))); err != nil {
-		return fmt.Errorf("failed to run tun device: %s", err)
-	}
-
 	return nil
 	return nil
 }
 }
 
 
+func (t *tun) removeRoutes(routes []Route) {
+	for _, r := range routes {
+		if !r.Install {
+			continue
+		}
+
+		nr := netlink.Route{
+			LinkIndex: t.deviceIndex,
+			Dst:       r.Cidr,
+			MTU:       r.MTU,
+			AdvMSS:    t.advMSS(r),
+			Scope:     unix.RT_SCOPE_LINK,
+		}
+
+		if r.Metric > 0 {
+			nr.Priority = r.Metric
+		}
+
+		err := netlink.RouteDel(&nr)
+		if err != nil {
+			t.l.WithError(err).WithField("route", r).Error("Failed to remove route")
+		} else {
+			t.l.WithField("route", r).Info("Removed route")
+		}
+	}
+}
+
 func (t *tun) Cidr() *net.IPNet {
 func (t *tun) Cidr() *net.IPNet {
 	return t.cidr
 	return t.cidr
 }
 }
@@ -410,5 +528,9 @@ func (t *tun) Close() error {
 		t.ReadWriteCloser.Close()
 		t.ReadWriteCloser.Close()
 	}
 	}
 
 
+	if t.ioctlFd > 0 {
+		os.NewFile(t.ioctlFd, "ioctlFd").Close()
+	}
+
 	return nil
 	return nil
 }
 }

+ 99 - 24
overlay/tun_netbsd.go

@@ -11,12 +11,15 @@ import (
 	"os/exec"
 	"os/exec"
 	"regexp"
 	"regexp"
 	"strconv"
 	"strconv"
+	"sync/atomic"
 	"syscall"
 	"syscall"
 	"unsafe"
 	"unsafe"
 
 
 	"github.com/sirupsen/logrus"
 	"github.com/sirupsen/logrus"
 	"github.com/slackhq/nebula/cidr"
 	"github.com/slackhq/nebula/cidr"
+	"github.com/slackhq/nebula/config"
 	"github.com/slackhq/nebula/iputil"
 	"github.com/slackhq/nebula/iputil"
+	"github.com/slackhq/nebula/util"
 )
 )
 
 
 type ifreqDestroy struct {
 type ifreqDestroy struct {
@@ -28,8 +31,8 @@ type tun struct {
 	Device    string
 	Device    string
 	cidr      *net.IPNet
 	cidr      *net.IPNet
 	MTU       int
 	MTU       int
-	Routes    []Route
-	routeTree *cidr.Tree4[iputil.VpnIp]
+	Routes    atomic.Pointer[[]Route]
+	routeTree atomic.Pointer[cidr.Tree4[iputil.VpnIp]]
 	l         *logrus.Logger
 	l         *logrus.Logger
 
 
 	io.ReadWriteCloser
 	io.ReadWriteCloser
@@ -56,43 +59,50 @@ func (t *tun) Close() error {
 	return nil
 	return nil
 }
 }
 
 
-func newTunFromFd(_ *logrus.Logger, _ int, _ *net.IPNet, _ int, _ []Route, _ int, _ bool) (*tun, error) {
+func newTunFromFd(_ *config.C, _ *logrus.Logger, _ int, _ *net.IPNet) (*tun, error) {
 	return nil, fmt.Errorf("newTunFromFd not supported in NetBSD")
 	return nil, fmt.Errorf("newTunFromFd not supported in NetBSD")
 }
 }
 
 
 var deviceNameRE = regexp.MustCompile(`^tun[0-9]+$`)
 var deviceNameRE = regexp.MustCompile(`^tun[0-9]+$`)
 
 
-func newTun(l *logrus.Logger, deviceName string, cidr *net.IPNet, defaultMTU int, routes []Route, _ int, _ bool, _ bool) (*tun, error) {
+func newTun(c *config.C, l *logrus.Logger, cidr *net.IPNet, _ bool) (*tun, error) {
 	// Try to open tun device
 	// Try to open tun device
 	var file *os.File
 	var file *os.File
 	var err error
 	var err error
+	deviceName := c.GetString("tun.dev", "")
 	if deviceName == "" {
 	if deviceName == "" {
 		return nil, fmt.Errorf("a device name in the format of /dev/tunN must be specified")
 		return nil, fmt.Errorf("a device name in the format of /dev/tunN must be specified")
 	}
 	}
 	if !deviceNameRE.MatchString(deviceName) {
 	if !deviceNameRE.MatchString(deviceName) {
 		return nil, fmt.Errorf("a device name in the format of /dev/tunN must be specified")
 		return nil, fmt.Errorf("a device name in the format of /dev/tunN must be specified")
 	}
 	}
-	file, err = os.OpenFile("/dev/"+deviceName, os.O_RDWR, 0)
 
 
+	file, err = os.OpenFile("/dev/"+deviceName, os.O_RDWR, 0)
 	if err != nil {
 	if err != nil {
 		return nil, err
 		return nil, err
 	}
 	}
 
 
-	routeTree, err := makeRouteTree(l, routes, false)
+	t := &tun{
+		ReadWriteCloser: file,
+		Device:          deviceName,
+		cidr:            cidr,
+		MTU:             c.GetInt("tun.mtu", DefaultMTU),
+		l:               l,
+	}
 
 
+	err = t.reload(c, true)
 	if err != nil {
 	if err != nil {
 		return nil, err
 		return nil, err
 	}
 	}
 
 
-	return &tun{
-		ReadWriteCloser: file,
-		Device:          deviceName,
-		cidr:            cidr,
-		MTU:             defaultMTU,
-		Routes:          routes,
-		routeTree:       routeTree,
-		l:               l,
-	}, nil
+	c.RegisterReloadCallback(func(c *config.C) {
+		err := t.reload(c, false)
+		if err != nil {
+			util.LogWithContextIfNeeded("failed to reload tun device", err, t.l)
+		}
+	})
+
+	return t, nil
 }
 }
 
 
 func (t *tun) Activate() error {
 func (t *tun) Activate() error {
@@ -116,17 +126,42 @@ func (t *tun) Activate() error {
 	if err = cmd.Run(); err != nil {
 	if err = cmd.Run(); err != nil {
 		return fmt.Errorf("failed to run 'ifconfig': %s", err)
 		return fmt.Errorf("failed to run 'ifconfig': %s", err)
 	}
 	}
+
 	// Unsafe path routes
 	// Unsafe path routes
-	for _, r := range t.Routes {
-		if r.Via == nil || !r.Install {
-			// We don't allow route MTUs so only install routes with a via
-			continue
+	return t.addRoutes(false)
+}
+
+func (t *tun) reload(c *config.C, initial bool) error {
+	change, routes, err := getAllRoutesFromConfig(c, t.cidr, initial)
+	if err != nil {
+		return err
+	}
+
+	if !initial && !change {
+		return nil
+	}
+
+	routeTree, err := makeRouteTree(t.l, routes, false)
+	if err != nil {
+		return err
+	}
+
+	// Teach nebula how to handle the routes before establishing them in the system table
+	oldRoutes := t.Routes.Swap(&routes)
+	t.routeTree.Store(routeTree)
+
+	if !initial {
+		// Remove first, if the system removes a wanted route hopefully it will be re-added next
+		err := t.removeRoutes(findRemovedRoutes(routes, *oldRoutes))
+		if err != nil {
+			util.LogWithContextIfNeeded("Failed to remove routes", err, t.l)
 		}
 		}
 
 
-		cmd = exec.Command("/sbin/route", "-n", "add", "-net", r.Cidr.String(), t.cidr.IP.String())
-		t.l.Debug("command: ", cmd.String())
-		if err = cmd.Run(); err != nil {
-			return fmt.Errorf("failed to run 'route add' for unsafe_route %s: %s", r.Cidr.String(), err)
+		// Ensure any routes we actually want are installed
+		err = t.addRoutes(true)
+		if err != nil {
+			// Catch any stray logs
+			util.LogWithContextIfNeeded("Failed to add routes", err, t.l)
 		}
 		}
 	}
 	}
 
 
@@ -134,7 +169,7 @@ func (t *tun) Activate() error {
 }
 }
 
 
 func (t *tun) RouteFor(ip iputil.VpnIp) iputil.VpnIp {
 func (t *tun) RouteFor(ip iputil.VpnIp) iputil.VpnIp {
-	_, r := t.routeTree.MostSpecificContains(ip)
+	_, r := t.routeTree.Load().MostSpecificContains(ip)
 	return r
 	return r
 }
 }
 
 
@@ -150,6 +185,46 @@ func (t *tun) NewMultiQueueReader() (io.ReadWriteCloser, error) {
 	return nil, fmt.Errorf("TODO: multiqueue not implemented for netbsd")
 	return nil, fmt.Errorf("TODO: multiqueue not implemented for netbsd")
 }
 }
 
 
+func (t *tun) addRoutes(logErrors bool) error {
+	routes := *t.Routes.Load()
+	for _, r := range routes {
+		if r.Via == nil || !r.Install {
+			// We don't allow route MTUs so only install routes with a via
+			continue
+		}
+
+		cmd := exec.Command("/sbin/route", "-n", "add", "-net", r.Cidr.String(), t.cidr.IP.String())
+		t.l.Debug("command: ", cmd.String())
+		if err := cmd.Run(); err != nil {
+			retErr := util.NewContextualError("failed to run 'route add' for unsafe_route", map[string]interface{}{"route": r}, err)
+			if logErrors {
+				retErr.Log(t.l)
+			} else {
+				return retErr
+			}
+		}
+	}
+
+	return nil
+}
+
+func (t *tun) removeRoutes(routes []Route) error {
+	for _, r := range routes {
+		if !r.Install {
+			continue
+		}
+
+		cmd := exec.Command("/sbin/route", "-n", "delete", "-net", r.Cidr.String(), t.cidr.IP.String())
+		t.l.Debug("command: ", cmd.String())
+		if err := cmd.Run(); err != nil {
+			t.l.WithError(err).WithField("route", r).Error("Failed to remove route")
+		} else {
+			t.l.WithField("route", r).Info("Removed route")
+		}
+	}
+	return nil
+}
+
 func (t *tun) deviceBytes() (o [16]byte) {
 func (t *tun) deviceBytes() (o [16]byte) {
 	for i, c := range t.Device {
 	for i, c := range t.Device {
 		o[i] = byte(c)
 		o[i] = byte(c)

+ 96 - 21
overlay/tun_openbsd.go

@@ -11,19 +11,22 @@ import (
 	"os/exec"
 	"os/exec"
 	"regexp"
 	"regexp"
 	"strconv"
 	"strconv"
+	"sync/atomic"
 	"syscall"
 	"syscall"
 
 
 	"github.com/sirupsen/logrus"
 	"github.com/sirupsen/logrus"
 	"github.com/slackhq/nebula/cidr"
 	"github.com/slackhq/nebula/cidr"
+	"github.com/slackhq/nebula/config"
 	"github.com/slackhq/nebula/iputil"
 	"github.com/slackhq/nebula/iputil"
+	"github.com/slackhq/nebula/util"
 )
 )
 
 
 type tun struct {
 type tun struct {
 	Device    string
 	Device    string
 	cidr      *net.IPNet
 	cidr      *net.IPNet
 	MTU       int
 	MTU       int
-	Routes    []Route
-	routeTree *cidr.Tree4[iputil.VpnIp]
+	Routes    atomic.Pointer[[]Route]
+	routeTree atomic.Pointer[cidr.Tree4[iputil.VpnIp]]
 	l         *logrus.Logger
 	l         *logrus.Logger
 
 
 	io.ReadWriteCloser
 	io.ReadWriteCloser
@@ -40,13 +43,14 @@ func (t *tun) Close() error {
 	return nil
 	return nil
 }
 }
 
 
-func newTunFromFd(_ *logrus.Logger, _ int, _ *net.IPNet, _ int, _ []Route, _ int, _ bool) (*tun, error) {
+func newTunFromFd(_ *config.C, _ *logrus.Logger, _ int, _ *net.IPNet) (*tun, error) {
 	return nil, fmt.Errorf("newTunFromFd not supported in OpenBSD")
 	return nil, fmt.Errorf("newTunFromFd not supported in OpenBSD")
 }
 }
 
 
 var deviceNameRE = regexp.MustCompile(`^tun[0-9]+$`)
 var deviceNameRE = regexp.MustCompile(`^tun[0-9]+$`)
 
 
-func newTun(l *logrus.Logger, deviceName string, cidr *net.IPNet, defaultMTU int, routes []Route, _ int, _ bool, _ bool) (*tun, error) {
+func newTun(c *config.C, l *logrus.Logger, cidr *net.IPNet, _ bool) (*tun, error) {
+	deviceName := c.GetString("tun.dev", "")
 	if deviceName == "" {
 	if deviceName == "" {
 		return nil, fmt.Errorf("a device name in the format of tunN must be specified")
 		return nil, fmt.Errorf("a device name in the format of tunN must be specified")
 	}
 	}
@@ -60,20 +64,64 @@ func newTun(l *logrus.Logger, deviceName string, cidr *net.IPNet, defaultMTU int
 		return nil, err
 		return nil, err
 	}
 	}
 
 
-	routeTree, err := makeRouteTree(l, routes, false)
-	if err != nil {
-		return nil, err
-	}
-
-	return &tun{
+	t := &tun{
 		ReadWriteCloser: file,
 		ReadWriteCloser: file,
 		Device:          deviceName,
 		Device:          deviceName,
 		cidr:            cidr,
 		cidr:            cidr,
-		MTU:             defaultMTU,
-		Routes:          routes,
-		routeTree:       routeTree,
+		MTU:             c.GetInt("tun.mtu", DefaultMTU),
 		l:               l,
 		l:               l,
-	}, nil
+	}
+
+	err = t.reload(c, true)
+	if err != nil {
+		return nil, err
+	}
+
+	c.RegisterReloadCallback(func(c *config.C) {
+		err := t.reload(c, false)
+		if err != nil {
+			util.LogWithContextIfNeeded("failed to reload tun device", err, t.l)
+		}
+	})
+
+	return t, nil
+}
+
+func (t *tun) reload(c *config.C, initial bool) error {
+	change, routes, err := getAllRoutesFromConfig(c, t.cidr, initial)
+	if err != nil {
+		return err
+	}
+
+	if !initial && !change {
+		return nil
+	}
+
+	routeTree, err := makeRouteTree(t.l, routes, false)
+	if err != nil {
+		return err
+	}
+
+	// Teach nebula how to handle the routes before establishing them in the system table
+	oldRoutes := t.Routes.Swap(&routes)
+	t.routeTree.Store(routeTree)
+
+	if !initial {
+		// Remove first, if the system removes a wanted route hopefully it will be re-added next
+		err := t.removeRoutes(findRemovedRoutes(routes, *oldRoutes))
+		if err != nil {
+			util.LogWithContextIfNeeded("Failed to remove routes", err, t.l)
+		}
+
+		// Ensure any routes we actually want are installed
+		err = t.addRoutes(true)
+		if err != nil {
+			// Catch any stray logs
+			util.LogWithContextIfNeeded("Failed to add routes", err, t.l)
+		}
+	}
+
+	return nil
 }
 }
 
 
 func (t *tun) Activate() error {
 func (t *tun) Activate() error {
@@ -98,25 +146,52 @@ func (t *tun) Activate() error {
 	}
 	}
 
 
 	// Unsafe path routes
 	// Unsafe path routes
-	for _, r := range t.Routes {
+	return t.addRoutes(false)
+}
+
+func (t *tun) RouteFor(ip iputil.VpnIp) iputil.VpnIp {
+	_, r := t.routeTree.Load().MostSpecificContains(ip)
+	return r
+}
+
+func (t *tun) addRoutes(logErrors bool) error {
+	routes := *t.Routes.Load()
+	for _, r := range routes {
 		if r.Via == nil || !r.Install {
 		if r.Via == nil || !r.Install {
 			// We don't allow route MTUs so only install routes with a via
 			// We don't allow route MTUs so only install routes with a via
 			continue
 			continue
 		}
 		}
 
 
-		cmd = exec.Command("/sbin/route", "-n", "add", "-inet", r.Cidr.String(), t.cidr.IP.String())
+		cmd := exec.Command("/sbin/route", "-n", "add", "-inet", r.Cidr.String(), t.cidr.IP.String())
 		t.l.Debug("command: ", cmd.String())
 		t.l.Debug("command: ", cmd.String())
-		if err = cmd.Run(); err != nil {
-			return fmt.Errorf("failed to run 'route add' for unsafe_route %s: %s", r.Cidr.String(), err)
+		if err := cmd.Run(); err != nil {
+			retErr := util.NewContextualError("failed to run 'route add' for unsafe_route", map[string]interface{}{"route": r}, err)
+			if logErrors {
+				retErr.Log(t.l)
+			} else {
+				return retErr
+			}
 		}
 		}
 	}
 	}
 
 
 	return nil
 	return nil
 }
 }
 
 
-func (t *tun) RouteFor(ip iputil.VpnIp) iputil.VpnIp {
-	_, r := t.routeTree.MostSpecificContains(ip)
-	return r
+func (t *tun) removeRoutes(routes []Route) error {
+	for _, r := range routes {
+		if !r.Install {
+			continue
+		}
+
+		cmd := exec.Command("/sbin/route", "-n", "delete", "-inet", r.Cidr.String(), t.cidr.IP.String())
+		t.l.Debug("command: ", cmd.String())
+		if err := cmd.Run(); err != nil {
+			t.l.WithError(err).WithField("route", r).Error("Failed to remove route")
+		} else {
+			t.l.WithField("route", r).Info("Removed route")
+		}
+	}
+	return nil
 }
 }
 
 
 func (t *tun) Cidr() *net.IPNet {
 func (t *tun) Cidr() *net.IPNet {

+ 8 - 3
overlay/tun_tester.go

@@ -12,6 +12,7 @@ import (
 
 
 	"github.com/sirupsen/logrus"
 	"github.com/sirupsen/logrus"
 	"github.com/slackhq/nebula/cidr"
 	"github.com/slackhq/nebula/cidr"
+	"github.com/slackhq/nebula/config"
 	"github.com/slackhq/nebula/iputil"
 	"github.com/slackhq/nebula/iputil"
 )
 )
 
 
@@ -27,14 +28,18 @@ type TestTun struct {
 	TxPackets chan []byte // Packets transmitted outside by nebula
 	TxPackets chan []byte // Packets transmitted outside by nebula
 }
 }
 
 
-func newTun(l *logrus.Logger, deviceName string, cidr *net.IPNet, _ int, routes []Route, _ int, _ bool, _ bool) (*TestTun, error) {
+func newTun(c *config.C, l *logrus.Logger, cidr *net.IPNet, _ bool) (*TestTun, error) {
+	_, routes, err := getAllRoutesFromConfig(c, cidr, true)
+	if err != nil {
+		return nil, err
+	}
 	routeTree, err := makeRouteTree(l, routes, false)
 	routeTree, err := makeRouteTree(l, routes, false)
 	if err != nil {
 	if err != nil {
 		return nil, err
 		return nil, err
 	}
 	}
 
 
 	return &TestTun{
 	return &TestTun{
-		Device:    deviceName,
+		Device:    c.GetString("tun.dev", ""),
 		cidr:      cidr,
 		cidr:      cidr,
 		Routes:    routes,
 		Routes:    routes,
 		routeTree: routeTree,
 		routeTree: routeTree,
@@ -44,7 +49,7 @@ func newTun(l *logrus.Logger, deviceName string, cidr *net.IPNet, _ int, routes
 	}, nil
 	}, nil
 }
 }
 
 
-func newTunFromFd(_ *logrus.Logger, _ int, _ *net.IPNet, _ int, _ []Route, _ int, _ bool) (*TestTun, error) {
+func newTunFromFd(_ *config.C, _ *logrus.Logger, _ int, _ *net.IPNet) (*TestTun, error) {
 	return nil, fmt.Errorf("newTunFromFd not supported")
 	return nil, fmt.Errorf("newTunFromFd not supported")
 }
 }
 
 

+ 104 - 18
overlay/tun_water_windows.go

@@ -6,10 +6,13 @@ import (
 	"net"
 	"net"
 	"os/exec"
 	"os/exec"
 	"strconv"
 	"strconv"
+	"sync/atomic"
 
 
 	"github.com/sirupsen/logrus"
 	"github.com/sirupsen/logrus"
 	"github.com/slackhq/nebula/cidr"
 	"github.com/slackhq/nebula/cidr"
+	"github.com/slackhq/nebula/config"
 	"github.com/slackhq/nebula/iputil"
 	"github.com/slackhq/nebula/iputil"
+	"github.com/slackhq/nebula/util"
 	"github.com/songgao/water"
 	"github.com/songgao/water"
 )
 )
 
 
@@ -17,25 +20,34 @@ type waterTun struct {
 	Device    string
 	Device    string
 	cidr      *net.IPNet
 	cidr      *net.IPNet
 	MTU       int
 	MTU       int
-	Routes    []Route
-	routeTree *cidr.Tree4[iputil.VpnIp]
-
+	Routes    atomic.Pointer[[]Route]
+	routeTree atomic.Pointer[cidr.Tree4[iputil.VpnIp]]
+	l         *logrus.Logger
+	f         *net.Interface
 	*water.Interface
 	*water.Interface
 }
 }
 
 
-func newWaterTun(l *logrus.Logger, cidr *net.IPNet, defaultMTU int, routes []Route) (*waterTun, error) {
-	routeTree, err := makeRouteTree(l, routes, false)
+func newWaterTun(c *config.C, l *logrus.Logger, cidr *net.IPNet, _ bool) (*waterTun, error) {
+	// NOTE: You cannot set the deviceName under Windows, so you must check tun.Device after calling .Activate()
+	t := &waterTun{
+		cidr: cidr,
+		MTU:  c.GetInt("tun.mtu", DefaultMTU),
+		l:    l,
+	}
+
+	err := t.reload(c, true)
 	if err != nil {
 	if err != nil {
 		return nil, err
 		return nil, err
 	}
 	}
 
 
-	// NOTE: You cannot set the deviceName under Windows, so you must check tun.Device after calling .Activate()
-	return &waterTun{
-		cidr:      cidr,
-		MTU:       defaultMTU,
-		Routes:    routes,
-		routeTree: routeTree,
-	}, nil
+	c.RegisterReloadCallback(func(c *config.C) {
+		err := t.reload(c, false)
+		if err != nil {
+			util.LogWithContextIfNeeded("failed to reload tun device", err, t.l)
+		}
+	})
+
+	return t, nil
 }
 }
 
 
 func (t *waterTun) Activate() error {
 func (t *waterTun) Activate() error {
@@ -74,30 +86,104 @@ func (t *waterTun) Activate() error {
 		return fmt.Errorf("failed to run 'netsh' to set MTU: %s", err)
 		return fmt.Errorf("failed to run 'netsh' to set MTU: %s", err)
 	}
 	}
 
 
-	iface, err := net.InterfaceByName(t.Device)
+	t.f, err = net.InterfaceByName(t.Device)
 	if err != nil {
 	if err != nil {
 		return fmt.Errorf("failed to find interface named %s: %v", t.Device, err)
 		return fmt.Errorf("failed to find interface named %s: %v", t.Device, err)
 	}
 	}
 
 
-	for _, r := range t.Routes {
+	err = t.addRoutes(false)
+	if err != nil {
+		return err
+	}
+
+	return nil
+}
+
+func (t *waterTun) reload(c *config.C, initial bool) error {
+	change, routes, err := getAllRoutesFromConfig(c, t.cidr, initial)
+	if err != nil {
+		return err
+	}
+
+	if !initial && !change {
+		return nil
+	}
+
+	routeTree, err := makeRouteTree(t.l, routes, false)
+	if err != nil {
+		return err
+	}
+
+	// Teach nebula how to handle the routes before establishing them in the system table
+	oldRoutes := t.Routes.Swap(&routes)
+	t.routeTree.Store(routeTree)
+
+	if !initial {
+		// Remove first, if the system removes a wanted route hopefully it will be re-added next
+		t.removeRoutes(findRemovedRoutes(routes, *oldRoutes))
+
+		// Ensure any routes we actually want are installed
+		err = t.addRoutes(true)
+		if err != nil {
+			// Catch any stray logs
+			util.LogWithContextIfNeeded("Failed to set routes", err, t.l)
+		} else {
+			for _, r := range findRemovedRoutes(routes, *oldRoutes) {
+				t.l.WithField("route", r).Info("Removed route")
+			}
+		}
+	}
+
+	return nil
+}
+
+func (t *waterTun) addRoutes(logErrors bool) error {
+	// Path routes
+	routes := *t.Routes.Load()
+	for _, r := range routes {
 		if r.Via == nil || !r.Install {
 		if r.Via == nil || !r.Install {
 			// We don't allow route MTUs so only install routes with a via
 			// We don't allow route MTUs so only install routes with a via
 			continue
 			continue
 		}
 		}
 
 
-		err = exec.Command(
-			"C:\\Windows\\System32\\route.exe", "add", r.Cidr.String(), r.Via.String(), "IF", strconv.Itoa(iface.Index), "METRIC", strconv.Itoa(r.Metric),
+		err := exec.Command(
+			"C:\\Windows\\System32\\route.exe", "add", r.Cidr.String(), r.Via.String(), "IF", strconv.Itoa(t.f.Index), "METRIC", strconv.Itoa(r.Metric),
 		).Run()
 		).Run()
+
 		if err != nil {
 		if err != nil {
-			return fmt.Errorf("failed to add the unsafe_route %s: %v", r.Cidr.String(), err)
+			retErr := util.NewContextualError("Failed to add route", map[string]interface{}{"route": r}, err)
+			if logErrors {
+				retErr.Log(t.l)
+			} else {
+				return retErr
+			}
+		} else {
+			t.l.WithField("route", r).Info("Added route")
 		}
 		}
 	}
 	}
 
 
 	return nil
 	return nil
 }
 }
 
 
+func (t *waterTun) removeRoutes(routes []Route) {
+	for _, r := range routes {
+		if !r.Install {
+			continue
+		}
+
+		err := exec.Command(
+			"C:\\Windows\\System32\\route.exe", "delete", r.Cidr.String(), r.Via.String(), "IF", strconv.Itoa(t.f.Index), "METRIC", strconv.Itoa(r.Metric),
+		).Run()
+		if err != nil {
+			t.l.WithError(err).WithField("route", r).Error("Failed to remove route")
+		} else {
+			t.l.WithField("route", r).Info("Removed route")
+		}
+	}
+}
+
 func (t *waterTun) RouteFor(ip iputil.VpnIp) iputil.VpnIp {
 func (t *waterTun) RouteFor(ip iputil.VpnIp) iputil.VpnIp {
-	_, r := t.routeTree.MostSpecificContains(ip)
+	_, r := t.routeTree.Load().MostSpecificContains(ip)
 	return r
 	return r
 }
 }
 
 

+ 5 - 4
overlay/tun_windows.go

@@ -12,13 +12,14 @@ import (
 	"syscall"
 	"syscall"
 
 
 	"github.com/sirupsen/logrus"
 	"github.com/sirupsen/logrus"
+	"github.com/slackhq/nebula/config"
 )
 )
 
 
-func newTunFromFd(_ *logrus.Logger, _ int, _ *net.IPNet, _ int, _ []Route, _ int, _ bool) (Device, error) {
+func newTunFromFd(_ *config.C, _ *logrus.Logger, _ int, _ *net.IPNet) (Device, error) {
 	return nil, fmt.Errorf("newTunFromFd not supported in Windows")
 	return nil, fmt.Errorf("newTunFromFd not supported in Windows")
 }
 }
 
 
-func newTun(l *logrus.Logger, deviceName string, cidr *net.IPNet, defaultMTU int, routes []Route, _ int, _ bool, _ bool) (Device, error) {
+func newTun(c *config.C, l *logrus.Logger, cidr *net.IPNet, multiqueue bool) (Device, error) {
 	useWintun := true
 	useWintun := true
 	if err := checkWinTunExists(); err != nil {
 	if err := checkWinTunExists(); err != nil {
 		l.WithError(err).Warn("Check Wintun driver failed, fallback to wintap driver")
 		l.WithError(err).Warn("Check Wintun driver failed, fallback to wintap driver")
@@ -26,14 +27,14 @@ func newTun(l *logrus.Logger, deviceName string, cidr *net.IPNet, defaultMTU int
 	}
 	}
 
 
 	if useWintun {
 	if useWintun {
-		device, err := newWinTun(l, deviceName, cidr, defaultMTU, routes)
+		device, err := newWinTun(c, l, cidr, multiqueue)
 		if err != nil {
 		if err != nil {
 			return nil, fmt.Errorf("create Wintun interface failed, %w", err)
 			return nil, fmt.Errorf("create Wintun interface failed, %w", err)
 		}
 		}
 		return device, nil
 		return device, nil
 	}
 	}
 
 
-	device, err := newWaterTun(l, cidr, defaultMTU, routes)
+	device, err := newWaterTun(c, l, cidr, multiqueue)
 	if err != nil {
 	if err != nil {
 		return nil, fmt.Errorf("create wintap driver failed, %w", err)
 		return nil, fmt.Errorf("create wintap driver failed, %w", err)
 	}
 	}

+ 129 - 37
overlay/tun_wintun_windows.go

@@ -6,11 +6,14 @@ import (
 	"io"
 	"io"
 	"net"
 	"net"
 	"net/netip"
 	"net/netip"
+	"sync/atomic"
 	"unsafe"
 	"unsafe"
 
 
 	"github.com/sirupsen/logrus"
 	"github.com/sirupsen/logrus"
 	"github.com/slackhq/nebula/cidr"
 	"github.com/slackhq/nebula/cidr"
+	"github.com/slackhq/nebula/config"
 	"github.com/slackhq/nebula/iputil"
 	"github.com/slackhq/nebula/iputil"
+	"github.com/slackhq/nebula/util"
 	"github.com/slackhq/nebula/wintun"
 	"github.com/slackhq/nebula/wintun"
 	"golang.org/x/sys/windows"
 	"golang.org/x/sys/windows"
 	"golang.zx2c4.com/wireguard/windows/tunnel/winipcfg"
 	"golang.zx2c4.com/wireguard/windows/tunnel/winipcfg"
@@ -23,8 +26,9 @@ type winTun struct {
 	cidr      *net.IPNet
 	cidr      *net.IPNet
 	prefix    netip.Prefix
 	prefix    netip.Prefix
 	MTU       int
 	MTU       int
-	Routes    []Route
-	routeTree *cidr.Tree4[iputil.VpnIp]
+	Routes    atomic.Pointer[[]Route]
+	routeTree atomic.Pointer[cidr.Tree4[iputil.VpnIp]]
+	l         *logrus.Logger
 
 
 	tun *wintun.NativeTun
 	tun *wintun.NativeTun
 }
 }
@@ -48,83 +52,148 @@ func generateGUIDByDeviceName(name string) (*windows.GUID, error) {
 	return (*windows.GUID)(unsafe.Pointer(&sum[0])), nil
 	return (*windows.GUID)(unsafe.Pointer(&sum[0])), nil
 }
 }
 
 
-func newWinTun(l *logrus.Logger, deviceName string, cidr *net.IPNet, defaultMTU int, routes []Route) (*winTun, error) {
+func newWinTun(c *config.C, l *logrus.Logger, cidr *net.IPNet, _ bool) (*winTun, error) {
+	deviceName := c.GetString("tun.dev", "")
 	guid, err := generateGUIDByDeviceName(deviceName)
 	guid, err := generateGUIDByDeviceName(deviceName)
 	if err != nil {
 	if err != nil {
 		return nil, fmt.Errorf("generate GUID failed: %w", err)
 		return nil, fmt.Errorf("generate GUID failed: %w", err)
 	}
 	}
 
 
+	prefix, err := iputil.ToNetIpPrefix(*cidr)
+	if err != nil {
+		return nil, err
+	}
+
+	t := &winTun{
+		Device: deviceName,
+		cidr:   cidr,
+		prefix: prefix,
+		MTU:    c.GetInt("tun.mtu", DefaultMTU),
+		l:      l,
+	}
+
+	err = t.reload(c, true)
+	if err != nil {
+		return nil, err
+	}
+
 	var tunDevice wintun.Device
 	var tunDevice wintun.Device
-	tunDevice, err = wintun.CreateTUNWithRequestedGUID(deviceName, guid, defaultMTU)
+	tunDevice, err = wintun.CreateTUNWithRequestedGUID(deviceName, guid, t.MTU)
 	if err != nil {
 	if err != nil {
 		// Windows 10 has an issue with unclean shutdowns not fully cleaning up the wintun device.
 		// Windows 10 has an issue with unclean shutdowns not fully cleaning up the wintun device.
 		// Trying a second time resolves the issue.
 		// Trying a second time resolves the issue.
 		l.WithError(err).Debug("Failed to create wintun device, retrying")
 		l.WithError(err).Debug("Failed to create wintun device, retrying")
-		tunDevice, err = wintun.CreateTUNWithRequestedGUID(deviceName, guid, defaultMTU)
+		tunDevice, err = wintun.CreateTUNWithRequestedGUID(deviceName, guid, t.MTU)
 		if err != nil {
 		if err != nil {
 			return nil, fmt.Errorf("create TUN device failed: %w", err)
 			return nil, fmt.Errorf("create TUN device failed: %w", err)
 		}
 		}
 	}
 	}
+	t.tun = tunDevice.(*wintun.NativeTun)
+
+	c.RegisterReloadCallback(func(c *config.C) {
+		err := t.reload(c, false)
+		if err != nil {
+			util.LogWithContextIfNeeded("failed to reload tun device", err, t.l)
+		}
+	})
+
+	return t, nil
+}
 
 
-	routeTree, err := makeRouteTree(l, routes, false)
+func (t *winTun) reload(c *config.C, initial bool) error {
+	change, routes, err := getAllRoutesFromConfig(c, t.cidr, initial)
 	if err != nil {
 	if err != nil {
-		return nil, err
+		return err
 	}
 	}
 
 
-	prefix, err := iputil.ToNetIpPrefix(*cidr)
+	if !initial && !change {
+		return nil
+	}
+
+	routeTree, err := makeRouteTree(t.l, routes, false)
 	if err != nil {
 	if err != nil {
-		return nil, err
+		return err
 	}
 	}
 
 
-	return &winTun{
-		Device:    deviceName,
-		cidr:      cidr,
-		prefix:    prefix,
-		MTU:       defaultMTU,
-		Routes:    routes,
-		routeTree: routeTree,
+	// Teach nebula how to handle the routes before establishing them in the system table
+	oldRoutes := t.Routes.Swap(&routes)
+	t.routeTree.Store(routeTree)
 
 
-		tun: tunDevice.(*wintun.NativeTun),
-	}, nil
+	if !initial {
+		// Remove first, if the system removes a wanted route hopefully it will be re-added next
+		err := t.removeRoutes(findRemovedRoutes(routes, *oldRoutes))
+		if err != nil {
+			util.LogWithContextIfNeeded("Failed to remove routes", err, t.l)
+		}
+
+		// Ensure any routes we actually want are installed
+		err = t.addRoutes(true)
+		if err != nil {
+			// Catch any stray logs
+			util.LogWithContextIfNeeded("Failed to add routes", err, t.l)
+		}
+	}
+
+	return nil
 }
 }
 
 
 func (t *winTun) Activate() error {
 func (t *winTun) Activate() error {
 	luid := winipcfg.LUID(t.tun.LUID())
 	luid := winipcfg.LUID(t.tun.LUID())
 
 
-	if err := luid.SetIPAddresses([]netip.Prefix{t.prefix}); err != nil {
+	err := luid.SetIPAddresses([]netip.Prefix{t.prefix})
+	if err != nil {
 		return fmt.Errorf("failed to set address: %w", err)
 		return fmt.Errorf("failed to set address: %w", err)
 	}
 	}
 
 
+	err = t.addRoutes(false)
+	if err != nil {
+		return err
+	}
+
+	return nil
+}
+
+func (t *winTun) addRoutes(logErrors bool) error {
+	luid := winipcfg.LUID(t.tun.LUID())
+	routes := *t.Routes.Load()
 	foundDefault4 := false
 	foundDefault4 := false
-	routes := make([]*winipcfg.RouteData, 0, len(t.Routes)+1)
 
 
-	for _, r := range t.Routes {
+	for _, r := range routes {
 		if r.Via == nil || !r.Install {
 		if r.Via == nil || !r.Install {
 			// We don't allow route MTUs so only install routes with a via
 			// We don't allow route MTUs so only install routes with a via
 			continue
 			continue
 		}
 		}
 
 
-		if !foundDefault4 {
-			if ones, bits := r.Cidr.Mask.Size(); ones == 0 && bits != 0 {
-				foundDefault4 = true
-			}
-		}
-
 		prefix, err := iputil.ToNetIpPrefix(*r.Cidr)
 		prefix, err := iputil.ToNetIpPrefix(*r.Cidr)
 		if err != nil {
 		if err != nil {
-			return err
+			retErr := util.NewContextualError("Failed to parse cidr to netip prefix, ignoring route", map[string]interface{}{"route": r}, err)
+			if logErrors {
+				retErr.Log(t.l)
+				continue
+			} else {
+				return retErr
+			}
 		}
 		}
 
 
 		// Add our unsafe route
 		// Add our unsafe route
-		routes = append(routes, &winipcfg.RouteData{
-			Destination: prefix,
-			NextHop:     r.Via.ToNetIpAddr(),
-			Metric:      uint32(r.Metric),
-		})
-	}
+		err = luid.AddRoute(prefix, r.Via.ToNetIpAddr(), uint32(r.Metric))
+		if err != nil {
+			retErr := util.NewContextualError("Failed to add route", map[string]interface{}{"route": r}, err)
+			if logErrors {
+				retErr.Log(t.l)
+				continue
+			} else {
+				return retErr
+			}
+		} else {
+			t.l.WithField("route", r).Info("Added route")
+		}
 
 
-	if err := luid.AddRoutes(routes); err != nil {
-		return fmt.Errorf("failed to add routes: %w", err)
+		if !foundDefault4 {
+			if ones, bits := r.Cidr.Mask.Size(); ones == 0 && bits != 0 {
+				foundDefault4 = true
+			}
+		}
 	}
 	}
 
 
 	ipif, err := luid.IPInterface(windows.AF_INET)
 	ipif, err := luid.IPInterface(windows.AF_INET)
@@ -141,12 +210,35 @@ func (t *winTun) Activate() error {
 	if err := ipif.Set(); err != nil {
 	if err := ipif.Set(); err != nil {
 		return fmt.Errorf("failed to set ip interface: %w", err)
 		return fmt.Errorf("failed to set ip interface: %w", err)
 	}
 	}
+	return nil
+}
+
+func (t *winTun) removeRoutes(routes []Route) error {
+	luid := winipcfg.LUID(t.tun.LUID())
+
+	for _, r := range routes {
+		if !r.Install {
+			continue
+		}
 
 
+		prefix, err := iputil.ToNetIpPrefix(*r.Cidr)
+		if err != nil {
+			t.l.WithError(err).WithField("route", r).Info("Failed to convert cidr to netip prefix")
+			continue
+		}
+
+		err = luid.DeleteRoute(prefix, r.Via.ToNetIpAddr())
+		if err != nil {
+			t.l.WithError(err).WithField("route", r).Error("Failed to remove route")
+		} else {
+			t.l.WithField("route", r).Info("Removed route")
+		}
+	}
 	return nil
 	return nil
 }
 }
 
 
 func (t *winTun) RouteFor(ip iputil.VpnIp) iputil.VpnIp {
 func (t *winTun) RouteFor(ip iputil.VpnIp) iputil.VpnIp {
-	_, r := t.routeTree.MostSpecificContains(ip)
+	_, r := t.routeTree.Load().MostSpecificContains(ip)
 	return r
 	return r
 }
 }