123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616 |
- //go:build !android && !e2e_testing
- // +build !android,!e2e_testing
- package overlay
- import (
- "fmt"
- "io"
- "net"
- "net/netip"
- "os"
- "strings"
- "sync/atomic"
- "time"
- "unsafe"
- "github.com/gaissmai/bart"
- "github.com/sirupsen/logrus"
- "github.com/slackhq/nebula/config"
- "github.com/slackhq/nebula/util"
- "github.com/vishvananda/netlink"
- "golang.org/x/sys/unix"
- )
- type tun struct {
- io.ReadWriteCloser
- fd int
- Device string
- vpnNetworks []netip.Prefix
- MaxMTU int
- DefaultMTU int
- TXQueueLen int
- deviceIndex int
- ioctlFd uintptr
- Routes atomic.Pointer[[]Route]
- routeTree atomic.Pointer[bart.Table[netip.Addr]]
- routeChan chan struct{}
- useSystemRoutes bool
- l *logrus.Logger
- }
- func (t *tun) Networks() []netip.Prefix {
- return t.vpnNetworks
- }
- type ifReq struct {
- Name [16]byte
- Flags uint16
- pad [8]byte
- }
- type ifreqMTU struct {
- Name [16]byte
- MTU int32
- pad [8]byte
- }
- type ifreqQLEN struct {
- Name [16]byte
- Value int32
- pad [8]byte
- }
- func newTunFromFd(c *config.C, l *logrus.Logger, deviceFd int, vpnNetworks []netip.Prefix) (*tun, error) {
- file := os.NewFile(uintptr(deviceFd), "/dev/net/tun")
- t, err := newTunGeneric(c, l, file, vpnNetworks)
- if err != nil {
- return nil, err
- }
- t.Device = "tun0"
- return t, nil
- }
- func newTun(c *config.C, l *logrus.Logger, vpnNetworks []netip.Prefix, multiqueue bool) (*tun, error) {
- fd, err := unix.Open("/dev/net/tun", os.O_RDWR, 0)
- if err != nil {
- // If /dev/net/tun doesn't exist, try to create it (will happen in docker)
- if os.IsNotExist(err) {
- err = os.MkdirAll("/dev/net", 0755)
- if err != nil {
- return nil, fmt.Errorf("/dev/net/tun doesn't exist, failed to mkdir -p /dev/net: %w", err)
- }
- err = unix.Mknod("/dev/net/tun", unix.S_IFCHR|0600, int(unix.Mkdev(10, 200)))
- if err != nil {
- return nil, fmt.Errorf("failed to create /dev/net/tun: %w", err)
- }
- fd, err = unix.Open("/dev/net/tun", os.O_RDWR, 0)
- if err != nil {
- return nil, fmt.Errorf("created /dev/net/tun, but still failed: %w", err)
- }
- } else {
- return nil, err
- }
- }
- var req ifReq
- req.Flags = uint16(unix.IFF_TUN | unix.IFF_NO_PI)
- if multiqueue {
- req.Flags |= unix.IFF_MULTI_QUEUE
- }
- copy(req.Name[:], c.GetString("tun.dev", ""))
- if err = ioctl(uintptr(fd), uintptr(unix.TUNSETIFF), uintptr(unsafe.Pointer(&req))); err != nil {
- return nil, err
- }
- name := strings.Trim(string(req.Name[:]), "\x00")
- file := os.NewFile(uintptr(fd), "/dev/net/tun")
- t, err := newTunGeneric(c, l, file, vpnNetworks)
- if err != nil {
- return nil, err
- }
- t.Device = name
- return t, nil
- }
- func newTunGeneric(c *config.C, l *logrus.Logger, file *os.File, vpnNetworks []netip.Prefix) (*tun, error) {
- t := &tun{
- ReadWriteCloser: file,
- fd: int(file.Fd()),
- vpnNetworks: vpnNetworks,
- TXQueueLen: c.GetInt("tun.tx_queue", 500),
- useSystemRoutes: c.GetBool("tun.use_system_route_table", false),
- l: l,
- }
- err := t.reload(c, true)
- if err != nil {
- return nil, err
- }
- c.RegisterReloadCallback(func(c *config.C) {
- err := t.reload(c, false)
- if err != nil {
- util.LogWithContextIfNeeded("failed to reload tun device", err, t.l)
- }
- })
- return t, nil
- }
- func (t *tun) reload(c *config.C, initial bool) error {
- routeChange, routes, err := getAllRoutesFromConfig(c, t.vpnNetworks, initial)
- if err != nil {
- return err
- }
- if !initial && !routeChange && !c.HasChanged("tun.mtu") {
- return nil
- }
- routeTree, err := makeRouteTree(t.l, routes, true)
- if err != nil {
- return err
- }
- oldDefaultMTU := t.DefaultMTU
- oldMaxMTU := t.MaxMTU
- newDefaultMTU := c.GetInt("tun.mtu", DefaultMTU)
- newMaxMTU := newDefaultMTU
- for i, r := range routes {
- if r.MTU == 0 {
- routes[i].MTU = newDefaultMTU
- }
- if r.MTU > t.MaxMTU {
- newMaxMTU = r.MTU
- }
- }
- t.MaxMTU = newMaxMTU
- t.DefaultMTU = newDefaultMTU
- // Teach nebula how to handle the routes before establishing them in the system table
- oldRoutes := t.Routes.Swap(&routes)
- t.routeTree.Store(routeTree)
- if !initial {
- if oldMaxMTU != newMaxMTU {
- t.setMTU()
- t.l.Infof("Set max MTU to %v was %v", t.MaxMTU, oldMaxMTU)
- }
- if oldDefaultMTU != newDefaultMTU {
- for i := range t.vpnNetworks {
- err := t.setDefaultRoute(t.vpnNetworks[i])
- if err != nil {
- t.l.Warn(err)
- } else {
- t.l.Infof("Set default MTU to %v was %v", t.DefaultMTU, oldDefaultMTU)
- }
- }
- }
- // Remove first, if the system removes a wanted route hopefully it will be re-added next
- t.removeRoutes(findRemovedRoutes(routes, *oldRoutes))
- // Ensure any routes we actually want are installed
- err = t.addRoutes(true)
- if err != nil {
- // This should never be called since addRoutes should log its own errors in a reload condition
- util.LogWithContextIfNeeded("Failed to refresh routes", err, t.l)
- }
- }
- return nil
- }
- func (t *tun) NewMultiQueueReader() (io.ReadWriteCloser, error) {
- fd, err := unix.Open("/dev/net/tun", os.O_RDWR, 0)
- if err != nil {
- return nil, err
- }
- var req ifReq
- req.Flags = uint16(unix.IFF_TUN | unix.IFF_NO_PI | unix.IFF_MULTI_QUEUE)
- copy(req.Name[:], t.Device)
- if err = ioctl(uintptr(fd), uintptr(unix.TUNSETIFF), uintptr(unsafe.Pointer(&req))); err != nil {
- return nil, err
- }
- file := os.NewFile(uintptr(fd), "/dev/net/tun")
- return file, nil
- }
- func (t *tun) RouteFor(ip netip.Addr) netip.Addr {
- r, _ := t.routeTree.Load().Lookup(ip)
- return r
- }
- func (t *tun) Write(b []byte) (int, error) {
- var nn int
- maximum := len(b)
- for {
- n, err := unix.Write(t.fd, b[nn:maximum])
- if n > 0 {
- nn += n
- }
- if nn == len(b) {
- return nn, err
- }
- if err != nil {
- return nn, err
- }
- if n == 0 {
- return nn, io.ErrUnexpectedEOF
- }
- }
- }
- func (t *tun) deviceBytes() (o [16]byte) {
- for i, c := range t.Device {
- o[i] = byte(c)
- }
- return
- }
- func hasNetlinkAddr(al []*netlink.Addr, x netlink.Addr) bool {
- for i := range al {
- if al[i].Equal(x) {
- return true
- }
- }
- return false
- }
- // addIPs uses netlink to add all addresses that don't exist, then it removes ones that should not be there
- func (t *tun) addIPs(link netlink.Link) error {
- newAddrs := make([]*netlink.Addr, len(t.vpnNetworks))
- for i := range t.vpnNetworks {
- newAddrs[i] = &netlink.Addr{
- IPNet: &net.IPNet{
- IP: t.vpnNetworks[i].Addr().AsSlice(),
- Mask: net.CIDRMask(t.vpnNetworks[i].Bits(), t.vpnNetworks[i].Addr().BitLen()),
- },
- Label: t.vpnNetworks[i].Addr().Zone(),
- }
- }
- //add all new addresses
- for i := range newAddrs {
- //TODO: CERT-V2 do we want to stack errors and try as many ops as possible?
- //AddrReplace still adds new IPs, but if their properties change it will change them as well
- if err := netlink.AddrReplace(link, newAddrs[i]); err != nil {
- return err
- }
- }
- //iterate over remainder, remove whoever shouldn't be there
- al, err := netlink.AddrList(link, netlink.FAMILY_ALL)
- if err != nil {
- return fmt.Errorf("failed to get tun address list: %s", err)
- }
- for i := range al {
- if hasNetlinkAddr(newAddrs, al[i]) {
- continue
- }
- err = netlink.AddrDel(link, &al[i])
- if err != nil {
- t.l.WithError(err).Error("failed to remove address from tun address list")
- } else {
- t.l.WithField("removed", al[i].String()).Info("removed address not listed in cert(s)")
- }
- }
- return nil
- }
- func (t *tun) Activate() error {
- devName := t.deviceBytes()
- if t.useSystemRoutes {
- t.watchRoutes()
- }
- s, err := unix.Socket(
- unix.AF_INET, //because everything we use t.ioctlFd for is address family independent, this is fine
- unix.SOCK_DGRAM,
- unix.IPPROTO_IP,
- )
- if err != nil {
- return err
- }
- t.ioctlFd = uintptr(s)
- // Set the device name
- ifrf := ifReq{Name: devName}
- if err = ioctl(t.ioctlFd, unix.SIOCGIFFLAGS, uintptr(unsafe.Pointer(&ifrf))); err != nil {
- return fmt.Errorf("failed to set tun device name: %s", err)
- }
- link, err := netlink.LinkByName(t.Device)
- if err != nil {
- return fmt.Errorf("failed to get tun device link: %s", err)
- }
- t.deviceIndex = link.Attrs().Index
- // Setup our default MTU
- t.setMTU()
- // Set the transmit queue length
- ifrq := ifreqQLEN{Name: devName, Value: int32(t.TXQueueLen)}
- if err = ioctl(t.ioctlFd, unix.SIOCSIFTXQLEN, uintptr(unsafe.Pointer(&ifrq))); err != nil {
- // If we can't set the queue length nebula will still work but it may lead to packet loss
- t.l.WithError(err).Error("Failed to set tun tx queue length")
- }
- if err = t.addIPs(link); err != nil {
- return err
- }
- // Bring up the interface
- ifrf.Flags = ifrf.Flags | unix.IFF_UP
- if err = ioctl(t.ioctlFd, unix.SIOCSIFFLAGS, uintptr(unsafe.Pointer(&ifrf))); err != nil {
- return fmt.Errorf("failed to bring the tun device up: %s", err)
- }
- //set route MTU
- for i := range t.vpnNetworks {
- if err = t.setDefaultRoute(t.vpnNetworks[i]); err != nil {
- return fmt.Errorf("failed to set default route MTU: %w", err)
- }
- }
- // Set the routes
- if err = t.addRoutes(false); err != nil {
- return err
- }
- // Run the interface
- ifrf.Flags = ifrf.Flags | unix.IFF_UP | unix.IFF_RUNNING
- if err = ioctl(t.ioctlFd, unix.SIOCSIFFLAGS, uintptr(unsafe.Pointer(&ifrf))); err != nil {
- return fmt.Errorf("failed to run tun device: %s", err)
- }
- return nil
- }
- func (t *tun) setMTU() {
- // Set the MTU on the device
- ifm := ifreqMTU{Name: t.deviceBytes(), MTU: int32(t.MaxMTU)}
- if err := ioctl(t.ioctlFd, unix.SIOCSIFMTU, uintptr(unsafe.Pointer(&ifm))); err != nil {
- // This is currently a non fatal condition because the route table must have the MTU set appropriately as well
- t.l.WithError(err).Error("Failed to set tun mtu")
- }
- }
- func (t *tun) setDefaultRoute(cidr netip.Prefix) error {
- dr := &net.IPNet{
- IP: cidr.Masked().Addr().AsSlice(),
- Mask: net.CIDRMask(cidr.Bits(), cidr.Addr().BitLen()),
- }
- nr := netlink.Route{
- LinkIndex: t.deviceIndex,
- Dst: dr,
- MTU: t.DefaultMTU,
- AdvMSS: t.advMSS(Route{}),
- Scope: unix.RT_SCOPE_LINK,
- Src: net.IP(cidr.Addr().AsSlice()),
- Protocol: unix.RTPROT_KERNEL,
- Table: unix.RT_TABLE_MAIN,
- Type: unix.RTN_UNICAST,
- }
- err := netlink.RouteReplace(&nr)
- if err != nil {
- t.l.WithError(err).WithField("cidr", cidr).Warn("Failed to set default route MTU, retrying")
- //retry twice more -- on some systems there appears to be a race condition where if we set routes too soon, netlink says `invalid argument`
- for i := 0; i < 2; i++ {
- time.Sleep(100 * time.Millisecond)
- err = netlink.RouteReplace(&nr)
- if err == nil {
- break
- } else {
- t.l.WithError(err).WithField("cidr", cidr).WithField("mtu", t.DefaultMTU).Warn("Failed to set default route MTU, retrying")
- }
- }
- if err != nil {
- return fmt.Errorf("failed to set mtu %v on the default route %v; %v", t.DefaultMTU, dr, err)
- }
- }
- return nil
- }
- func (t *tun) addRoutes(logErrors bool) error {
- // Path routes
- routes := *t.Routes.Load()
- for _, r := range routes {
- if !r.Install {
- continue
- }
- dr := &net.IPNet{
- IP: r.Cidr.Masked().Addr().AsSlice(),
- Mask: net.CIDRMask(r.Cidr.Bits(), r.Cidr.Addr().BitLen()),
- }
- nr := netlink.Route{
- LinkIndex: t.deviceIndex,
- Dst: dr,
- MTU: r.MTU,
- AdvMSS: t.advMSS(r),
- Scope: unix.RT_SCOPE_LINK,
- }
- if r.Metric > 0 {
- nr.Priority = r.Metric
- }
- err := netlink.RouteReplace(&nr)
- if err != nil {
- retErr := util.NewContextualError("Failed to add route", map[string]interface{}{"route": r}, err)
- if logErrors {
- retErr.Log(t.l)
- } else {
- return retErr
- }
- } else {
- t.l.WithField("route", r).Info("Added route")
- }
- }
- return nil
- }
- func (t *tun) removeRoutes(routes []Route) {
- for _, r := range routes {
- if !r.Install {
- continue
- }
- dr := &net.IPNet{
- IP: r.Cidr.Masked().Addr().AsSlice(),
- Mask: net.CIDRMask(r.Cidr.Bits(), r.Cidr.Addr().BitLen()),
- }
- nr := netlink.Route{
- LinkIndex: t.deviceIndex,
- Dst: dr,
- MTU: r.MTU,
- AdvMSS: t.advMSS(r),
- Scope: unix.RT_SCOPE_LINK,
- }
- if r.Metric > 0 {
- nr.Priority = r.Metric
- }
- err := netlink.RouteDel(&nr)
- if err != nil {
- t.l.WithError(err).WithField("route", r).Error("Failed to remove route")
- } else {
- t.l.WithField("route", r).Info("Removed route")
- }
- }
- }
- func (t *tun) Name() string {
- return t.Device
- }
- func (t *tun) advMSS(r Route) int {
- mtu := r.MTU
- if r.MTU == 0 {
- mtu = t.DefaultMTU
- }
- // We only need to set advmss if the route MTU does not match the device MTU
- if mtu != t.MaxMTU {
- return mtu - 40
- }
- return 0
- }
- func (t *tun) watchRoutes() {
- rch := make(chan netlink.RouteUpdate)
- doneChan := make(chan struct{})
- if err := netlink.RouteSubscribe(rch, doneChan); err != nil {
- t.l.WithError(err).Errorf("failed to subscribe to system route changes")
- return
- }
- t.routeChan = doneChan
- go func() {
- for {
- select {
- case r := <-rch:
- t.updateRoutes(r)
- case <-doneChan:
- // netlink.RouteSubscriber will close the rch for us
- return
- }
- }
- }()
- }
- func (t *tun) updateRoutes(r netlink.RouteUpdate) {
- if r.Gw == nil {
- // Not a gateway route, ignore
- t.l.WithField("route", r).Debug("Ignoring route update, not a gateway route")
- return
- }
- gwAddr, ok := netip.AddrFromSlice(r.Gw)
- if !ok {
- t.l.WithField("route", r).Debug("Ignoring route update, invalid gateway address")
- return
- }
- gwAddr = gwAddr.Unmap()
- withinNetworks := false
- for i := range t.vpnNetworks {
- if t.vpnNetworks[i].Contains(gwAddr) {
- withinNetworks = true
- break
- }
- }
- if !withinNetworks {
- // Gateway isn't in our overlay network, ignore
- t.l.WithField("route", r).Debug("Ignoring route update, not in our networks")
- return
- }
- dstAddr, ok := netip.AddrFromSlice(r.Dst.IP)
- if !ok {
- t.l.WithField("route", r).Debug("Ignoring route update, invalid destination address")
- return
- }
- ones, _ := r.Dst.Mask.Size()
- dst := netip.PrefixFrom(dstAddr, ones)
- newTree := t.routeTree.Load().Clone()
- if r.Type == unix.RTM_NEWROUTE {
- t.l.WithField("destination", r.Dst).WithField("via", r.Gw).Info("Adding route")
- newTree.Insert(dst, gwAddr)
- } else {
- newTree.Delete(dst)
- t.l.WithField("destination", r.Dst).WithField("via", r.Gw).Info("Removing route")
- }
- t.routeTree.Store(newTree)
- }
- func (t *tun) Close() error {
- if t.routeChan != nil {
- close(t.routeChan)
- }
- if t.ReadWriteCloser != nil {
- _ = t.ReadWriteCloser.Close()
- }
- if t.ioctlFd > 0 {
- _ = os.NewFile(t.ioctlFd, "ioctlFd").Close()
- }
- return nil
- }
|