123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553 |
- //go:build !android && !e2e_testing
- // +build !android,!e2e_testing
- package overlay
- import (
- "bytes"
- "fmt"
- "io"
- "net"
- "os"
- "strings"
- "sync/atomic"
- "unsafe"
- "github.com/sirupsen/logrus"
- "github.com/slackhq/nebula/cidr"
- "github.com/slackhq/nebula/config"
- "github.com/slackhq/nebula/iputil"
- "github.com/slackhq/nebula/util"
- "github.com/vishvananda/netlink"
- "golang.org/x/sys/unix"
- )
- type tun struct {
- io.ReadWriteCloser
- fd int
- Device string
- cidr *net.IPNet
- MaxMTU int
- DefaultMTU int
- TXQueueLen int
- deviceIndex int
- ioctlFd uintptr
- Routes atomic.Pointer[[]Route]
- routeTree atomic.Pointer[cidr.Tree4[iputil.VpnIp]]
- routeChan chan struct{}
- useSystemRoutes bool
- l *logrus.Logger
- }
- type ifReq struct {
- Name [16]byte
- Flags uint16
- pad [8]byte
- }
- type ifreqAddr struct {
- Name [16]byte
- Addr unix.RawSockaddrInet4
- pad [8]byte
- }
- type ifreqMTU struct {
- Name [16]byte
- MTU int32
- pad [8]byte
- }
- type ifreqQLEN struct {
- Name [16]byte
- Value int32
- pad [8]byte
- }
- func newTunFromFd(c *config.C, l *logrus.Logger, deviceFd int, cidr *net.IPNet) (*tun, error) {
- file := os.NewFile(uintptr(deviceFd), "/dev/net/tun")
- t, err := newTunGeneric(c, l, file, cidr)
- if err != nil {
- return nil, err
- }
- t.Device = "tun0"
- return t, nil
- }
- func newTun(c *config.C, l *logrus.Logger, cidr *net.IPNet, multiqueue bool) (*tun, error) {
- fd, err := unix.Open("/dev/net/tun", os.O_RDWR, 0)
- if err != nil {
- // If /dev/net/tun doesn't exist, try to create it (will happen in docker)
- if os.IsNotExist(err) {
- err = os.MkdirAll("/dev/net", 0755)
- if err != nil {
- return nil, fmt.Errorf("/dev/net/tun doesn't exist, failed to mkdir -p /dev/net: %w", err)
- }
- err = unix.Mknod("/dev/net/tun", unix.S_IFCHR|0600, int(unix.Mkdev(10, 200)))
- if err != nil {
- return nil, fmt.Errorf("failed to create /dev/net/tun: %w", err)
- }
- fd, err = unix.Open("/dev/net/tun", os.O_RDWR, 0)
- if err != nil {
- return nil, fmt.Errorf("created /dev/net/tun, but still failed: %w", err)
- }
- } else {
- return nil, err
- }
- }
- var req ifReq
- req.Flags = uint16(unix.IFF_TUN | unix.IFF_NO_PI)
- if multiqueue {
- req.Flags |= unix.IFF_MULTI_QUEUE
- }
- copy(req.Name[:], c.GetString("tun.dev", ""))
- if err = ioctl(uintptr(fd), uintptr(unix.TUNSETIFF), uintptr(unsafe.Pointer(&req))); err != nil {
- return nil, err
- }
- name := strings.Trim(string(req.Name[:]), "\x00")
- file := os.NewFile(uintptr(fd), "/dev/net/tun")
- t, err := newTunGeneric(c, l, file, cidr)
- if err != nil {
- return nil, err
- }
- t.Device = name
- return t, nil
- }
- func newTunGeneric(c *config.C, l *logrus.Logger, file *os.File, cidr *net.IPNet) (*tun, error) {
- t := &tun{
- ReadWriteCloser: file,
- fd: int(file.Fd()),
- cidr: cidr,
- TXQueueLen: c.GetInt("tun.tx_queue", 500),
- useSystemRoutes: c.GetBool("tun.use_system_route_table", false),
- l: l,
- }
- err := t.reload(c, true)
- if err != nil {
- return nil, err
- }
- c.RegisterReloadCallback(func(c *config.C) {
- err := t.reload(c, false)
- if err != nil {
- util.LogWithContextIfNeeded("failed to reload tun device", err, t.l)
- }
- })
- return t, nil
- }
- func (t *tun) reload(c *config.C, initial bool) error {
- routeChange, routes, err := getAllRoutesFromConfig(c, t.cidr, initial)
- if err != nil {
- return err
- }
- if !initial && !routeChange && !c.HasChanged("tun.mtu") {
- return nil
- }
- routeTree, err := makeRouteTree(t.l, routes, true)
- if err != nil {
- return err
- }
- oldDefaultMTU := t.DefaultMTU
- oldMaxMTU := t.MaxMTU
- newDefaultMTU := c.GetInt("tun.mtu", DefaultMTU)
- newMaxMTU := newDefaultMTU
- for i, r := range routes {
- if r.MTU == 0 {
- routes[i].MTU = newDefaultMTU
- }
- if r.MTU > t.MaxMTU {
- newMaxMTU = r.MTU
- }
- }
- t.MaxMTU = newMaxMTU
- t.DefaultMTU = newDefaultMTU
- // Teach nebula how to handle the routes before establishing them in the system table
- oldRoutes := t.Routes.Swap(&routes)
- t.routeTree.Store(routeTree)
- if !initial {
- if oldMaxMTU != newMaxMTU {
- t.setMTU()
- t.l.Infof("Set max MTU to %v was %v", t.MaxMTU, oldMaxMTU)
- }
- if oldDefaultMTU != newDefaultMTU {
- err := t.setDefaultRoute()
- if err != nil {
- t.l.Warn(err)
- } else {
- t.l.Infof("Set default MTU to %v was %v", t.DefaultMTU, oldDefaultMTU)
- }
- }
- // Remove first, if the system removes a wanted route hopefully it will be re-added next
- t.removeRoutes(findRemovedRoutes(routes, *oldRoutes))
- // Ensure any routes we actually want are installed
- err = t.addRoutes(true)
- if err != nil {
- // This should never be called since addRoutes should log its own errors in a reload condition
- util.LogWithContextIfNeeded("Failed to refresh routes", err, t.l)
- }
- }
- return nil
- }
- func (t *tun) NewMultiQueueReader() (io.ReadWriteCloser, error) {
- fd, err := unix.Open("/dev/net/tun", os.O_RDWR, 0)
- if err != nil {
- return nil, err
- }
- var req ifReq
- req.Flags = uint16(unix.IFF_TUN | unix.IFF_NO_PI | unix.IFF_MULTI_QUEUE)
- copy(req.Name[:], t.Device)
- if err = ioctl(uintptr(fd), uintptr(unix.TUNSETIFF), uintptr(unsafe.Pointer(&req))); err != nil {
- return nil, err
- }
- file := os.NewFile(uintptr(fd), "/dev/net/tun")
- return file, nil
- }
- func (t *tun) RouteFor(ip iputil.VpnIp) iputil.VpnIp {
- _, r := t.routeTree.Load().MostSpecificContains(ip)
- return r
- }
- func (t *tun) Write(b []byte) (int, error) {
- var nn int
- max := len(b)
- for {
- n, err := unix.Write(t.fd, b[nn:max])
- if n > 0 {
- nn += n
- }
- if nn == len(b) {
- return nn, err
- }
- if err != nil {
- return nn, err
- }
- if n == 0 {
- return nn, io.ErrUnexpectedEOF
- }
- }
- }
- func (t *tun) deviceBytes() (o [16]byte) {
- for i, c := range t.Device {
- o[i] = byte(c)
- }
- return
- }
- func (t *tun) Activate() error {
- devName := t.deviceBytes()
- if t.useSystemRoutes {
- t.watchRoutes()
- }
- var addr, mask [4]byte
- copy(addr[:], t.cidr.IP.To4())
- copy(mask[:], t.cidr.Mask)
- s, err := unix.Socket(
- unix.AF_INET,
- unix.SOCK_DGRAM,
- unix.IPPROTO_IP,
- )
- if err != nil {
- return err
- }
- t.ioctlFd = uintptr(s)
- ifra := ifreqAddr{
- Name: devName,
- Addr: unix.RawSockaddrInet4{
- Family: unix.AF_INET,
- Addr: addr,
- },
- }
- // Set the device ip address
- if err = ioctl(t.ioctlFd, unix.SIOCSIFADDR, uintptr(unsafe.Pointer(&ifra))); err != nil {
- return fmt.Errorf("failed to set tun address: %s", err)
- }
- // Set the device network
- ifra.Addr.Addr = mask
- if err = ioctl(t.ioctlFd, unix.SIOCSIFNETMASK, uintptr(unsafe.Pointer(&ifra))); err != nil {
- return fmt.Errorf("failed to set tun netmask: %s", err)
- }
- // Set the device name
- ifrf := ifReq{Name: devName}
- if err = ioctl(t.ioctlFd, unix.SIOCGIFFLAGS, uintptr(unsafe.Pointer(&ifrf))); err != nil {
- return fmt.Errorf("failed to set tun device name: %s", err)
- }
- // Setup our default MTU
- t.setMTU()
- // Set the transmit queue length
- ifrq := ifreqQLEN{Name: devName, Value: int32(t.TXQueueLen)}
- if err = ioctl(t.ioctlFd, unix.SIOCSIFTXQLEN, uintptr(unsafe.Pointer(&ifrq))); err != nil {
- // If we can't set the queue length nebula will still work but it may lead to packet loss
- t.l.WithError(err).Error("Failed to set tun tx queue length")
- }
- // Bring up the interface
- ifrf.Flags = ifrf.Flags | unix.IFF_UP
- if err = ioctl(t.ioctlFd, unix.SIOCSIFFLAGS, uintptr(unsafe.Pointer(&ifrf))); err != nil {
- return fmt.Errorf("failed to bring the tun device up: %s", err)
- }
- link, err := netlink.LinkByName(t.Device)
- if err != nil {
- return fmt.Errorf("failed to get tun device link: %s", err)
- }
- t.deviceIndex = link.Attrs().Index
- if err = t.setDefaultRoute(); err != nil {
- return err
- }
- // Set the routes
- if err = t.addRoutes(false); err != nil {
- return err
- }
- // Run the interface
- ifrf.Flags = ifrf.Flags | unix.IFF_UP | unix.IFF_RUNNING
- if err = ioctl(t.ioctlFd, unix.SIOCSIFFLAGS, uintptr(unsafe.Pointer(&ifrf))); err != nil {
- return fmt.Errorf("failed to run tun device: %s", err)
- }
- return nil
- }
- func (t *tun) setMTU() {
- // Set the MTU on the device
- ifm := ifreqMTU{Name: t.deviceBytes(), MTU: int32(t.MaxMTU)}
- if err := ioctl(t.ioctlFd, unix.SIOCSIFMTU, uintptr(unsafe.Pointer(&ifm))); err != nil {
- // This is currently a non fatal condition because the route table must have the MTU set appropriately as well
- t.l.WithError(err).Error("Failed to set tun mtu")
- }
- }
- func (t *tun) setDefaultRoute() error {
- // Default route
- dr := &net.IPNet{IP: t.cidr.IP.Mask(t.cidr.Mask), Mask: t.cidr.Mask}
- nr := netlink.Route{
- LinkIndex: t.deviceIndex,
- Dst: dr,
- MTU: t.DefaultMTU,
- AdvMSS: t.advMSS(Route{}),
- Scope: unix.RT_SCOPE_LINK,
- Src: t.cidr.IP,
- Protocol: unix.RTPROT_KERNEL,
- Table: unix.RT_TABLE_MAIN,
- Type: unix.RTN_UNICAST,
- }
- err := netlink.RouteReplace(&nr)
- if err != nil {
- return fmt.Errorf("failed to set mtu %v on the default route %v; %v", t.DefaultMTU, dr, err)
- }
- return nil
- }
- func (t *tun) addRoutes(logErrors bool) error {
- // Path routes
- routes := *t.Routes.Load()
- for _, r := range routes {
- if !r.Install {
- continue
- }
- nr := netlink.Route{
- LinkIndex: t.deviceIndex,
- Dst: r.Cidr,
- MTU: r.MTU,
- AdvMSS: t.advMSS(r),
- Scope: unix.RT_SCOPE_LINK,
- }
- if r.Metric > 0 {
- nr.Priority = r.Metric
- }
- err := netlink.RouteReplace(&nr)
- if err != nil {
- retErr := util.NewContextualError("Failed to add route", map[string]interface{}{"route": r}, err)
- if logErrors {
- retErr.Log(t.l)
- } else {
- return retErr
- }
- } else {
- t.l.WithField("route", r).Info("Added route")
- }
- }
- return nil
- }
- func (t *tun) removeRoutes(routes []Route) {
- for _, r := range routes {
- if !r.Install {
- continue
- }
- nr := netlink.Route{
- LinkIndex: t.deviceIndex,
- Dst: r.Cidr,
- MTU: r.MTU,
- AdvMSS: t.advMSS(r),
- Scope: unix.RT_SCOPE_LINK,
- }
- if r.Metric > 0 {
- nr.Priority = r.Metric
- }
- err := netlink.RouteDel(&nr)
- if err != nil {
- t.l.WithError(err).WithField("route", r).Error("Failed to remove route")
- } else {
- t.l.WithField("route", r).Info("Removed route")
- }
- }
- }
- func (t *tun) Cidr() *net.IPNet {
- return t.cidr
- }
- func (t *tun) Name() string {
- return t.Device
- }
- func (t *tun) advMSS(r Route) int {
- mtu := r.MTU
- if r.MTU == 0 {
- mtu = t.DefaultMTU
- }
- // We only need to set advmss if the route MTU does not match the device MTU
- if mtu != t.MaxMTU {
- return mtu - 40
- }
- return 0
- }
- func (t *tun) watchRoutes() {
- rch := make(chan netlink.RouteUpdate)
- doneChan := make(chan struct{})
- if err := netlink.RouteSubscribe(rch, doneChan); err != nil {
- t.l.WithError(err).Errorf("failed to subscribe to system route changes")
- return
- }
- t.routeChan = doneChan
- go func() {
- for {
- select {
- case r := <-rch:
- t.updateRoutes(r)
- case <-doneChan:
- // netlink.RouteSubscriber will close the rch for us
- return
- }
- }
- }()
- }
- func (t *tun) updateRoutes(r netlink.RouteUpdate) {
- if r.Gw == nil {
- // Not a gateway route, ignore
- t.l.WithField("route", r).Debug("Ignoring route update, not a gateway route")
- return
- }
- if !t.cidr.Contains(r.Gw) {
- // Gateway isn't in our overlay network, ignore
- t.l.WithField("route", r).Debug("Ignoring route update, not in our network")
- return
- }
- if x := r.Dst.IP.To4(); x == nil {
- // Nebula only handles ipv4 on the overlay currently
- t.l.WithField("route", r).Debug("Ignoring route update, destination is not ipv4")
- return
- }
- newTree := cidr.NewTree4[iputil.VpnIp]()
- if r.Type == unix.RTM_NEWROUTE {
- for _, oldR := range t.routeTree.Load().List() {
- newTree.AddCIDR(oldR.CIDR, oldR.Value)
- }
- t.l.WithField("destination", r.Dst).WithField("via", r.Gw).Info("Adding route")
- newTree.AddCIDR(r.Dst, iputil.Ip2VpnIp(r.Gw))
- } else {
- gw := iputil.Ip2VpnIp(r.Gw)
- for _, oldR := range t.routeTree.Load().List() {
- if bytes.Equal(oldR.CIDR.IP, r.Dst.IP) && bytes.Equal(oldR.CIDR.Mask, r.Dst.Mask) && oldR.Value == gw {
- // This is the record to delete
- t.l.WithField("destination", r.Dst).WithField("via", r.Gw).Info("Removing route")
- continue
- }
- newTree.AddCIDR(oldR.CIDR, oldR.Value)
- }
- }
- t.routeTree.Store(newTree)
- }
- func (t *tun) Close() error {
- if t.routeChan != nil {
- close(t.routeChan)
- }
- if t.ReadWriteCloser != nil {
- t.ReadWriteCloser.Close()
- }
- if t.ioctlFd > 0 {
- os.NewFile(t.ioctlFd, "ioctlFd").Close()
- }
- return nil
- }
|