tun_linux.go 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553
  1. //go:build !android && !e2e_testing
  2. // +build !android,!e2e_testing
  3. package overlay
  4. import (
  5. "bytes"
  6. "fmt"
  7. "io"
  8. "net"
  9. "os"
  10. "strings"
  11. "sync/atomic"
  12. "unsafe"
  13. "github.com/sirupsen/logrus"
  14. "github.com/slackhq/nebula/cidr"
  15. "github.com/slackhq/nebula/config"
  16. "github.com/slackhq/nebula/iputil"
  17. "github.com/slackhq/nebula/util"
  18. "github.com/vishvananda/netlink"
  19. "golang.org/x/sys/unix"
  20. )
  21. type tun struct {
  22. io.ReadWriteCloser
  23. fd int
  24. Device string
  25. cidr *net.IPNet
  26. MaxMTU int
  27. DefaultMTU int
  28. TXQueueLen int
  29. deviceIndex int
  30. ioctlFd uintptr
  31. Routes atomic.Pointer[[]Route]
  32. routeTree atomic.Pointer[cidr.Tree4[iputil.VpnIp]]
  33. routeChan chan struct{}
  34. useSystemRoutes bool
  35. l *logrus.Logger
  36. }
  37. type ifReq struct {
  38. Name [16]byte
  39. Flags uint16
  40. pad [8]byte
  41. }
  42. type ifreqAddr struct {
  43. Name [16]byte
  44. Addr unix.RawSockaddrInet4
  45. pad [8]byte
  46. }
  47. type ifreqMTU struct {
  48. Name [16]byte
  49. MTU int32
  50. pad [8]byte
  51. }
  52. type ifreqQLEN struct {
  53. Name [16]byte
  54. Value int32
  55. pad [8]byte
  56. }
  57. func newTunFromFd(c *config.C, l *logrus.Logger, deviceFd int, cidr *net.IPNet) (*tun, error) {
  58. file := os.NewFile(uintptr(deviceFd), "/dev/net/tun")
  59. t, err := newTunGeneric(c, l, file, cidr)
  60. if err != nil {
  61. return nil, err
  62. }
  63. t.Device = "tun0"
  64. return t, nil
  65. }
  66. func newTun(c *config.C, l *logrus.Logger, cidr *net.IPNet, multiqueue bool) (*tun, error) {
  67. fd, err := unix.Open("/dev/net/tun", os.O_RDWR, 0)
  68. if err != nil {
  69. // If /dev/net/tun doesn't exist, try to create it (will happen in docker)
  70. if os.IsNotExist(err) {
  71. err = os.MkdirAll("/dev/net", 0755)
  72. if err != nil {
  73. return nil, fmt.Errorf("/dev/net/tun doesn't exist, failed to mkdir -p /dev/net: %w", err)
  74. }
  75. err = unix.Mknod("/dev/net/tun", unix.S_IFCHR|0600, int(unix.Mkdev(10, 200)))
  76. if err != nil {
  77. return nil, fmt.Errorf("failed to create /dev/net/tun: %w", err)
  78. }
  79. fd, err = unix.Open("/dev/net/tun", os.O_RDWR, 0)
  80. if err != nil {
  81. return nil, fmt.Errorf("created /dev/net/tun, but still failed: %w", err)
  82. }
  83. } else {
  84. return nil, err
  85. }
  86. }
  87. var req ifReq
  88. req.Flags = uint16(unix.IFF_TUN | unix.IFF_NO_PI)
  89. if multiqueue {
  90. req.Flags |= unix.IFF_MULTI_QUEUE
  91. }
  92. copy(req.Name[:], c.GetString("tun.dev", ""))
  93. if err = ioctl(uintptr(fd), uintptr(unix.TUNSETIFF), uintptr(unsafe.Pointer(&req))); err != nil {
  94. return nil, err
  95. }
  96. name := strings.Trim(string(req.Name[:]), "\x00")
  97. file := os.NewFile(uintptr(fd), "/dev/net/tun")
  98. t, err := newTunGeneric(c, l, file, cidr)
  99. if err != nil {
  100. return nil, err
  101. }
  102. t.Device = name
  103. return t, nil
  104. }
  105. func newTunGeneric(c *config.C, l *logrus.Logger, file *os.File, cidr *net.IPNet) (*tun, error) {
  106. t := &tun{
  107. ReadWriteCloser: file,
  108. fd: int(file.Fd()),
  109. cidr: cidr,
  110. TXQueueLen: c.GetInt("tun.tx_queue", 500),
  111. useSystemRoutes: c.GetBool("tun.use_system_route_table", false),
  112. l: l,
  113. }
  114. err := t.reload(c, true)
  115. if err != nil {
  116. return nil, err
  117. }
  118. c.RegisterReloadCallback(func(c *config.C) {
  119. err := t.reload(c, false)
  120. if err != nil {
  121. util.LogWithContextIfNeeded("failed to reload tun device", err, t.l)
  122. }
  123. })
  124. return t, nil
  125. }
  126. func (t *tun) reload(c *config.C, initial bool) error {
  127. routeChange, routes, err := getAllRoutesFromConfig(c, t.cidr, initial)
  128. if err != nil {
  129. return err
  130. }
  131. if !initial && !routeChange && !c.HasChanged("tun.mtu") {
  132. return nil
  133. }
  134. routeTree, err := makeRouteTree(t.l, routes, true)
  135. if err != nil {
  136. return err
  137. }
  138. oldDefaultMTU := t.DefaultMTU
  139. oldMaxMTU := t.MaxMTU
  140. newDefaultMTU := c.GetInt("tun.mtu", DefaultMTU)
  141. newMaxMTU := newDefaultMTU
  142. for i, r := range routes {
  143. if r.MTU == 0 {
  144. routes[i].MTU = newDefaultMTU
  145. }
  146. if r.MTU > t.MaxMTU {
  147. newMaxMTU = r.MTU
  148. }
  149. }
  150. t.MaxMTU = newMaxMTU
  151. t.DefaultMTU = newDefaultMTU
  152. // Teach nebula how to handle the routes before establishing them in the system table
  153. oldRoutes := t.Routes.Swap(&routes)
  154. t.routeTree.Store(routeTree)
  155. if !initial {
  156. if oldMaxMTU != newMaxMTU {
  157. t.setMTU()
  158. t.l.Infof("Set max MTU to %v was %v", t.MaxMTU, oldMaxMTU)
  159. }
  160. if oldDefaultMTU != newDefaultMTU {
  161. err := t.setDefaultRoute()
  162. if err != nil {
  163. t.l.Warn(err)
  164. } else {
  165. t.l.Infof("Set default MTU to %v was %v", t.DefaultMTU, oldDefaultMTU)
  166. }
  167. }
  168. // Remove first, if the system removes a wanted route hopefully it will be re-added next
  169. t.removeRoutes(findRemovedRoutes(routes, *oldRoutes))
  170. // Ensure any routes we actually want are installed
  171. err = t.addRoutes(true)
  172. if err != nil {
  173. // This should never be called since addRoutes should log its own errors in a reload condition
  174. util.LogWithContextIfNeeded("Failed to refresh routes", err, t.l)
  175. }
  176. }
  177. return nil
  178. }
  179. func (t *tun) NewMultiQueueReader() (io.ReadWriteCloser, error) {
  180. fd, err := unix.Open("/dev/net/tun", os.O_RDWR, 0)
  181. if err != nil {
  182. return nil, err
  183. }
  184. var req ifReq
  185. req.Flags = uint16(unix.IFF_TUN | unix.IFF_NO_PI | unix.IFF_MULTI_QUEUE)
  186. copy(req.Name[:], t.Device)
  187. if err = ioctl(uintptr(fd), uintptr(unix.TUNSETIFF), uintptr(unsafe.Pointer(&req))); err != nil {
  188. return nil, err
  189. }
  190. file := os.NewFile(uintptr(fd), "/dev/net/tun")
  191. return file, nil
  192. }
  193. func (t *tun) RouteFor(ip iputil.VpnIp) iputil.VpnIp {
  194. _, r := t.routeTree.Load().MostSpecificContains(ip)
  195. return r
  196. }
  197. func (t *tun) Write(b []byte) (int, error) {
  198. var nn int
  199. max := len(b)
  200. for {
  201. n, err := unix.Write(t.fd, b[nn:max])
  202. if n > 0 {
  203. nn += n
  204. }
  205. if nn == len(b) {
  206. return nn, err
  207. }
  208. if err != nil {
  209. return nn, err
  210. }
  211. if n == 0 {
  212. return nn, io.ErrUnexpectedEOF
  213. }
  214. }
  215. }
  216. func (t *tun) deviceBytes() (o [16]byte) {
  217. for i, c := range t.Device {
  218. o[i] = byte(c)
  219. }
  220. return
  221. }
  222. func (t *tun) Activate() error {
  223. devName := t.deviceBytes()
  224. if t.useSystemRoutes {
  225. t.watchRoutes()
  226. }
  227. var addr, mask [4]byte
  228. copy(addr[:], t.cidr.IP.To4())
  229. copy(mask[:], t.cidr.Mask)
  230. s, err := unix.Socket(
  231. unix.AF_INET,
  232. unix.SOCK_DGRAM,
  233. unix.IPPROTO_IP,
  234. )
  235. if err != nil {
  236. return err
  237. }
  238. t.ioctlFd = uintptr(s)
  239. ifra := ifreqAddr{
  240. Name: devName,
  241. Addr: unix.RawSockaddrInet4{
  242. Family: unix.AF_INET,
  243. Addr: addr,
  244. },
  245. }
  246. // Set the device ip address
  247. if err = ioctl(t.ioctlFd, unix.SIOCSIFADDR, uintptr(unsafe.Pointer(&ifra))); err != nil {
  248. return fmt.Errorf("failed to set tun address: %s", err)
  249. }
  250. // Set the device network
  251. ifra.Addr.Addr = mask
  252. if err = ioctl(t.ioctlFd, unix.SIOCSIFNETMASK, uintptr(unsafe.Pointer(&ifra))); err != nil {
  253. return fmt.Errorf("failed to set tun netmask: %s", err)
  254. }
  255. // Set the device name
  256. ifrf := ifReq{Name: devName}
  257. if err = ioctl(t.ioctlFd, unix.SIOCGIFFLAGS, uintptr(unsafe.Pointer(&ifrf))); err != nil {
  258. return fmt.Errorf("failed to set tun device name: %s", err)
  259. }
  260. // Setup our default MTU
  261. t.setMTU()
  262. // Set the transmit queue length
  263. ifrq := ifreqQLEN{Name: devName, Value: int32(t.TXQueueLen)}
  264. if err = ioctl(t.ioctlFd, unix.SIOCSIFTXQLEN, uintptr(unsafe.Pointer(&ifrq))); err != nil {
  265. // If we can't set the queue length nebula will still work but it may lead to packet loss
  266. t.l.WithError(err).Error("Failed to set tun tx queue length")
  267. }
  268. // Bring up the interface
  269. ifrf.Flags = ifrf.Flags | unix.IFF_UP
  270. if err = ioctl(t.ioctlFd, unix.SIOCSIFFLAGS, uintptr(unsafe.Pointer(&ifrf))); err != nil {
  271. return fmt.Errorf("failed to bring the tun device up: %s", err)
  272. }
  273. link, err := netlink.LinkByName(t.Device)
  274. if err != nil {
  275. return fmt.Errorf("failed to get tun device link: %s", err)
  276. }
  277. t.deviceIndex = link.Attrs().Index
  278. if err = t.setDefaultRoute(); err != nil {
  279. return err
  280. }
  281. // Set the routes
  282. if err = t.addRoutes(false); err != nil {
  283. return err
  284. }
  285. // Run the interface
  286. ifrf.Flags = ifrf.Flags | unix.IFF_UP | unix.IFF_RUNNING
  287. if err = ioctl(t.ioctlFd, unix.SIOCSIFFLAGS, uintptr(unsafe.Pointer(&ifrf))); err != nil {
  288. return fmt.Errorf("failed to run tun device: %s", err)
  289. }
  290. return nil
  291. }
  292. func (t *tun) setMTU() {
  293. // Set the MTU on the device
  294. ifm := ifreqMTU{Name: t.deviceBytes(), MTU: int32(t.MaxMTU)}
  295. if err := ioctl(t.ioctlFd, unix.SIOCSIFMTU, uintptr(unsafe.Pointer(&ifm))); err != nil {
  296. // This is currently a non fatal condition because the route table must have the MTU set appropriately as well
  297. t.l.WithError(err).Error("Failed to set tun mtu")
  298. }
  299. }
  300. func (t *tun) setDefaultRoute() error {
  301. // Default route
  302. dr := &net.IPNet{IP: t.cidr.IP.Mask(t.cidr.Mask), Mask: t.cidr.Mask}
  303. nr := netlink.Route{
  304. LinkIndex: t.deviceIndex,
  305. Dst: dr,
  306. MTU: t.DefaultMTU,
  307. AdvMSS: t.advMSS(Route{}),
  308. Scope: unix.RT_SCOPE_LINK,
  309. Src: t.cidr.IP,
  310. Protocol: unix.RTPROT_KERNEL,
  311. Table: unix.RT_TABLE_MAIN,
  312. Type: unix.RTN_UNICAST,
  313. }
  314. err := netlink.RouteReplace(&nr)
  315. if err != nil {
  316. return fmt.Errorf("failed to set mtu %v on the default route %v; %v", t.DefaultMTU, dr, err)
  317. }
  318. return nil
  319. }
  320. func (t *tun) addRoutes(logErrors bool) error {
  321. // Path routes
  322. routes := *t.Routes.Load()
  323. for _, r := range routes {
  324. if !r.Install {
  325. continue
  326. }
  327. nr := netlink.Route{
  328. LinkIndex: t.deviceIndex,
  329. Dst: r.Cidr,
  330. MTU: r.MTU,
  331. AdvMSS: t.advMSS(r),
  332. Scope: unix.RT_SCOPE_LINK,
  333. }
  334. if r.Metric > 0 {
  335. nr.Priority = r.Metric
  336. }
  337. err := netlink.RouteReplace(&nr)
  338. if err != nil {
  339. retErr := util.NewContextualError("Failed to add route", map[string]interface{}{"route": r}, err)
  340. if logErrors {
  341. retErr.Log(t.l)
  342. } else {
  343. return retErr
  344. }
  345. } else {
  346. t.l.WithField("route", r).Info("Added route")
  347. }
  348. }
  349. return nil
  350. }
  351. func (t *tun) removeRoutes(routes []Route) {
  352. for _, r := range routes {
  353. if !r.Install {
  354. continue
  355. }
  356. nr := netlink.Route{
  357. LinkIndex: t.deviceIndex,
  358. Dst: r.Cidr,
  359. MTU: r.MTU,
  360. AdvMSS: t.advMSS(r),
  361. Scope: unix.RT_SCOPE_LINK,
  362. }
  363. if r.Metric > 0 {
  364. nr.Priority = r.Metric
  365. }
  366. err := netlink.RouteDel(&nr)
  367. if err != nil {
  368. t.l.WithError(err).WithField("route", r).Error("Failed to remove route")
  369. } else {
  370. t.l.WithField("route", r).Info("Removed route")
  371. }
  372. }
  373. }
  374. func (t *tun) Cidr() *net.IPNet {
  375. return t.cidr
  376. }
  377. func (t *tun) Name() string {
  378. return t.Device
  379. }
  380. func (t *tun) advMSS(r Route) int {
  381. mtu := r.MTU
  382. if r.MTU == 0 {
  383. mtu = t.DefaultMTU
  384. }
  385. // We only need to set advmss if the route MTU does not match the device MTU
  386. if mtu != t.MaxMTU {
  387. return mtu - 40
  388. }
  389. return 0
  390. }
  391. func (t *tun) watchRoutes() {
  392. rch := make(chan netlink.RouteUpdate)
  393. doneChan := make(chan struct{})
  394. if err := netlink.RouteSubscribe(rch, doneChan); err != nil {
  395. t.l.WithError(err).Errorf("failed to subscribe to system route changes")
  396. return
  397. }
  398. t.routeChan = doneChan
  399. go func() {
  400. for {
  401. select {
  402. case r := <-rch:
  403. t.updateRoutes(r)
  404. case <-doneChan:
  405. // netlink.RouteSubscriber will close the rch for us
  406. return
  407. }
  408. }
  409. }()
  410. }
  411. func (t *tun) updateRoutes(r netlink.RouteUpdate) {
  412. if r.Gw == nil {
  413. // Not a gateway route, ignore
  414. t.l.WithField("route", r).Debug("Ignoring route update, not a gateway route")
  415. return
  416. }
  417. if !t.cidr.Contains(r.Gw) {
  418. // Gateway isn't in our overlay network, ignore
  419. t.l.WithField("route", r).Debug("Ignoring route update, not in our network")
  420. return
  421. }
  422. if x := r.Dst.IP.To4(); x == nil {
  423. // Nebula only handles ipv4 on the overlay currently
  424. t.l.WithField("route", r).Debug("Ignoring route update, destination is not ipv4")
  425. return
  426. }
  427. newTree := cidr.NewTree4[iputil.VpnIp]()
  428. if r.Type == unix.RTM_NEWROUTE {
  429. for _, oldR := range t.routeTree.Load().List() {
  430. newTree.AddCIDR(oldR.CIDR, oldR.Value)
  431. }
  432. t.l.WithField("destination", r.Dst).WithField("via", r.Gw).Info("Adding route")
  433. newTree.AddCIDR(r.Dst, iputil.Ip2VpnIp(r.Gw))
  434. } else {
  435. gw := iputil.Ip2VpnIp(r.Gw)
  436. for _, oldR := range t.routeTree.Load().List() {
  437. if bytes.Equal(oldR.CIDR.IP, r.Dst.IP) && bytes.Equal(oldR.CIDR.Mask, r.Dst.Mask) && oldR.Value == gw {
  438. // This is the record to delete
  439. t.l.WithField("destination", r.Dst).WithField("via", r.Gw).Info("Removing route")
  440. continue
  441. }
  442. newTree.AddCIDR(oldR.CIDR, oldR.Value)
  443. }
  444. }
  445. t.routeTree.Store(newTree)
  446. }
  447. func (t *tun) Close() error {
  448. if t.routeChan != nil {
  449. close(t.routeChan)
  450. }
  451. if t.ReadWriteCloser != nil {
  452. t.ReadWriteCloser.Close()
  453. }
  454. if t.ioctlFd > 0 {
  455. os.NewFile(t.ioctlFd, "ioctlFd").Close()
  456. }
  457. return nil
  458. }