tun_linux.go 9.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426
  1. //go:build !android && !e2e_testing
  2. // +build !android,!e2e_testing
  3. package overlay
  4. import (
  5. "bytes"
  6. "fmt"
  7. "io"
  8. "net"
  9. "os"
  10. "strings"
  11. "sync/atomic"
  12. "unsafe"
  13. "github.com/sirupsen/logrus"
  14. "github.com/slackhq/nebula/cidr"
  15. "github.com/slackhq/nebula/iputil"
  16. "github.com/vishvananda/netlink"
  17. "golang.org/x/sys/unix"
  18. )
  19. type tun struct {
  20. io.ReadWriteCloser
  21. fd int
  22. Device string
  23. cidr *net.IPNet
  24. MaxMTU int
  25. DefaultMTU int
  26. TXQueueLen int
  27. Routes []Route
  28. routeTree atomic.Pointer[cidr.Tree4]
  29. routeChan chan struct{}
  30. useSystemRoutes bool
  31. l *logrus.Logger
  32. }
  33. type ifReq struct {
  34. Name [16]byte
  35. Flags uint16
  36. pad [8]byte
  37. }
  38. func ioctl(a1, a2, a3 uintptr) error {
  39. _, _, errno := unix.Syscall(unix.SYS_IOCTL, a1, a2, a3)
  40. if errno != 0 {
  41. return errno
  42. }
  43. return nil
  44. }
  45. type ifreqAddr struct {
  46. Name [16]byte
  47. Addr unix.RawSockaddrInet4
  48. pad [8]byte
  49. }
  50. type ifreqMTU struct {
  51. Name [16]byte
  52. MTU int32
  53. pad [8]byte
  54. }
  55. type ifreqQLEN struct {
  56. Name [16]byte
  57. Value int32
  58. pad [8]byte
  59. }
  60. func newTunFromFd(l *logrus.Logger, deviceFd int, cidr *net.IPNet, defaultMTU int, routes []Route, txQueueLen int, useSystemRoutes bool) (*tun, error) {
  61. routeTree, err := makeRouteTree(l, routes, true)
  62. if err != nil {
  63. return nil, err
  64. }
  65. file := os.NewFile(uintptr(deviceFd), "/dev/net/tun")
  66. t := &tun{
  67. ReadWriteCloser: file,
  68. fd: int(file.Fd()),
  69. Device: "tun0",
  70. cidr: cidr,
  71. DefaultMTU: defaultMTU,
  72. TXQueueLen: txQueueLen,
  73. Routes: routes,
  74. useSystemRoutes: useSystemRoutes,
  75. l: l,
  76. }
  77. t.routeTree.Store(routeTree)
  78. return t, nil
  79. }
  80. func newTun(l *logrus.Logger, deviceName string, cidr *net.IPNet, defaultMTU int, routes []Route, txQueueLen int, multiqueue bool, useSystemRoutes bool) (*tun, error) {
  81. fd, err := unix.Open("/dev/net/tun", os.O_RDWR, 0)
  82. if err != nil {
  83. return nil, err
  84. }
  85. var req ifReq
  86. req.Flags = uint16(unix.IFF_TUN | unix.IFF_NO_PI)
  87. if multiqueue {
  88. req.Flags |= unix.IFF_MULTI_QUEUE
  89. }
  90. copy(req.Name[:], deviceName)
  91. if err = ioctl(uintptr(fd), uintptr(unix.TUNSETIFF), uintptr(unsafe.Pointer(&req))); err != nil {
  92. return nil, err
  93. }
  94. name := strings.Trim(string(req.Name[:]), "\x00")
  95. file := os.NewFile(uintptr(fd), "/dev/net/tun")
  96. maxMTU := defaultMTU
  97. for _, r := range routes {
  98. if r.MTU == 0 {
  99. r.MTU = defaultMTU
  100. }
  101. if r.MTU > maxMTU {
  102. maxMTU = r.MTU
  103. }
  104. }
  105. routeTree, err := makeRouteTree(l, routes, true)
  106. if err != nil {
  107. return nil, err
  108. }
  109. t := &tun{
  110. ReadWriteCloser: file,
  111. fd: int(file.Fd()),
  112. Device: name,
  113. cidr: cidr,
  114. MaxMTU: maxMTU,
  115. DefaultMTU: defaultMTU,
  116. TXQueueLen: txQueueLen,
  117. Routes: routes,
  118. useSystemRoutes: useSystemRoutes,
  119. l: l,
  120. }
  121. t.routeTree.Store(routeTree)
  122. return t, nil
  123. }
  124. func (t *tun) NewMultiQueueReader() (io.ReadWriteCloser, error) {
  125. fd, err := unix.Open("/dev/net/tun", os.O_RDWR, 0)
  126. if err != nil {
  127. return nil, err
  128. }
  129. var req ifReq
  130. req.Flags = uint16(unix.IFF_TUN | unix.IFF_NO_PI | unix.IFF_MULTI_QUEUE)
  131. copy(req.Name[:], t.Device)
  132. if err = ioctl(uintptr(fd), uintptr(unix.TUNSETIFF), uintptr(unsafe.Pointer(&req))); err != nil {
  133. return nil, err
  134. }
  135. file := os.NewFile(uintptr(fd), "/dev/net/tun")
  136. return file, nil
  137. }
  138. func (t *tun) RouteFor(ip iputil.VpnIp) iputil.VpnIp {
  139. r := t.routeTree.Load().MostSpecificContains(ip)
  140. if r != nil {
  141. return r.(iputil.VpnIp)
  142. }
  143. return 0
  144. }
  145. func (t *tun) Write(b []byte) (int, error) {
  146. var nn int
  147. max := len(b)
  148. for {
  149. n, err := unix.Write(t.fd, b[nn:max])
  150. if n > 0 {
  151. nn += n
  152. }
  153. if nn == len(b) {
  154. return nn, err
  155. }
  156. if err != nil {
  157. return nn, err
  158. }
  159. if n == 0 {
  160. return nn, io.ErrUnexpectedEOF
  161. }
  162. }
  163. }
  164. func (t *tun) deviceBytes() (o [16]byte) {
  165. for i, c := range t.Device {
  166. o[i] = byte(c)
  167. }
  168. return
  169. }
  170. func (t *tun) Activate() error {
  171. devName := t.deviceBytes()
  172. if t.useSystemRoutes {
  173. t.watchRoutes()
  174. }
  175. var addr, mask [4]byte
  176. copy(addr[:], t.cidr.IP.To4())
  177. copy(mask[:], t.cidr.Mask)
  178. s, err := unix.Socket(
  179. unix.AF_INET,
  180. unix.SOCK_DGRAM,
  181. unix.IPPROTO_IP,
  182. )
  183. if err != nil {
  184. return err
  185. }
  186. fd := uintptr(s)
  187. ifra := ifreqAddr{
  188. Name: devName,
  189. Addr: unix.RawSockaddrInet4{
  190. Family: unix.AF_INET,
  191. Addr: addr,
  192. },
  193. }
  194. // Set the device ip address
  195. if err = ioctl(fd, unix.SIOCSIFADDR, uintptr(unsafe.Pointer(&ifra))); err != nil {
  196. return fmt.Errorf("failed to set tun address: %s", err)
  197. }
  198. // Set the device network
  199. ifra.Addr.Addr = mask
  200. if err = ioctl(fd, unix.SIOCSIFNETMASK, uintptr(unsafe.Pointer(&ifra))); err != nil {
  201. return fmt.Errorf("failed to set tun netmask: %s", err)
  202. }
  203. // Set the device name
  204. ifrf := ifReq{Name: devName}
  205. if err = ioctl(fd, unix.SIOCGIFFLAGS, uintptr(unsafe.Pointer(&ifrf))); err != nil {
  206. return fmt.Errorf("failed to set tun device name: %s", err)
  207. }
  208. // Set the MTU on the device
  209. ifm := ifreqMTU{Name: devName, MTU: int32(t.MaxMTU)}
  210. if err = ioctl(fd, unix.SIOCSIFMTU, uintptr(unsafe.Pointer(&ifm))); err != nil {
  211. // This is currently a non fatal condition because the route table must have the MTU set appropriately as well
  212. t.l.WithError(err).Error("Failed to set tun mtu")
  213. }
  214. // Set the transmit queue length
  215. ifrq := ifreqQLEN{Name: devName, Value: int32(t.TXQueueLen)}
  216. if err = ioctl(fd, unix.SIOCSIFTXQLEN, uintptr(unsafe.Pointer(&ifrq))); err != nil {
  217. // If we can't set the queue length nebula will still work but it may lead to packet loss
  218. t.l.WithError(err).Error("Failed to set tun tx queue length")
  219. }
  220. // Bring up the interface
  221. ifrf.Flags = ifrf.Flags | unix.IFF_UP
  222. if err = ioctl(fd, unix.SIOCSIFFLAGS, uintptr(unsafe.Pointer(&ifrf))); err != nil {
  223. return fmt.Errorf("failed to bring the tun device up: %s", err)
  224. }
  225. // Set the routes
  226. link, err := netlink.LinkByName(t.Device)
  227. if err != nil {
  228. return fmt.Errorf("failed to get tun device link: %s", err)
  229. }
  230. // Default route
  231. dr := &net.IPNet{IP: t.cidr.IP.Mask(t.cidr.Mask), Mask: t.cidr.Mask}
  232. nr := netlink.Route{
  233. LinkIndex: link.Attrs().Index,
  234. Dst: dr,
  235. MTU: t.DefaultMTU,
  236. AdvMSS: t.advMSS(Route{}),
  237. Scope: unix.RT_SCOPE_LINK,
  238. Src: t.cidr.IP,
  239. Protocol: unix.RTPROT_KERNEL,
  240. Table: unix.RT_TABLE_MAIN,
  241. Type: unix.RTN_UNICAST,
  242. }
  243. err = netlink.RouteReplace(&nr)
  244. if err != nil {
  245. return fmt.Errorf("failed to set mtu %v on the default route %v; %v", t.DefaultMTU, dr, err)
  246. }
  247. // Path routes
  248. for _, r := range t.Routes {
  249. if !r.Install {
  250. continue
  251. }
  252. nr := netlink.Route{
  253. LinkIndex: link.Attrs().Index,
  254. Dst: r.Cidr,
  255. MTU: r.MTU,
  256. AdvMSS: t.advMSS(r),
  257. Scope: unix.RT_SCOPE_LINK,
  258. }
  259. if r.Metric > 0 {
  260. nr.Priority = r.Metric
  261. }
  262. err = netlink.RouteAdd(&nr)
  263. if err != nil {
  264. return fmt.Errorf("failed to set mtu %v on route %v; %v", r.MTU, r.Cidr, err)
  265. }
  266. }
  267. // Run the interface
  268. ifrf.Flags = ifrf.Flags | unix.IFF_UP | unix.IFF_RUNNING
  269. if err = ioctl(fd, unix.SIOCSIFFLAGS, uintptr(unsafe.Pointer(&ifrf))); err != nil {
  270. return fmt.Errorf("failed to run tun device: %s", err)
  271. }
  272. return nil
  273. }
  274. func (t *tun) Cidr() *net.IPNet {
  275. return t.cidr
  276. }
  277. func (t *tun) Name() string {
  278. return t.Device
  279. }
  280. func (t *tun) advMSS(r Route) int {
  281. mtu := r.MTU
  282. if r.MTU == 0 {
  283. mtu = t.DefaultMTU
  284. }
  285. // We only need to set advmss if the route MTU does not match the device MTU
  286. if mtu != t.MaxMTU {
  287. return mtu - 40
  288. }
  289. return 0
  290. }
  291. func (t *tun) watchRoutes() {
  292. rch := make(chan netlink.RouteUpdate)
  293. doneChan := make(chan struct{})
  294. if err := netlink.RouteSubscribe(rch, doneChan); err != nil {
  295. t.l.WithError(err).Errorf("failed to subscribe to system route changes")
  296. return
  297. }
  298. t.routeChan = doneChan
  299. go func() {
  300. for {
  301. select {
  302. case r := <-rch:
  303. t.updateRoutes(r)
  304. case <-doneChan:
  305. // netlink.RouteSubscriber will close the rch for us
  306. return
  307. }
  308. }
  309. }()
  310. }
  311. func (t *tun) updateRoutes(r netlink.RouteUpdate) {
  312. if r.Gw == nil {
  313. // Not a gateway route, ignore
  314. t.l.WithField("route", r).Debug("Ignoring route update, not a gateway route")
  315. return
  316. }
  317. if !t.cidr.Contains(r.Gw) {
  318. // Gateway isn't in our overlay network, ignore
  319. t.l.WithField("route", r).Debug("Ignoring route update, not in our network")
  320. return
  321. }
  322. if x := r.Dst.IP.To4(); x == nil {
  323. // Nebula only handles ipv4 on the overlay currently
  324. t.l.WithField("route", r).Debug("Ignoring route update, destination is not ipv4")
  325. return
  326. }
  327. newTree := cidr.NewTree4()
  328. if r.Type == unix.RTM_NEWROUTE {
  329. for _, oldR := range t.routeTree.Load().List() {
  330. newTree.AddCIDR(oldR.CIDR, oldR.Value)
  331. }
  332. t.l.WithField("destination", r.Dst).WithField("via", r.Gw).Info("Adding route")
  333. newTree.AddCIDR(r.Dst, iputil.Ip2VpnIp(r.Gw))
  334. } else {
  335. gw := iputil.Ip2VpnIp(r.Gw)
  336. for _, oldR := range t.routeTree.Load().List() {
  337. if bytes.Equal(oldR.CIDR.IP, r.Dst.IP) && bytes.Equal(oldR.CIDR.Mask, r.Dst.Mask) && *oldR.Value != nil && (*oldR.Value).(iputil.VpnIp) == gw {
  338. // This is the record to delete
  339. t.l.WithField("destination", r.Dst).WithField("via", r.Gw).Info("Removing route")
  340. continue
  341. }
  342. newTree.AddCIDR(oldR.CIDR, oldR.Value)
  343. }
  344. }
  345. t.routeTree.Store(newTree)
  346. }
  347. func (t *tun) Close() error {
  348. if t.routeChan != nil {
  349. close(t.routeChan)
  350. }
  351. if t.ReadWriteCloser != nil {
  352. t.ReadWriteCloser.Close()
  353. }
  354. return nil
  355. }