interface.go 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457
  1. package nebula
  2. import (
  3. "context"
  4. "encoding/binary"
  5. "errors"
  6. "fmt"
  7. "io"
  8. "net/netip"
  9. "os"
  10. "runtime"
  11. "sync/atomic"
  12. "time"
  13. "github.com/rcrowley/go-metrics"
  14. "github.com/sirupsen/logrus"
  15. "github.com/slackhq/nebula/config"
  16. "github.com/slackhq/nebula/firewall"
  17. "github.com/slackhq/nebula/header"
  18. "github.com/slackhq/nebula/overlay"
  19. "github.com/slackhq/nebula/udp"
  20. )
  21. const mtu = 9001
  22. type InterfaceConfig struct {
  23. HostMap *HostMap
  24. Outside udp.Conn
  25. Inside overlay.Device
  26. pki *PKI
  27. Cipher string
  28. Firewall *Firewall
  29. ServeDns bool
  30. HandshakeManager *HandshakeManager
  31. lightHouse *LightHouse
  32. connectionManager *connectionManager
  33. DropLocalBroadcast bool
  34. DropMulticast bool
  35. routines int
  36. MessageMetrics *MessageMetrics
  37. version string
  38. relayManager *relayManager
  39. punchy *Punchy
  40. tryPromoteEvery uint32
  41. reQueryEvery uint32
  42. reQueryWait time.Duration
  43. ConntrackCacheTimeout time.Duration
  44. l *logrus.Logger
  45. }
  46. type Interface struct {
  47. hostMap *HostMap
  48. outside udp.Conn
  49. inside overlay.Device
  50. pki *PKI
  51. cipher string
  52. firewall *Firewall
  53. connectionManager *connectionManager
  54. handshakeManager *HandshakeManager
  55. serveDns bool
  56. createTime time.Time
  57. lightHouse *LightHouse
  58. myBroadcastAddr netip.Addr
  59. myVpnNet netip.Prefix
  60. dropLocalBroadcast bool
  61. dropMulticast bool
  62. routines int
  63. disconnectInvalid atomic.Bool
  64. closed atomic.Bool
  65. relayManager *relayManager
  66. tryPromoteEvery atomic.Uint32
  67. reQueryEvery atomic.Uint32
  68. reQueryWait atomic.Int64
  69. sendRecvErrorConfig sendRecvErrorConfig
  70. // rebindCount is used to decide if an active tunnel should trigger a punch notification through a lighthouse
  71. rebindCount int8
  72. version string
  73. conntrackCacheTimeout time.Duration
  74. writers []udp.Conn
  75. readers []io.ReadWriteCloser
  76. metricHandshakes metrics.Histogram
  77. messageMetrics *MessageMetrics
  78. cachedPacketMetrics *cachedPacketMetrics
  79. l *logrus.Logger
  80. }
  81. type EncWriter interface {
  82. SendVia(via *HostInfo,
  83. relay *Relay,
  84. ad,
  85. nb,
  86. out []byte,
  87. nocopy bool,
  88. )
  89. SendMessageToVpnIp(t header.MessageType, st header.MessageSubType, vpnIp netip.Addr, p, nb, out []byte)
  90. SendMessageToHostInfo(t header.MessageType, st header.MessageSubType, hostinfo *HostInfo, p, nb, out []byte)
  91. Handshake(vpnIp netip.Addr)
  92. }
  93. type sendRecvErrorConfig uint8
  94. const (
  95. sendRecvErrorAlways sendRecvErrorConfig = iota
  96. sendRecvErrorNever
  97. sendRecvErrorPrivate
  98. )
  99. func (s sendRecvErrorConfig) ShouldSendRecvError(ip netip.AddrPort) bool {
  100. switch s {
  101. case sendRecvErrorPrivate:
  102. return ip.Addr().IsPrivate()
  103. case sendRecvErrorAlways:
  104. return true
  105. case sendRecvErrorNever:
  106. return false
  107. default:
  108. panic(fmt.Errorf("invalid sendRecvErrorConfig value: %d", s))
  109. }
  110. }
  111. func (s sendRecvErrorConfig) String() string {
  112. switch s {
  113. case sendRecvErrorAlways:
  114. return "always"
  115. case sendRecvErrorNever:
  116. return "never"
  117. case sendRecvErrorPrivate:
  118. return "private"
  119. default:
  120. return fmt.Sprintf("invalid(%d)", s)
  121. }
  122. }
  123. func NewInterface(ctx context.Context, c *InterfaceConfig) (*Interface, error) {
  124. if c.Outside == nil {
  125. return nil, errors.New("no outside connection")
  126. }
  127. if c.Inside == nil {
  128. return nil, errors.New("no inside interface (tun)")
  129. }
  130. if c.pki == nil {
  131. return nil, errors.New("no certificate state")
  132. }
  133. if c.Firewall == nil {
  134. return nil, errors.New("no firewall rules")
  135. }
  136. if c.connectionManager == nil {
  137. return nil, errors.New("no connection manager")
  138. }
  139. certificate := c.pki.GetCertState().Certificate
  140. myVpnAddr, ok := netip.AddrFromSlice(certificate.Details.Ips[0].IP)
  141. if !ok {
  142. return nil, fmt.Errorf("invalid ip address in certificate: %s", certificate.Details.Ips[0].IP)
  143. }
  144. myVpnMask, ok := netip.AddrFromSlice(certificate.Details.Ips[0].Mask)
  145. if !ok {
  146. return nil, fmt.Errorf("invalid ip mask in certificate: %s", certificate.Details.Ips[0].Mask)
  147. }
  148. myVpnAddr = myVpnAddr.Unmap()
  149. myVpnMask = myVpnMask.Unmap()
  150. if myVpnAddr.BitLen() != myVpnMask.BitLen() {
  151. return nil, fmt.Errorf("ip address and mask are different lengths in certificate")
  152. }
  153. ones, _ := certificate.Details.Ips[0].Mask.Size()
  154. myVpnNet := netip.PrefixFrom(myVpnAddr, ones)
  155. ifce := &Interface{
  156. pki: c.pki,
  157. hostMap: c.HostMap,
  158. outside: c.Outside,
  159. inside: c.Inside,
  160. cipher: c.Cipher,
  161. firewall: c.Firewall,
  162. serveDns: c.ServeDns,
  163. handshakeManager: c.HandshakeManager,
  164. createTime: time.Now(),
  165. lightHouse: c.lightHouse,
  166. dropLocalBroadcast: c.DropLocalBroadcast,
  167. dropMulticast: c.DropMulticast,
  168. routines: c.routines,
  169. version: c.version,
  170. writers: make([]udp.Conn, c.routines),
  171. readers: make([]io.ReadWriteCloser, c.routines),
  172. myVpnNet: myVpnNet,
  173. relayManager: c.relayManager,
  174. connectionManager: c.connectionManager,
  175. conntrackCacheTimeout: c.ConntrackCacheTimeout,
  176. metricHandshakes: metrics.GetOrRegisterHistogram("handshakes", nil, metrics.NewExpDecaySample(1028, 0.015)),
  177. messageMetrics: c.MessageMetrics,
  178. cachedPacketMetrics: &cachedPacketMetrics{
  179. sent: metrics.GetOrRegisterCounter("hostinfo.cached_packets.sent", nil),
  180. dropped: metrics.GetOrRegisterCounter("hostinfo.cached_packets.dropped", nil),
  181. },
  182. l: c.l,
  183. }
  184. if myVpnAddr.Is4() {
  185. addr := myVpnNet.Masked().Addr().As4()
  186. binary.BigEndian.PutUint32(addr[:], binary.BigEndian.Uint32(addr[:])|^binary.BigEndian.Uint32(certificate.Details.Ips[0].Mask))
  187. ifce.myBroadcastAddr = netip.AddrFrom4(addr)
  188. }
  189. ifce.tryPromoteEvery.Store(c.tryPromoteEvery)
  190. ifce.reQueryEvery.Store(c.reQueryEvery)
  191. ifce.reQueryWait.Store(int64(c.reQueryWait))
  192. ifce.connectionManager.intf = ifce
  193. return ifce, nil
  194. }
  195. // activate creates the interface on the host. After the interface is created, any
  196. // other services that want to bind listeners to its IP may do so successfully. However,
  197. // the interface isn't going to process anything until run() is called.
  198. func (f *Interface) activate() {
  199. // actually turn on tun dev
  200. addr, err := f.outside.LocalAddr()
  201. if err != nil {
  202. f.l.WithError(err).Error("Failed to get udp listen address")
  203. }
  204. f.l.WithField("interface", f.inside.Name()).WithField("network", f.inside.Cidr().String()).
  205. WithField("build", f.version).WithField("udpAddr", addr).
  206. WithField("boringcrypto", boringEnabled()).
  207. Info("Nebula interface is active")
  208. metrics.GetOrRegisterGauge("routines", nil).Update(int64(f.routines))
  209. // Prepare n tun queues
  210. var reader io.ReadWriteCloser = f.inside
  211. for i := 0; i < f.routines; i++ {
  212. if i > 0 {
  213. reader, err = f.inside.NewMultiQueueReader()
  214. if err != nil {
  215. f.l.Fatal(err)
  216. }
  217. }
  218. f.readers[i] = reader
  219. }
  220. if err := f.inside.Activate(); err != nil {
  221. f.inside.Close()
  222. f.l.Fatal(err)
  223. }
  224. }
  225. func (f *Interface) run() {
  226. // Launch n queues to read packets from udp
  227. for i := 0; i < f.routines; i++ {
  228. go f.listenOut(i)
  229. }
  230. // Launch n queues to read packets from tun dev
  231. for i := 0; i < f.routines; i++ {
  232. go f.listenIn(f.readers[i], i)
  233. }
  234. }
  235. func (f *Interface) listenOut(i int) {
  236. runtime.LockOSThread()
  237. var li udp.Conn
  238. // TODO clean this up with a coherent interface for each outside connection
  239. if i > 0 {
  240. li = f.writers[i]
  241. } else {
  242. li = f.outside
  243. }
  244. lhh := f.lightHouse.NewRequestHandler()
  245. conntrackCache := firewall.NewConntrackCacheTicker(f.conntrackCacheTimeout)
  246. li.ListenOut(readOutsidePackets(f), lhHandleRequest(lhh, f), conntrackCache, i)
  247. }
  248. func (f *Interface) listenIn(reader io.ReadWriteCloser, i int) {
  249. runtime.LockOSThread()
  250. packet := make([]byte, mtu)
  251. out := make([]byte, mtu)
  252. fwPacket := &firewall.Packet{}
  253. nb := make([]byte, 12, 12)
  254. conntrackCache := firewall.NewConntrackCacheTicker(f.conntrackCacheTimeout)
  255. for {
  256. n, err := reader.Read(packet)
  257. if err != nil {
  258. if errors.Is(err, os.ErrClosed) && f.closed.Load() {
  259. return
  260. }
  261. f.l.WithError(err).Error("Error while reading outbound packet")
  262. // This only seems to happen when something fatal happens to the fd, so exit.
  263. os.Exit(2)
  264. }
  265. f.consumeInsidePacket(packet[:n], fwPacket, nb, out, i, conntrackCache.Get(f.l))
  266. }
  267. }
  268. func (f *Interface) RegisterConfigChangeCallbacks(c *config.C) {
  269. c.RegisterReloadCallback(f.reloadFirewall)
  270. c.RegisterReloadCallback(f.reloadSendRecvError)
  271. c.RegisterReloadCallback(f.reloadDisconnectInvalid)
  272. c.RegisterReloadCallback(f.reloadMisc)
  273. for _, udpConn := range f.writers {
  274. c.RegisterReloadCallback(udpConn.ReloadConfig)
  275. }
  276. }
  277. func (f *Interface) reloadDisconnectInvalid(c *config.C) {
  278. initial := c.InitialLoad()
  279. if initial || c.HasChanged("pki.disconnect_invalid") {
  280. f.disconnectInvalid.Store(c.GetBool("pki.disconnect_invalid", true))
  281. if !initial {
  282. f.l.Infof("pki.disconnect_invalid changed to %v", f.disconnectInvalid.Load())
  283. }
  284. }
  285. }
  286. func (f *Interface) reloadFirewall(c *config.C) {
  287. //TODO: need to trigger/detect if the certificate changed too
  288. if c.HasChanged("firewall") == false {
  289. f.l.Debug("No firewall config change detected")
  290. return
  291. }
  292. fw, err := NewFirewallFromConfig(f.l, f.pki.GetCertState().Certificate, c)
  293. if err != nil {
  294. f.l.WithError(err).Error("Error while creating firewall during reload")
  295. return
  296. }
  297. oldFw := f.firewall
  298. conntrack := oldFw.Conntrack
  299. conntrack.Lock()
  300. defer conntrack.Unlock()
  301. fw.rulesVersion = oldFw.rulesVersion + 1
  302. // If rulesVersion is back to zero, we have wrapped all the way around. Be
  303. // safe and just reset conntrack in this case.
  304. if fw.rulesVersion == 0 {
  305. f.l.WithField("firewallHashes", fw.GetRuleHashes()).
  306. WithField("oldFirewallHashes", oldFw.GetRuleHashes()).
  307. WithField("rulesVersion", fw.rulesVersion).
  308. Warn("firewall rulesVersion has overflowed, resetting conntrack")
  309. } else {
  310. fw.Conntrack = conntrack
  311. }
  312. f.firewall = fw
  313. oldFw.Destroy()
  314. f.l.WithField("firewallHashes", fw.GetRuleHashes()).
  315. WithField("oldFirewallHashes", oldFw.GetRuleHashes()).
  316. WithField("rulesVersion", fw.rulesVersion).
  317. Info("New firewall has been installed")
  318. }
  319. func (f *Interface) reloadSendRecvError(c *config.C) {
  320. if c.InitialLoad() || c.HasChanged("listen.send_recv_error") {
  321. stringValue := c.GetString("listen.send_recv_error", "always")
  322. switch stringValue {
  323. case "always":
  324. f.sendRecvErrorConfig = sendRecvErrorAlways
  325. case "never":
  326. f.sendRecvErrorConfig = sendRecvErrorNever
  327. case "private":
  328. f.sendRecvErrorConfig = sendRecvErrorPrivate
  329. default:
  330. if c.GetBool("listen.send_recv_error", true) {
  331. f.sendRecvErrorConfig = sendRecvErrorAlways
  332. } else {
  333. f.sendRecvErrorConfig = sendRecvErrorNever
  334. }
  335. }
  336. f.l.WithField("sendRecvError", f.sendRecvErrorConfig.String()).
  337. Info("Loaded send_recv_error config")
  338. }
  339. }
  340. func (f *Interface) reloadMisc(c *config.C) {
  341. if c.HasChanged("counters.try_promote") {
  342. n := c.GetUint32("counters.try_promote", defaultPromoteEvery)
  343. f.tryPromoteEvery.Store(n)
  344. f.l.Info("counters.try_promote has changed")
  345. }
  346. if c.HasChanged("counters.requery_every_packets") {
  347. n := c.GetUint32("counters.requery_every_packets", defaultReQueryEvery)
  348. f.reQueryEvery.Store(n)
  349. f.l.Info("counters.requery_every_packets has changed")
  350. }
  351. if c.HasChanged("timers.requery_wait_duration") {
  352. n := c.GetDuration("timers.requery_wait_duration", defaultReQueryWait)
  353. f.reQueryWait.Store(int64(n))
  354. f.l.Info("timers.requery_wait_duration has changed")
  355. }
  356. }
  357. func (f *Interface) emitStats(ctx context.Context, i time.Duration) {
  358. ticker := time.NewTicker(i)
  359. defer ticker.Stop()
  360. udpStats := udp.NewUDPStatsEmitter(f.writers)
  361. certExpirationGauge := metrics.GetOrRegisterGauge("certificate.ttl_seconds", nil)
  362. for {
  363. select {
  364. case <-ctx.Done():
  365. return
  366. case <-ticker.C:
  367. f.firewall.EmitStats()
  368. f.handshakeManager.EmitStats()
  369. udpStats()
  370. certExpirationGauge.Update(int64(f.pki.GetCertState().Certificate.Details.NotAfter.Sub(time.Now()) / time.Second))
  371. }
  372. }
  373. }
  374. func (f *Interface) Close() error {
  375. f.closed.Store(true)
  376. for _, u := range f.writers {
  377. err := u.Close()
  378. if err != nil {
  379. f.l.WithError(err).Error("Error while closing udp socket")
  380. }
  381. }
  382. // Release the tun device
  383. return f.inside.Close()
  384. }