interface.go 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507
  1. package nebula
  2. import (
  3. "context"
  4. "errors"
  5. "fmt"
  6. "net/netip"
  7. "os"
  8. "runtime"
  9. "sync/atomic"
  10. "time"
  11. "github.com/gaissmai/bart"
  12. "github.com/rcrowley/go-metrics"
  13. "github.com/sirupsen/logrus"
  14. "github.com/slackhq/nebula/config"
  15. "github.com/slackhq/nebula/firewall"
  16. "github.com/slackhq/nebula/header"
  17. "github.com/slackhq/nebula/overlay"
  18. "github.com/slackhq/nebula/udp"
  19. )
  20. const mtu = 9001
  21. const virtioNetHdrLen = overlay.VirtioNetHdrLen
  22. type InterfaceConfig struct {
  23. HostMap *HostMap
  24. Outside udp.Conn
  25. Inside overlay.Device
  26. pki *PKI
  27. Cipher string
  28. Firewall *Firewall
  29. ServeDns bool
  30. HandshakeManager *HandshakeManager
  31. lightHouse *LightHouse
  32. connectionManager *connectionManager
  33. DropLocalBroadcast bool
  34. DropMulticast bool
  35. routines int
  36. MessageMetrics *MessageMetrics
  37. version string
  38. relayManager *relayManager
  39. punchy *Punchy
  40. tryPromoteEvery uint32
  41. reQueryEvery uint32
  42. reQueryWait time.Duration
  43. ConntrackCacheTimeout time.Duration
  44. l *logrus.Logger
  45. }
  46. type batchMetrics struct {
  47. udpReadSize metrics.Histogram
  48. tunReadSize metrics.Histogram
  49. udpWriteSize metrics.Histogram
  50. tunWriteSize metrics.Histogram
  51. }
  52. type Interface struct {
  53. hostMap *HostMap
  54. outside udp.Conn
  55. inside overlay.Device
  56. pki *PKI
  57. firewall *Firewall
  58. connectionManager *connectionManager
  59. handshakeManager *HandshakeManager
  60. serveDns bool
  61. createTime time.Time
  62. lightHouse *LightHouse
  63. myBroadcastAddrsTable *bart.Lite
  64. myVpnAddrs []netip.Addr // A list of addresses assigned to us via our certificate
  65. myVpnAddrsTable *bart.Lite
  66. myVpnNetworks []netip.Prefix // A list of networks assigned to us via our certificate
  67. myVpnNetworksTable *bart.Lite
  68. dropLocalBroadcast bool
  69. dropMulticast bool
  70. routines int
  71. disconnectInvalid atomic.Bool
  72. closed atomic.Bool
  73. relayManager *relayManager
  74. tryPromoteEvery atomic.Uint32
  75. reQueryEvery atomic.Uint32
  76. reQueryWait atomic.Int64
  77. sendRecvErrorConfig sendRecvErrorConfig
  78. // rebindCount is used to decide if an active tunnel should trigger a punch notification through a lighthouse
  79. rebindCount int8
  80. version string
  81. conntrackCacheTimeout time.Duration
  82. writers []udp.Conn
  83. readers []overlay.BatchReadWriter
  84. metricHandshakes metrics.Histogram
  85. messageMetrics *MessageMetrics
  86. cachedPacketMetrics *cachedPacketMetrics
  87. batchMetrics *batchMetrics
  88. l *logrus.Logger
  89. }
  90. type EncWriter interface {
  91. SendVia(via *HostInfo,
  92. relay *Relay,
  93. ad,
  94. nb,
  95. out []byte,
  96. nocopy bool,
  97. )
  98. SendMessageToVpnAddr(t header.MessageType, st header.MessageSubType, vpnAddr netip.Addr, p, nb, out []byte)
  99. SendMessageToHostInfo(t header.MessageType, st header.MessageSubType, hostinfo *HostInfo, p, nb, out []byte)
  100. Handshake(vpnAddr netip.Addr)
  101. GetHostInfo(vpnAddr netip.Addr) *HostInfo
  102. GetCertState() *CertState
  103. }
  104. type sendRecvErrorConfig uint8
  105. const (
  106. sendRecvErrorAlways sendRecvErrorConfig = iota
  107. sendRecvErrorNever
  108. sendRecvErrorPrivate
  109. )
  110. func (s sendRecvErrorConfig) ShouldSendRecvError(endpoint netip.AddrPort) bool {
  111. switch s {
  112. case sendRecvErrorPrivate:
  113. return endpoint.Addr().IsPrivate()
  114. case sendRecvErrorAlways:
  115. return true
  116. case sendRecvErrorNever:
  117. return false
  118. default:
  119. panic(fmt.Errorf("invalid sendRecvErrorConfig value: %d", s))
  120. }
  121. }
  122. func (s sendRecvErrorConfig) String() string {
  123. switch s {
  124. case sendRecvErrorAlways:
  125. return "always"
  126. case sendRecvErrorNever:
  127. return "never"
  128. case sendRecvErrorPrivate:
  129. return "private"
  130. default:
  131. return fmt.Sprintf("invalid(%d)", s)
  132. }
  133. }
  134. func NewInterface(ctx context.Context, c *InterfaceConfig) (*Interface, error) {
  135. if c.Outside == nil {
  136. return nil, errors.New("no outside connection")
  137. }
  138. if c.Inside == nil {
  139. return nil, errors.New("no inside interface (tun)")
  140. }
  141. if c.pki == nil {
  142. return nil, errors.New("no certificate state")
  143. }
  144. if c.Firewall == nil {
  145. return nil, errors.New("no firewall rules")
  146. }
  147. if c.connectionManager == nil {
  148. return nil, errors.New("no connection manager")
  149. }
  150. cs := c.pki.getCertState()
  151. ifce := &Interface{
  152. pki: c.pki,
  153. hostMap: c.HostMap,
  154. outside: c.Outside,
  155. inside: c.Inside,
  156. firewall: c.Firewall,
  157. serveDns: c.ServeDns,
  158. handshakeManager: c.HandshakeManager,
  159. createTime: time.Now(),
  160. lightHouse: c.lightHouse,
  161. dropLocalBroadcast: c.DropLocalBroadcast,
  162. dropMulticast: c.DropMulticast,
  163. routines: c.routines,
  164. version: c.version,
  165. writers: make([]udp.Conn, c.routines),
  166. readers: make([]overlay.BatchReadWriter, c.routines),
  167. myVpnNetworks: cs.myVpnNetworks,
  168. myVpnNetworksTable: cs.myVpnNetworksTable,
  169. myVpnAddrs: cs.myVpnAddrs,
  170. myVpnAddrsTable: cs.myVpnAddrsTable,
  171. myBroadcastAddrsTable: cs.myVpnBroadcastAddrsTable,
  172. relayManager: c.relayManager,
  173. connectionManager: c.connectionManager,
  174. conntrackCacheTimeout: c.ConntrackCacheTimeout,
  175. metricHandshakes: metrics.GetOrRegisterHistogram("handshakes", nil, metrics.NewExpDecaySample(1028, 0.015)),
  176. messageMetrics: c.MessageMetrics,
  177. cachedPacketMetrics: &cachedPacketMetrics{
  178. sent: metrics.GetOrRegisterCounter("hostinfo.cached_packets.sent", nil),
  179. dropped: metrics.GetOrRegisterCounter("hostinfo.cached_packets.dropped", nil),
  180. },
  181. batchMetrics: &batchMetrics{
  182. udpReadSize: metrics.GetOrRegisterHistogram("batch.udp_read_size", nil, metrics.NewUniformSample(1024)),
  183. tunReadSize: metrics.GetOrRegisterHistogram("batch.tun_read_size", nil, metrics.NewUniformSample(1024)),
  184. udpWriteSize: metrics.GetOrRegisterHistogram("batch.udp_write_size", nil, metrics.NewUniformSample(1024)),
  185. tunWriteSize: metrics.GetOrRegisterHistogram("batch.tun_write_size", nil, metrics.NewUniformSample(1024)),
  186. },
  187. l: c.l,
  188. }
  189. ifce.tryPromoteEvery.Store(c.tryPromoteEvery)
  190. ifce.reQueryEvery.Store(c.reQueryEvery)
  191. ifce.reQueryWait.Store(int64(c.reQueryWait))
  192. ifce.connectionManager.intf = ifce
  193. return ifce, nil
  194. }
  195. // activate creates the interface on the host. After the interface is created, any
  196. // other services that want to bind listeners to its IP may do so successfully. However,
  197. // the interface isn't going to process anything until run() is called.
  198. func (f *Interface) activate() {
  199. // actually turn on tun dev
  200. addr, err := f.outside.LocalAddr()
  201. if err != nil {
  202. f.l.WithError(err).Error("Failed to get udp listen address")
  203. }
  204. f.l.WithField("interface", f.inside.Name()).WithField("networks", f.myVpnNetworks).
  205. WithField("build", f.version).WithField("udpAddr", addr).
  206. WithField("boringcrypto", boringEnabled()).
  207. Info("Nebula interface is active")
  208. metrics.GetOrRegisterGauge("routines", nil).Update(int64(f.routines))
  209. // Prepare n tun queues
  210. var reader overlay.BatchReadWriter = f.inside
  211. for i := 0; i < f.routines; i++ {
  212. if i > 0 {
  213. reader, err = f.inside.NewMultiQueueReader()
  214. if err != nil {
  215. f.l.Fatal(err)
  216. }
  217. }
  218. f.readers[i] = reader
  219. }
  220. if err := f.inside.Activate(); err != nil {
  221. f.inside.Close()
  222. f.l.Fatal(err)
  223. }
  224. }
  225. func (f *Interface) run() {
  226. // Launch n queues to read packets from udp
  227. for i := 0; i < f.routines; i++ {
  228. go f.listenOut(i)
  229. }
  230. // Launch n queues to read packets from tun dev
  231. for i := 0; i < f.routines; i++ {
  232. go f.listenIn(f.readers[i], i)
  233. }
  234. }
  235. func (f *Interface) listenOut(i int) {
  236. runtime.LockOSThread()
  237. var li udp.Conn
  238. if i > 0 {
  239. li = f.writers[i]
  240. } else {
  241. li = f.outside
  242. }
  243. ctCache := firewall.NewConntrackCacheTicker(f.conntrackCacheTimeout)
  244. lhh := f.lightHouse.NewRequestHandler()
  245. // Pre-allocate output buffers for batch processing
  246. batchSize := li.BatchSize()
  247. outs := make([][]byte, batchSize)
  248. for idx := range outs {
  249. // Allocate full buffer with virtio header space
  250. outs[idx] = make([]byte, virtioNetHdrLen, virtioNetHdrLen+udp.MTU)
  251. }
  252. h := &header.H{}
  253. fwPacket := &firewall.Packet{}
  254. nb := make([]byte, 12)
  255. li.ListenOutBatch(func(addrs []netip.AddrPort, payloads [][]byte, count int) {
  256. f.readOutsidePacketsBatch(addrs, payloads, count, outs[:count], nb, i, h, fwPacket, lhh, ctCache.Get(f.l))
  257. })
  258. }
  259. func (f *Interface) listenIn(reader overlay.BatchReadWriter, i int) {
  260. runtime.LockOSThread()
  261. batchSize := reader.BatchSize()
  262. // Allocate buffers for batch reading
  263. bufs := make([][]byte, batchSize)
  264. for idx := range bufs {
  265. bufs[idx] = make([]byte, mtu)
  266. }
  267. sizes := make([]int, batchSize)
  268. // Allocate output buffers for batch processing (one per packet)
  269. // Each has virtio header headroom to avoid copies on write
  270. outs := make([][]byte, batchSize)
  271. for idx := range outs {
  272. outBuf := make([]byte, virtioNetHdrLen+mtu)
  273. outs[idx] = outBuf[virtioNetHdrLen:] // Slice starting after headroom
  274. }
  275. // Pre-allocate batch accumulation buffers for sending
  276. batchPackets := make([][]byte, 0, batchSize)
  277. batchAddrs := make([]netip.AddrPort, 0, batchSize)
  278. // Pre-allocate nonce buffer (reused for all encryptions)
  279. nb := make([]byte, 12)
  280. conntrackCache := firewall.NewConntrackCacheTicker(f.conntrackCacheTimeout)
  281. for {
  282. n, err := reader.BatchRead(bufs, sizes)
  283. if err != nil {
  284. if errors.Is(err, os.ErrClosed) && f.closed.Load() {
  285. return
  286. }
  287. f.l.WithError(err).Error("Error while batch reading outbound packets")
  288. // This only seems to happen when something fatal happens to the fd, so exit.
  289. os.Exit(2)
  290. }
  291. f.batchMetrics.tunReadSize.Update(int64(n))
  292. // Process all packets in the batch at once
  293. f.consumeInsidePackets(bufs, sizes, n, outs, nb, i, conntrackCache.Get(f.l), &batchPackets, &batchAddrs)
  294. }
  295. }
  296. func (f *Interface) RegisterConfigChangeCallbacks(c *config.C) {
  297. c.RegisterReloadCallback(f.reloadFirewall)
  298. c.RegisterReloadCallback(f.reloadSendRecvError)
  299. c.RegisterReloadCallback(f.reloadDisconnectInvalid)
  300. c.RegisterReloadCallback(f.reloadMisc)
  301. for _, udpConn := range f.writers {
  302. c.RegisterReloadCallback(udpConn.ReloadConfig)
  303. }
  304. }
  305. func (f *Interface) reloadDisconnectInvalid(c *config.C) {
  306. initial := c.InitialLoad()
  307. if initial || c.HasChanged("pki.disconnect_invalid") {
  308. f.disconnectInvalid.Store(c.GetBool("pki.disconnect_invalid", true))
  309. if !initial {
  310. f.l.Infof("pki.disconnect_invalid changed to %v", f.disconnectInvalid.Load())
  311. }
  312. }
  313. }
  314. func (f *Interface) reloadFirewall(c *config.C) {
  315. //TODO: need to trigger/detect if the certificate changed too
  316. if c.HasChanged("firewall") == false {
  317. f.l.Debug("No firewall config change detected")
  318. return
  319. }
  320. fw, err := NewFirewallFromConfig(f.l, f.pki.getCertState(), c)
  321. if err != nil {
  322. f.l.WithError(err).Error("Error while creating firewall during reload")
  323. return
  324. }
  325. oldFw := f.firewall
  326. conntrack := oldFw.Conntrack
  327. conntrack.Lock()
  328. defer conntrack.Unlock()
  329. fw.rulesVersion = oldFw.rulesVersion + 1
  330. // If rulesVersion is back to zero, we have wrapped all the way around. Be
  331. // safe and just reset conntrack in this case.
  332. if fw.rulesVersion == 0 {
  333. f.l.WithField("firewallHashes", fw.GetRuleHashes()).
  334. WithField("oldFirewallHashes", oldFw.GetRuleHashes()).
  335. WithField("rulesVersion", fw.rulesVersion).
  336. Warn("firewall rulesVersion has overflowed, resetting conntrack")
  337. } else {
  338. fw.Conntrack = conntrack
  339. }
  340. f.firewall = fw
  341. oldFw.Destroy()
  342. f.l.WithField("firewallHashes", fw.GetRuleHashes()).
  343. WithField("oldFirewallHashes", oldFw.GetRuleHashes()).
  344. WithField("rulesVersion", fw.rulesVersion).
  345. Info("New firewall has been installed")
  346. }
  347. func (f *Interface) reloadSendRecvError(c *config.C) {
  348. if c.InitialLoad() || c.HasChanged("listen.send_recv_error") {
  349. stringValue := c.GetString("listen.send_recv_error", "always")
  350. switch stringValue {
  351. case "always":
  352. f.sendRecvErrorConfig = sendRecvErrorAlways
  353. case "never":
  354. f.sendRecvErrorConfig = sendRecvErrorNever
  355. case "private":
  356. f.sendRecvErrorConfig = sendRecvErrorPrivate
  357. default:
  358. if c.GetBool("listen.send_recv_error", true) {
  359. f.sendRecvErrorConfig = sendRecvErrorAlways
  360. } else {
  361. f.sendRecvErrorConfig = sendRecvErrorNever
  362. }
  363. }
  364. f.l.WithField("sendRecvError", f.sendRecvErrorConfig.String()).
  365. Info("Loaded send_recv_error config")
  366. }
  367. }
  368. func (f *Interface) reloadMisc(c *config.C) {
  369. if c.HasChanged("counters.try_promote") {
  370. n := c.GetUint32("counters.try_promote", defaultPromoteEvery)
  371. f.tryPromoteEvery.Store(n)
  372. f.l.Info("counters.try_promote has changed")
  373. }
  374. if c.HasChanged("counters.requery_every_packets") {
  375. n := c.GetUint32("counters.requery_every_packets", defaultReQueryEvery)
  376. f.reQueryEvery.Store(n)
  377. f.l.Info("counters.requery_every_packets has changed")
  378. }
  379. if c.HasChanged("timers.requery_wait_duration") {
  380. n := c.GetDuration("timers.requery_wait_duration", defaultReQueryWait)
  381. f.reQueryWait.Store(int64(n))
  382. f.l.Info("timers.requery_wait_duration has changed")
  383. }
  384. }
  385. func (f *Interface) emitStats(ctx context.Context, i time.Duration) {
  386. ticker := time.NewTicker(i)
  387. defer ticker.Stop()
  388. udpStats := udp.NewUDPStatsEmitter(f.writers)
  389. certExpirationGauge := metrics.GetOrRegisterGauge("certificate.ttl_seconds", nil)
  390. certInitiatingVersion := metrics.GetOrRegisterGauge("certificate.initiating_version", nil)
  391. certMaxVersion := metrics.GetOrRegisterGauge("certificate.max_version", nil)
  392. for {
  393. select {
  394. case <-ctx.Done():
  395. return
  396. case <-ticker.C:
  397. f.firewall.EmitStats()
  398. f.handshakeManager.EmitStats()
  399. udpStats()
  400. certState := f.pki.getCertState()
  401. defaultCrt := certState.GetDefaultCertificate()
  402. certExpirationGauge.Update(int64(defaultCrt.NotAfter().Sub(time.Now()) / time.Second))
  403. certInitiatingVersion.Update(int64(defaultCrt.Version()))
  404. // Report the max certificate version we are capable of using
  405. if certState.v2Cert != nil {
  406. certMaxVersion.Update(int64(certState.v2Cert.Version()))
  407. } else {
  408. certMaxVersion.Update(int64(certState.v1Cert.Version()))
  409. }
  410. }
  411. }
  412. }
  413. func (f *Interface) GetHostInfo(vpnIp netip.Addr) *HostInfo {
  414. return f.hostMap.QueryVpnAddr(vpnIp)
  415. }
  416. func (f *Interface) GetCertState() *CertState {
  417. return f.pki.getCertState()
  418. }
  419. func (f *Interface) Close() error {
  420. f.closed.Store(true)
  421. for _, u := range f.writers {
  422. err := u.Close()
  423. if err != nil {
  424. f.l.WithError(err).Error("Error while closing udp socket")
  425. }
  426. }
  427. // Release the tun device
  428. return f.inside.Close()
  429. }