interface.go 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518
  1. package nebula
  2. import (
  3. "context"
  4. "errors"
  5. "fmt"
  6. "io"
  7. "net/netip"
  8. "runtime"
  9. "sync"
  10. "sync/atomic"
  11. "time"
  12. "github.com/gaissmai/bart"
  13. "github.com/rcrowley/go-metrics"
  14. "github.com/sirupsen/logrus"
  15. "github.com/slackhq/nebula/config"
  16. "github.com/slackhq/nebula/firewall"
  17. "github.com/slackhq/nebula/header"
  18. "github.com/slackhq/nebula/overlay"
  19. "github.com/slackhq/nebula/udp"
  20. )
  21. const mtu = 9001
  22. type InterfaceConfig struct {
  23. HostMap *HostMap
  24. Outside udp.Conn
  25. Inside overlay.Device
  26. pki *PKI
  27. Cipher string
  28. Firewall *Firewall
  29. ServeDns bool
  30. HandshakeManager *HandshakeManager
  31. lightHouse *LightHouse
  32. connectionManager *connectionManager
  33. DropLocalBroadcast bool
  34. DropMulticast bool
  35. routines int
  36. MessageMetrics *MessageMetrics
  37. version string
  38. relayManager *relayManager
  39. punchy *Punchy
  40. tryPromoteEvery uint32
  41. reQueryEvery uint32
  42. reQueryWait time.Duration
  43. ConntrackCacheTimeout time.Duration
  44. l *logrus.Logger
  45. }
  46. type Interface struct {
  47. hostMap *HostMap
  48. outside udp.Conn
  49. inside overlay.Device
  50. pki *PKI
  51. firewall *Firewall
  52. connectionManager *connectionManager
  53. handshakeManager *HandshakeManager
  54. serveDns bool
  55. createTime time.Time
  56. lightHouse *LightHouse
  57. myBroadcastAddrsTable *bart.Lite
  58. myVpnAddrs []netip.Addr // A list of addresses assigned to us via our certificate
  59. myVpnAddrsTable *bart.Lite
  60. myVpnNetworks []netip.Prefix // A list of networks assigned to us via our certificate
  61. myVpnNetworksTable *bart.Lite
  62. dropLocalBroadcast bool
  63. dropMulticast bool
  64. routines int
  65. disconnectInvalid atomic.Bool
  66. closed atomic.Bool
  67. relayManager *relayManager
  68. tryPromoteEvery atomic.Uint32
  69. reQueryEvery atomic.Uint32
  70. reQueryWait atomic.Int64
  71. sendRecvErrorConfig recvErrorConfig
  72. acceptRecvErrorConfig recvErrorConfig
  73. // rebindCount is used to decide if an active tunnel should trigger a punch notification through a lighthouse
  74. rebindCount int8
  75. version string
  76. conntrackCacheTimeout time.Duration
  77. writers []udp.Conn
  78. readers []io.ReadWriteCloser
  79. wg sync.WaitGroup
  80. metricHandshakes metrics.Histogram
  81. messageMetrics *MessageMetrics
  82. cachedPacketMetrics *cachedPacketMetrics
  83. l *logrus.Logger
  84. }
  85. type EncWriter interface {
  86. SendVia(via *HostInfo,
  87. relay *Relay,
  88. ad,
  89. nb,
  90. out []byte,
  91. nocopy bool,
  92. )
  93. SendMessageToVpnAddr(t header.MessageType, st header.MessageSubType, vpnAddr netip.Addr, p, nb, out []byte)
  94. SendMessageToHostInfo(t header.MessageType, st header.MessageSubType, hostinfo *HostInfo, p, nb, out []byte)
  95. Handshake(vpnAddr netip.Addr)
  96. GetHostInfo(vpnAddr netip.Addr) *HostInfo
  97. GetCertState() *CertState
  98. }
  99. type recvErrorConfig uint8
  100. const (
  101. recvErrorAlways recvErrorConfig = iota
  102. recvErrorNever
  103. recvErrorPrivate
  104. )
  105. func (s recvErrorConfig) ShouldRecvError(endpoint netip.AddrPort) bool {
  106. switch s {
  107. case recvErrorPrivate:
  108. return endpoint.Addr().IsPrivate()
  109. case recvErrorAlways:
  110. return true
  111. case recvErrorNever:
  112. return false
  113. default:
  114. panic(fmt.Errorf("invalid recvErrorConfig value: %d", s))
  115. }
  116. }
  117. func (s recvErrorConfig) String() string {
  118. switch s {
  119. case recvErrorAlways:
  120. return "always"
  121. case recvErrorNever:
  122. return "never"
  123. case recvErrorPrivate:
  124. return "private"
  125. default:
  126. return fmt.Sprintf("invalid(%d)", s)
  127. }
  128. }
  129. func NewInterface(ctx context.Context, c *InterfaceConfig) (*Interface, error) {
  130. if c.Outside == nil {
  131. return nil, errors.New("no outside connection")
  132. }
  133. if c.Inside == nil {
  134. return nil, errors.New("no inside interface (tun)")
  135. }
  136. if c.pki == nil {
  137. return nil, errors.New("no certificate state")
  138. }
  139. if c.Firewall == nil {
  140. return nil, errors.New("no firewall rules")
  141. }
  142. if c.connectionManager == nil {
  143. return nil, errors.New("no connection manager")
  144. }
  145. cs := c.pki.getCertState()
  146. ifce := &Interface{
  147. pki: c.pki,
  148. hostMap: c.HostMap,
  149. outside: c.Outside,
  150. inside: c.Inside,
  151. firewall: c.Firewall,
  152. serveDns: c.ServeDns,
  153. handshakeManager: c.HandshakeManager,
  154. createTime: time.Now(),
  155. lightHouse: c.lightHouse,
  156. dropLocalBroadcast: c.DropLocalBroadcast,
  157. dropMulticast: c.DropMulticast,
  158. routines: c.routines,
  159. version: c.version,
  160. writers: make([]udp.Conn, c.routines),
  161. readers: make([]io.ReadWriteCloser, c.routines),
  162. myVpnNetworks: cs.myVpnNetworks,
  163. myVpnNetworksTable: cs.myVpnNetworksTable,
  164. myVpnAddrs: cs.myVpnAddrs,
  165. myVpnAddrsTable: cs.myVpnAddrsTable,
  166. myBroadcastAddrsTable: cs.myVpnBroadcastAddrsTable,
  167. relayManager: c.relayManager,
  168. connectionManager: c.connectionManager,
  169. conntrackCacheTimeout: c.ConntrackCacheTimeout,
  170. metricHandshakes: metrics.GetOrRegisterHistogram("handshakes", nil, metrics.NewExpDecaySample(1028, 0.015)),
  171. messageMetrics: c.MessageMetrics,
  172. cachedPacketMetrics: &cachedPacketMetrics{
  173. sent: metrics.GetOrRegisterCounter("hostinfo.cached_packets.sent", nil),
  174. dropped: metrics.GetOrRegisterCounter("hostinfo.cached_packets.dropped", nil),
  175. },
  176. l: c.l,
  177. }
  178. ifce.tryPromoteEvery.Store(c.tryPromoteEvery)
  179. ifce.reQueryEvery.Store(c.reQueryEvery)
  180. ifce.reQueryWait.Store(int64(c.reQueryWait))
  181. ifce.connectionManager.intf = ifce
  182. return ifce, nil
  183. }
  184. // activate creates the interface on the host. After the interface is created, any
  185. // other services that want to bind listeners to its IP may do so successfully. However,
  186. // the interface isn't going to process anything until run() is called.
  187. func (f *Interface) activate() error {
  188. // actually turn on tun dev
  189. addr, err := f.outside.LocalAddr()
  190. if err != nil {
  191. f.l.WithError(err).Error("Failed to get udp listen address")
  192. }
  193. f.l.WithField("interface", f.inside.Name()).WithField("networks", f.myVpnNetworks).
  194. WithField("build", f.version).WithField("udpAddr", addr).
  195. WithField("boringcrypto", boringEnabled()).
  196. Info("Nebula interface is active")
  197. if f.routines > 1 {
  198. if !f.inside.SupportsMultiqueue() || !f.outside.SupportsMultipleReaders() {
  199. f.routines = 1
  200. f.l.Warn("routines is not supported on this platform, falling back to a single routine")
  201. }
  202. }
  203. metrics.GetOrRegisterGauge("routines", nil).Update(int64(f.routines))
  204. // Prepare n tun queues
  205. var reader io.ReadWriteCloser = f.inside
  206. for i := 0; i < f.routines; i++ {
  207. if i > 0 {
  208. reader, err = f.inside.NewMultiQueueReader()
  209. if err != nil {
  210. return err
  211. }
  212. }
  213. f.readers[i] = reader
  214. }
  215. if err = f.inside.Activate(); err != nil {
  216. f.inside.Close()
  217. return err
  218. }
  219. return nil
  220. }
  221. func (f *Interface) run() (func(), error) {
  222. // Launch n queues to read packets from udp
  223. for i := 0; i < f.routines; i++ {
  224. f.wg.Add(1)
  225. go f.listenOut(i)
  226. }
  227. // Launch n queues to read packets from tun dev
  228. for i := 0; i < f.routines; i++ {
  229. f.wg.Add(1)
  230. go f.listenIn(f.readers[i], i)
  231. }
  232. return f.wg.Wait, nil
  233. }
  234. func (f *Interface) listenOut(i int) {
  235. runtime.LockOSThread()
  236. var li udp.Conn
  237. if i > 0 {
  238. li = f.writers[i]
  239. } else {
  240. li = f.outside
  241. }
  242. ctCache := firewall.NewConntrackCacheTicker(f.conntrackCacheTimeout)
  243. lhh := f.lightHouse.NewRequestHandler()
  244. plaintext := make([]byte, udp.MTU)
  245. h := &header.H{}
  246. fwPacket := &firewall.Packet{}
  247. nb := make([]byte, 12, 12)
  248. err := li.ListenOut(func(fromUdpAddr netip.AddrPort, payload []byte) {
  249. f.readOutsidePackets(ViaSender{UdpAddr: fromUdpAddr}, plaintext[:0], payload, h, fwPacket, lhh, nb, i, ctCache.Get(f.l))
  250. })
  251. if err != nil && !f.closed.Load() {
  252. f.l.WithError(err).Error("Error while reading packet inbound packet, closing")
  253. //TODO: Trigger Control to close
  254. }
  255. f.l.Debugf("underlay reader %v is done", i)
  256. f.wg.Done()
  257. }
  258. func (f *Interface) listenIn(reader io.ReadWriteCloser, i int) {
  259. runtime.LockOSThread()
  260. packet := make([]byte, mtu)
  261. out := make([]byte, mtu)
  262. fwPacket := &firewall.Packet{}
  263. nb := make([]byte, 12, 12)
  264. conntrackCache := firewall.NewConntrackCacheTicker(f.conntrackCacheTimeout)
  265. for {
  266. n, err := reader.Read(packet)
  267. if err != nil {
  268. if !f.closed.Load() {
  269. f.l.WithError(err).Error("Error while reading outbound packet, closing")
  270. //TODO: Trigger Control to close
  271. }
  272. break
  273. }
  274. f.consumeInsidePacket(packet[:n], fwPacket, nb, out, i, conntrackCache.Get(f.l))
  275. }
  276. f.l.Debugf("overlay reader %v is done", i)
  277. f.wg.Done()
  278. }
  279. func (f *Interface) RegisterConfigChangeCallbacks(c *config.C) {
  280. c.RegisterReloadCallback(f.reloadFirewall)
  281. c.RegisterReloadCallback(f.reloadSendRecvError)
  282. c.RegisterReloadCallback(f.reloadAcceptRecvError)
  283. c.RegisterReloadCallback(f.reloadDisconnectInvalid)
  284. c.RegisterReloadCallback(f.reloadMisc)
  285. for _, udpConn := range f.writers {
  286. c.RegisterReloadCallback(udpConn.ReloadConfig)
  287. }
  288. }
  289. func (f *Interface) reloadDisconnectInvalid(c *config.C) {
  290. initial := c.InitialLoad()
  291. if initial || c.HasChanged("pki.disconnect_invalid") {
  292. f.disconnectInvalid.Store(c.GetBool("pki.disconnect_invalid", true))
  293. if !initial {
  294. f.l.Infof("pki.disconnect_invalid changed to %v", f.disconnectInvalid.Load())
  295. }
  296. }
  297. }
  298. func (f *Interface) reloadFirewall(c *config.C) {
  299. //TODO: need to trigger/detect if the certificate changed too
  300. if c.HasChanged("firewall") == false {
  301. f.l.Debug("No firewall config change detected")
  302. return
  303. }
  304. fw, err := NewFirewallFromConfig(f.l, f.pki.getCertState(), c)
  305. if err != nil {
  306. f.l.WithError(err).Error("Error while creating firewall during reload")
  307. return
  308. }
  309. oldFw := f.firewall
  310. conntrack := oldFw.Conntrack
  311. conntrack.Lock()
  312. defer conntrack.Unlock()
  313. fw.rulesVersion = oldFw.rulesVersion + 1
  314. // If rulesVersion is back to zero, we have wrapped all the way around. Be
  315. // safe and just reset conntrack in this case.
  316. if fw.rulesVersion == 0 {
  317. f.l.WithField("firewallHashes", fw.GetRuleHashes()).
  318. WithField("oldFirewallHashes", oldFw.GetRuleHashes()).
  319. WithField("rulesVersion", fw.rulesVersion).
  320. Warn("firewall rulesVersion has overflowed, resetting conntrack")
  321. } else {
  322. fw.Conntrack = conntrack
  323. }
  324. f.firewall = fw
  325. oldFw.Destroy()
  326. f.l.WithField("firewallHashes", fw.GetRuleHashes()).
  327. WithField("oldFirewallHashes", oldFw.GetRuleHashes()).
  328. WithField("rulesVersion", fw.rulesVersion).
  329. Info("New firewall has been installed")
  330. }
  331. func (f *Interface) reloadSendRecvError(c *config.C) {
  332. if c.InitialLoad() || c.HasChanged("listen.send_recv_error") {
  333. stringValue := c.GetString("listen.send_recv_error", "always")
  334. switch stringValue {
  335. case "always":
  336. f.sendRecvErrorConfig = recvErrorAlways
  337. case "never":
  338. f.sendRecvErrorConfig = recvErrorNever
  339. case "private":
  340. f.sendRecvErrorConfig = recvErrorPrivate
  341. default:
  342. if c.GetBool("listen.send_recv_error", true) {
  343. f.sendRecvErrorConfig = recvErrorAlways
  344. } else {
  345. f.sendRecvErrorConfig = recvErrorNever
  346. }
  347. }
  348. f.l.WithField("sendRecvError", f.sendRecvErrorConfig.String()).
  349. Info("Loaded send_recv_error config")
  350. }
  351. }
  352. func (f *Interface) reloadAcceptRecvError(c *config.C) {
  353. if c.InitialLoad() || c.HasChanged("listen.accept_recv_error") {
  354. stringValue := c.GetString("listen.accept_recv_error", "always")
  355. switch stringValue {
  356. case "always":
  357. f.acceptRecvErrorConfig = recvErrorAlways
  358. case "never":
  359. f.acceptRecvErrorConfig = recvErrorNever
  360. case "private":
  361. f.acceptRecvErrorConfig = recvErrorPrivate
  362. default:
  363. if c.GetBool("listen.accept_recv_error", true) {
  364. f.acceptRecvErrorConfig = recvErrorAlways
  365. } else {
  366. f.acceptRecvErrorConfig = recvErrorNever
  367. }
  368. }
  369. f.l.WithField("acceptRecvError", f.acceptRecvErrorConfig.String()).
  370. Info("Loaded accept_recv_error config")
  371. }
  372. }
  373. func (f *Interface) reloadMisc(c *config.C) {
  374. if c.HasChanged("counters.try_promote") {
  375. n := c.GetUint32("counters.try_promote", defaultPromoteEvery)
  376. f.tryPromoteEvery.Store(n)
  377. f.l.Info("counters.try_promote has changed")
  378. }
  379. if c.HasChanged("counters.requery_every_packets") {
  380. n := c.GetUint32("counters.requery_every_packets", defaultReQueryEvery)
  381. f.reQueryEvery.Store(n)
  382. f.l.Info("counters.requery_every_packets has changed")
  383. }
  384. if c.HasChanged("timers.requery_wait_duration") {
  385. n := c.GetDuration("timers.requery_wait_duration", defaultReQueryWait)
  386. f.reQueryWait.Store(int64(n))
  387. f.l.Info("timers.requery_wait_duration has changed")
  388. }
  389. }
  390. func (f *Interface) emitStats(ctx context.Context, i time.Duration) {
  391. ticker := time.NewTicker(i)
  392. defer ticker.Stop()
  393. udpStats := udp.NewUDPStatsEmitter(f.writers)
  394. certExpirationGauge := metrics.GetOrRegisterGauge("certificate.ttl_seconds", nil)
  395. certInitiatingVersion := metrics.GetOrRegisterGauge("certificate.initiating_version", nil)
  396. certMaxVersion := metrics.GetOrRegisterGauge("certificate.max_version", nil)
  397. for {
  398. select {
  399. case <-ctx.Done():
  400. return
  401. case <-ticker.C:
  402. f.firewall.EmitStats()
  403. f.handshakeManager.EmitStats()
  404. udpStats()
  405. certState := f.pki.getCertState()
  406. defaultCrt := certState.GetDefaultCertificate()
  407. certExpirationGauge.Update(int64(defaultCrt.NotAfter().Sub(time.Now()) / time.Second))
  408. certInitiatingVersion.Update(int64(defaultCrt.Version()))
  409. // Report the max certificate version we are capable of using
  410. if certState.v2Cert != nil {
  411. certMaxVersion.Update(int64(certState.v2Cert.Version()))
  412. } else {
  413. certMaxVersion.Update(int64(certState.v1Cert.Version()))
  414. }
  415. }
  416. }
  417. }
  418. func (f *Interface) GetHostInfo(vpnIp netip.Addr) *HostInfo {
  419. return f.hostMap.QueryVpnAddr(vpnIp)
  420. }
  421. func (f *Interface) GetCertState() *CertState {
  422. return f.pki.getCertState()
  423. }
  424. func (f *Interface) Close() error {
  425. f.closed.Store(true)
  426. // Release the udp readers
  427. for _, u := range f.writers {
  428. err := u.Close()
  429. if err != nil {
  430. f.l.WithError(err).Error("Error while closing udp socket")
  431. }
  432. }
  433. // Release the tun readers
  434. for i, u := range f.readers {
  435. err := u.Close()
  436. if err != nil {
  437. f.l.WithError(err).WithField("i", i).Error("Error while closing tun device")
  438. }
  439. }
  440. return nil
  441. }