interface.go 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465
  1. package nebula
  2. import (
  3. "context"
  4. "crypto/fips140"
  5. "errors"
  6. "fmt"
  7. "io"
  8. "net/netip"
  9. "os"
  10. "runtime"
  11. "sync/atomic"
  12. "time"
  13. "github.com/gaissmai/bart"
  14. "github.com/rcrowley/go-metrics"
  15. "github.com/sirupsen/logrus"
  16. "github.com/slackhq/nebula/config"
  17. "github.com/slackhq/nebula/firewall"
  18. "github.com/slackhq/nebula/header"
  19. "github.com/slackhq/nebula/overlay"
  20. "github.com/slackhq/nebula/udp"
  21. )
  22. const mtu = 9001
  23. type InterfaceConfig struct {
  24. HostMap *HostMap
  25. Outside udp.Conn
  26. Inside overlay.Device
  27. pki *PKI
  28. Cipher string
  29. Firewall *Firewall
  30. ServeDns bool
  31. HandshakeManager *HandshakeManager
  32. lightHouse *LightHouse
  33. connectionManager *connectionManager
  34. DropLocalBroadcast bool
  35. DropMulticast bool
  36. routines int
  37. MessageMetrics *MessageMetrics
  38. version string
  39. relayManager *relayManager
  40. punchy *Punchy
  41. tryPromoteEvery uint32
  42. reQueryEvery uint32
  43. reQueryWait time.Duration
  44. ConntrackCacheTimeout time.Duration
  45. l *logrus.Logger
  46. }
  47. type Interface struct {
  48. hostMap *HostMap
  49. outside udp.Conn
  50. inside overlay.Device
  51. pki *PKI
  52. firewall *Firewall
  53. connectionManager *connectionManager
  54. handshakeManager *HandshakeManager
  55. serveDns bool
  56. createTime time.Time
  57. lightHouse *LightHouse
  58. myBroadcastAddrsTable *bart.Lite
  59. myVpnAddrs []netip.Addr // A list of addresses assigned to us via our certificate
  60. myVpnAddrsTable *bart.Lite
  61. myVpnNetworks []netip.Prefix // A list of networks assigned to us via our certificate
  62. myVpnNetworksTable *bart.Lite
  63. dropLocalBroadcast bool
  64. dropMulticast bool
  65. routines int
  66. disconnectInvalid atomic.Bool
  67. closed atomic.Bool
  68. relayManager *relayManager
  69. tryPromoteEvery atomic.Uint32
  70. reQueryEvery atomic.Uint32
  71. reQueryWait atomic.Int64
  72. sendRecvErrorConfig sendRecvErrorConfig
  73. // rebindCount is used to decide if an active tunnel should trigger a punch notification through a lighthouse
  74. rebindCount int8
  75. version string
  76. conntrackCacheTimeout time.Duration
  77. writers []udp.Conn
  78. readers []io.ReadWriteCloser
  79. metricHandshakes metrics.Histogram
  80. messageMetrics *MessageMetrics
  81. cachedPacketMetrics *cachedPacketMetrics
  82. l *logrus.Logger
  83. }
  84. type EncWriter interface {
  85. SendVia(via *HostInfo,
  86. relay *Relay,
  87. ad,
  88. nb,
  89. out []byte,
  90. nocopy bool,
  91. )
  92. SendMessageToVpnAddr(t header.MessageType, st header.MessageSubType, vpnAddr netip.Addr, p, nb, out []byte)
  93. SendMessageToHostInfo(t header.MessageType, st header.MessageSubType, hostinfo *HostInfo, p, nb, out []byte)
  94. Handshake(vpnAddr netip.Addr)
  95. GetHostInfo(vpnAddr netip.Addr) *HostInfo
  96. GetCertState() *CertState
  97. }
  98. type sendRecvErrorConfig uint8
  99. const (
  100. sendRecvErrorAlways sendRecvErrorConfig = iota
  101. sendRecvErrorNever
  102. sendRecvErrorPrivate
  103. )
  104. func (s sendRecvErrorConfig) ShouldSendRecvError(endpoint netip.AddrPort) bool {
  105. switch s {
  106. case sendRecvErrorPrivate:
  107. return endpoint.Addr().IsPrivate()
  108. case sendRecvErrorAlways:
  109. return true
  110. case sendRecvErrorNever:
  111. return false
  112. default:
  113. panic(fmt.Errorf("invalid sendRecvErrorConfig value: %d", s))
  114. }
  115. }
  116. func (s sendRecvErrorConfig) String() string {
  117. switch s {
  118. case sendRecvErrorAlways:
  119. return "always"
  120. case sendRecvErrorNever:
  121. return "never"
  122. case sendRecvErrorPrivate:
  123. return "private"
  124. default:
  125. return fmt.Sprintf("invalid(%d)", s)
  126. }
  127. }
  128. func NewInterface(ctx context.Context, c *InterfaceConfig) (*Interface, error) {
  129. if c.Outside == nil {
  130. return nil, errors.New("no outside connection")
  131. }
  132. if c.Inside == nil {
  133. return nil, errors.New("no inside interface (tun)")
  134. }
  135. if c.pki == nil {
  136. return nil, errors.New("no certificate state")
  137. }
  138. if c.Firewall == nil {
  139. return nil, errors.New("no firewall rules")
  140. }
  141. if c.connectionManager == nil {
  142. return nil, errors.New("no connection manager")
  143. }
  144. cs := c.pki.getCertState()
  145. ifce := &Interface{
  146. pki: c.pki,
  147. hostMap: c.HostMap,
  148. outside: c.Outside,
  149. inside: c.Inside,
  150. firewall: c.Firewall,
  151. serveDns: c.ServeDns,
  152. handshakeManager: c.HandshakeManager,
  153. createTime: time.Now(),
  154. lightHouse: c.lightHouse,
  155. dropLocalBroadcast: c.DropLocalBroadcast,
  156. dropMulticast: c.DropMulticast,
  157. routines: c.routines,
  158. version: c.version,
  159. writers: make([]udp.Conn, c.routines),
  160. readers: make([]io.ReadWriteCloser, c.routines),
  161. myVpnNetworks: cs.myVpnNetworks,
  162. myVpnNetworksTable: cs.myVpnNetworksTable,
  163. myVpnAddrs: cs.myVpnAddrs,
  164. myVpnAddrsTable: cs.myVpnAddrsTable,
  165. myBroadcastAddrsTable: cs.myVpnBroadcastAddrsTable,
  166. relayManager: c.relayManager,
  167. connectionManager: c.connectionManager,
  168. conntrackCacheTimeout: c.ConntrackCacheTimeout,
  169. metricHandshakes: metrics.GetOrRegisterHistogram("handshakes", nil, metrics.NewExpDecaySample(1028, 0.015)),
  170. messageMetrics: c.MessageMetrics,
  171. cachedPacketMetrics: &cachedPacketMetrics{
  172. sent: metrics.GetOrRegisterCounter("hostinfo.cached_packets.sent", nil),
  173. dropped: metrics.GetOrRegisterCounter("hostinfo.cached_packets.dropped", nil),
  174. },
  175. l: c.l,
  176. }
  177. ifce.tryPromoteEvery.Store(c.tryPromoteEvery)
  178. ifce.reQueryEvery.Store(c.reQueryEvery)
  179. ifce.reQueryWait.Store(int64(c.reQueryWait))
  180. ifce.connectionManager.intf = ifce
  181. return ifce, nil
  182. }
  183. // activate creates the interface on the host. After the interface is created, any
  184. // other services that want to bind listeners to its IP may do so successfully. However,
  185. // the interface isn't going to process anything until run() is called.
  186. func (f *Interface) activate() {
  187. // actually turn on tun dev
  188. addr, err := f.outside.LocalAddr()
  189. if err != nil {
  190. f.l.WithError(err).Error("Failed to get udp listen address")
  191. }
  192. f.l.WithField("interface", f.inside.Name()).WithField("networks", f.myVpnNetworks).
  193. WithField("build", f.version).WithField("udpAddr", addr).
  194. WithField("boringcrypto", boringEnabled()).
  195. WithField("fips140", fips140.Enabled()).
  196. Info("Nebula interface is active")
  197. metrics.GetOrRegisterGauge("routines", nil).Update(int64(f.routines))
  198. // Prepare n tun queues
  199. var reader io.ReadWriteCloser = f.inside
  200. for i := 0; i < f.routines; i++ {
  201. if i > 0 {
  202. reader, err = f.inside.NewMultiQueueReader()
  203. if err != nil {
  204. f.l.Fatal(err)
  205. }
  206. }
  207. f.readers[i] = reader
  208. }
  209. if err := f.inside.Activate(); err != nil {
  210. f.inside.Close()
  211. f.l.Fatal(err)
  212. }
  213. }
  214. func (f *Interface) run() {
  215. // Launch n queues to read packets from udp
  216. for i := 0; i < f.routines; i++ {
  217. go f.listenOut(i)
  218. }
  219. // Launch n queues to read packets from tun dev
  220. for i := 0; i < f.routines; i++ {
  221. go f.listenIn(f.readers[i], i)
  222. }
  223. }
  224. func (f *Interface) listenOut(i int) {
  225. runtime.LockOSThread()
  226. var li udp.Conn
  227. if i > 0 {
  228. li = f.writers[i]
  229. } else {
  230. li = f.outside
  231. }
  232. ctCache := firewall.NewConntrackCacheTicker(f.conntrackCacheTimeout)
  233. lhh := f.lightHouse.NewRequestHandler()
  234. plaintext := make([]byte, udp.MTU)
  235. h := &header.H{}
  236. fwPacket := &firewall.Packet{}
  237. nb := make([]byte, 12, 12)
  238. li.ListenOut(func(fromUdpAddr netip.AddrPort, payload []byte) {
  239. f.readOutsidePackets(fromUdpAddr, nil, plaintext[:0], payload, h, fwPacket, lhh, nb, i, ctCache.Get(f.l))
  240. })
  241. }
  242. func (f *Interface) listenIn(reader io.ReadWriteCloser, i int) {
  243. runtime.LockOSThread()
  244. packet := make([]byte, mtu)
  245. out := make([]byte, mtu)
  246. fwPacket := &firewall.Packet{}
  247. nb := make([]byte, 12, 12)
  248. conntrackCache := firewall.NewConntrackCacheTicker(f.conntrackCacheTimeout)
  249. for {
  250. n, err := reader.Read(packet)
  251. if err != nil {
  252. if errors.Is(err, os.ErrClosed) && f.closed.Load() {
  253. return
  254. }
  255. f.l.WithError(err).Error("Error while reading outbound packet")
  256. // This only seems to happen when something fatal happens to the fd, so exit.
  257. os.Exit(2)
  258. }
  259. f.consumeInsidePacket(packet[:n], fwPacket, nb, out, i, conntrackCache.Get(f.l))
  260. }
  261. }
  262. func (f *Interface) RegisterConfigChangeCallbacks(c *config.C) {
  263. c.RegisterReloadCallback(f.reloadFirewall)
  264. c.RegisterReloadCallback(f.reloadSendRecvError)
  265. c.RegisterReloadCallback(f.reloadDisconnectInvalid)
  266. c.RegisterReloadCallback(f.reloadMisc)
  267. for _, udpConn := range f.writers {
  268. c.RegisterReloadCallback(udpConn.ReloadConfig)
  269. }
  270. }
  271. func (f *Interface) reloadDisconnectInvalid(c *config.C) {
  272. initial := c.InitialLoad()
  273. if initial || c.HasChanged("pki.disconnect_invalid") {
  274. f.disconnectInvalid.Store(c.GetBool("pki.disconnect_invalid", true))
  275. if !initial {
  276. f.l.Infof("pki.disconnect_invalid changed to %v", f.disconnectInvalid.Load())
  277. }
  278. }
  279. }
  280. func (f *Interface) reloadFirewall(c *config.C) {
  281. //TODO: need to trigger/detect if the certificate changed too
  282. if c.HasChanged("firewall") == false {
  283. f.l.Debug("No firewall config change detected")
  284. return
  285. }
  286. fw, err := NewFirewallFromConfig(f.l, f.pki.getCertState(), c)
  287. if err != nil {
  288. f.l.WithError(err).Error("Error while creating firewall during reload")
  289. return
  290. }
  291. oldFw := f.firewall
  292. conntrack := oldFw.Conntrack
  293. conntrack.Lock()
  294. defer conntrack.Unlock()
  295. fw.rulesVersion = oldFw.rulesVersion + 1
  296. // If rulesVersion is back to zero, we have wrapped all the way around. Be
  297. // safe and just reset conntrack in this case.
  298. if fw.rulesVersion == 0 {
  299. f.l.WithField("firewallHashes", fw.GetRuleHashes()).
  300. WithField("oldFirewallHashes", oldFw.GetRuleHashes()).
  301. WithField("rulesVersion", fw.rulesVersion).
  302. Warn("firewall rulesVersion has overflowed, resetting conntrack")
  303. } else {
  304. fw.Conntrack = conntrack
  305. }
  306. f.firewall = fw
  307. oldFw.Destroy()
  308. f.l.WithField("firewallHashes", fw.GetRuleHashes()).
  309. WithField("oldFirewallHashes", oldFw.GetRuleHashes()).
  310. WithField("rulesVersion", fw.rulesVersion).
  311. Info("New firewall has been installed")
  312. }
  313. func (f *Interface) reloadSendRecvError(c *config.C) {
  314. if c.InitialLoad() || c.HasChanged("listen.send_recv_error") {
  315. stringValue := c.GetString("listen.send_recv_error", "always")
  316. switch stringValue {
  317. case "always":
  318. f.sendRecvErrorConfig = sendRecvErrorAlways
  319. case "never":
  320. f.sendRecvErrorConfig = sendRecvErrorNever
  321. case "private":
  322. f.sendRecvErrorConfig = sendRecvErrorPrivate
  323. default:
  324. if c.GetBool("listen.send_recv_error", true) {
  325. f.sendRecvErrorConfig = sendRecvErrorAlways
  326. } else {
  327. f.sendRecvErrorConfig = sendRecvErrorNever
  328. }
  329. }
  330. f.l.WithField("sendRecvError", f.sendRecvErrorConfig.String()).
  331. Info("Loaded send_recv_error config")
  332. }
  333. }
  334. func (f *Interface) reloadMisc(c *config.C) {
  335. if c.HasChanged("counters.try_promote") {
  336. n := c.GetUint32("counters.try_promote", defaultPromoteEvery)
  337. f.tryPromoteEvery.Store(n)
  338. f.l.Info("counters.try_promote has changed")
  339. }
  340. if c.HasChanged("counters.requery_every_packets") {
  341. n := c.GetUint32("counters.requery_every_packets", defaultReQueryEvery)
  342. f.reQueryEvery.Store(n)
  343. f.l.Info("counters.requery_every_packets has changed")
  344. }
  345. if c.HasChanged("timers.requery_wait_duration") {
  346. n := c.GetDuration("timers.requery_wait_duration", defaultReQueryWait)
  347. f.reQueryWait.Store(int64(n))
  348. f.l.Info("timers.requery_wait_duration has changed")
  349. }
  350. }
  351. func (f *Interface) emitStats(ctx context.Context, i time.Duration) {
  352. ticker := time.NewTicker(i)
  353. defer ticker.Stop()
  354. udpStats := udp.NewUDPStatsEmitter(f.writers)
  355. certExpirationGauge := metrics.GetOrRegisterGauge("certificate.ttl_seconds", nil)
  356. certInitiatingVersion := metrics.GetOrRegisterGauge("certificate.initiating_version", nil)
  357. certMaxVersion := metrics.GetOrRegisterGauge("certificate.max_version", nil)
  358. for {
  359. select {
  360. case <-ctx.Done():
  361. return
  362. case <-ticker.C:
  363. f.firewall.EmitStats()
  364. f.handshakeManager.EmitStats()
  365. udpStats()
  366. certState := f.pki.getCertState()
  367. defaultCrt := certState.GetDefaultCertificate()
  368. certExpirationGauge.Update(int64(defaultCrt.NotAfter().Sub(time.Now()) / time.Second))
  369. certInitiatingVersion.Update(int64(defaultCrt.Version()))
  370. // Report the max certificate version we are capable of using
  371. if certState.v2Cert != nil {
  372. certMaxVersion.Update(int64(certState.v2Cert.Version()))
  373. } else {
  374. certMaxVersion.Update(int64(certState.v1Cert.Version()))
  375. }
  376. }
  377. }
  378. }
  379. func (f *Interface) GetHostInfo(vpnIp netip.Addr) *HostInfo {
  380. return f.hostMap.QueryVpnAddr(vpnIp)
  381. }
  382. func (f *Interface) GetCertState() *CertState {
  383. return f.pki.getCertState()
  384. }
  385. func (f *Interface) Close() error {
  386. f.closed.Store(true)
  387. for _, u := range f.writers {
  388. err := u.Close()
  389. if err != nil {
  390. f.l.WithError(err).Error("Error while closing udp socket")
  391. }
  392. }
  393. // Release the tun device
  394. return f.inside.Close()
  395. }