interface.go 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477
  1. package nebula
  2. import (
  3. "context"
  4. "errors"
  5. "fmt"
  6. "io"
  7. "net/netip"
  8. "sync"
  9. "sync/atomic"
  10. "time"
  11. "github.com/gaissmai/bart"
  12. "github.com/rcrowley/go-metrics"
  13. "github.com/sirupsen/logrus"
  14. "github.com/slackhq/nebula/config"
  15. "github.com/slackhq/nebula/firewall"
  16. "github.com/slackhq/nebula/header"
  17. "github.com/slackhq/nebula/overlay"
  18. "github.com/slackhq/nebula/udp"
  19. )
  20. const mtu = 9001
  21. type InterfaceConfig struct {
  22. HostMap *HostMap
  23. Outside udp.Conn
  24. Inside overlay.Device
  25. pki *PKI
  26. Cipher string
  27. Firewall *Firewall
  28. ServeDns bool
  29. HandshakeManager *HandshakeManager
  30. lightHouse *LightHouse
  31. connectionManager *connectionManager
  32. DropLocalBroadcast bool
  33. DropMulticast bool
  34. routines int
  35. MessageMetrics *MessageMetrics
  36. version string
  37. relayManager *relayManager
  38. punchy *Punchy
  39. tryPromoteEvery uint32
  40. reQueryEvery uint32
  41. reQueryWait time.Duration
  42. ConntrackCacheTimeout time.Duration
  43. l *logrus.Logger
  44. }
  45. type Interface struct {
  46. hostMap *HostMap
  47. outside udp.Conn
  48. inside overlay.Device
  49. pki *PKI
  50. firewall *Firewall
  51. connectionManager *connectionManager
  52. handshakeManager *HandshakeManager
  53. serveDns bool
  54. createTime time.Time
  55. lightHouse *LightHouse
  56. myBroadcastAddrsTable *bart.Lite
  57. myVpnAddrs []netip.Addr // A list of addresses assigned to us via our certificate
  58. myVpnAddrsTable *bart.Lite
  59. myVpnNetworks []netip.Prefix // A list of networks assigned to us via our certificate
  60. myVpnNetworksTable *bart.Lite
  61. dropLocalBroadcast bool
  62. dropMulticast bool
  63. routines int
  64. disconnectInvalid atomic.Bool
  65. closed atomic.Bool
  66. relayManager *relayManager
  67. tryPromoteEvery atomic.Uint32
  68. reQueryEvery atomic.Uint32
  69. reQueryWait atomic.Int64
  70. sendRecvErrorConfig sendRecvErrorConfig
  71. // rebindCount is used to decide if an active tunnel should trigger a punch notification through a lighthouse
  72. rebindCount int8
  73. version string
  74. conntrackCacheTimeout time.Duration
  75. writers []udp.Conn
  76. readers []io.ReadWriteCloser
  77. wg sync.WaitGroup
  78. metricHandshakes metrics.Histogram
  79. messageMetrics *MessageMetrics
  80. cachedPacketMetrics *cachedPacketMetrics
  81. l *logrus.Logger
  82. }
  83. type EncWriter interface {
  84. SendVia(via *HostInfo,
  85. relay *Relay,
  86. ad,
  87. nb,
  88. out []byte,
  89. nocopy bool,
  90. )
  91. SendMessageToVpnAddr(t header.MessageType, st header.MessageSubType, vpnAddr netip.Addr, p, nb, out []byte)
  92. SendMessageToHostInfo(t header.MessageType, st header.MessageSubType, hostinfo *HostInfo, p, nb, out []byte)
  93. Handshake(vpnAddr netip.Addr)
  94. GetHostInfo(vpnAddr netip.Addr) *HostInfo
  95. GetCertState() *CertState
  96. }
  97. type sendRecvErrorConfig uint8
  98. const (
  99. sendRecvErrorAlways sendRecvErrorConfig = iota
  100. sendRecvErrorNever
  101. sendRecvErrorPrivate
  102. )
  103. func (s sendRecvErrorConfig) ShouldSendRecvError(endpoint netip.AddrPort) bool {
  104. switch s {
  105. case sendRecvErrorPrivate:
  106. return endpoint.Addr().IsPrivate()
  107. case sendRecvErrorAlways:
  108. return true
  109. case sendRecvErrorNever:
  110. return false
  111. default:
  112. panic(fmt.Errorf("invalid sendRecvErrorConfig value: %d", s))
  113. }
  114. }
  115. func (s sendRecvErrorConfig) String() string {
  116. switch s {
  117. case sendRecvErrorAlways:
  118. return "always"
  119. case sendRecvErrorNever:
  120. return "never"
  121. case sendRecvErrorPrivate:
  122. return "private"
  123. default:
  124. return fmt.Sprintf("invalid(%d)", s)
  125. }
  126. }
  127. func NewInterface(ctx context.Context, c *InterfaceConfig) (*Interface, error) {
  128. if c.Outside == nil {
  129. return nil, errors.New("no outside connection")
  130. }
  131. if c.Inside == nil {
  132. return nil, errors.New("no inside interface (tun)")
  133. }
  134. if c.pki == nil {
  135. return nil, errors.New("no certificate state")
  136. }
  137. if c.Firewall == nil {
  138. return nil, errors.New("no firewall rules")
  139. }
  140. if c.connectionManager == nil {
  141. return nil, errors.New("no connection manager")
  142. }
  143. cs := c.pki.getCertState()
  144. ifce := &Interface{
  145. pki: c.pki,
  146. hostMap: c.HostMap,
  147. outside: c.Outside,
  148. inside: c.Inside,
  149. firewall: c.Firewall,
  150. serveDns: c.ServeDns,
  151. handshakeManager: c.HandshakeManager,
  152. createTime: time.Now(),
  153. lightHouse: c.lightHouse,
  154. dropLocalBroadcast: c.DropLocalBroadcast,
  155. dropMulticast: c.DropMulticast,
  156. routines: c.routines,
  157. version: c.version,
  158. writers: make([]udp.Conn, c.routines),
  159. readers: make([]io.ReadWriteCloser, c.routines),
  160. myVpnNetworks: cs.myVpnNetworks,
  161. myVpnNetworksTable: cs.myVpnNetworksTable,
  162. myVpnAddrs: cs.myVpnAddrs,
  163. myVpnAddrsTable: cs.myVpnAddrsTable,
  164. myBroadcastAddrsTable: cs.myVpnBroadcastAddrsTable,
  165. relayManager: c.relayManager,
  166. connectionManager: c.connectionManager,
  167. conntrackCacheTimeout: c.ConntrackCacheTimeout,
  168. metricHandshakes: metrics.GetOrRegisterHistogram("handshakes", nil, metrics.NewExpDecaySample(1028, 0.015)),
  169. messageMetrics: c.MessageMetrics,
  170. cachedPacketMetrics: &cachedPacketMetrics{
  171. sent: metrics.GetOrRegisterCounter("hostinfo.cached_packets.sent", nil),
  172. dropped: metrics.GetOrRegisterCounter("hostinfo.cached_packets.dropped", nil),
  173. },
  174. l: c.l,
  175. }
  176. ifce.tryPromoteEvery.Store(c.tryPromoteEvery)
  177. ifce.reQueryEvery.Store(c.reQueryEvery)
  178. ifce.reQueryWait.Store(int64(c.reQueryWait))
  179. ifce.connectionManager.intf = ifce
  180. return ifce, nil
  181. }
  182. // activate creates the interface on the host. After the interface is created, any
  183. // other services that want to bind listeners to its IP may do so successfully. However,
  184. // the interface isn't going to process anything until run() is called.
  185. func (f *Interface) activate() error {
  186. // actually turn on tun dev
  187. addr, err := f.outside.LocalAddr()
  188. if err != nil {
  189. f.l.WithError(err).Error("Failed to get udp listen address")
  190. }
  191. f.l.WithField("interface", f.inside.Name()).WithField("networks", f.myVpnNetworks).
  192. WithField("build", f.version).WithField("udpAddr", addr).
  193. WithField("boringcrypto", boringEnabled()).
  194. Info("Nebula interface is active")
  195. metrics.GetOrRegisterGauge("routines", nil).Update(int64(f.routines))
  196. // Prepare n tun queues
  197. var reader io.ReadWriteCloser = f.inside
  198. for i := 0; i < f.routines; i++ {
  199. if i > 0 {
  200. reader, err = f.inside.NewMultiQueueReader()
  201. if err != nil {
  202. return err
  203. }
  204. }
  205. f.readers[i] = reader
  206. }
  207. if err = f.inside.Activate(); err != nil {
  208. f.inside.Close()
  209. return err
  210. }
  211. return nil
  212. }
  213. func (f *Interface) run() (func(), error) {
  214. // Launch n queues to read packets from udp
  215. for i := 0; i < f.routines; i++ {
  216. go f.listenOut(i)
  217. f.wg.Add(1)
  218. }
  219. // Launch n queues to read packets from tun dev
  220. for i := 0; i < f.routines; i++ {
  221. go f.listenIn(f.readers[i], i)
  222. f.wg.Add(1)
  223. }
  224. return f.wg.Wait, nil
  225. }
  226. func (f *Interface) listenOut(i int) {
  227. var li udp.Conn
  228. if i > 0 {
  229. li = f.writers[i]
  230. } else {
  231. li = f.outside
  232. }
  233. ctCache := firewall.NewConntrackCacheTicker(f.conntrackCacheTimeout)
  234. lhh := f.lightHouse.NewRequestHandler()
  235. plaintext := make([]byte, udp.MTU)
  236. h := &header.H{}
  237. fwPacket := &firewall.Packet{}
  238. nb := make([]byte, 12, 12)
  239. li.ListenOut(func(fromUdpAddr netip.AddrPort, payload []byte) {
  240. f.readOutsidePackets(fromUdpAddr, nil, plaintext[:0], payload, h, fwPacket, lhh, nb, i, ctCache.Get(f.l))
  241. })
  242. f.l.Errorf("udp reader %v is done", i)
  243. f.wg.Done()
  244. }
  245. func (f *Interface) listenIn(reader io.ReadWriteCloser, i int) {
  246. packet := make([]byte, mtu)
  247. out := make([]byte, mtu)
  248. fwPacket := &firewall.Packet{}
  249. nb := make([]byte, 12, 12)
  250. conntrackCache := firewall.NewConntrackCacheTicker(f.conntrackCacheTimeout)
  251. for {
  252. n, err := reader.Read(packet)
  253. if err != nil {
  254. if !f.closed.Load() {
  255. //TODO: should we close? yes
  256. f.l.WithError(err).Error("Error while reading outbound packet")
  257. }
  258. break
  259. }
  260. f.consumeInsidePacket(packet[:n], fwPacket, nb, out, i, conntrackCache.Get(f.l))
  261. }
  262. f.l.Errorf("tun reader %v is done", i)
  263. f.wg.Done()
  264. }
  265. func (f *Interface) RegisterConfigChangeCallbacks(c *config.C) {
  266. c.RegisterReloadCallback(f.reloadFirewall)
  267. c.RegisterReloadCallback(f.reloadSendRecvError)
  268. c.RegisterReloadCallback(f.reloadDisconnectInvalid)
  269. c.RegisterReloadCallback(f.reloadMisc)
  270. for _, udpConn := range f.writers {
  271. c.RegisterReloadCallback(udpConn.ReloadConfig)
  272. }
  273. }
  274. func (f *Interface) reloadDisconnectInvalid(c *config.C) {
  275. initial := c.InitialLoad()
  276. if initial || c.HasChanged("pki.disconnect_invalid") {
  277. f.disconnectInvalid.Store(c.GetBool("pki.disconnect_invalid", true))
  278. if !initial {
  279. f.l.Infof("pki.disconnect_invalid changed to %v", f.disconnectInvalid.Load())
  280. }
  281. }
  282. }
  283. func (f *Interface) reloadFirewall(c *config.C) {
  284. //TODO: need to trigger/detect if the certificate changed too
  285. if c.HasChanged("firewall") == false {
  286. f.l.Debug("No firewall config change detected")
  287. return
  288. }
  289. fw, err := NewFirewallFromConfig(f.l, f.pki.getCertState(), c)
  290. if err != nil {
  291. f.l.WithError(err).Error("Error while creating firewall during reload")
  292. return
  293. }
  294. oldFw := f.firewall
  295. conntrack := oldFw.Conntrack
  296. conntrack.Lock()
  297. defer conntrack.Unlock()
  298. fw.rulesVersion = oldFw.rulesVersion + 1
  299. // If rulesVersion is back to zero, we have wrapped all the way around. Be
  300. // safe and just reset conntrack in this case.
  301. if fw.rulesVersion == 0 {
  302. f.l.WithField("firewallHashes", fw.GetRuleHashes()).
  303. WithField("oldFirewallHashes", oldFw.GetRuleHashes()).
  304. WithField("rulesVersion", fw.rulesVersion).
  305. Warn("firewall rulesVersion has overflowed, resetting conntrack")
  306. } else {
  307. fw.Conntrack = conntrack
  308. }
  309. f.firewall = fw
  310. oldFw.Destroy()
  311. f.l.WithField("firewallHashes", fw.GetRuleHashes()).
  312. WithField("oldFirewallHashes", oldFw.GetRuleHashes()).
  313. WithField("rulesVersion", fw.rulesVersion).
  314. Info("New firewall has been installed")
  315. }
  316. func (f *Interface) reloadSendRecvError(c *config.C) {
  317. if c.InitialLoad() || c.HasChanged("listen.send_recv_error") {
  318. stringValue := c.GetString("listen.send_recv_error", "always")
  319. switch stringValue {
  320. case "always":
  321. f.sendRecvErrorConfig = sendRecvErrorAlways
  322. case "never":
  323. f.sendRecvErrorConfig = sendRecvErrorNever
  324. case "private":
  325. f.sendRecvErrorConfig = sendRecvErrorPrivate
  326. default:
  327. if c.GetBool("listen.send_recv_error", true) {
  328. f.sendRecvErrorConfig = sendRecvErrorAlways
  329. } else {
  330. f.sendRecvErrorConfig = sendRecvErrorNever
  331. }
  332. }
  333. f.l.WithField("sendRecvError", f.sendRecvErrorConfig.String()).
  334. Info("Loaded send_recv_error config")
  335. }
  336. }
  337. func (f *Interface) reloadMisc(c *config.C) {
  338. if c.HasChanged("counters.try_promote") {
  339. n := c.GetUint32("counters.try_promote", defaultPromoteEvery)
  340. f.tryPromoteEvery.Store(n)
  341. f.l.Info("counters.try_promote has changed")
  342. }
  343. if c.HasChanged("counters.requery_every_packets") {
  344. n := c.GetUint32("counters.requery_every_packets", defaultReQueryEvery)
  345. f.reQueryEvery.Store(n)
  346. f.l.Info("counters.requery_every_packets has changed")
  347. }
  348. if c.HasChanged("timers.requery_wait_duration") {
  349. n := c.GetDuration("timers.requery_wait_duration", defaultReQueryWait)
  350. f.reQueryWait.Store(int64(n))
  351. f.l.Info("timers.requery_wait_duration has changed")
  352. }
  353. }
  354. func (f *Interface) emitStats(ctx context.Context, i time.Duration) {
  355. ticker := time.NewTicker(i)
  356. defer ticker.Stop()
  357. udpStats := udp.NewUDPStatsEmitter(f.writers)
  358. certExpirationGauge := metrics.GetOrRegisterGauge("certificate.ttl_seconds", nil)
  359. certInitiatingVersion := metrics.GetOrRegisterGauge("certificate.initiating_version", nil)
  360. certMaxVersion := metrics.GetOrRegisterGauge("certificate.max_version", nil)
  361. for {
  362. select {
  363. case <-ctx.Done():
  364. return
  365. case <-ticker.C:
  366. f.firewall.EmitStats()
  367. f.handshakeManager.EmitStats()
  368. udpStats()
  369. certState := f.pki.getCertState()
  370. defaultCrt := certState.GetDefaultCertificate()
  371. certExpirationGauge.Update(int64(defaultCrt.NotAfter().Sub(time.Now()) / time.Second))
  372. certInitiatingVersion.Update(int64(defaultCrt.Version()))
  373. // Report the max certificate version we are capable of using
  374. if certState.v2Cert != nil {
  375. certMaxVersion.Update(int64(certState.v2Cert.Version()))
  376. } else {
  377. certMaxVersion.Update(int64(certState.v1Cert.Version()))
  378. }
  379. }
  380. }
  381. }
  382. func (f *Interface) GetHostInfo(vpnIp netip.Addr) *HostInfo {
  383. return f.hostMap.QueryVpnAddr(vpnIp)
  384. }
  385. func (f *Interface) GetCertState() *CertState {
  386. return f.pki.getCertState()
  387. }
  388. func (f *Interface) Close() error {
  389. f.closed.Store(true)
  390. // Release the udp readers
  391. for _, u := range f.writers {
  392. err := u.Close()
  393. if err != nil {
  394. f.l.WithError(err).Error("Error while closing udp socket")
  395. }
  396. }
  397. // Release the tun readers
  398. for _, u := range f.readers {
  399. err := u.Close()
  400. if err != nil {
  401. f.l.WithError(err).Error("Error while closing tun device")
  402. }
  403. }
  404. return nil
  405. }