interface.go 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458
  1. package nebula
  2. import (
  3. "context"
  4. "errors"
  5. "fmt"
  6. "io"
  7. "net"
  8. "os"
  9. "runtime"
  10. "sync/atomic"
  11. "time"
  12. "github.com/rcrowley/go-metrics"
  13. "github.com/sirupsen/logrus"
  14. "github.com/slackhq/nebula/cert"
  15. "github.com/slackhq/nebula/config"
  16. "github.com/slackhq/nebula/firewall"
  17. "github.com/slackhq/nebula/header"
  18. "github.com/slackhq/nebula/iputil"
  19. "github.com/slackhq/nebula/overlay"
  20. "github.com/slackhq/nebula/udp"
  21. )
  22. const mtu = 9001
  23. type InterfaceConfig struct {
  24. HostMap *HostMap
  25. Outside udp.Conn
  26. Inside overlay.Device
  27. certState *CertState
  28. Cipher string
  29. Firewall *Firewall
  30. ServeDns bool
  31. HandshakeManager *HandshakeManager
  32. lightHouse *LightHouse
  33. checkInterval time.Duration
  34. pendingDeletionInterval time.Duration
  35. DropLocalBroadcast bool
  36. DropMulticast bool
  37. routines int
  38. MessageMetrics *MessageMetrics
  39. version string
  40. caPool *cert.NebulaCAPool
  41. disconnectInvalid bool
  42. relayManager *relayManager
  43. punchy *Punchy
  44. tryPromoteEvery uint32
  45. reQueryEvery uint32
  46. reQueryWait time.Duration
  47. ConntrackCacheTimeout time.Duration
  48. l *logrus.Logger
  49. }
  50. type Interface struct {
  51. hostMap *HostMap
  52. outside udp.Conn
  53. inside overlay.Device
  54. certState atomic.Pointer[CertState]
  55. cipher string
  56. firewall *Firewall
  57. connectionManager *connectionManager
  58. handshakeManager *HandshakeManager
  59. serveDns bool
  60. createTime time.Time
  61. lightHouse *LightHouse
  62. localBroadcast iputil.VpnIp
  63. myVpnIp iputil.VpnIp
  64. dropLocalBroadcast bool
  65. dropMulticast bool
  66. routines int
  67. caPool *cert.NebulaCAPool
  68. disconnectInvalid bool
  69. closed atomic.Bool
  70. relayManager *relayManager
  71. tryPromoteEvery atomic.Uint32
  72. reQueryEvery atomic.Uint32
  73. reQueryWait atomic.Int64
  74. sendRecvErrorConfig sendRecvErrorConfig
  75. // rebindCount is used to decide if an active tunnel should trigger a punch notification through a lighthouse
  76. rebindCount int8
  77. version string
  78. conntrackCacheTimeout time.Duration
  79. writers []udp.Conn
  80. readers []io.ReadWriteCloser
  81. metricHandshakes metrics.Histogram
  82. messageMetrics *MessageMetrics
  83. cachedPacketMetrics *cachedPacketMetrics
  84. l *logrus.Logger
  85. }
  86. type EncWriter interface {
  87. SendVia(via *HostInfo,
  88. relay *Relay,
  89. ad,
  90. nb,
  91. out []byte,
  92. nocopy bool,
  93. )
  94. SendMessageToVpnIp(t header.MessageType, st header.MessageSubType, vpnIp iputil.VpnIp, p, nb, out []byte)
  95. SendMessageToHostInfo(t header.MessageType, st header.MessageSubType, hostinfo *HostInfo, p, nb, out []byte)
  96. Handshake(vpnIp iputil.VpnIp)
  97. }
  98. type sendRecvErrorConfig uint8
  99. const (
  100. sendRecvErrorAlways sendRecvErrorConfig = iota
  101. sendRecvErrorNever
  102. sendRecvErrorPrivate
  103. )
  104. func (s sendRecvErrorConfig) ShouldSendRecvError(ip net.IP) bool {
  105. switch s {
  106. case sendRecvErrorPrivate:
  107. return ip.IsPrivate()
  108. case sendRecvErrorAlways:
  109. return true
  110. case sendRecvErrorNever:
  111. return false
  112. default:
  113. panic(fmt.Errorf("invalid sendRecvErrorConfig value: %d", s))
  114. }
  115. }
  116. func (s sendRecvErrorConfig) String() string {
  117. switch s {
  118. case sendRecvErrorAlways:
  119. return "always"
  120. case sendRecvErrorNever:
  121. return "never"
  122. case sendRecvErrorPrivate:
  123. return "private"
  124. default:
  125. return fmt.Sprintf("invalid(%d)", s)
  126. }
  127. }
  128. func NewInterface(ctx context.Context, c *InterfaceConfig) (*Interface, error) {
  129. if c.Outside == nil {
  130. return nil, errors.New("no outside connection")
  131. }
  132. if c.Inside == nil {
  133. return nil, errors.New("no inside interface (tun)")
  134. }
  135. if c.certState == nil {
  136. return nil, errors.New("no certificate state")
  137. }
  138. if c.Firewall == nil {
  139. return nil, errors.New("no firewall rules")
  140. }
  141. myVpnIp := iputil.Ip2VpnIp(c.certState.certificate.Details.Ips[0].IP)
  142. ifce := &Interface{
  143. hostMap: c.HostMap,
  144. outside: c.Outside,
  145. inside: c.Inside,
  146. cipher: c.Cipher,
  147. firewall: c.Firewall,
  148. serveDns: c.ServeDns,
  149. handshakeManager: c.HandshakeManager,
  150. createTime: time.Now(),
  151. lightHouse: c.lightHouse,
  152. localBroadcast: myVpnIp | ^iputil.Ip2VpnIp(c.certState.certificate.Details.Ips[0].Mask),
  153. dropLocalBroadcast: c.DropLocalBroadcast,
  154. dropMulticast: c.DropMulticast,
  155. routines: c.routines,
  156. version: c.version,
  157. writers: make([]udp.Conn, c.routines),
  158. readers: make([]io.ReadWriteCloser, c.routines),
  159. caPool: c.caPool,
  160. disconnectInvalid: c.disconnectInvalid,
  161. myVpnIp: myVpnIp,
  162. relayManager: c.relayManager,
  163. conntrackCacheTimeout: c.ConntrackCacheTimeout,
  164. metricHandshakes: metrics.GetOrRegisterHistogram("handshakes", nil, metrics.NewExpDecaySample(1028, 0.015)),
  165. messageMetrics: c.MessageMetrics,
  166. cachedPacketMetrics: &cachedPacketMetrics{
  167. sent: metrics.GetOrRegisterCounter("hostinfo.cached_packets.sent", nil),
  168. dropped: metrics.GetOrRegisterCounter("hostinfo.cached_packets.dropped", nil),
  169. },
  170. l: c.l,
  171. }
  172. ifce.tryPromoteEvery.Store(c.tryPromoteEvery)
  173. ifce.reQueryEvery.Store(c.reQueryEvery)
  174. ifce.reQueryWait.Store(int64(c.reQueryWait))
  175. ifce.certState.Store(c.certState)
  176. ifce.connectionManager = newConnectionManager(ctx, c.l, ifce, c.checkInterval, c.pendingDeletionInterval, c.punchy)
  177. return ifce, nil
  178. }
  179. // activate creates the interface on the host. After the interface is created, any
  180. // other services that want to bind listeners to its IP may do so successfully. However,
  181. // the interface isn't going to process anything until run() is called.
  182. func (f *Interface) activate() {
  183. // actually turn on tun dev
  184. addr, err := f.outside.LocalAddr()
  185. if err != nil {
  186. f.l.WithError(err).Error("Failed to get udp listen address")
  187. }
  188. f.l.WithField("interface", f.inside.Name()).WithField("network", f.inside.Cidr().String()).
  189. WithField("build", f.version).WithField("udpAddr", addr).
  190. WithField("boringcrypto", boringEnabled()).
  191. Info("Nebula interface is active")
  192. metrics.GetOrRegisterGauge("routines", nil).Update(int64(f.routines))
  193. // Prepare n tun queues
  194. var reader io.ReadWriteCloser = f.inside
  195. for i := 0; i < f.routines; i++ {
  196. if i > 0 {
  197. reader, err = f.inside.NewMultiQueueReader()
  198. if err != nil {
  199. f.l.Fatal(err)
  200. }
  201. }
  202. f.readers[i] = reader
  203. }
  204. if err := f.inside.Activate(); err != nil {
  205. f.inside.Close()
  206. f.l.Fatal(err)
  207. }
  208. }
  209. func (f *Interface) run() {
  210. // Launch n queues to read packets from udp
  211. for i := 0; i < f.routines; i++ {
  212. go f.listenOut(i)
  213. }
  214. // Launch n queues to read packets from tun dev
  215. for i := 0; i < f.routines; i++ {
  216. go f.listenIn(f.readers[i], i)
  217. }
  218. }
  219. func (f *Interface) listenOut(i int) {
  220. runtime.LockOSThread()
  221. var li udp.Conn
  222. // TODO clean this up with a coherent interface for each outside connection
  223. if i > 0 {
  224. li = f.writers[i]
  225. } else {
  226. li = f.outside
  227. }
  228. lhh := f.lightHouse.NewRequestHandler()
  229. conntrackCache := firewall.NewConntrackCacheTicker(f.conntrackCacheTimeout)
  230. li.ListenOut(readOutsidePackets(f), lhHandleRequest(lhh, f), conntrackCache, i)
  231. }
  232. func (f *Interface) listenIn(reader io.ReadWriteCloser, i int) {
  233. runtime.LockOSThread()
  234. packet := make([]byte, mtu)
  235. out := make([]byte, mtu)
  236. fwPacket := &firewall.Packet{}
  237. nb := make([]byte, 12, 12)
  238. conntrackCache := firewall.NewConntrackCacheTicker(f.conntrackCacheTimeout)
  239. for {
  240. n, err := reader.Read(packet)
  241. if err != nil {
  242. if errors.Is(err, os.ErrClosed) && f.closed.Load() {
  243. return
  244. }
  245. f.l.WithError(err).Error("Error while reading outbound packet")
  246. // This only seems to happen when something fatal happens to the fd, so exit.
  247. os.Exit(2)
  248. }
  249. f.consumeInsidePacket(packet[:n], fwPacket, nb, out, i, conntrackCache.Get(f.l))
  250. }
  251. }
  252. func (f *Interface) RegisterConfigChangeCallbacks(c *config.C) {
  253. c.RegisterReloadCallback(f.reloadCA)
  254. c.RegisterReloadCallback(f.reloadCertKey)
  255. c.RegisterReloadCallback(f.reloadFirewall)
  256. c.RegisterReloadCallback(f.reloadSendRecvError)
  257. c.RegisterReloadCallback(f.reloadMisc)
  258. for _, udpConn := range f.writers {
  259. c.RegisterReloadCallback(udpConn.ReloadConfig)
  260. }
  261. }
  262. func (f *Interface) reloadCA(c *config.C) {
  263. // reload and check regardless
  264. // todo: need mutex?
  265. newCAs, err := loadCAFromConfig(f.l, c)
  266. if err != nil {
  267. f.l.WithError(err).Error("Could not refresh trusted CA certificates")
  268. return
  269. }
  270. f.caPool = newCAs
  271. f.l.WithField("fingerprints", f.caPool.GetFingerprints()).Info("Trusted CA certificates refreshed")
  272. }
  273. func (f *Interface) reloadCertKey(c *config.C) {
  274. // reload and check in all cases
  275. cs, err := NewCertStateFromConfig(c)
  276. if err != nil {
  277. f.l.WithError(err).Error("Could not refresh client cert")
  278. return
  279. }
  280. // did IP in cert change? if so, don't set
  281. currentCert := f.certState.Load().certificate
  282. oldIPs := currentCert.Details.Ips
  283. newIPs := cs.certificate.Details.Ips
  284. if len(oldIPs) > 0 && len(newIPs) > 0 && oldIPs[0].String() != newIPs[0].String() {
  285. f.l.WithField("new_ip", newIPs[0]).WithField("old_ip", oldIPs[0]).Error("IP in new cert was different from old")
  286. return
  287. }
  288. f.certState.Store(cs)
  289. f.l.WithField("cert", cs.certificate).Info("Client cert refreshed from disk")
  290. }
  291. func (f *Interface) reloadFirewall(c *config.C) {
  292. //TODO: need to trigger/detect if the certificate changed too
  293. if c.HasChanged("firewall") == false {
  294. f.l.Debug("No firewall config change detected")
  295. return
  296. }
  297. fw, err := NewFirewallFromConfig(f.l, f.certState.Load().certificate, c)
  298. if err != nil {
  299. f.l.WithError(err).Error("Error while creating firewall during reload")
  300. return
  301. }
  302. oldFw := f.firewall
  303. conntrack := oldFw.Conntrack
  304. conntrack.Lock()
  305. defer conntrack.Unlock()
  306. fw.rulesVersion = oldFw.rulesVersion + 1
  307. // If rulesVersion is back to zero, we have wrapped all the way around. Be
  308. // safe and just reset conntrack in this case.
  309. if fw.rulesVersion == 0 {
  310. f.l.WithField("firewallHash", fw.GetRuleHash()).
  311. WithField("oldFirewallHash", oldFw.GetRuleHash()).
  312. WithField("rulesVersion", fw.rulesVersion).
  313. Warn("firewall rulesVersion has overflowed, resetting conntrack")
  314. } else {
  315. fw.Conntrack = conntrack
  316. }
  317. f.firewall = fw
  318. oldFw.Destroy()
  319. f.l.WithField("firewallHash", fw.GetRuleHash()).
  320. WithField("oldFirewallHash", oldFw.GetRuleHash()).
  321. WithField("rulesVersion", fw.rulesVersion).
  322. Info("New firewall has been installed")
  323. }
  324. func (f *Interface) reloadSendRecvError(c *config.C) {
  325. if c.InitialLoad() || c.HasChanged("listen.send_recv_error") {
  326. stringValue := c.GetString("listen.send_recv_error", "always")
  327. switch stringValue {
  328. case "always":
  329. f.sendRecvErrorConfig = sendRecvErrorAlways
  330. case "never":
  331. f.sendRecvErrorConfig = sendRecvErrorNever
  332. case "private":
  333. f.sendRecvErrorConfig = sendRecvErrorPrivate
  334. default:
  335. if c.GetBool("listen.send_recv_error", true) {
  336. f.sendRecvErrorConfig = sendRecvErrorAlways
  337. } else {
  338. f.sendRecvErrorConfig = sendRecvErrorNever
  339. }
  340. }
  341. f.l.WithField("sendRecvError", f.sendRecvErrorConfig.String()).
  342. Info("Loaded send_recv_error config")
  343. }
  344. }
  345. func (f *Interface) reloadMisc(c *config.C) {
  346. if c.HasChanged("counters.try_promote") {
  347. n := c.GetUint32("counters.try_promote", defaultPromoteEvery)
  348. f.tryPromoteEvery.Store(n)
  349. f.l.Info("counters.try_promote has changed")
  350. }
  351. if c.HasChanged("counters.requery_every_packets") {
  352. n := c.GetUint32("counters.requery_every_packets", defaultReQueryEvery)
  353. f.reQueryEvery.Store(n)
  354. f.l.Info("counters.requery_every_packets has changed")
  355. }
  356. if c.HasChanged("timers.requery_wait_duration") {
  357. n := c.GetDuration("timers.requery_wait_duration", defaultReQueryWait)
  358. f.reQueryWait.Store(int64(n))
  359. f.l.Info("timers.requery_wait_duration has changed")
  360. }
  361. }
  362. func (f *Interface) emitStats(ctx context.Context, i time.Duration) {
  363. ticker := time.NewTicker(i)
  364. defer ticker.Stop()
  365. udpStats := udp.NewUDPStatsEmitter(f.writers)
  366. certExpirationGauge := metrics.GetOrRegisterGauge("certificate.ttl_seconds", nil)
  367. for {
  368. select {
  369. case <-ctx.Done():
  370. return
  371. case <-ticker.C:
  372. f.firewall.EmitStats()
  373. f.handshakeManager.EmitStats()
  374. udpStats()
  375. certExpirationGauge.Update(int64(f.certState.Load().certificate.Details.NotAfter.Sub(time.Now()) / time.Second))
  376. }
  377. }
  378. }
  379. func (f *Interface) Close() error {
  380. f.closed.Store(true)
  381. for _, u := range f.writers {
  382. err := u.Close()
  383. if err != nil {
  384. f.l.WithError(err).Error("Error while closing udp socket")
  385. }
  386. }
  387. // Release the tun device
  388. return f.inside.Close()
  389. }