interface.go 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400
  1. package nebula
  2. import (
  3. "context"
  4. "errors"
  5. "fmt"
  6. "io"
  7. "net"
  8. "os"
  9. "runtime"
  10. "sync/atomic"
  11. "time"
  12. "github.com/rcrowley/go-metrics"
  13. "github.com/sirupsen/logrus"
  14. "github.com/slackhq/nebula/cert"
  15. "github.com/slackhq/nebula/config"
  16. "github.com/slackhq/nebula/firewall"
  17. "github.com/slackhq/nebula/iputil"
  18. "github.com/slackhq/nebula/overlay"
  19. "github.com/slackhq/nebula/udp"
  20. )
  21. const mtu = 9001
  22. type InterfaceConfig struct {
  23. HostMap *HostMap
  24. Outside *udp.Conn
  25. Inside overlay.Device
  26. certState *CertState
  27. Cipher string
  28. Firewall *Firewall
  29. ServeDns bool
  30. HandshakeManager *HandshakeManager
  31. lightHouse *LightHouse
  32. checkInterval time.Duration
  33. pendingDeletionInterval time.Duration
  34. DropLocalBroadcast bool
  35. DropMulticast bool
  36. routines int
  37. MessageMetrics *MessageMetrics
  38. version string
  39. caPool *cert.NebulaCAPool
  40. disconnectInvalid bool
  41. relayManager *relayManager
  42. punchy *Punchy
  43. ConntrackCacheTimeout time.Duration
  44. l *logrus.Logger
  45. }
  46. type Interface struct {
  47. hostMap *HostMap
  48. outside *udp.Conn
  49. inside overlay.Device
  50. certState atomic.Pointer[CertState]
  51. cipher string
  52. firewall *Firewall
  53. connectionManager *connectionManager
  54. handshakeManager *HandshakeManager
  55. serveDns bool
  56. createTime time.Time
  57. lightHouse *LightHouse
  58. localBroadcast iputil.VpnIp
  59. myVpnIp iputil.VpnIp
  60. dropLocalBroadcast bool
  61. dropMulticast bool
  62. routines int
  63. caPool *cert.NebulaCAPool
  64. disconnectInvalid bool
  65. closed atomic.Bool
  66. relayManager *relayManager
  67. sendRecvErrorConfig sendRecvErrorConfig
  68. // rebindCount is used to decide if an active tunnel should trigger a punch notification through a lighthouse
  69. rebindCount int8
  70. version string
  71. conntrackCacheTimeout time.Duration
  72. writers []*udp.Conn
  73. readers []io.ReadWriteCloser
  74. metricHandshakes metrics.Histogram
  75. messageMetrics *MessageMetrics
  76. cachedPacketMetrics *cachedPacketMetrics
  77. l *logrus.Logger
  78. }
  79. type sendRecvErrorConfig uint8
  80. const (
  81. sendRecvErrorAlways sendRecvErrorConfig = iota
  82. sendRecvErrorNever
  83. sendRecvErrorPrivate
  84. )
  85. func (s sendRecvErrorConfig) ShouldSendRecvError(ip net.IP) bool {
  86. switch s {
  87. case sendRecvErrorPrivate:
  88. return ip.IsPrivate()
  89. case sendRecvErrorAlways:
  90. return true
  91. case sendRecvErrorNever:
  92. return false
  93. default:
  94. panic(fmt.Errorf("invalid sendRecvErrorConfig value: %d", s))
  95. }
  96. }
  97. func (s sendRecvErrorConfig) String() string {
  98. switch s {
  99. case sendRecvErrorAlways:
  100. return "always"
  101. case sendRecvErrorNever:
  102. return "never"
  103. case sendRecvErrorPrivate:
  104. return "private"
  105. default:
  106. return fmt.Sprintf("invalid(%d)", s)
  107. }
  108. }
  109. func NewInterface(ctx context.Context, c *InterfaceConfig) (*Interface, error) {
  110. if c.Outside == nil {
  111. return nil, errors.New("no outside connection")
  112. }
  113. if c.Inside == nil {
  114. return nil, errors.New("no inside interface (tun)")
  115. }
  116. if c.certState == nil {
  117. return nil, errors.New("no certificate state")
  118. }
  119. if c.Firewall == nil {
  120. return nil, errors.New("no firewall rules")
  121. }
  122. myVpnIp := iputil.Ip2VpnIp(c.certState.certificate.Details.Ips[0].IP)
  123. ifce := &Interface{
  124. hostMap: c.HostMap,
  125. outside: c.Outside,
  126. inside: c.Inside,
  127. cipher: c.Cipher,
  128. firewall: c.Firewall,
  129. serveDns: c.ServeDns,
  130. handshakeManager: c.HandshakeManager,
  131. createTime: time.Now(),
  132. lightHouse: c.lightHouse,
  133. localBroadcast: myVpnIp | ^iputil.Ip2VpnIp(c.certState.certificate.Details.Ips[0].Mask),
  134. dropLocalBroadcast: c.DropLocalBroadcast,
  135. dropMulticast: c.DropMulticast,
  136. routines: c.routines,
  137. version: c.version,
  138. writers: make([]*udp.Conn, c.routines),
  139. readers: make([]io.ReadWriteCloser, c.routines),
  140. caPool: c.caPool,
  141. disconnectInvalid: c.disconnectInvalid,
  142. myVpnIp: myVpnIp,
  143. relayManager: c.relayManager,
  144. conntrackCacheTimeout: c.ConntrackCacheTimeout,
  145. metricHandshakes: metrics.GetOrRegisterHistogram("handshakes", nil, metrics.NewExpDecaySample(1028, 0.015)),
  146. messageMetrics: c.MessageMetrics,
  147. cachedPacketMetrics: &cachedPacketMetrics{
  148. sent: metrics.GetOrRegisterCounter("hostinfo.cached_packets.sent", nil),
  149. dropped: metrics.GetOrRegisterCounter("hostinfo.cached_packets.dropped", nil),
  150. },
  151. l: c.l,
  152. }
  153. ifce.certState.Store(c.certState)
  154. ifce.connectionManager = newConnectionManager(ctx, c.l, ifce, c.checkInterval, c.pendingDeletionInterval, c.punchy)
  155. return ifce, nil
  156. }
  157. // activate creates the interface on the host. After the interface is created, any
  158. // other services that want to bind listeners to its IP may do so successfully. However,
  159. // the interface isn't going to process anything until run() is called.
  160. func (f *Interface) activate() {
  161. // actually turn on tun dev
  162. addr, err := f.outside.LocalAddr()
  163. if err != nil {
  164. f.l.WithError(err).Error("Failed to get udp listen address")
  165. }
  166. f.l.WithField("interface", f.inside.Name()).WithField("network", f.inside.Cidr().String()).
  167. WithField("build", f.version).WithField("udpAddr", addr).
  168. Info("Nebula interface is active")
  169. metrics.GetOrRegisterGauge("routines", nil).Update(int64(f.routines))
  170. // Prepare n tun queues
  171. var reader io.ReadWriteCloser = f.inside
  172. for i := 0; i < f.routines; i++ {
  173. if i > 0 {
  174. reader, err = f.inside.NewMultiQueueReader()
  175. if err != nil {
  176. f.l.Fatal(err)
  177. }
  178. }
  179. f.readers[i] = reader
  180. }
  181. if err := f.inside.Activate(); err != nil {
  182. f.inside.Close()
  183. f.l.Fatal(err)
  184. }
  185. }
  186. func (f *Interface) run() {
  187. // Launch n queues to read packets from udp
  188. for i := 0; i < f.routines; i++ {
  189. go f.listenOut(i)
  190. }
  191. // Launch n queues to read packets from tun dev
  192. for i := 0; i < f.routines; i++ {
  193. go f.listenIn(f.readers[i], i)
  194. }
  195. }
  196. func (f *Interface) listenOut(i int) {
  197. runtime.LockOSThread()
  198. var li *udp.Conn
  199. // TODO clean this up with a coherent interface for each outside connection
  200. if i > 0 {
  201. li = f.writers[i]
  202. } else {
  203. li = f.outside
  204. }
  205. lhh := f.lightHouse.NewRequestHandler()
  206. conntrackCache := firewall.NewConntrackCacheTicker(f.conntrackCacheTimeout)
  207. li.ListenOut(f.readOutsidePackets, lhh.HandleRequest, conntrackCache, i)
  208. }
  209. func (f *Interface) listenIn(reader io.ReadWriteCloser, i int) {
  210. runtime.LockOSThread()
  211. packet := make([]byte, mtu)
  212. out := make([]byte, mtu)
  213. fwPacket := &firewall.Packet{}
  214. nb := make([]byte, 12, 12)
  215. conntrackCache := firewall.NewConntrackCacheTicker(f.conntrackCacheTimeout)
  216. for {
  217. n, err := reader.Read(packet)
  218. if err != nil {
  219. if errors.Is(err, os.ErrClosed) && f.closed.Load() {
  220. return
  221. }
  222. f.l.WithError(err).Error("Error while reading outbound packet")
  223. // This only seems to happen when something fatal happens to the fd, so exit.
  224. os.Exit(2)
  225. }
  226. f.consumeInsidePacket(packet[:n], fwPacket, nb, out, i, conntrackCache.Get(f.l))
  227. }
  228. }
  229. func (f *Interface) RegisterConfigChangeCallbacks(c *config.C) {
  230. c.RegisterReloadCallback(f.reloadCA)
  231. c.RegisterReloadCallback(f.reloadCertKey)
  232. c.RegisterReloadCallback(f.reloadFirewall)
  233. c.RegisterReloadCallback(f.reloadSendRecvError)
  234. for _, udpConn := range f.writers {
  235. c.RegisterReloadCallback(udpConn.ReloadConfig)
  236. }
  237. }
  238. func (f *Interface) reloadCA(c *config.C) {
  239. // reload and check regardless
  240. // todo: need mutex?
  241. newCAs, err := loadCAFromConfig(f.l, c)
  242. if err != nil {
  243. f.l.WithError(err).Error("Could not refresh trusted CA certificates")
  244. return
  245. }
  246. f.caPool = newCAs
  247. f.l.WithField("fingerprints", f.caPool.GetFingerprints()).Info("Trusted CA certificates refreshed")
  248. }
  249. func (f *Interface) reloadCertKey(c *config.C) {
  250. // reload and check in all cases
  251. cs, err := NewCertStateFromConfig(c)
  252. if err != nil {
  253. f.l.WithError(err).Error("Could not refresh client cert")
  254. return
  255. }
  256. // did IP in cert change? if so, don't set
  257. currentCert := f.certState.Load().certificate
  258. oldIPs := currentCert.Details.Ips
  259. newIPs := cs.certificate.Details.Ips
  260. if len(oldIPs) > 0 && len(newIPs) > 0 && oldIPs[0].String() != newIPs[0].String() {
  261. f.l.WithField("new_ip", newIPs[0]).WithField("old_ip", oldIPs[0]).Error("IP in new cert was different from old")
  262. return
  263. }
  264. f.certState.Store(cs)
  265. f.l.WithField("cert", cs.certificate).Info("Client cert refreshed from disk")
  266. }
  267. func (f *Interface) reloadFirewall(c *config.C) {
  268. //TODO: need to trigger/detect if the certificate changed too
  269. if c.HasChanged("firewall") == false {
  270. f.l.Debug("No firewall config change detected")
  271. return
  272. }
  273. fw, err := NewFirewallFromConfig(f.l, f.certState.Load().certificate, c)
  274. if err != nil {
  275. f.l.WithError(err).Error("Error while creating firewall during reload")
  276. return
  277. }
  278. oldFw := f.firewall
  279. conntrack := oldFw.Conntrack
  280. conntrack.Lock()
  281. defer conntrack.Unlock()
  282. fw.rulesVersion = oldFw.rulesVersion + 1
  283. // If rulesVersion is back to zero, we have wrapped all the way around. Be
  284. // safe and just reset conntrack in this case.
  285. if fw.rulesVersion == 0 {
  286. f.l.WithField("firewallHash", fw.GetRuleHash()).
  287. WithField("oldFirewallHash", oldFw.GetRuleHash()).
  288. WithField("rulesVersion", fw.rulesVersion).
  289. Warn("firewall rulesVersion has overflowed, resetting conntrack")
  290. } else {
  291. fw.Conntrack = conntrack
  292. }
  293. f.firewall = fw
  294. oldFw.Destroy()
  295. f.l.WithField("firewallHash", fw.GetRuleHash()).
  296. WithField("oldFirewallHash", oldFw.GetRuleHash()).
  297. WithField("rulesVersion", fw.rulesVersion).
  298. Info("New firewall has been installed")
  299. }
  300. func (f *Interface) reloadSendRecvError(c *config.C) {
  301. if c.InitialLoad() || c.HasChanged("listen.send_recv_error") {
  302. stringValue := c.GetString("listen.send_recv_error", "always")
  303. switch stringValue {
  304. case "always":
  305. f.sendRecvErrorConfig = sendRecvErrorAlways
  306. case "never":
  307. f.sendRecvErrorConfig = sendRecvErrorNever
  308. case "private":
  309. f.sendRecvErrorConfig = sendRecvErrorPrivate
  310. default:
  311. if c.GetBool("listen.send_recv_error", true) {
  312. f.sendRecvErrorConfig = sendRecvErrorAlways
  313. } else {
  314. f.sendRecvErrorConfig = sendRecvErrorNever
  315. }
  316. }
  317. f.l.WithField("sendRecvError", f.sendRecvErrorConfig.String()).
  318. Info("Loaded send_recv_error config")
  319. }
  320. }
  321. func (f *Interface) emitStats(ctx context.Context, i time.Duration) {
  322. ticker := time.NewTicker(i)
  323. defer ticker.Stop()
  324. udpStats := udp.NewUDPStatsEmitter(f.writers)
  325. for {
  326. select {
  327. case <-ctx.Done():
  328. return
  329. case <-ticker.C:
  330. f.firewall.EmitStats()
  331. f.handshakeManager.EmitStats()
  332. udpStats()
  333. }
  334. }
  335. }
  336. func (f *Interface) Close() error {
  337. f.closed.Store(true)
  338. // Release the tun device
  339. return f.inside.Close()
  340. }