handshake_ix.go 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465
  1. package nebula
  2. import (
  3. "sync/atomic"
  4. "time"
  5. "github.com/flynn/noise"
  6. "github.com/slackhq/nebula/header"
  7. "github.com/slackhq/nebula/iputil"
  8. "github.com/slackhq/nebula/udp"
  9. )
  10. // NOISE IX Handshakes
  11. // This function constructs a handshake packet, but does not actually send it
  12. // Sending is done by the handshake manager
  13. func ixHandshakeStage0(f *Interface, vpnIp iputil.VpnIp, hostinfo *HostInfo) {
  14. // This queries the lighthouse if we don't know a remote for the host
  15. // We do it here to provoke the lighthouse to preempt our timer wheel and trigger the stage 1 packet to send
  16. // more quickly, effect is a quicker handshake.
  17. if hostinfo.remote == nil {
  18. f.lightHouse.QueryServer(vpnIp, f)
  19. }
  20. err := f.handshakeManager.AddIndexHostInfo(hostinfo)
  21. if err != nil {
  22. f.l.WithError(err).WithField("vpnIp", vpnIp).
  23. WithField("handshake", m{"stage": 0, "style": "ix_psk0"}).Error("Failed to generate index")
  24. return
  25. }
  26. ci := hostinfo.ConnectionState
  27. hsProto := &NebulaHandshakeDetails{
  28. InitiatorIndex: hostinfo.localIndexId,
  29. Time: uint64(time.Now().UnixNano()),
  30. Cert: ci.certState.rawCertificateNoKey,
  31. }
  32. hsBytes := []byte{}
  33. hs := &NebulaHandshake{
  34. Details: hsProto,
  35. }
  36. hsBytes, err = hs.Marshal()
  37. if err != nil {
  38. f.l.WithError(err).WithField("vpnIp", vpnIp).
  39. WithField("handshake", m{"stage": 0, "style": "ix_psk0"}).Error("Failed to marshal handshake message")
  40. return
  41. }
  42. h := header.Encode(make([]byte, header.Len), header.Version, header.Handshake, header.HandshakeIXPSK0, 0, 1)
  43. atomic.AddUint64(&ci.atomicMessageCounter, 1)
  44. msg, _, _, err := ci.H.WriteMessage(h, hsBytes)
  45. if err != nil {
  46. f.l.WithError(err).WithField("vpnIp", vpnIp).
  47. WithField("handshake", m{"stage": 0, "style": "ix_psk0"}).Error("Failed to call noise.WriteMessage")
  48. return
  49. }
  50. // We are sending handshake packet 1, so we don't expect to receive
  51. // handshake packet 1 from the responder
  52. ci.window.Update(f.l, 1)
  53. hostinfo.HandshakePacket[0] = msg
  54. hostinfo.HandshakeReady = true
  55. hostinfo.handshakeStart = time.Now()
  56. }
  57. func ixHandshakeStage1(f *Interface, addr *udp.Addr, packet []byte, h *header.H) {
  58. ci := f.newConnectionState(f.l, false, noise.HandshakeIX, []byte{}, 0)
  59. // Mark packet 1 as seen so it doesn't show up as missed
  60. ci.window.Update(f.l, 1)
  61. msg, _, _, err := ci.H.ReadMessage(nil, packet[header.Len:])
  62. if err != nil {
  63. f.l.WithError(err).WithField("udpAddr", addr).
  64. WithField("handshake", m{"stage": 1, "style": "ix_psk0"}).Error("Failed to call noise.ReadMessage")
  65. return
  66. }
  67. hs := &NebulaHandshake{}
  68. err = hs.Unmarshal(msg)
  69. /*
  70. l.Debugln("GOT INDEX: ", hs.Details.InitiatorIndex)
  71. */
  72. if err != nil || hs.Details == nil {
  73. f.l.WithError(err).WithField("udpAddr", addr).
  74. WithField("handshake", m{"stage": 1, "style": "ix_psk0"}).Error("Failed unmarshal handshake message")
  75. return
  76. }
  77. remoteCert, err := RecombineCertAndValidate(ci.H, hs.Details.Cert, f.caPool)
  78. if err != nil {
  79. f.l.WithError(err).WithField("udpAddr", addr).
  80. WithField("handshake", m{"stage": 1, "style": "ix_psk0"}).WithField("cert", remoteCert).
  81. Info("Invalid certificate from host")
  82. return
  83. }
  84. vpnIp := iputil.Ip2VpnIp(remoteCert.Details.Ips[0].IP)
  85. certName := remoteCert.Details.Name
  86. fingerprint, _ := remoteCert.Sha256Sum()
  87. issuer := remoteCert.Details.Issuer
  88. if vpnIp == f.myVpnIp {
  89. f.l.WithField("vpnIp", vpnIp).WithField("udpAddr", addr).
  90. WithField("certName", certName).
  91. WithField("fingerprint", fingerprint).
  92. WithField("issuer", issuer).
  93. WithField("handshake", m{"stage": 1, "style": "ix_psk0"}).Error("Refusing to handshake with myself")
  94. return
  95. }
  96. if !f.lightHouse.GetRemoteAllowList().Allow(vpnIp, addr.IP) {
  97. f.l.WithField("vpnIp", vpnIp).WithField("udpAddr", addr).Debug("lighthouse.remote_allow_list denied incoming handshake")
  98. return
  99. }
  100. myIndex, err := generateIndex(f.l)
  101. if err != nil {
  102. f.l.WithError(err).WithField("vpnIp", vpnIp).WithField("udpAddr", addr).
  103. WithField("certName", certName).
  104. WithField("fingerprint", fingerprint).
  105. WithField("issuer", issuer).
  106. WithField("handshake", m{"stage": 1, "style": "ix_psk0"}).Error("Failed to generate index")
  107. return
  108. }
  109. hostinfo := &HostInfo{
  110. ConnectionState: ci,
  111. localIndexId: myIndex,
  112. remoteIndexId: hs.Details.InitiatorIndex,
  113. vpnIp: vpnIp,
  114. HandshakePacket: make(map[uint8][]byte, 0),
  115. lastHandshakeTime: hs.Details.Time,
  116. }
  117. hostinfo.Lock()
  118. defer hostinfo.Unlock()
  119. f.l.WithField("vpnIp", vpnIp).WithField("udpAddr", addr).
  120. WithField("certName", certName).
  121. WithField("fingerprint", fingerprint).
  122. WithField("issuer", issuer).
  123. WithField("initiatorIndex", hs.Details.InitiatorIndex).WithField("responderIndex", hs.Details.ResponderIndex).
  124. WithField("remoteIndex", h.RemoteIndex).WithField("handshake", m{"stage": 1, "style": "ix_psk0"}).
  125. Info("Handshake message received")
  126. hs.Details.ResponderIndex = myIndex
  127. hs.Details.Cert = ci.certState.rawCertificateNoKey
  128. // Update the time in case their clock is way off from ours
  129. hs.Details.Time = uint64(time.Now().UnixNano())
  130. hsBytes, err := hs.Marshal()
  131. if err != nil {
  132. f.l.WithError(err).WithField("vpnIp", hostinfo.vpnIp).WithField("udpAddr", addr).
  133. WithField("certName", certName).
  134. WithField("fingerprint", fingerprint).
  135. WithField("issuer", issuer).
  136. WithField("handshake", m{"stage": 1, "style": "ix_psk0"}).Error("Failed to marshal handshake message")
  137. return
  138. }
  139. nh := header.Encode(make([]byte, header.Len), header.Version, header.Handshake, header.HandshakeIXPSK0, hs.Details.InitiatorIndex, 2)
  140. msg, dKey, eKey, err := ci.H.WriteMessage(nh, hsBytes)
  141. if err != nil {
  142. f.l.WithError(err).WithField("vpnIp", hostinfo.vpnIp).WithField("udpAddr", addr).
  143. WithField("certName", certName).
  144. WithField("fingerprint", fingerprint).
  145. WithField("issuer", issuer).
  146. WithField("handshake", m{"stage": 1, "style": "ix_psk0"}).Error("Failed to call noise.WriteMessage")
  147. return
  148. } else if dKey == nil || eKey == nil {
  149. f.l.WithField("vpnIp", hostinfo.vpnIp).WithField("udpAddr", addr).
  150. WithField("certName", certName).
  151. WithField("fingerprint", fingerprint).
  152. WithField("issuer", issuer).
  153. WithField("handshake", m{"stage": 1, "style": "ix_psk0"}).Error("Noise did not arrive at a key")
  154. return
  155. }
  156. hostinfo.HandshakePacket[0] = make([]byte, len(packet[header.Len:]))
  157. copy(hostinfo.HandshakePacket[0], packet[header.Len:])
  158. // Regardless of whether you are the sender or receiver, you should arrive here
  159. // and complete standing up the connection.
  160. hostinfo.HandshakePacket[2] = make([]byte, len(msg))
  161. copy(hostinfo.HandshakePacket[2], msg)
  162. // We are sending handshake packet 2, so we don't expect to receive
  163. // handshake packet 2 from the initiator.
  164. ci.window.Update(f.l, 2)
  165. ci.peerCert = remoteCert
  166. ci.dKey = NewNebulaCipherState(dKey)
  167. ci.eKey = NewNebulaCipherState(eKey)
  168. hostinfo.remotes = f.lightHouse.QueryCache(vpnIp)
  169. hostinfo.SetRemote(addr)
  170. hostinfo.CreateRemoteCIDR(remoteCert)
  171. // Only overwrite existing record if we should win the handshake race
  172. overwrite := vpnIp > f.myVpnIp
  173. existing, err := f.handshakeManager.CheckAndComplete(hostinfo, 0, overwrite, f)
  174. if err != nil {
  175. switch err {
  176. case ErrAlreadySeen:
  177. // Update remote if preferred (Note we have to switch to locking
  178. // the existing hostinfo, and then switch back so the defer Unlock
  179. // higher in this function still works)
  180. hostinfo.Unlock()
  181. existing.Lock()
  182. // Update remote if preferred
  183. if existing.SetRemoteIfPreferred(f.hostMap, addr) {
  184. // Send a test packet to ensure the other side has also switched to
  185. // the preferred remote
  186. f.SendMessageToVpnIp(header.Test, header.TestRequest, vpnIp, []byte(""), make([]byte, 12, 12), make([]byte, mtu))
  187. }
  188. existing.Unlock()
  189. hostinfo.Lock()
  190. msg = existing.HandshakePacket[2]
  191. f.messageMetrics.Tx(header.Handshake, header.MessageSubType(msg[1]), 1)
  192. err := f.outside.WriteTo(msg, addr)
  193. if err != nil {
  194. f.l.WithField("vpnIp", existing.vpnIp).WithField("udpAddr", addr).
  195. WithField("handshake", m{"stage": 2, "style": "ix_psk0"}).WithField("cached", true).
  196. WithError(err).Error("Failed to send handshake message")
  197. } else {
  198. f.l.WithField("vpnIp", existing.vpnIp).WithField("udpAddr", addr).
  199. WithField("handshake", m{"stage": 2, "style": "ix_psk0"}).WithField("cached", true).
  200. Info("Handshake message sent")
  201. }
  202. return
  203. case ErrExistingHostInfo:
  204. // This means there was an existing tunnel and this handshake was older than the one we are currently based on
  205. f.l.WithField("vpnIp", vpnIp).WithField("udpAddr", addr).
  206. WithField("certName", certName).
  207. WithField("oldHandshakeTime", existing.lastHandshakeTime).
  208. WithField("newHandshakeTime", hostinfo.lastHandshakeTime).
  209. WithField("fingerprint", fingerprint).
  210. WithField("issuer", issuer).
  211. WithField("initiatorIndex", hs.Details.InitiatorIndex).WithField("responderIndex", hs.Details.ResponderIndex).
  212. WithField("remoteIndex", h.RemoteIndex).WithField("handshake", m{"stage": 1, "style": "ix_psk0"}).
  213. Info("Handshake too old")
  214. // Send a test packet to trigger an authenticated tunnel test, this should suss out any lingering tunnel issues
  215. f.SendMessageToVpnIp(header.Test, header.TestRequest, vpnIp, []byte(""), make([]byte, 12, 12), make([]byte, mtu))
  216. return
  217. case ErrLocalIndexCollision:
  218. // This means we failed to insert because of collision on localIndexId. Just let the next handshake packet retry
  219. f.l.WithField("vpnIp", vpnIp).WithField("udpAddr", addr).
  220. WithField("certName", certName).
  221. WithField("fingerprint", fingerprint).
  222. WithField("issuer", issuer).
  223. WithField("initiatorIndex", hs.Details.InitiatorIndex).WithField("responderIndex", hs.Details.ResponderIndex).
  224. WithField("remoteIndex", h.RemoteIndex).WithField("handshake", m{"stage": 1, "style": "ix_psk0"}).
  225. WithField("localIndex", hostinfo.localIndexId).WithField("collision", existing.vpnIp).
  226. Error("Failed to add HostInfo due to localIndex collision")
  227. return
  228. case ErrExistingHandshake:
  229. // We have a race where both parties think they are an initiator and this tunnel lost, let the other one finish
  230. f.l.WithField("vpnIp", vpnIp).WithField("udpAddr", addr).
  231. WithField("certName", certName).
  232. WithField("fingerprint", fingerprint).
  233. WithField("issuer", issuer).
  234. WithField("initiatorIndex", hs.Details.InitiatorIndex).WithField("responderIndex", hs.Details.ResponderIndex).
  235. WithField("remoteIndex", h.RemoteIndex).WithField("handshake", m{"stage": 1, "style": "ix_psk0"}).
  236. Error("Prevented a pending handshake race")
  237. return
  238. default:
  239. // Shouldn't happen, but just in case someone adds a new error type to CheckAndComplete
  240. // And we forget to update it here
  241. f.l.WithError(err).WithField("vpnIp", vpnIp).WithField("udpAddr", addr).
  242. WithField("certName", certName).
  243. WithField("fingerprint", fingerprint).
  244. WithField("issuer", issuer).
  245. WithField("initiatorIndex", hs.Details.InitiatorIndex).WithField("responderIndex", hs.Details.ResponderIndex).
  246. WithField("remoteIndex", h.RemoteIndex).WithField("handshake", m{"stage": 1, "style": "ix_psk0"}).
  247. Error("Failed to add HostInfo to HostMap")
  248. return
  249. }
  250. }
  251. // Do the send
  252. f.messageMetrics.Tx(header.Handshake, header.MessageSubType(msg[1]), 1)
  253. err = f.outside.WriteTo(msg, addr)
  254. if err != nil {
  255. f.l.WithField("vpnIp", vpnIp).WithField("udpAddr", addr).
  256. WithField("certName", certName).
  257. WithField("fingerprint", fingerprint).
  258. WithField("issuer", issuer).
  259. WithField("initiatorIndex", hs.Details.InitiatorIndex).WithField("responderIndex", hs.Details.ResponderIndex).
  260. WithField("remoteIndex", h.RemoteIndex).WithField("handshake", m{"stage": 2, "style": "ix_psk0"}).
  261. WithError(err).Error("Failed to send handshake")
  262. } else {
  263. f.l.WithField("vpnIp", vpnIp).WithField("udpAddr", addr).
  264. WithField("certName", certName).
  265. WithField("fingerprint", fingerprint).
  266. WithField("issuer", issuer).
  267. WithField("initiatorIndex", hs.Details.InitiatorIndex).WithField("responderIndex", hs.Details.ResponderIndex).
  268. WithField("remoteIndex", h.RemoteIndex).WithField("handshake", m{"stage": 2, "style": "ix_psk0"}).
  269. WithField("sentCachedPackets", len(hostinfo.packetStore)).
  270. Info("Handshake message sent")
  271. }
  272. hostinfo.handshakeComplete(f.l, f.cachedPacketMetrics)
  273. return
  274. }
  275. func ixHandshakeStage2(f *Interface, addr *udp.Addr, hostinfo *HostInfo, packet []byte, h *header.H) bool {
  276. if hostinfo == nil {
  277. // Nothing here to tear down, got a bogus stage 2 packet
  278. return true
  279. }
  280. hostinfo.Lock()
  281. defer hostinfo.Unlock()
  282. if !f.lightHouse.GetRemoteAllowList().Allow(hostinfo.vpnIp, addr.IP) {
  283. f.l.WithField("vpnIp", hostinfo.vpnIp).WithField("udpAddr", addr).Debug("lighthouse.remote_allow_list denied incoming handshake")
  284. return false
  285. }
  286. ci := hostinfo.ConnectionState
  287. if ci.ready {
  288. f.l.WithField("vpnIp", hostinfo.vpnIp).WithField("udpAddr", addr).
  289. WithField("handshake", m{"stage": 2, "style": "ix_psk0"}).WithField("header", h).
  290. Info("Handshake is already complete")
  291. // Update remote if preferred
  292. if hostinfo.SetRemoteIfPreferred(f.hostMap, addr) {
  293. // Send a test packet to ensure the other side has also switched to
  294. // the preferred remote
  295. f.SendMessageToVpnIp(header.Test, header.TestRequest, hostinfo.vpnIp, []byte(""), make([]byte, 12, 12), make([]byte, mtu))
  296. }
  297. // We already have a complete tunnel, there is nothing that can be done by processing further stage 1 packets
  298. return false
  299. }
  300. msg, eKey, dKey, err := ci.H.ReadMessage(nil, packet[header.Len:])
  301. if err != nil {
  302. f.l.WithError(err).WithField("vpnIp", hostinfo.vpnIp).WithField("udpAddr", addr).
  303. WithField("handshake", m{"stage": 2, "style": "ix_psk0"}).WithField("header", h).
  304. Error("Failed to call noise.ReadMessage")
  305. // We don't want to tear down the connection on a bad ReadMessage because it could be an attacker trying
  306. // to DOS us. Every other error condition after should to allow a possible good handshake to complete in the
  307. // near future
  308. return false
  309. } else if dKey == nil || eKey == nil {
  310. f.l.WithField("vpnIp", hostinfo.vpnIp).WithField("udpAddr", addr).
  311. WithField("handshake", m{"stage": 2, "style": "ix_psk0"}).
  312. Error("Noise did not arrive at a key")
  313. // This should be impossible in IX but just in case, if we get here then there is no chance to recover
  314. // the handshake state machine. Tear it down
  315. return true
  316. }
  317. hs := &NebulaHandshake{}
  318. err = hs.Unmarshal(msg)
  319. if err != nil || hs.Details == nil {
  320. f.l.WithError(err).WithField("vpnIp", hostinfo.vpnIp).WithField("udpAddr", addr).
  321. WithField("handshake", m{"stage": 2, "style": "ix_psk0"}).Error("Failed unmarshal handshake message")
  322. // The handshake state machine is complete, if things break now there is no chance to recover. Tear down and start again
  323. return true
  324. }
  325. remoteCert, err := RecombineCertAndValidate(ci.H, hs.Details.Cert, f.caPool)
  326. if err != nil {
  327. f.l.WithError(err).WithField("vpnIp", hostinfo.vpnIp).WithField("udpAddr", addr).
  328. WithField("cert", remoteCert).WithField("handshake", m{"stage": 2, "style": "ix_psk0"}).
  329. Error("Invalid certificate from host")
  330. // The handshake state machine is complete, if things break now there is no chance to recover. Tear down and start again
  331. return true
  332. }
  333. vpnIp := iputil.Ip2VpnIp(remoteCert.Details.Ips[0].IP)
  334. certName := remoteCert.Details.Name
  335. fingerprint, _ := remoteCert.Sha256Sum()
  336. issuer := remoteCert.Details.Issuer
  337. // Ensure the right host responded
  338. if vpnIp != hostinfo.vpnIp {
  339. f.l.WithField("intendedVpnIp", hostinfo.vpnIp).WithField("haveVpnIp", vpnIp).
  340. WithField("udpAddr", addr).WithField("certName", certName).
  341. WithField("handshake", m{"stage": 2, "style": "ix_psk0"}).
  342. Info("Incorrect host responded to handshake")
  343. // Release our old handshake from pending, it should not continue
  344. f.handshakeManager.pendingHostMap.DeleteHostInfo(hostinfo)
  345. // Create a new hostinfo/handshake for the intended vpn ip
  346. //TODO: this adds it to the timer wheel in a way that aggressively retries
  347. newHostInfo := f.getOrHandshake(hostinfo.vpnIp)
  348. newHostInfo.Lock()
  349. // Block the current used address
  350. newHostInfo.remotes = hostinfo.remotes
  351. newHostInfo.remotes.BlockRemote(addr)
  352. // Get the correct remote list for the host we did handshake with
  353. hostinfo.remotes = f.lightHouse.QueryCache(vpnIp)
  354. f.l.WithField("blockedUdpAddrs", newHostInfo.remotes.CopyBlockedRemotes()).WithField("vpnIp", vpnIp).
  355. WithField("remotes", newHostInfo.remotes.CopyAddrs(f.hostMap.preferredRanges)).
  356. Info("Blocked addresses for handshakes")
  357. // Swap the packet store to benefit the original intended recipient
  358. hostinfo.ConnectionState.queueLock.Lock()
  359. newHostInfo.packetStore = hostinfo.packetStore
  360. hostinfo.packetStore = []*cachedPacket{}
  361. hostinfo.ConnectionState.queueLock.Unlock()
  362. // Finally, put the correct vpn ip in the host info, tell them to close the tunnel, and return true to tear down
  363. hostinfo.vpnIp = vpnIp
  364. f.sendCloseTunnel(hostinfo)
  365. newHostInfo.Unlock()
  366. return true
  367. }
  368. // Mark packet 2 as seen so it doesn't show up as missed
  369. ci.window.Update(f.l, 2)
  370. duration := time.Since(hostinfo.handshakeStart).Nanoseconds()
  371. f.l.WithField("vpnIp", vpnIp).WithField("udpAddr", addr).
  372. WithField("certName", certName).
  373. WithField("fingerprint", fingerprint).
  374. WithField("issuer", issuer).
  375. WithField("initiatorIndex", hs.Details.InitiatorIndex).WithField("responderIndex", hs.Details.ResponderIndex).
  376. WithField("remoteIndex", h.RemoteIndex).WithField("handshake", m{"stage": 2, "style": "ix_psk0"}).
  377. WithField("durationNs", duration).
  378. WithField("sentCachedPackets", len(hostinfo.packetStore)).
  379. Info("Handshake message received")
  380. hostinfo.remoteIndexId = hs.Details.ResponderIndex
  381. hostinfo.lastHandshakeTime = hs.Details.Time
  382. // Store their cert and our symmetric keys
  383. ci.peerCert = remoteCert
  384. ci.dKey = NewNebulaCipherState(dKey)
  385. ci.eKey = NewNebulaCipherState(eKey)
  386. // Make sure the current udpAddr being used is set for responding
  387. hostinfo.SetRemote(addr)
  388. // Build up the radix for the firewall if we have subnets in the cert
  389. hostinfo.CreateRemoteCIDR(remoteCert)
  390. // Complete our handshake and update metrics, this will replace any existing tunnels for this vpnIp
  391. //TODO: Complete here does not do a race avoidance, it will just take the new tunnel. Is this ok?
  392. f.handshakeManager.Complete(hostinfo, f)
  393. hostinfo.handshakeComplete(f.l, f.cachedPacketMetrics)
  394. f.metricHandshakes.Update(duration)
  395. return false
  396. }