handshake_ix.go 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466
  1. package nebula
  2. import (
  3. "sync/atomic"
  4. "time"
  5. "github.com/flynn/noise"
  6. "github.com/golang/protobuf/proto"
  7. "github.com/slackhq/nebula/header"
  8. "github.com/slackhq/nebula/iputil"
  9. "github.com/slackhq/nebula/udp"
  10. )
  11. // NOISE IX Handshakes
  12. // This function constructs a handshake packet, but does not actually send it
  13. // Sending is done by the handshake manager
  14. func ixHandshakeStage0(f *Interface, vpnIp iputil.VpnIp, hostinfo *HostInfo) {
  15. // This queries the lighthouse if we don't know a remote for the host
  16. // We do it here to provoke the lighthouse to preempt our timer wheel and trigger the stage 1 packet to send
  17. // more quickly, effect is a quicker handshake.
  18. if hostinfo.remote == nil {
  19. f.lightHouse.QueryServer(vpnIp, f)
  20. }
  21. err := f.handshakeManager.AddIndexHostInfo(hostinfo)
  22. if err != nil {
  23. f.l.WithError(err).WithField("vpnIp", vpnIp).
  24. WithField("handshake", m{"stage": 0, "style": "ix_psk0"}).Error("Failed to generate index")
  25. return
  26. }
  27. ci := hostinfo.ConnectionState
  28. hsProto := &NebulaHandshakeDetails{
  29. InitiatorIndex: hostinfo.localIndexId,
  30. Time: uint64(time.Now().UnixNano()),
  31. Cert: ci.certState.rawCertificateNoKey,
  32. }
  33. hsBytes := []byte{}
  34. hs := &NebulaHandshake{
  35. Details: hsProto,
  36. }
  37. hsBytes, err = proto.Marshal(hs)
  38. if err != nil {
  39. f.l.WithError(err).WithField("vpnIp", vpnIp).
  40. WithField("handshake", m{"stage": 0, "style": "ix_psk0"}).Error("Failed to marshal handshake message")
  41. return
  42. }
  43. h := header.Encode(make([]byte, header.Len), header.Version, header.Handshake, header.HandshakeIXPSK0, 0, 1)
  44. atomic.AddUint64(&ci.atomicMessageCounter, 1)
  45. msg, _, _, err := ci.H.WriteMessage(h, hsBytes)
  46. if err != nil {
  47. f.l.WithError(err).WithField("vpnIp", vpnIp).
  48. WithField("handshake", m{"stage": 0, "style": "ix_psk0"}).Error("Failed to call noise.WriteMessage")
  49. return
  50. }
  51. // We are sending handshake packet 1, so we don't expect to receive
  52. // handshake packet 1 from the responder
  53. ci.window.Update(f.l, 1)
  54. hostinfo.HandshakePacket[0] = msg
  55. hostinfo.HandshakeReady = true
  56. hostinfo.handshakeStart = time.Now()
  57. }
  58. func ixHandshakeStage1(f *Interface, addr *udp.Addr, packet []byte, h *header.H) {
  59. ci := f.newConnectionState(f.l, false, noise.HandshakeIX, []byte{}, 0)
  60. // Mark packet 1 as seen so it doesn't show up as missed
  61. ci.window.Update(f.l, 1)
  62. msg, _, _, err := ci.H.ReadMessage(nil, packet[header.Len:])
  63. if err != nil {
  64. f.l.WithError(err).WithField("udpAddr", addr).
  65. WithField("handshake", m{"stage": 1, "style": "ix_psk0"}).Error("Failed to call noise.ReadMessage")
  66. return
  67. }
  68. hs := &NebulaHandshake{}
  69. err = proto.Unmarshal(msg, hs)
  70. /*
  71. l.Debugln("GOT INDEX: ", hs.Details.InitiatorIndex)
  72. */
  73. if err != nil || hs.Details == nil {
  74. f.l.WithError(err).WithField("udpAddr", addr).
  75. WithField("handshake", m{"stage": 1, "style": "ix_psk0"}).Error("Failed unmarshal handshake message")
  76. return
  77. }
  78. remoteCert, err := RecombineCertAndValidate(ci.H, hs.Details.Cert, f.caPool)
  79. if err != nil {
  80. f.l.WithError(err).WithField("udpAddr", addr).
  81. WithField("handshake", m{"stage": 1, "style": "ix_psk0"}).WithField("cert", remoteCert).
  82. Info("Invalid certificate from host")
  83. return
  84. }
  85. vpnIp := iputil.Ip2VpnIp(remoteCert.Details.Ips[0].IP)
  86. certName := remoteCert.Details.Name
  87. fingerprint, _ := remoteCert.Sha256Sum()
  88. issuer := remoteCert.Details.Issuer
  89. if vpnIp == f.myVpnIp {
  90. f.l.WithField("vpnIp", vpnIp).WithField("udpAddr", addr).
  91. WithField("certName", certName).
  92. WithField("fingerprint", fingerprint).
  93. WithField("issuer", issuer).
  94. WithField("handshake", m{"stage": 1, "style": "ix_psk0"}).Error("Refusing to handshake with myself")
  95. return
  96. }
  97. if !f.lightHouse.remoteAllowList.Allow(vpnIp, addr.IP) {
  98. f.l.WithField("vpnIp", vpnIp).WithField("udpAddr", addr).Debug("lighthouse.remote_allow_list denied incoming handshake")
  99. return
  100. }
  101. myIndex, err := generateIndex(f.l)
  102. if err != nil {
  103. f.l.WithError(err).WithField("vpnIp", vpnIp).WithField("udpAddr", addr).
  104. WithField("certName", certName).
  105. WithField("fingerprint", fingerprint).
  106. WithField("issuer", issuer).
  107. WithField("handshake", m{"stage": 1, "style": "ix_psk0"}).Error("Failed to generate index")
  108. return
  109. }
  110. hostinfo := &HostInfo{
  111. ConnectionState: ci,
  112. localIndexId: myIndex,
  113. remoteIndexId: hs.Details.InitiatorIndex,
  114. vpnIp: vpnIp,
  115. HandshakePacket: make(map[uint8][]byte, 0),
  116. lastHandshakeTime: hs.Details.Time,
  117. }
  118. hostinfo.Lock()
  119. defer hostinfo.Unlock()
  120. f.l.WithField("vpnIp", vpnIp).WithField("udpAddr", addr).
  121. WithField("certName", certName).
  122. WithField("fingerprint", fingerprint).
  123. WithField("issuer", issuer).
  124. WithField("initiatorIndex", hs.Details.InitiatorIndex).WithField("responderIndex", hs.Details.ResponderIndex).
  125. WithField("remoteIndex", h.RemoteIndex).WithField("handshake", m{"stage": 1, "style": "ix_psk0"}).
  126. Info("Handshake message received")
  127. hs.Details.ResponderIndex = myIndex
  128. hs.Details.Cert = ci.certState.rawCertificateNoKey
  129. // Update the time in case their clock is way off from ours
  130. hs.Details.Time = uint64(time.Now().UnixNano())
  131. hsBytes, err := proto.Marshal(hs)
  132. if err != nil {
  133. f.l.WithError(err).WithField("vpnIp", hostinfo.vpnIp).WithField("udpAddr", addr).
  134. WithField("certName", certName).
  135. WithField("fingerprint", fingerprint).
  136. WithField("issuer", issuer).
  137. WithField("handshake", m{"stage": 1, "style": "ix_psk0"}).Error("Failed to marshal handshake message")
  138. return
  139. }
  140. nh := header.Encode(make([]byte, header.Len), header.Version, header.Handshake, header.HandshakeIXPSK0, hs.Details.InitiatorIndex, 2)
  141. msg, dKey, eKey, err := ci.H.WriteMessage(nh, hsBytes)
  142. if err != nil {
  143. f.l.WithError(err).WithField("vpnIp", hostinfo.vpnIp).WithField("udpAddr", addr).
  144. WithField("certName", certName).
  145. WithField("fingerprint", fingerprint).
  146. WithField("issuer", issuer).
  147. WithField("handshake", m{"stage": 1, "style": "ix_psk0"}).Error("Failed to call noise.WriteMessage")
  148. return
  149. } else if dKey == nil || eKey == nil {
  150. f.l.WithField("vpnIp", hostinfo.vpnIp).WithField("udpAddr", addr).
  151. WithField("certName", certName).
  152. WithField("fingerprint", fingerprint).
  153. WithField("issuer", issuer).
  154. WithField("handshake", m{"stage": 1, "style": "ix_psk0"}).Error("Noise did not arrive at a key")
  155. return
  156. }
  157. hostinfo.HandshakePacket[0] = make([]byte, len(packet[header.Len:]))
  158. copy(hostinfo.HandshakePacket[0], packet[header.Len:])
  159. // Regardless of whether you are the sender or receiver, you should arrive here
  160. // and complete standing up the connection.
  161. hostinfo.HandshakePacket[2] = make([]byte, len(msg))
  162. copy(hostinfo.HandshakePacket[2], msg)
  163. // We are sending handshake packet 2, so we don't expect to receive
  164. // handshake packet 2 from the initiator.
  165. ci.window.Update(f.l, 2)
  166. ci.peerCert = remoteCert
  167. ci.dKey = NewNebulaCipherState(dKey)
  168. ci.eKey = NewNebulaCipherState(eKey)
  169. hostinfo.remotes = f.lightHouse.QueryCache(vpnIp)
  170. hostinfo.SetRemote(addr)
  171. hostinfo.CreateRemoteCIDR(remoteCert)
  172. // Only overwrite existing record if we should win the handshake race
  173. overwrite := vpnIp > f.myVpnIp
  174. existing, err := f.handshakeManager.CheckAndComplete(hostinfo, 0, overwrite, f)
  175. if err != nil {
  176. switch err {
  177. case ErrAlreadySeen:
  178. // Update remote if preferred (Note we have to switch to locking
  179. // the existing hostinfo, and then switch back so the defer Unlock
  180. // higher in this function still works)
  181. hostinfo.Unlock()
  182. existing.Lock()
  183. // Update remote if preferred
  184. if existing.SetRemoteIfPreferred(f.hostMap, addr) {
  185. // Send a test packet to ensure the other side has also switched to
  186. // the preferred remote
  187. f.SendMessageToVpnIp(header.Test, header.TestRequest, vpnIp, []byte(""), make([]byte, 12, 12), make([]byte, mtu))
  188. }
  189. existing.Unlock()
  190. hostinfo.Lock()
  191. msg = existing.HandshakePacket[2]
  192. f.messageMetrics.Tx(header.Handshake, header.MessageSubType(msg[1]), 1)
  193. err := f.outside.WriteTo(msg, addr)
  194. if err != nil {
  195. f.l.WithField("vpnIp", existing.vpnIp).WithField("udpAddr", addr).
  196. WithField("handshake", m{"stage": 2, "style": "ix_psk0"}).WithField("cached", true).
  197. WithError(err).Error("Failed to send handshake message")
  198. } else {
  199. f.l.WithField("vpnIp", existing.vpnIp).WithField("udpAddr", addr).
  200. WithField("handshake", m{"stage": 2, "style": "ix_psk0"}).WithField("cached", true).
  201. Info("Handshake message sent")
  202. }
  203. return
  204. case ErrExistingHostInfo:
  205. // This means there was an existing tunnel and this handshake was older than the one we are currently based on
  206. f.l.WithField("vpnIp", vpnIp).WithField("udpAddr", addr).
  207. WithField("certName", certName).
  208. WithField("oldHandshakeTime", existing.lastHandshakeTime).
  209. WithField("newHandshakeTime", hostinfo.lastHandshakeTime).
  210. WithField("fingerprint", fingerprint).
  211. WithField("issuer", issuer).
  212. WithField("initiatorIndex", hs.Details.InitiatorIndex).WithField("responderIndex", hs.Details.ResponderIndex).
  213. WithField("remoteIndex", h.RemoteIndex).WithField("handshake", m{"stage": 1, "style": "ix_psk0"}).
  214. Info("Handshake too old")
  215. // Send a test packet to trigger an authenticated tunnel test, this should suss out any lingering tunnel issues
  216. f.SendMessageToVpnIp(header.Test, header.TestRequest, vpnIp, []byte(""), make([]byte, 12, 12), make([]byte, mtu))
  217. return
  218. case ErrLocalIndexCollision:
  219. // This means we failed to insert because of collision on localIndexId. Just let the next handshake packet retry
  220. f.l.WithField("vpnIp", vpnIp).WithField("udpAddr", addr).
  221. WithField("certName", certName).
  222. WithField("fingerprint", fingerprint).
  223. WithField("issuer", issuer).
  224. WithField("initiatorIndex", hs.Details.InitiatorIndex).WithField("responderIndex", hs.Details.ResponderIndex).
  225. WithField("remoteIndex", h.RemoteIndex).WithField("handshake", m{"stage": 1, "style": "ix_psk0"}).
  226. WithField("localIndex", hostinfo.localIndexId).WithField("collision", existing.vpnIp).
  227. Error("Failed to add HostInfo due to localIndex collision")
  228. return
  229. case ErrExistingHandshake:
  230. // We have a race where both parties think they are an initiator and this tunnel lost, let the other one finish
  231. f.l.WithField("vpnIp", vpnIp).WithField("udpAddr", addr).
  232. WithField("certName", certName).
  233. WithField("fingerprint", fingerprint).
  234. WithField("issuer", issuer).
  235. WithField("initiatorIndex", hs.Details.InitiatorIndex).WithField("responderIndex", hs.Details.ResponderIndex).
  236. WithField("remoteIndex", h.RemoteIndex).WithField("handshake", m{"stage": 1, "style": "ix_psk0"}).
  237. Error("Prevented a pending handshake race")
  238. return
  239. default:
  240. // Shouldn't happen, but just in case someone adds a new error type to CheckAndComplete
  241. // And we forget to update it here
  242. f.l.WithError(err).WithField("vpnIp", vpnIp).WithField("udpAddr", addr).
  243. WithField("certName", certName).
  244. WithField("fingerprint", fingerprint).
  245. WithField("issuer", issuer).
  246. WithField("initiatorIndex", hs.Details.InitiatorIndex).WithField("responderIndex", hs.Details.ResponderIndex).
  247. WithField("remoteIndex", h.RemoteIndex).WithField("handshake", m{"stage": 1, "style": "ix_psk0"}).
  248. Error("Failed to add HostInfo to HostMap")
  249. return
  250. }
  251. }
  252. // Do the send
  253. f.messageMetrics.Tx(header.Handshake, header.MessageSubType(msg[1]), 1)
  254. err = f.outside.WriteTo(msg, addr)
  255. if err != nil {
  256. f.l.WithField("vpnIp", vpnIp).WithField("udpAddr", addr).
  257. WithField("certName", certName).
  258. WithField("fingerprint", fingerprint).
  259. WithField("issuer", issuer).
  260. WithField("initiatorIndex", hs.Details.InitiatorIndex).WithField("responderIndex", hs.Details.ResponderIndex).
  261. WithField("remoteIndex", h.RemoteIndex).WithField("handshake", m{"stage": 2, "style": "ix_psk0"}).
  262. WithError(err).Error("Failed to send handshake")
  263. } else {
  264. f.l.WithField("vpnIp", vpnIp).WithField("udpAddr", addr).
  265. WithField("certName", certName).
  266. WithField("fingerprint", fingerprint).
  267. WithField("issuer", issuer).
  268. WithField("initiatorIndex", hs.Details.InitiatorIndex).WithField("responderIndex", hs.Details.ResponderIndex).
  269. WithField("remoteIndex", h.RemoteIndex).WithField("handshake", m{"stage": 2, "style": "ix_psk0"}).
  270. WithField("sentCachedPackets", len(hostinfo.packetStore)).
  271. Info("Handshake message sent")
  272. }
  273. hostinfo.handshakeComplete(f.l, f.cachedPacketMetrics)
  274. return
  275. }
  276. func ixHandshakeStage2(f *Interface, addr *udp.Addr, hostinfo *HostInfo, packet []byte, h *header.H) bool {
  277. if hostinfo == nil {
  278. // Nothing here to tear down, got a bogus stage 2 packet
  279. return true
  280. }
  281. hostinfo.Lock()
  282. defer hostinfo.Unlock()
  283. if !f.lightHouse.remoteAllowList.Allow(hostinfo.vpnIp, addr.IP) {
  284. f.l.WithField("vpnIp", hostinfo.vpnIp).WithField("udpAddr", addr).Debug("lighthouse.remote_allow_list denied incoming handshake")
  285. return false
  286. }
  287. ci := hostinfo.ConnectionState
  288. if ci.ready {
  289. f.l.WithField("vpnIp", hostinfo.vpnIp).WithField("udpAddr", addr).
  290. WithField("handshake", m{"stage": 2, "style": "ix_psk0"}).WithField("header", h).
  291. Info("Handshake is already complete")
  292. // Update remote if preferred
  293. if hostinfo.SetRemoteIfPreferred(f.hostMap, addr) {
  294. // Send a test packet to ensure the other side has also switched to
  295. // the preferred remote
  296. f.SendMessageToVpnIp(header.Test, header.TestRequest, hostinfo.vpnIp, []byte(""), make([]byte, 12, 12), make([]byte, mtu))
  297. }
  298. // We already have a complete tunnel, there is nothing that can be done by processing further stage 1 packets
  299. return false
  300. }
  301. msg, eKey, dKey, err := ci.H.ReadMessage(nil, packet[header.Len:])
  302. if err != nil {
  303. f.l.WithError(err).WithField("vpnIp", hostinfo.vpnIp).WithField("udpAddr", addr).
  304. WithField("handshake", m{"stage": 2, "style": "ix_psk0"}).WithField("header", h).
  305. Error("Failed to call noise.ReadMessage")
  306. // We don't want to tear down the connection on a bad ReadMessage because it could be an attacker trying
  307. // to DOS us. Every other error condition after should to allow a possible good handshake to complete in the
  308. // near future
  309. return false
  310. } else if dKey == nil || eKey == nil {
  311. f.l.WithField("vpnIp", hostinfo.vpnIp).WithField("udpAddr", addr).
  312. WithField("handshake", m{"stage": 2, "style": "ix_psk0"}).
  313. Error("Noise did not arrive at a key")
  314. // This should be impossible in IX but just in case, if we get here then there is no chance to recover
  315. // the handshake state machine. Tear it down
  316. return true
  317. }
  318. hs := &NebulaHandshake{}
  319. err = proto.Unmarshal(msg, hs)
  320. if err != nil || hs.Details == nil {
  321. f.l.WithError(err).WithField("vpnIp", hostinfo.vpnIp).WithField("udpAddr", addr).
  322. WithField("handshake", m{"stage": 2, "style": "ix_psk0"}).Error("Failed unmarshal handshake message")
  323. // The handshake state machine is complete, if things break now there is no chance to recover. Tear down and start again
  324. return true
  325. }
  326. remoteCert, err := RecombineCertAndValidate(ci.H, hs.Details.Cert, f.caPool)
  327. if err != nil {
  328. f.l.WithError(err).WithField("vpnIp", hostinfo.vpnIp).WithField("udpAddr", addr).
  329. WithField("cert", remoteCert).WithField("handshake", m{"stage": 2, "style": "ix_psk0"}).
  330. Error("Invalid certificate from host")
  331. // The handshake state machine is complete, if things break now there is no chance to recover. Tear down and start again
  332. return true
  333. }
  334. vpnIp := iputil.Ip2VpnIp(remoteCert.Details.Ips[0].IP)
  335. certName := remoteCert.Details.Name
  336. fingerprint, _ := remoteCert.Sha256Sum()
  337. issuer := remoteCert.Details.Issuer
  338. // Ensure the right host responded
  339. if vpnIp != hostinfo.vpnIp {
  340. f.l.WithField("intendedVpnIp", hostinfo.vpnIp).WithField("haveVpnIp", vpnIp).
  341. WithField("udpAddr", addr).WithField("certName", certName).
  342. WithField("handshake", m{"stage": 2, "style": "ix_psk0"}).
  343. Info("Incorrect host responded to handshake")
  344. // Release our old handshake from pending, it should not continue
  345. f.handshakeManager.pendingHostMap.DeleteHostInfo(hostinfo)
  346. // Create a new hostinfo/handshake for the intended vpn ip
  347. //TODO: this adds it to the timer wheel in a way that aggressively retries
  348. newHostInfo := f.getOrHandshake(hostinfo.vpnIp)
  349. newHostInfo.Lock()
  350. // Block the current used address
  351. newHostInfo.remotes = hostinfo.remotes
  352. newHostInfo.remotes.BlockRemote(addr)
  353. // Get the correct remote list for the host we did handshake with
  354. hostinfo.remotes = f.lightHouse.QueryCache(vpnIp)
  355. f.l.WithField("blockedUdpAddrs", newHostInfo.remotes.CopyBlockedRemotes()).WithField("vpnIp", vpnIp).
  356. WithField("remotes", newHostInfo.remotes.CopyAddrs(f.hostMap.preferredRanges)).
  357. Info("Blocked addresses for handshakes")
  358. // Swap the packet store to benefit the original intended recipient
  359. hostinfo.ConnectionState.queueLock.Lock()
  360. newHostInfo.packetStore = hostinfo.packetStore
  361. hostinfo.packetStore = []*cachedPacket{}
  362. hostinfo.ConnectionState.queueLock.Unlock()
  363. // Finally, put the correct vpn ip in the host info, tell them to close the tunnel, and return true to tear down
  364. hostinfo.vpnIp = vpnIp
  365. f.sendCloseTunnel(hostinfo)
  366. newHostInfo.Unlock()
  367. return true
  368. }
  369. // Mark packet 2 as seen so it doesn't show up as missed
  370. ci.window.Update(f.l, 2)
  371. duration := time.Since(hostinfo.handshakeStart).Nanoseconds()
  372. f.l.WithField("vpnIp", vpnIp).WithField("udpAddr", addr).
  373. WithField("certName", certName).
  374. WithField("fingerprint", fingerprint).
  375. WithField("issuer", issuer).
  376. WithField("initiatorIndex", hs.Details.InitiatorIndex).WithField("responderIndex", hs.Details.ResponderIndex).
  377. WithField("remoteIndex", h.RemoteIndex).WithField("handshake", m{"stage": 2, "style": "ix_psk0"}).
  378. WithField("durationNs", duration).
  379. WithField("sentCachedPackets", len(hostinfo.packetStore)).
  380. Info("Handshake message received")
  381. hostinfo.remoteIndexId = hs.Details.ResponderIndex
  382. hostinfo.lastHandshakeTime = hs.Details.Time
  383. // Store their cert and our symmetric keys
  384. ci.peerCert = remoteCert
  385. ci.dKey = NewNebulaCipherState(dKey)
  386. ci.eKey = NewNebulaCipherState(eKey)
  387. // Make sure the current udpAddr being used is set for responding
  388. hostinfo.SetRemote(addr)
  389. // Build up the radix for the firewall if we have subnets in the cert
  390. hostinfo.CreateRemoteCIDR(remoteCert)
  391. // Complete our handshake and update metrics, this will replace any existing tunnels for this vpnIp
  392. //TODO: Complete here does not do a race avoidance, it will just take the new tunnel. Is this ok?
  393. f.handshakeManager.Complete(hostinfo, f)
  394. hostinfo.handshakeComplete(f.l, f.cachedPacketMetrics)
  395. f.metricHandshakes.Update(duration)
  396. return false
  397. }