handshake_ix.go 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463
  1. package nebula
  2. import (
  3. "sync/atomic"
  4. "time"
  5. "github.com/flynn/noise"
  6. "github.com/golang/protobuf/proto"
  7. )
  8. // NOISE IX Handshakes
  9. // This function constructs a handshake packet, but does not actually send it
  10. // Sending is done by the handshake manager
  11. func ixHandshakeStage0(f *Interface, vpnIp uint32, hostinfo *HostInfo) {
  12. // This queries the lighthouse if we don't know a remote for the host
  13. // We do it here to provoke the lighthouse to preempt our timer wheel and trigger the stage 1 packet to send
  14. // more quickly, effect is a quicker handshake.
  15. if hostinfo.remote == nil {
  16. f.lightHouse.QueryServer(vpnIp, f)
  17. }
  18. err := f.handshakeManager.AddIndexHostInfo(hostinfo)
  19. if err != nil {
  20. f.l.WithError(err).WithField("vpnIp", IntIp(vpnIp)).
  21. WithField("handshake", m{"stage": 0, "style": "ix_psk0"}).Error("Failed to generate index")
  22. return
  23. }
  24. ci := hostinfo.ConnectionState
  25. hsProto := &NebulaHandshakeDetails{
  26. InitiatorIndex: hostinfo.localIndexId,
  27. Time: uint64(time.Now().UnixNano()),
  28. Cert: ci.certState.rawCertificateNoKey,
  29. }
  30. hsBytes := []byte{}
  31. hs := &NebulaHandshake{
  32. Details: hsProto,
  33. }
  34. hsBytes, err = proto.Marshal(hs)
  35. if err != nil {
  36. f.l.WithError(err).WithField("vpnIp", IntIp(vpnIp)).
  37. WithField("handshake", m{"stage": 0, "style": "ix_psk0"}).Error("Failed to marshal handshake message")
  38. return
  39. }
  40. header := HeaderEncode(make([]byte, HeaderLen), Version, uint8(handshake), handshakeIXPSK0, 0, 1)
  41. atomic.AddUint64(&ci.atomicMessageCounter, 1)
  42. msg, _, _, err := ci.H.WriteMessage(header, hsBytes)
  43. if err != nil {
  44. f.l.WithError(err).WithField("vpnIp", IntIp(vpnIp)).
  45. WithField("handshake", m{"stage": 0, "style": "ix_psk0"}).Error("Failed to call noise.WriteMessage")
  46. return
  47. }
  48. // We are sending handshake packet 1, so we don't expect to receive
  49. // handshake packet 1 from the responder
  50. ci.window.Update(f.l, 1)
  51. hostinfo.HandshakePacket[0] = msg
  52. hostinfo.HandshakeReady = true
  53. hostinfo.handshakeStart = time.Now()
  54. }
  55. func ixHandshakeStage1(f *Interface, addr *udpAddr, packet []byte, h *Header) {
  56. ci := f.newConnectionState(f.l, false, noise.HandshakeIX, []byte{}, 0)
  57. // Mark packet 1 as seen so it doesn't show up as missed
  58. ci.window.Update(f.l, 1)
  59. msg, _, _, err := ci.H.ReadMessage(nil, packet[HeaderLen:])
  60. if err != nil {
  61. f.l.WithError(err).WithField("udpAddr", addr).
  62. WithField("handshake", m{"stage": 1, "style": "ix_psk0"}).Error("Failed to call noise.ReadMessage")
  63. return
  64. }
  65. hs := &NebulaHandshake{}
  66. err = proto.Unmarshal(msg, hs)
  67. /*
  68. l.Debugln("GOT INDEX: ", hs.Details.InitiatorIndex)
  69. */
  70. if err != nil || hs.Details == nil {
  71. f.l.WithError(err).WithField("udpAddr", addr).
  72. WithField("handshake", m{"stage": 1, "style": "ix_psk0"}).Error("Failed unmarshal handshake message")
  73. return
  74. }
  75. remoteCert, err := RecombineCertAndValidate(ci.H, hs.Details.Cert, f.caPool)
  76. if err != nil {
  77. f.l.WithError(err).WithField("udpAddr", addr).
  78. WithField("handshake", m{"stage": 1, "style": "ix_psk0"}).WithField("cert", remoteCert).
  79. Info("Invalid certificate from host")
  80. return
  81. }
  82. vpnIP := ip2int(remoteCert.Details.Ips[0].IP)
  83. certName := remoteCert.Details.Name
  84. fingerprint, _ := remoteCert.Sha256Sum()
  85. issuer := remoteCert.Details.Issuer
  86. if vpnIP == ip2int(f.certState.certificate.Details.Ips[0].IP) {
  87. f.l.WithField("vpnIp", IntIp(vpnIP)).WithField("udpAddr", addr).
  88. WithField("certName", certName).
  89. WithField("fingerprint", fingerprint).
  90. WithField("issuer", issuer).
  91. WithField("handshake", m{"stage": 1, "style": "ix_psk0"}).Error("Refusing to handshake with myself")
  92. return
  93. }
  94. if !f.lightHouse.remoteAllowList.Allow(vpnIP, addr.IP) {
  95. f.l.WithField("vpnIp", IntIp(vpnIP)).WithField("udpAddr", addr).Debug("lighthouse.remote_allow_list denied incoming handshake")
  96. return
  97. }
  98. myIndex, err := generateIndex(f.l)
  99. if err != nil {
  100. f.l.WithError(err).WithField("vpnIp", IntIp(vpnIP)).WithField("udpAddr", addr).
  101. WithField("certName", certName).
  102. WithField("fingerprint", fingerprint).
  103. WithField("issuer", issuer).
  104. WithField("handshake", m{"stage": 1, "style": "ix_psk0"}).Error("Failed to generate index")
  105. return
  106. }
  107. hostinfo := &HostInfo{
  108. ConnectionState: ci,
  109. localIndexId: myIndex,
  110. remoteIndexId: hs.Details.InitiatorIndex,
  111. hostId: vpnIP,
  112. HandshakePacket: make(map[uint8][]byte, 0),
  113. lastHandshakeTime: hs.Details.Time,
  114. }
  115. hostinfo.Lock()
  116. defer hostinfo.Unlock()
  117. f.l.WithField("vpnIp", IntIp(vpnIP)).WithField("udpAddr", addr).
  118. WithField("certName", certName).
  119. WithField("fingerprint", fingerprint).
  120. WithField("issuer", issuer).
  121. WithField("initiatorIndex", hs.Details.InitiatorIndex).WithField("responderIndex", hs.Details.ResponderIndex).
  122. WithField("remoteIndex", h.RemoteIndex).WithField("handshake", m{"stage": 1, "style": "ix_psk0"}).
  123. Info("Handshake message received")
  124. hs.Details.ResponderIndex = myIndex
  125. hs.Details.Cert = ci.certState.rawCertificateNoKey
  126. // Update the time in case their clock is way off from ours
  127. hs.Details.Time = uint64(time.Now().UnixNano())
  128. hsBytes, err := proto.Marshal(hs)
  129. if err != nil {
  130. f.l.WithError(err).WithField("vpnIp", IntIp(hostinfo.hostId)).WithField("udpAddr", addr).
  131. WithField("certName", certName).
  132. WithField("fingerprint", fingerprint).
  133. WithField("issuer", issuer).
  134. WithField("handshake", m{"stage": 1, "style": "ix_psk0"}).Error("Failed to marshal handshake message")
  135. return
  136. }
  137. header := HeaderEncode(make([]byte, HeaderLen), Version, uint8(handshake), handshakeIXPSK0, hs.Details.InitiatorIndex, 2)
  138. msg, dKey, eKey, err := ci.H.WriteMessage(header, hsBytes)
  139. if err != nil {
  140. f.l.WithError(err).WithField("vpnIp", IntIp(hostinfo.hostId)).WithField("udpAddr", addr).
  141. WithField("certName", certName).
  142. WithField("fingerprint", fingerprint).
  143. WithField("issuer", issuer).
  144. WithField("handshake", m{"stage": 1, "style": "ix_psk0"}).Error("Failed to call noise.WriteMessage")
  145. return
  146. } else if dKey == nil || eKey == nil {
  147. f.l.WithField("vpnIp", IntIp(hostinfo.hostId)).WithField("udpAddr", addr).
  148. WithField("certName", certName).
  149. WithField("fingerprint", fingerprint).
  150. WithField("issuer", issuer).
  151. WithField("handshake", m{"stage": 1, "style": "ix_psk0"}).Error("Noise did not arrive at a key")
  152. return
  153. }
  154. hostinfo.HandshakePacket[0] = make([]byte, len(packet[HeaderLen:]))
  155. copy(hostinfo.HandshakePacket[0], packet[HeaderLen:])
  156. // Regardless of whether you are the sender or receiver, you should arrive here
  157. // and complete standing up the connection.
  158. hostinfo.HandshakePacket[2] = make([]byte, len(msg))
  159. copy(hostinfo.HandshakePacket[2], msg)
  160. // We are sending handshake packet 2, so we don't expect to receive
  161. // handshake packet 2 from the initiator.
  162. ci.window.Update(f.l, 2)
  163. ci.peerCert = remoteCert
  164. ci.dKey = NewNebulaCipherState(dKey)
  165. ci.eKey = NewNebulaCipherState(eKey)
  166. hostinfo.remotes = f.lightHouse.QueryCache(vpnIP)
  167. hostinfo.SetRemote(addr)
  168. hostinfo.CreateRemoteCIDR(remoteCert)
  169. // Only overwrite existing record if we should win the handshake race
  170. overwrite := vpnIP > ip2int(f.certState.certificate.Details.Ips[0].IP)
  171. existing, err := f.handshakeManager.CheckAndComplete(hostinfo, 0, overwrite, f)
  172. if err != nil {
  173. switch err {
  174. case ErrAlreadySeen:
  175. // Update remote if preferred (Note we have to switch to locking
  176. // the existing hostinfo, and then switch back so the defer Unlock
  177. // higher in this function still works)
  178. hostinfo.Unlock()
  179. existing.Lock()
  180. // Update remote if preferred
  181. if existing.SetRemoteIfPreferred(f.hostMap, addr) {
  182. // Send a test packet to ensure the other side has also switched to
  183. // the preferred remote
  184. f.SendMessageToVpnIp(test, testRequest, vpnIP, []byte(""), make([]byte, 12, 12), make([]byte, mtu))
  185. }
  186. existing.Unlock()
  187. hostinfo.Lock()
  188. msg = existing.HandshakePacket[2]
  189. f.messageMetrics.Tx(handshake, NebulaMessageSubType(msg[1]), 1)
  190. err := f.outside.WriteTo(msg, addr)
  191. if err != nil {
  192. f.l.WithField("vpnIp", IntIp(existing.hostId)).WithField("udpAddr", addr).
  193. WithField("handshake", m{"stage": 2, "style": "ix_psk0"}).WithField("cached", true).
  194. WithError(err).Error("Failed to send handshake message")
  195. } else {
  196. f.l.WithField("vpnIp", IntIp(existing.hostId)).WithField("udpAddr", addr).
  197. WithField("handshake", m{"stage": 2, "style": "ix_psk0"}).WithField("cached", true).
  198. Info("Handshake message sent")
  199. }
  200. return
  201. case ErrExistingHostInfo:
  202. // This means there was an existing tunnel and this handshake was older than the one we are currently based on
  203. f.l.WithField("vpnIp", IntIp(vpnIP)).WithField("udpAddr", addr).
  204. WithField("certName", certName).
  205. WithField("oldHandshakeTime", existing.lastHandshakeTime).
  206. WithField("newHandshakeTime", hostinfo.lastHandshakeTime).
  207. WithField("fingerprint", fingerprint).
  208. WithField("issuer", issuer).
  209. WithField("initiatorIndex", hs.Details.InitiatorIndex).WithField("responderIndex", hs.Details.ResponderIndex).
  210. WithField("remoteIndex", h.RemoteIndex).WithField("handshake", m{"stage": 1, "style": "ix_psk0"}).
  211. Info("Handshake too old")
  212. // Send a test packet to trigger an authenticated tunnel test, this should suss out any lingering tunnel issues
  213. f.SendMessageToVpnIp(test, testRequest, vpnIP, []byte(""), make([]byte, 12, 12), make([]byte, mtu))
  214. return
  215. case ErrLocalIndexCollision:
  216. // This means we failed to insert because of collision on localIndexId. Just let the next handshake packet retry
  217. f.l.WithField("vpnIp", IntIp(vpnIP)).WithField("udpAddr", addr).
  218. WithField("certName", certName).
  219. WithField("fingerprint", fingerprint).
  220. WithField("issuer", issuer).
  221. WithField("initiatorIndex", hs.Details.InitiatorIndex).WithField("responderIndex", hs.Details.ResponderIndex).
  222. WithField("remoteIndex", h.RemoteIndex).WithField("handshake", m{"stage": 1, "style": "ix_psk0"}).
  223. WithField("localIndex", hostinfo.localIndexId).WithField("collision", IntIp(existing.hostId)).
  224. Error("Failed to add HostInfo due to localIndex collision")
  225. return
  226. case ErrExistingHandshake:
  227. // We have a race where both parties think they are an initiator and this tunnel lost, let the other one finish
  228. f.l.WithField("vpnIp", IntIp(vpnIP)).WithField("udpAddr", addr).
  229. WithField("certName", certName).
  230. WithField("fingerprint", fingerprint).
  231. WithField("issuer", issuer).
  232. WithField("initiatorIndex", hs.Details.InitiatorIndex).WithField("responderIndex", hs.Details.ResponderIndex).
  233. WithField("remoteIndex", h.RemoteIndex).WithField("handshake", m{"stage": 1, "style": "ix_psk0"}).
  234. Error("Prevented a pending handshake race")
  235. return
  236. default:
  237. // Shouldn't happen, but just in case someone adds a new error type to CheckAndComplete
  238. // And we forget to update it here
  239. f.l.WithError(err).WithField("vpnIp", IntIp(vpnIP)).WithField("udpAddr", addr).
  240. WithField("certName", certName).
  241. WithField("fingerprint", fingerprint).
  242. WithField("issuer", issuer).
  243. WithField("initiatorIndex", hs.Details.InitiatorIndex).WithField("responderIndex", hs.Details.ResponderIndex).
  244. WithField("remoteIndex", h.RemoteIndex).WithField("handshake", m{"stage": 1, "style": "ix_psk0"}).
  245. Error("Failed to add HostInfo to HostMap")
  246. return
  247. }
  248. }
  249. // Do the send
  250. f.messageMetrics.Tx(handshake, NebulaMessageSubType(msg[1]), 1)
  251. err = f.outside.WriteTo(msg, addr)
  252. if err != nil {
  253. f.l.WithField("vpnIp", IntIp(vpnIP)).WithField("udpAddr", addr).
  254. WithField("certName", certName).
  255. WithField("fingerprint", fingerprint).
  256. WithField("issuer", issuer).
  257. WithField("initiatorIndex", hs.Details.InitiatorIndex).WithField("responderIndex", hs.Details.ResponderIndex).
  258. WithField("remoteIndex", h.RemoteIndex).WithField("handshake", m{"stage": 2, "style": "ix_psk0"}).
  259. WithError(err).Error("Failed to send handshake")
  260. } else {
  261. f.l.WithField("vpnIp", IntIp(vpnIP)).WithField("udpAddr", addr).
  262. WithField("certName", certName).
  263. WithField("fingerprint", fingerprint).
  264. WithField("issuer", issuer).
  265. WithField("initiatorIndex", hs.Details.InitiatorIndex).WithField("responderIndex", hs.Details.ResponderIndex).
  266. WithField("remoteIndex", h.RemoteIndex).WithField("handshake", m{"stage": 2, "style": "ix_psk0"}).
  267. WithField("sentCachedPackets", len(hostinfo.packetStore)).
  268. Info("Handshake message sent")
  269. }
  270. hostinfo.handshakeComplete(f.l, f.cachedPacketMetrics)
  271. return
  272. }
  273. func ixHandshakeStage2(f *Interface, addr *udpAddr, hostinfo *HostInfo, packet []byte, h *Header) bool {
  274. if hostinfo == nil {
  275. // Nothing here to tear down, got a bogus stage 2 packet
  276. return true
  277. }
  278. hostinfo.Lock()
  279. defer hostinfo.Unlock()
  280. if !f.lightHouse.remoteAllowList.Allow(hostinfo.hostId, addr.IP) {
  281. f.l.WithField("vpnIp", IntIp(hostinfo.hostId)).WithField("udpAddr", addr).Debug("lighthouse.remote_allow_list denied incoming handshake")
  282. return false
  283. }
  284. ci := hostinfo.ConnectionState
  285. if ci.ready {
  286. f.l.WithField("vpnIp", IntIp(hostinfo.hostId)).WithField("udpAddr", addr).
  287. WithField("handshake", m{"stage": 2, "style": "ix_psk0"}).WithField("header", h).
  288. Info("Handshake is already complete")
  289. // Update remote if preferred
  290. if hostinfo.SetRemoteIfPreferred(f.hostMap, addr) {
  291. // Send a test packet to ensure the other side has also switched to
  292. // the preferred remote
  293. f.SendMessageToVpnIp(test, testRequest, hostinfo.hostId, []byte(""), make([]byte, 12, 12), make([]byte, mtu))
  294. }
  295. // We already have a complete tunnel, there is nothing that can be done by processing further stage 1 packets
  296. return false
  297. }
  298. msg, eKey, dKey, err := ci.H.ReadMessage(nil, packet[HeaderLen:])
  299. if err != nil {
  300. f.l.WithError(err).WithField("vpnIp", IntIp(hostinfo.hostId)).WithField("udpAddr", addr).
  301. WithField("handshake", m{"stage": 2, "style": "ix_psk0"}).WithField("header", h).
  302. Error("Failed to call noise.ReadMessage")
  303. // We don't want to tear down the connection on a bad ReadMessage because it could be an attacker trying
  304. // to DOS us. Every other error condition after should to allow a possible good handshake to complete in the
  305. // near future
  306. return false
  307. } else if dKey == nil || eKey == nil {
  308. f.l.WithField("vpnIp", IntIp(hostinfo.hostId)).WithField("udpAddr", addr).
  309. WithField("handshake", m{"stage": 2, "style": "ix_psk0"}).
  310. Error("Noise did not arrive at a key")
  311. // This should be impossible in IX but just in case, if we get here then there is no chance to recover
  312. // the handshake state machine. Tear it down
  313. return true
  314. }
  315. hs := &NebulaHandshake{}
  316. err = proto.Unmarshal(msg, hs)
  317. if err != nil || hs.Details == nil {
  318. f.l.WithError(err).WithField("vpnIp", IntIp(hostinfo.hostId)).WithField("udpAddr", addr).
  319. WithField("handshake", m{"stage": 2, "style": "ix_psk0"}).Error("Failed unmarshal handshake message")
  320. // The handshake state machine is complete, if things break now there is no chance to recover. Tear down and start again
  321. return true
  322. }
  323. remoteCert, err := RecombineCertAndValidate(ci.H, hs.Details.Cert, f.caPool)
  324. if err != nil {
  325. f.l.WithError(err).WithField("vpnIp", IntIp(hostinfo.hostId)).WithField("udpAddr", addr).
  326. WithField("cert", remoteCert).WithField("handshake", m{"stage": 2, "style": "ix_psk0"}).
  327. Error("Invalid certificate from host")
  328. // The handshake state machine is complete, if things break now there is no chance to recover. Tear down and start again
  329. return true
  330. }
  331. vpnIP := ip2int(remoteCert.Details.Ips[0].IP)
  332. certName := remoteCert.Details.Name
  333. fingerprint, _ := remoteCert.Sha256Sum()
  334. issuer := remoteCert.Details.Issuer
  335. // Ensure the right host responded
  336. if vpnIP != hostinfo.hostId {
  337. f.l.WithField("intendedVpnIp", IntIp(hostinfo.hostId)).WithField("haveVpnIp", IntIp(vpnIP)).
  338. WithField("udpAddr", addr).WithField("certName", certName).
  339. WithField("handshake", m{"stage": 2, "style": "ix_psk0"}).
  340. Info("Incorrect host responded to handshake")
  341. // Release our old handshake from pending, it should not continue
  342. f.handshakeManager.pendingHostMap.DeleteHostInfo(hostinfo)
  343. // Create a new hostinfo/handshake for the intended vpn ip
  344. //TODO: this adds it to the timer wheel in a way that aggressively retries
  345. newHostInfo := f.getOrHandshake(hostinfo.hostId)
  346. newHostInfo.Lock()
  347. // Block the current used address
  348. newHostInfo.remotes = hostinfo.remotes
  349. newHostInfo.remotes.BlockRemote(addr)
  350. // Get the correct remote list for the host we did handshake with
  351. hostinfo.remotes = f.lightHouse.QueryCache(vpnIP)
  352. f.l.WithField("blockedUdpAddrs", newHostInfo.remotes.CopyBlockedRemotes()).WithField("vpnIp", IntIp(vpnIP)).
  353. WithField("remotes", newHostInfo.remotes.CopyAddrs(f.hostMap.preferredRanges)).
  354. Info("Blocked addresses for handshakes")
  355. // Swap the packet store to benefit the original intended recipient
  356. hostinfo.ConnectionState.queueLock.Lock()
  357. newHostInfo.packetStore = hostinfo.packetStore
  358. hostinfo.packetStore = []*cachedPacket{}
  359. hostinfo.ConnectionState.queueLock.Unlock()
  360. // Finally, put the correct vpn ip in the host info, tell them to close the tunnel, and return true to tear down
  361. hostinfo.hostId = vpnIP
  362. f.sendCloseTunnel(hostinfo)
  363. newHostInfo.Unlock()
  364. return true
  365. }
  366. // Mark packet 2 as seen so it doesn't show up as missed
  367. ci.window.Update(f.l, 2)
  368. duration := time.Since(hostinfo.handshakeStart).Nanoseconds()
  369. f.l.WithField("vpnIp", IntIp(vpnIP)).WithField("udpAddr", addr).
  370. WithField("certName", certName).
  371. WithField("fingerprint", fingerprint).
  372. WithField("issuer", issuer).
  373. WithField("initiatorIndex", hs.Details.InitiatorIndex).WithField("responderIndex", hs.Details.ResponderIndex).
  374. WithField("remoteIndex", h.RemoteIndex).WithField("handshake", m{"stage": 2, "style": "ix_psk0"}).
  375. WithField("durationNs", duration).
  376. WithField("sentCachedPackets", len(hostinfo.packetStore)).
  377. Info("Handshake message received")
  378. hostinfo.remoteIndexId = hs.Details.ResponderIndex
  379. hostinfo.lastHandshakeTime = hs.Details.Time
  380. // Store their cert and our symmetric keys
  381. ci.peerCert = remoteCert
  382. ci.dKey = NewNebulaCipherState(dKey)
  383. ci.eKey = NewNebulaCipherState(eKey)
  384. // Make sure the current udpAddr being used is set for responding
  385. hostinfo.SetRemote(addr)
  386. // Build up the radix for the firewall if we have subnets in the cert
  387. hostinfo.CreateRemoteCIDR(remoteCert)
  388. // Complete our handshake and update metrics, this will replace any existing tunnels for this vpnIp
  389. //TODO: Complete here does not do a race avoidance, it will just take the new tunnel. Is this ok?
  390. f.handshakeManager.Complete(hostinfo, f)
  391. hostinfo.handshakeComplete(f.l, f.cachedPacketMetrics)
  392. f.metricHandshakes.Update(duration)
  393. return false
  394. }