handshake_manager.go 7.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256
  1. package nebula
  2. import (
  3. "crypto/rand"
  4. "encoding/binary"
  5. "fmt"
  6. "net"
  7. "time"
  8. "github.com/sirupsen/logrus"
  9. )
  10. const (
  11. // Total time to try a handshake = sequence of HandshakeTryInterval * HandshakeRetries
  12. // With 100ms interval and 20 retries is 23.5 seconds
  13. DefaultHandshakeTryInterval = time.Millisecond * 100
  14. DefaultHandshakeRetries = 20
  15. // DefaultHandshakeWaitRotation is the number of handshake attempts to do before starting to use other ips addresses
  16. DefaultHandshakeWaitRotation = 5
  17. DefaultHandshakeTriggerBuffer = 64
  18. )
  19. var (
  20. defaultHandshakeConfig = HandshakeConfig{
  21. tryInterval: DefaultHandshakeTryInterval,
  22. retries: DefaultHandshakeRetries,
  23. waitRotation: DefaultHandshakeWaitRotation,
  24. triggerBuffer: DefaultHandshakeTriggerBuffer,
  25. }
  26. )
  27. type HandshakeConfig struct {
  28. tryInterval time.Duration
  29. retries int
  30. waitRotation int
  31. triggerBuffer int
  32. messageMetrics *MessageMetrics
  33. }
  34. type HandshakeManager struct {
  35. pendingHostMap *HostMap
  36. mainHostMap *HostMap
  37. lightHouse *LightHouse
  38. outside *udpConn
  39. config HandshakeConfig
  40. // can be used to trigger outbound handshake for the given vpnIP
  41. trigger chan uint32
  42. OutboundHandshakeTimer *SystemTimerWheel
  43. InboundHandshakeTimer *SystemTimerWheel
  44. messageMetrics *MessageMetrics
  45. }
  46. func NewHandshakeManager(tunCidr *net.IPNet, preferredRanges []*net.IPNet, mainHostMap *HostMap, lightHouse *LightHouse, outside *udpConn, config HandshakeConfig) *HandshakeManager {
  47. return &HandshakeManager{
  48. pendingHostMap: NewHostMap("pending", tunCidr, preferredRanges),
  49. mainHostMap: mainHostMap,
  50. lightHouse: lightHouse,
  51. outside: outside,
  52. config: config,
  53. trigger: make(chan uint32, config.triggerBuffer),
  54. OutboundHandshakeTimer: NewSystemTimerWheel(config.tryInterval, config.tryInterval*time.Duration(config.retries)),
  55. InboundHandshakeTimer: NewSystemTimerWheel(config.tryInterval, config.tryInterval*time.Duration(config.retries)),
  56. messageMetrics: config.messageMetrics,
  57. }
  58. }
  59. func (c *HandshakeManager) Run(f EncWriter) {
  60. clockSource := time.Tick(c.config.tryInterval)
  61. for {
  62. select {
  63. case vpnIP := <-c.trigger:
  64. l.WithField("vpnIp", IntIp(vpnIP)).Debug("HandshakeManager: triggered")
  65. c.handleOutbound(vpnIP, f, true)
  66. case now := <-clockSource:
  67. c.NextOutboundHandshakeTimerTick(now, f)
  68. c.NextInboundHandshakeTimerTick(now)
  69. }
  70. }
  71. }
  72. func (c *HandshakeManager) NextOutboundHandshakeTimerTick(now time.Time, f EncWriter) {
  73. c.OutboundHandshakeTimer.advance(now)
  74. for {
  75. ep := c.OutboundHandshakeTimer.Purge()
  76. if ep == nil {
  77. break
  78. }
  79. vpnIP := ep.(uint32)
  80. c.handleOutbound(vpnIP, f, false)
  81. }
  82. }
  83. func (c *HandshakeManager) handleOutbound(vpnIP uint32, f EncWriter, lighthouseTriggered bool) {
  84. index, err := c.pendingHostMap.GetIndexByVpnIP(vpnIP)
  85. if err != nil {
  86. return
  87. }
  88. hostinfo, err := c.pendingHostMap.QueryVpnIP(vpnIP)
  89. if err != nil {
  90. return
  91. }
  92. // If we haven't finished the handshake and we haven't hit max retries, query
  93. // lighthouse and then send the handshake packet again.
  94. if hostinfo.HandshakeCounter < c.config.retries && !hostinfo.HandshakeComplete {
  95. if hostinfo.remote == nil {
  96. // We continue to query the lighthouse because hosts may
  97. // come online during handshake retries. If the query
  98. // succeeds (no error), add the lighthouse info to hostinfo
  99. ips := c.lightHouse.QueryCache(vpnIP)
  100. // If we have no responses yet, or only one IP (the host hadn't
  101. // finished reporting its own IPs yet), then send another query to
  102. // the LH.
  103. if len(ips) <= 1 {
  104. ips, err = c.lightHouse.Query(vpnIP, f)
  105. }
  106. if err == nil {
  107. for _, ip := range ips {
  108. hostinfo.AddRemote(ip)
  109. }
  110. hostinfo.ForcePromoteBest(c.mainHostMap.preferredRanges)
  111. }
  112. } else if lighthouseTriggered {
  113. // We were triggered by a lighthouse HostQueryReply packet, but
  114. // we have already picked a remote for this host (this can happen
  115. // if we are configured with multiple lighthouses). So we can skip
  116. // this trigger and let the timerwheel handle the rest of the
  117. // process
  118. return
  119. }
  120. hostinfo.HandshakeCounter++
  121. // We want to use the "best" calculated ip for the first 5 attempts, after that we just blindly rotate through
  122. // all the others until we can stand up a connection.
  123. if hostinfo.HandshakeCounter > c.config.waitRotation {
  124. hostinfo.rotateRemote()
  125. }
  126. // Ensure the handshake is ready to avoid a race in timer tick and stage 0 handshake generation
  127. if hostinfo.HandshakeReady && hostinfo.remote != nil {
  128. c.messageMetrics.Tx(handshake, NebulaMessageSubType(hostinfo.HandshakePacket[0][1]), 1)
  129. err := c.outside.WriteTo(hostinfo.HandshakePacket[0], hostinfo.remote)
  130. if err != nil {
  131. hostinfo.logger().WithField("udpAddr", hostinfo.remote).
  132. WithField("initiatorIndex", hostinfo.localIndexId).
  133. WithField("remoteIndex", hostinfo.remoteIndexId).
  134. WithField("handshake", m{"stage": 1, "style": "ix_psk0"}).
  135. WithError(err).Error("Failed to send handshake message")
  136. } else {
  137. //TODO: this log line is assuming a lot of stuff around the cached stage 0 handshake packet, we should
  138. // keep the real packet struct around for logging purposes
  139. hostinfo.logger().WithField("udpAddr", hostinfo.remote).
  140. WithField("initiatorIndex", hostinfo.localIndexId).
  141. WithField("remoteIndex", hostinfo.remoteIndexId).
  142. WithField("handshake", m{"stage": 1, "style": "ix_psk0"}).
  143. Info("Handshake message sent")
  144. }
  145. }
  146. // Readd to the timer wheel so we continue trying wait HandshakeTryInterval * counter longer for next try
  147. if !lighthouseTriggered {
  148. //l.Infoln("Interval: ", HandshakeTryInterval*time.Duration(hostinfo.HandshakeCounter))
  149. c.OutboundHandshakeTimer.Add(vpnIP, c.config.tryInterval*time.Duration(hostinfo.HandshakeCounter))
  150. }
  151. } else {
  152. c.pendingHostMap.DeleteVpnIP(vpnIP)
  153. c.pendingHostMap.DeleteIndex(index)
  154. }
  155. }
  156. func (c *HandshakeManager) NextInboundHandshakeTimerTick(now time.Time) {
  157. c.InboundHandshakeTimer.advance(now)
  158. for {
  159. ep := c.InboundHandshakeTimer.Purge()
  160. if ep == nil {
  161. break
  162. }
  163. index := ep.(uint32)
  164. vpnIP, err := c.pendingHostMap.GetVpnIPByIndex(index)
  165. if err != nil {
  166. continue
  167. }
  168. c.pendingHostMap.DeleteIndex(index)
  169. c.pendingHostMap.DeleteVpnIP(vpnIP)
  170. }
  171. }
  172. func (c *HandshakeManager) AddVpnIP(vpnIP uint32) *HostInfo {
  173. hostinfo := c.pendingHostMap.AddVpnIP(vpnIP)
  174. // We lock here and use an array to insert items to prevent locking the
  175. // main receive thread for very long by waiting to add items to the pending map
  176. c.OutboundHandshakeTimer.Add(vpnIP, c.config.tryInterval)
  177. return hostinfo
  178. }
  179. func (c *HandshakeManager) DeleteVpnIP(vpnIP uint32) {
  180. //l.Debugln("Deleting pending vpn ip :", IntIp(vpnIP))
  181. c.pendingHostMap.DeleteVpnIP(vpnIP)
  182. }
  183. func (c *HandshakeManager) AddIndex(index uint32, ci *ConnectionState) (*HostInfo, error) {
  184. hostinfo, err := c.pendingHostMap.AddIndex(index, ci)
  185. if err != nil {
  186. return nil, fmt.Errorf("Issue adding index: %d", index)
  187. }
  188. //c.mainHostMap.AddIndexHostInfo(index, hostinfo)
  189. c.InboundHandshakeTimer.Add(index, time.Second*10)
  190. return hostinfo, nil
  191. }
  192. func (c *HandshakeManager) AddIndexHostInfo(index uint32, h *HostInfo) {
  193. c.pendingHostMap.AddIndexHostInfo(index, h)
  194. }
  195. func (c *HandshakeManager) DeleteIndex(index uint32) {
  196. //l.Debugln("Deleting pending index :", index)
  197. c.pendingHostMap.DeleteIndex(index)
  198. }
  199. func (c *HandshakeManager) QueryIndex(index uint32) (*HostInfo, error) {
  200. return c.pendingHostMap.QueryIndex(index)
  201. }
  202. func (c *HandshakeManager) EmitStats() {
  203. c.pendingHostMap.EmitStats("pending")
  204. c.mainHostMap.EmitStats("main")
  205. }
  206. // Utility functions below
  207. func generateIndex() (uint32, error) {
  208. b := make([]byte, 4)
  209. _, err := rand.Read(b)
  210. if err != nil {
  211. l.Errorln(err)
  212. return 0, err
  213. }
  214. index := binary.BigEndian.Uint32(b)
  215. if l.Level >= logrus.DebugLevel {
  216. l.WithField("index", index).
  217. Debug("Generated index")
  218. }
  219. return index, nil
  220. }