Browse Source

remove deadlock in GetOrHandshake (#1151)

We had a rare deadlock in GetOrHandshake because we kept the hostmap
lock when we do the call to StartHandshake. StartHandshake can block
while sending to the lighthouse query worker channel, and that worker
needs to be able to grab the hostmap lock to do its work. Other calls
for StartHandshake don't hold the hostmap lock so we should be able to
drop it here.

This lock was originally added with: https://github.com/slackhq/nebula/pull/954
Wade Simmons 1 năm trước cách đây
mục cha
commit
4eb1da0958
1 tập tin đã thay đổi với 4 bổ sung4 xóa
  1. 4 4
      handshake_manager.go

+ 4 - 4
handshake_manager.go

@@ -356,10 +356,11 @@ func (hm *HandshakeManager) handleOutbound(vpnIp iputil.VpnIp, lighthouseTrigger
 // GetOrHandshake will try to find a hostinfo with a fully formed tunnel or start a new handshake if one is not present
 // The 2nd argument will be true if the hostinfo is ready to transmit traffic
 func (hm *HandshakeManager) GetOrHandshake(vpnIp iputil.VpnIp, cacheCb func(*HandshakeHostInfo)) (*HostInfo, bool) {
-	// Check the main hostmap and maintain a read lock if our host is not there
 	hm.mainHostMap.RLock()
-	if h, ok := hm.mainHostMap.Hosts[vpnIp]; ok {
-		hm.mainHostMap.RUnlock()
+	h, ok := hm.mainHostMap.Hosts[vpnIp]
+	hm.mainHostMap.RUnlock()
+
+	if ok {
 		// Do not attempt promotion if you are a lighthouse
 		if !hm.lightHouse.amLighthouse {
 			h.TryPromoteBest(hm.mainHostMap.GetPreferredRanges(), hm.f)
@@ -367,7 +368,6 @@ func (hm *HandshakeManager) GetOrHandshake(vpnIp iputil.VpnIp, cacheCb func(*Han
 		return h, true
 	}
 
-	defer hm.mainHostMap.RUnlock()
 	return hm.StartHandshake(vpnIp, cacheCb), false
 }