Browse Source

Fix for crazy Windows threading bug... repeatedly adding and removing a network now doesn't leave networks in limbo.

Adam Ierymenko 11 years ago
parent
commit
de4e29288d

+ 1 - 1
ext/installfiles/windows/ZeroTier One.aip

@@ -20,7 +20,7 @@
     <ROW Property="CTRLS" Value="2"/>
     <ROW Property="MSIFASTINSTALL" MultiBuildValue="DefaultBuild:2"/>
     <ROW Property="Manufacturer" Value="ZeroTier Networks LLC"/>
-    <ROW Property="ProductCode" Value="1033:{44B21E2C-C01D-4D96-AD50-9466735F2D70} " Type="16"/>
+    <ROW Property="ProductCode" Value="1033:{39DFC63F-B6EA-40E7-880B-DC4A5E70F75A} " Type="16"/>
     <ROW Property="ProductLanguage" Value="1033"/>
     <ROW Property="ProductName" Value="ZeroTier One"/>
     <ROW Property="ProductVersion" Value="0.9.2" Type="32"/>

+ 10 - 0
main.cpp

@@ -25,6 +25,10 @@
  * LLC. Start here: http://www.zerotier.com/
  */
 
+// Uncomment on Windows to assume -C and run in console instead of service
+// Useful for Visual Studio debugging (launch VS as Administrator to run)
+#define ZT_WIN_RUN_IN_CONSOLE
+
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
@@ -586,12 +590,18 @@ int main(int argc,char **argv)
 	const char *homeDir = (const char *)0;
 	unsigned int udpPort = ZT_DEFAULT_UDP_PORT;
 	unsigned int tcpPort = 0;
+
 #ifdef __UNIX_LIKE__
 	bool runAsDaemon = false;
 #endif
 #ifdef __WINDOWS__
+#ifdef ZT_WIN_RUN_IN_CONSOLE
+	bool winRunFromCommandLine = true;
+#else
 	bool winRunFromCommandLine = false;
 #endif
+#endif // __WINDOWS__
+
 	for(int i=1;i<argc;++i) {
 		if (argv[i][0] == '-') {
 			switch(argv[i][1]) {

+ 40 - 28
node/Network.cpp

@@ -66,10 +66,13 @@ Network::~Network()
 {
 	Thread::join(_setupThread);
 
-	if (_tap)
-		_r->tapFactory->close(_tap,_destroyOnDelete);
+	{
+		Mutex::Lock _l(_lock);
+		if (_tap)
+			_r->tapFactory->close(_tap,_destroyed);
+	}
 
-	if (_destroyOnDelete) {
+	if (_destroyed) {
 		Utils::rm(std::string(_r->homePath + ZT_PATH_SEPARATOR_S + "networks.d" + ZT_PATH_SEPARATOR_S + idString() + ".conf"));
 		Utils::rm(std::string(_r->homePath + ZT_PATH_SEPARATOR_S + "networks.d" + ZT_PATH_SEPARATOR_S + idString() + ".mcerts"));
 	} else {
@@ -80,12 +83,6 @@ Network::~Network()
 
 SharedPtr<Network> Network::newInstance(const RuntimeEnvironment *renv,NodeConfig *nc,uint64_t id)
 {
-	/* We construct Network via a static method to ensure that it is immediately
-	 * wrapped in a SharedPtr<>. Otherwise if there is traffic on the Ethernet
-	 * tap device, a SharedPtr<> wrap can occur in the Ethernet frame handler
-	 * that then causes the Network instance to be deleted before it is finished
-	 * being constructed. C++ edge cases, how I love thee. */
-
 	SharedPtr<Network> nw(new Network());
 	nw->_id = id;
 	nw->_nc = nc;
@@ -94,7 +91,7 @@ SharedPtr<Network> Network::newInstance(const RuntimeEnvironment *renv,NodeConfi
 	nw->_tap = (EthernetTap *)0;
 	nw->_enabled = true;
 	nw->_lastConfigUpdate = 0;
-	nw->_destroyOnDelete = false;
+	nw->_destroyed = false;
 	nw->_netconfFailure = NETCONF_FAILURE_NONE;
 
 	if (nw->controller() == renv->identity.address()) // TODO: fix Switch to allow packets to self
@@ -143,6 +140,9 @@ bool Network::setConfiguration(const Dictionary &conf,bool saveToDisk)
 {
 	Mutex::Lock _l(_lock);
 
+	if (_destroyed)
+		return false;
+
 	try {
 		SharedPtr<NetworkConfig> newConfig(new NetworkConfig(conf)); // throws if invalid
 		if ((newConfig->networkId() == _id)&&(newConfig->issuedTo() == _r->identity.address())) {
@@ -242,6 +242,10 @@ bool Network::isAllowed(const Address &peer) const
 void Network::clean()
 {
 	Mutex::Lock _l(_lock);
+
+	if (_destroyed)
+		return;
+
 	uint64_t now = Utils::now();
 
 	if ((_config)&&(_config->isPublic())) {
@@ -277,23 +281,17 @@ void Network::clean()
 Network::Status Network::status() const
 {
 	Mutex::Lock _l(_lock);
-	if (_tap) {
-		switch(_netconfFailure) {
-			case NETCONF_FAILURE_ACCESS_DENIED:
-				return NETWORK_ACCESS_DENIED;
-			case NETCONF_FAILURE_NOT_FOUND:
-				return NETWORK_NOT_FOUND;
-			case NETCONF_FAILURE_NONE:
-				if (_lastConfigUpdate > 0)
-					return NETWORK_OK;
-				else return NETWORK_WAITING_FOR_FIRST_AUTOCONF;
-			case NETCONF_FAILURE_INIT_FAILED:
-			default:
-				return NETWORK_INITIALIZATION_FAILED;
-		}
-	} else if (_netconfFailure == NETCONF_FAILURE_INIT_FAILED) {
-		return NETWORK_INITIALIZATION_FAILED;
-	} else return NETWORK_INITIALIZING;
+	switch(_netconfFailure) {
+		case NETCONF_FAILURE_ACCESS_DENIED:
+			return NETWORK_ACCESS_DENIED;
+		case NETCONF_FAILURE_NOT_FOUND:
+			return NETWORK_NOT_FOUND;
+		case NETCONF_FAILURE_NONE:
+			return ((_lastConfigUpdate > 0) ? ((_tap) ? NETWORK_OK : NETWORK_INITIALIZING) : NETWORK_WAITING_FOR_FIRST_AUTOCONF);
+		//case NETCONF_FAILURE_INIT_FAILED:
+		default:
+			return NETWORK_INITIALIZATION_FAILED;
+	}
 }
 
 void Network::_CBhandleTapData(void *arg,const MAC &from,const MAC &to,unsigned int etherType,const Buffer<4096> &data)
@@ -366,7 +364,7 @@ void Network::threadMain()
 	{
 		Mutex::Lock _l(_lock);
 		if (_tap) // the tap creation thread can technically be re-launched, though this isn't done right now
-			_r->tapFactory->close(_tap,_destroyOnDelete);
+			_r->tapFactory->close(_tap,false);
 		_tap = t;
 		if (t) {
 			if (_config)
@@ -409,6 +407,20 @@ void Network::setEnabled(bool enabled)
 		_tap->setEnabled(enabled);
 }
 
+void Network::destroy()
+{
+	Mutex::Lock _l(_lock);
+
+	_enabled = false;
+	_destroyed = true;
+
+	Thread::join(_setupThread);
+
+	if (_tap)
+		_r->tapFactory->close(_tap,true);
+	_tap = (EthernetTap *)0;
+}
+
 void Network::_restoreState()
 {
 	Buffer<ZT_NETWORK_CERT_WRITE_BUF_SIZE> buf;

+ 10 - 6
node/Network.hpp

@@ -99,11 +99,6 @@ private:
 	 */
 	static SharedPtr<Network> newInstance(const RuntimeEnvironment *renv,NodeConfig *nc,uint64_t id);
 
-	/**
-	 * Causes all persistent disk presence to be erased on delete, and this network won't be reloaded on next startup
-	 */
-	inline void destroyOnDelete() throw() { _destroyOnDelete = true; }
-
 public:
 	/**
 	 * Broadcast multicast group: ff:ff:ff:ff:ff:ff / 0
@@ -419,6 +414,15 @@ public:
 	 */
 	void setEnabled(bool enabled);
 
+	/**
+	 * Destroy this network
+	 *
+	 * This causes the network to disable itself, destroy its tap device, and on
+	 * delete to delete all trace of itself on disk and remove any persistent tap
+	 * device instances. Call this when a network is being removed from the system.
+	 */
+	void destroy();
+
 private:
 	static void _CBhandleTapData(void *arg,const MAC &from,const MAC &to,unsigned int etherType,const Buffer<4096> &data);
 
@@ -453,7 +457,7 @@ private:
 	SharedPtr<NetworkConfig> _config;
 	volatile uint64_t _lastConfigUpdate;
 
-	volatile bool _destroyOnDelete;
+	volatile bool _destroyed;
 
 	volatile enum {
 		NETCONF_FAILURE_NONE,

+ 1 - 1
node/NodeConfig.cpp

@@ -286,7 +286,7 @@ void NodeConfig::_doCommand(IpcConnection *ipcc,const char *commandLine)
 				if (nw == _networks.end()) {
 					ipcc->printf("404 leave %.16llx ERROR: not a member of that network"ZT_EOL_S,(unsigned long long)nwid);
 				} else {
-					nw->second->destroyOnDelete();
+					nw->second->destroy();
 					_networks.erase(nw);
 				}
 			} else {

+ 12 - 3
node/Thread.hpp

@@ -31,12 +31,13 @@
 #include <stdexcept>
 
 #include "Constants.hpp"
-#include "AtomicCounter.hpp"
 
 #ifdef __WINDOWS__
 
+#include <WinSock2.h>
 #include <Windows.h>
 #include <string.h>
+#include "Mutex.hpp"
 
 namespace ZeroTier {
 
@@ -56,6 +57,7 @@ public:
 		throw()
 	{
 		_th = NULL;
+		_tid = 0;
 	}
 
 	template<typename C>
@@ -71,8 +73,15 @@ public:
 
 	static inline void join(const Thread &t)
 	{
-		if (t._th != NULL)
-			WaitForSingleObject(t._th,INFINITE);
+		if (t._th != NULL) {
+			for(;;) {
+				DWORD ec = STILL_ACTIVE;
+				GetExitCodeThread(t._th,&ec);
+				if (ec == STILL_ACTIVE)
+					WaitForSingleObject(t._th,1000);
+				else break;
+			}
+		}
 	}
 
 	static inline void sleep(unsigned long ms)

+ 88 - 117
osnet/WindowsEthernetTap.cpp

@@ -233,7 +233,6 @@ WindowsEthernetTap::WindowsEthernetTap(
 		DWORD tmp = mtu;
 		RegSetKeyValueA(nwAdapters,mySubkeyName.c_str(),"MTU",REG_DWORD,(LPCVOID)&tmp,sizeof(tmp));
 
-		//tmp = NDIS_DEVICE_TYPE_ENDPOINT;
 		tmp = 0;
 		RegSetKeyValueA(nwAdapters,mySubkeyName.c_str(),"*NdisDeviceType",REG_DWORD,(LPCVOID)&tmp,sizeof(tmp));
 		tmp = IF_TYPE_ETHERNET_CSMACD;
@@ -571,130 +570,102 @@ void WindowsEthernetTap::threadMain()
 			continue;
 		}
 
-		uint32_t tmpi = 1;
-		DWORD bytesReturned = 0;
-		DeviceIoControl(_tap,TAP_WIN_IOCTL_SET_MEDIA_STATUS,&tmpi,sizeof(tmpi),&tmpi,sizeof(tmpi),&bytesReturned,NULL);
+		{
+			uint32_t tmpi = 1;
+			DWORD bytesReturned = 0;
+			DeviceIoControl(_tap,TAP_WIN_IOCTL_SET_MEDIA_STATUS,&tmpi,sizeof(tmpi),&tmpi,sizeof(tmpi),&bytesReturned,NULL);
+			bytesReturned = 0;
+			DeviceIoControl(_tap,TAP_WIN_IOCTL_SET_MEDIA_STATUS,&tmpi,sizeof(tmpi),&tmpi,sizeof(tmpi),&bytesReturned,NULL);
+		}
+
+		{
+#ifdef ZT_WINDOWS_CREATE_FAKE_DEFAULT_ROUTE
+			/* This inserts a fake default route and a fake ARP entry, forcing
+			 * Windows to detect this as a "real" network and apply proper
+			 * firewall rules.
+			 *
+			 * This hack is completely stupid, but Windows made me do it
+			 * by being broken and insane.
+			 *
+			 * Background: Windows tries to detect its network location by
+			 * matching it to the ARP address of the default route. Networks
+			 * without default routes are "unidentified networks" and cannot
+			 * have their firewall classification changed by the user (easily).
+			 *
+			 * Yes, you read that right.
+			 *
+			 * The common workaround is to set *NdisDeviceType to 1, which
+			 * totally disables all Windows firewall functionality. This is
+			 * the answer you'll find on most forums for things like OpenVPN.
+			 *
+			 * Yes, you read that right.
+			 *
+			 * The default route workaround is also known, but for this to
+			 * work there must be a known default IP that resolves to a known
+			 * ARP address. This works for an OpenVPN tunnel, but not here
+			 * because this isn't a tunnel. It's a mesh. There is no "other
+			 * end," or any other known always on IP.
+			 *
+			 * So let's make a fake one and shove it in there along with its
+			 * fake static ARP entry. Also makes it instant-on and static.
+			 *
+			 * We'll have to see what DHCP does with this. In the future we
+			 * probably will not want to do this on DHCP-enabled networks, so
+			 * when we enable DHCP we will go in and yank this wacko hacko from
+			 * the routing table before doing so.
+			 *
+			 * Like Jesse Pinkman would say: "YEEEEAAH BITCH!" */
+			const uint32_t fakeIp = htonl(0x19fffffe); // 25.255.255.254 -- unrouted IPv4 block
+			for(int i=0;i<8;++i) {
+				MIB_IPNET_ROW2 ipnr;
+				memset(&ipnr,0,sizeof(ipnr));
+				ipnr.Address.si_family = AF_INET;
+				ipnr.Address.Ipv4.sin_addr.s_addr = fakeIp;
+				ipnr.InterfaceLuid.Value = _deviceLuid.Value;
+				ipnr.PhysicalAddress[0] = _mac[0] ^ 0x10; // just make something up that's consistent and not part of this net
+				ipnr.PhysicalAddress[1] = 0x00;
+				ipnr.PhysicalAddress[2] = (UCHAR)((_deviceGuid.Data1 >> 24) & 0xff);
+				ipnr.PhysicalAddress[3] = (UCHAR)((_deviceGuid.Data1 >> 16) & 0xff);
+				ipnr.PhysicalAddress[4] = (UCHAR)((_deviceGuid.Data1 >> 8) & 0xff);
+				ipnr.PhysicalAddress[5] = (UCHAR)(_deviceGuid.Data1 & 0xff);
+				ipnr.PhysicalAddressLength = 6;
+				ipnr.State = NlnsPermanent;
+				ipnr.IsRouter = 1;
+				ipnr.IsUnreachable = 0;
+				ipnr.ReachabilityTime.LastReachable = 0x0fffffff;
+				ipnr.ReachabilityTime.LastUnreachable = 1;
+				DWORD result = CreateIpNetEntry2(&ipnr);
+				if (result != NO_ERROR)
+					Sleep(500);
+				else break;
+			}
+			for(int i=0;i<8;++i) {
+				MIB_IPFORWARD_ROW2 nr;
+				memset(&nr,0,sizeof(nr));
+				InitializeIpForwardEntry(&nr);
+				nr.InterfaceLuid.Value = _deviceLuid.Value;
+				nr.DestinationPrefix.Prefix.si_family = AF_INET; // rest is left as 0.0.0.0/0
+				nr.NextHop.si_family = AF_INET;
+				nr.NextHop.Ipv4.sin_addr.s_addr = fakeIp;
+				nr.Metric = 9999; // do not use as real default route
+				nr.Protocol = MIB_IPPROTO_NETMGMT;
+				DWORD result = CreateIpForwardEntry2(&nr);
+				if (result != NO_ERROR)
+					Sleep(500);
+				else break;
+			}
+#endif
+		}
 
 		if (throwOneAway) {
 			throwOneAway = false;
 			CloseHandle(_tap);
 			_tap = INVALID_HANDLE_VALUE;
-			Sleep(250);
+			Sleep(1000);
 			continue;
 		} else break;
 	}
 
-	/* code not currently used, but keep it around cause it was hard to figure out...
-	CoInitializeEx(NULL,COINIT_MULTITHREADED);
-	CComPtr<INetworkListManager> nlm;
-	nlm.CoCreateInstance(CLSID_NetworkListManager);
-	if (nlm) {
-		for(int i=0;i<8;++i) { // wait up to 8s for the NLM (network awareness) to find and initialize its awareness of our new network
-			CComPtr<IEnumNetworks> nlmNets;
-			bool foundMyNet = false;
-			if (SUCCEEDED(nlm->GetNetworks(NLM_ENUM_NETWORK_ALL,&nlmNets))) {
-				DWORD dwReturn = 0;
-				while (!foundMyNet) {
-					CComPtr<INetwork> nlmNet;
-					HRESULT hr = nlmNets->Next(1,&nlmNet,&dwReturn);
-					if ((hr == S_OK)&&(dwReturn > 0)&&(nlmNet)) {
-						CComPtr<IEnumNetworkConnections> nlmNetConns;
-						if (SUCCEEDED(nlmNet->GetNetworkConnections(&nlmNetConns))) {
-							for(;;) {
-								CComPtr<INetworkConnection> nlmNetConn;
-								hr = nlmNetConns->Next(1,&nlmNetConn,&dwReturn);
-								if ((hr == S_OK)&&(dwReturn > 0)&&(nlmNetConn)) {
-									GUID netAdapterId;
-									nlmNetConn->GetAdapterId(&netAdapterId);
-									if (netAdapterId == _deviceGuid) {
-										foundMyNet = true;
-										printf("*** Found my net!\n");
-										nlmNet->SetName(L"ZeroTier One Network");
-										break;
-									}
-								} else break;
-							}
-						}
-					} else break;
-				}
-			}
-			if (foundMyNet)
-				break;
-			else Thread::sleep(1000);
-		}
-	}
-	*/
-
-#ifdef ZT_WINDOWS_CREATE_FAKE_DEFAULT_ROUTE
-	/* This inserts a fake default route and a fake ARP entry, forcing
-	 * Windows to detect this as a "real" network and apply proper
-	 * firewall rules.
-	 *
-	 * This hack is completely stupid, but Windows made me do it
-	 * by being broken and insane.
-	 *
-	 * Background: Windows tries to detect its network location by
-	 * matching it to the ARP address of the default route. Networks
-	 * without default routes are "unidentified networks" and cannot
-	 * have their firewall classification changed by the user (easily).
-	 *
-	 * Yes, you read that right.
-	 *
-	 * The common workaround is to set *NdisDeviceType to 1, which
-	 * totally disables all Windows firewall functionality. This is
-	 * the answer you'll find on most forums for things like OpenVPN.
-	 *
-	 * Yes, you read that right.
-	 *
-	 * The default route workaround is also known, but for this to
-	 * work there must be a known default IP that resolves to a known
-	 * ARP address. This works for an OpenVPN tunnel, but not here
-	 * because this isn't a tunnel. It's a mesh. There is no "other
-	 * end," or any other known always on IP.
-	 *
-	 * So let's make a fake one and shove it in there along with its
-	 * fake static ARP entry. Also makes it instant-on and static.
-	 *
-	 * We'll have to see what DHCP does with this. In the future we
-	 * probably will not want to do this on DHCP-enabled networks, so
-	 * when we enable DHCP we will go in and yank this wacko hacko from
-	 * the routing table before doing so.
-	 *
-	 * Like Jesse Pinkman would say: "YEEEEAAH BITCH!" */
-	for(int i=0;i<8;++i) { // also wait up to 8s for this, though if we got the NLM part we're probably okay
-		MIB_IPFORWARD_ROW2 nr;
-		memset(&nr,0,sizeof(nr));
-		InitializeIpForwardEntry(&nr);
-		nr.InterfaceLuid.Value = _deviceLuid.Value;
-		nr.DestinationPrefix.Prefix.si_family = AF_INET; // rest is left as 0.0.0.0/0
-		nr.NextHop.si_family = AF_INET;
-		nr.NextHop.Ipv4.sin_addr.s_addr = htonl(0x19fffffe); // 25.255.255.254 -- unrouted IPv4 block
-		nr.Metric = 9999; // do not use as real default route
-		nr.Protocol = MIB_IPPROTO_NETMGMT;
-		DWORD result = CreateIpForwardEntry2(&nr);
-		if (result == NO_ERROR) {
-			MIB_IPNET_ROW2 ipnr;
-			memset(&ipnr,0,sizeof(ipnr));
-			ipnr.Address.si_family = AF_INET;
-			ipnr.Address.Ipv4.sin_addr.s_addr = nr.NextHop.Ipv4.sin_addr.s_addr;
-			ipnr.InterfaceLuid.Value = _deviceLuid.Value;
-			ipnr.PhysicalAddress[0] = _mac[0] ^ 0x10; // just make something up that's consistent and not part of this net
-			ipnr.PhysicalAddress[1] = 0x00;
-			ipnr.PhysicalAddress[2] = (UCHAR)((_deviceGuid.Data1 >> 24) & 0xff);
-			ipnr.PhysicalAddress[3] = (UCHAR)((_deviceGuid.Data1 >> 16) & 0xff);
-			ipnr.PhysicalAddress[4] = (UCHAR)((_deviceGuid.Data1 >> 8) & 0xff);
-			ipnr.PhysicalAddress[5] = (UCHAR)(_deviceGuid.Data1 & 0xff);
-			ipnr.PhysicalAddressLength = 6;
-			ipnr.State = NlnsPermanent;
-			ipnr.IsRouter = 1;
-			ipnr.IsUnreachable = 0;
-			ipnr.ReachabilityTime.LastReachable = 0x0fffffff;
-			CreateIpNetEntry2(&ipnr);
-			break; // stop retrying, we're done
-		} else Thread::sleep(1000);
-	}
-#endif
-
 	memset(&tapOvlRead,0,sizeof(tapOvlRead));
 	tapOvlRead.hEvent = CreateEvent(NULL,TRUE,FALSE,NULL);
 	memset(&tapOvlWrite,0,sizeof(tapOvlWrite));
@@ -710,7 +681,7 @@ void WindowsEthernetTap::threadMain()
 
 	for(;;) {
 		if (!_run) break;
-		DWORD r = WaitForMultipleObjectsEx(writeInProgress ? 3 : 2,wait4,FALSE,10000,TRUE);
+		DWORD r = WaitForMultipleObjectsEx(writeInProgress ? 3 : 2,wait4,FALSE,5000,TRUE);
 		if (!_run) break;
 
 		if ((r == WAIT_TIMEOUT)||(r == WAIT_FAILED))

+ 4 - 13
osnet/WindowsEthernetTapFactory.cpp

@@ -75,6 +75,9 @@ EthernetTap *WindowsEthernetTapFactory::open(
 
 void WindowsEthernetTapFactory::close(EthernetTap *tap,bool destroyPersistentDevices)
 {
+	if (!tap)
+		return;
+
 	std::string instanceId(((WindowsEthernetTap *)tap)->instanceId());
 	Mutex::Lock _l(_devices_m);
 
@@ -120,20 +123,8 @@ void WindowsEthernetTapFactory::destroyAllPersistentTapDevices(const char *pathT
 						dataLen = sizeof(data);
 						if (RegGetValueA(nwAdapters,subkeyName,"DeviceInstanceID",RRF_RT_ANY,&type,(PVOID)data,&dataLen) == ERROR_SUCCESS)
 							instanceIdPath.assign(data,dataLen);
-						if (instanceIdPath.length() != 0) {
+						if (instanceIdPath.length() != 0)
 							instanceIdPathsToRemove.insert(instanceIdPath);
-							/*
-							type = 0;
-							dataLen = sizeof(data);
-							if (RegGetValueA(nwAdapters,subkeyName,"_ZeroTierTapIdentifier",RRF_RT_ANY,&type,(PVOID)data,&dataLen) == ERROR_SUCCESS) {
-								if (dataLen <= 0) {
-									instanceIdPathsToRemove.insert(instanceIdPath);
-								} else {
-									instanceIdPathsToRemove.insert(instanceIdPath);
-								}
-							}
-							*/
-						}
 					}
 				}
 			} else break; // end of list or failure