Selaa lähdekoodia

Merge branch 'dev' into temporal

Grant Limberg 2 vuotta sitten
vanhempi
commit
714ef59814

+ 108 - 111
.github/workflows/validate-1m-linux.sh → .github/workflows/validate-linux.sh

@@ -3,32 +3,41 @@
 # This test script joins Earth and pokes some stuff
 
 TEST_NETWORK=8056c2e21c000001
-RUN_LENGTH=60
+RUN_LENGTH=30
 TEST_FINISHED=false
 ZTO_VER=$(git describe --tags $(git rev-list --tags --max-count=1))
 ZTO_COMMIT=$(git rev-parse HEAD)
 ZTO_COMMIT_SHORT=$(git rev-parse --short HEAD)
 TEST_DIR_PREFIX="$ZTO_VER-$ZTO_COMMIT_SHORT-test-results"
-EXIT_TEST_FAILED=0
+
+TEST_OK=0
+TEST_FAIL=1
 
 echo "Performing test on: $ZTO_VER-$ZTO_COMMIT_SHORT"
 TEST_FILEPATH_PREFIX="$TEST_DIR_PREFIX/$ZTO_COMMIT_SHORT"
 mkdir $TEST_DIR_PREFIX
 
+# How long we will wait for ZT to come online before considering it a failure
+MAX_WAIT_SECS=30
+
 ################################################################################
 # Multi-node connectivity and performance test                                 #
 ################################################################################
-main() {
-	echo -e "\nRunning test for $RUN_LENGTH seconds"
+
+test() {
+
+	echo -e "\nPerforming pre-flight checks"
 
 	check_exit_on_invalid_identity
 
-	NS1="ip netns exec ns1"
-	NS2="ip netns exec ns2"
+	echo -e "\nRunning test for $RUN_LENGTH seconds"
+
+	export NS1="ip netns exec ns1"
+	export NS2="ip netns exec ns2"
 
-	ZT1="$NS1 ./zerotier-cli -p9996 -D$(pwd)/node1"
+	export ZT1="$NS1 ./zerotier-cli -p9996 -D$(pwd)/node1"
 	# Specify custom port on one node to ensure that feature works
-	ZT2="$NS2 ./zerotier-cli -p9997 -D$(pwd)/node2"
+	export ZT2="$NS2 ./zerotier-cli -p9997 -D$(pwd)/node2"
 
 	echo -e "\nSetting up network namespaces..."
 	echo "Setting up ns1"
@@ -71,35 +80,15 @@ main() {
 	# Allow forwarding
 	sysctl -w net.ipv4.ip_forward=1
 
-	echo -e "\nPing from host to namespaces"
-
-	ping -c 3 192.168.0.1
-	ping -c 3 192.168.1.1
-
-	echo -e "\nPing from namespace to host"
-
-	$NS1 ping -c 3 192.168.0.1
-	$NS1 ping -c 3 192.168.0.1
-	$NS2 ping -c 3 192.168.0.2
-	$NS2 ping -c 3 192.168.0.2
-
-	echo -e "\nPing from ns1 to ns2"
-
-	$NS1 ping -c 3 192.168.0.1
-
-	echo -e "\nPing from ns2 to ns1"
-
-	$NS2 ping -c 3 192.168.0.1
-
 	################################################################################
 	# Memory Leak Check                                                            #
 	################################################################################
 
-	FILENAME_MEMORY_LOG="$TEST_FILEPATH_PREFIX-memory.log"
+	export FILENAME_MEMORY_LOG="$TEST_FILEPATH_PREFIX-memory.log"
 
 	echo -e "\nStarting a ZeroTier instance in each namespace..."
 
-	time_test_start=$(date +%s)
+	export time_test_start=$(date +%s)
 
 	# Spam the CLI as ZeroTier is starting
 	spam_cli 100
@@ -113,32 +102,52 @@ main() {
 		./zerotier-one node1 -p9996 -U >>node_1.log 2>&1 &
 
 	# Second instance, not run in memory profiler
+	# Don't set up internet access until _after_ zerotier is running
+	# This has been a source of stuckness in the past.
+	$NS2 ip addr del 192.168.1.2/24 dev veth3
 	$NS2 sudo ./zerotier-one node2 -U -p9997 >>node_2.log 2>&1 &
+	sleep 1;
+	$NS2 ip addr add 192.168.1.2/24 dev veth3
+	$NS2 ip route add default via 192.168.1.1
+
+	echo -e "\nPing from host to namespaces"
+
+	ping -c 3 192.168.0.1
+	ping -c 3 192.168.1.1
+
+	echo -e "\nPing from namespace to host"
+
+	$NS1 ping -c 3 192.168.0.1
+	$NS1 ping -c 3 192.168.0.1
+	$NS2 ping -c 3 192.168.0.2
+	$NS2 ping -c 3 192.168.0.2
+
+	echo -e "\nPing from ns1 to ns2"
+
+	$NS1 ping -c 3 192.168.0.1
+
+	echo -e "\nPing from ns2 to ns1"
+
+	$NS2 ping -c 3 192.168.0.1
 
 	################################################################################
 	# Online Check                                                                 #
 	################################################################################
 
 	echo "Waiting for ZeroTier to come online before attempting test..."
-	MAX_WAIT_SECS="${MAX_WAIT_SECS:-120}"
 	node1_online=false
 	node2_online=false
 	both_instances_online=false
 	time_zt_node1_start=$(date +%s)
 	time_zt_node2_start=$(date +%s)
 
-	for ((s = 0; s <= MAX_WAIT_SECS; s++)); do
+	for ((s = 0; s <= $MAX_WAIT_SECS; s++)); do
 		node1_online="$($ZT1 -j info | jq '.online' 2>/dev/null)"
 		node2_online="$($ZT2 -j info | jq '.online' 2>/dev/null)"
 		echo "Checking for online status: try #$s, node1:$node1_online, node2:$node2_online"
-		if [[ "$node1_online" == "true" ]]; then
-			time_zt_node1_online=$(date +%s)
-		fi
-		if [[ "$node2_online" == "true" ]]; then
-			time_zt_node2_online=$(date +%s)
-		fi
 		if [[ "$node2_online" == "true" && "$node1_online" == "true" ]]; then
-			both_instances_online=true
+			export both_instances_online=true
+			export time_to_both_nodes_online=$(date +%s)
 			break
 		fi
 		sleep 1
@@ -152,21 +161,20 @@ main() {
 	tree node2
 
 	echo -e "\n\nRunning ZeroTier processes:"
-	echo -e "\nNode 1:"
+	echo -e "\nNode 1:\n"
 	$NS1 ps aux | grep zerotier-one
-	echo -e "\nNode 2:"
+	echo -e "\nNode 2:\n"
 	$NS2 ps aux | grep zerotier-one
 
 	echo -e "\n\nStatus of each instance:"
 
-	echo -e "\n\nNode 1:"
+	echo -e "\n\nNode 1:\n"
 	$ZT1 status
-	echo -e "\n\nNode 2:"
+	echo -e "\n\nNode 2:\n"
 	$ZT2 status
 
 	if [[ "$both_instances_online" != "true" ]]; then
-		echo "One or more instances of ZeroTier failed to come online. Aborting test."
-		exit 1
+		exit_test_and_generate_report $TEST_FAIL "one or more nodes failed to come online"
 	fi
 
 	echo -e "\nJoining networks"
@@ -190,18 +198,14 @@ main() {
 	$NS1 ping -c 16 $node2_ip4 >$PING12_FILENAME
 	$NS2 ping -c 16 $node1_ip4 >$PING21_FILENAME
 
-	# Parse ping statistics
-	ping_loss_percent_1_to_2="${ping_loss_percent_1_to_2:-100.0}"
-	ping_loss_percent_2_to_1="${ping_loss_percent_2_to_1:-100.0}"
-
 	ping_loss_percent_1_to_2=$(cat $PING12_FILENAME |
 		grep "packet loss" | awk '{print $6}' | sed 's/%//')
 	ping_loss_percent_2_to_1=$(cat $PING21_FILENAME |
 		grep "packet loss" | awk '{print $6}' | sed 's/%//')
 
 	# Normalize loss value
-	ping_loss_percent_1_to_2=$(echo "scale=2; $ping_loss_percent_1_to_2/100.0" | bc)
-	ping_loss_percent_2_to_1=$(echo "scale=2; $ping_loss_percent_2_to_1/100.0" | bc)
+	export ping_loss_percent_1_to_2=$(echo "scale=2; $ping_loss_percent_1_to_2/100.0" | bc)
+	export ping_loss_percent_2_to_1=$(echo "scale=2; $ping_loss_percent_2_to_1/100.0" | bc)
 
 	################################################################################
 	# CLI Check                                                                    #
@@ -252,11 +256,9 @@ main() {
 
 	# TODO: Validate JSON
 
-	################################################################################
-	# Performance Test                                                             #
-	################################################################################
+	# Performance Test
 
-	FILENAME_PERF_JSON="$TEST_FILEPATH_PREFIX-iperf.json"
+	export FILENAME_PERF_JSON="$TEST_FILEPATH_PREFIX-iperf.json"
 
 	echo -e "\nBeginning performance test:"
 
@@ -272,24 +274,7 @@ main() {
 
 	cat $FILENAME_PERF_JSON
 
-	################################################################################
-	# Collect ZeroTier dump files                                                  #
-	################################################################################
-
-	echo -e "\nCollecting ZeroTier dump files"
-
-	node1_id=$($ZT1 -j status | jq -r .address)
-	node2_id=$($ZT2 -j status | jq -r .address)
-
-	$ZT1 dump
-	mv zerotier_dump.txt "$TEST_FILEPATH_PREFIX-node-dump-$node1_id.txt"
-
-	$ZT2 dump
-	mv zerotier_dump.txt "$TEST_FILEPATH_PREFIX-node-dump-$node2_id.txt"
-
-	################################################################################
-	# Let ZeroTier idle long enough for various timers                             #
-	################################################################################
+	# Let ZeroTier idle long enough for various timers
 
 	echo -e "\nIdling ZeroTier for $RUN_LENGTH seconds..."
 	sleep $RUN_LENGTH
@@ -301,9 +286,14 @@ main() {
 
 	sleep 5
 
-	################################################################################
-	# Stop test                                                                    #
-	################################################################################
+	exit_test_and_generate_report $TEST_OK "completed test"
+}
+
+################################################################################
+# Generate report                                                              #
+################################################################################
+
+exit_test_and_generate_report() {
 
 	echo -e "\nStopping memory check..."
 	sudo pkill -15 -f valgrind
@@ -311,16 +301,27 @@ main() {
 
 	time_test_end=$(date +%s)
 
-	################################################################################
-	# Rename ZeroTier stdout/stderr logs                                           #
-	################################################################################
+	echo "Exiting test with reason: $2 ($1)"
 
-	mv node_1.log "$TEST_FILEPATH_PREFIX-node-log-$node1_id.txt"
-	mv node_2.log "$TEST_FILEPATH_PREFIX-node-log-$node2_id.txt"
+	# Collect ZeroTier dump files
 
-	################################################################################
-	# Generate report                                                              #
-	################################################################################
+	echo -e "\nCollecting ZeroTier dump files"
+
+	node1_id=$($ZT1 -j status | jq -r .address)
+	node2_id=$($ZT2 -j status | jq -r .address)
+
+	$ZT1 dump
+	mv zerotier_dump.txt "$TEST_FILEPATH_PREFIX-node-dump-$node1_id.txt"
+
+	$ZT2 dump
+	mv zerotier_dump.txt "$TEST_FILEPATH_PREFIX-node-dump-$node2_id.txt"
+
+	# Copy ZeroTier stdout/stderr logs
+
+	cp node_1.log "$TEST_FILEPATH_PREFIX-node-log-$node1_id.txt"
+	cp node_2.log "$TEST_FILEPATH_PREFIX-node-log-$node2_id.txt"
+
+	# Generate report
 
 	cat $FILENAME_MEMORY_LOG
 
@@ -329,9 +330,7 @@ main() {
 	POSSIBLY_LOST=$(xmlstarlet sel -t -v '/valgrindoutput/error/xwhat' \
 		$FILENAME_MEMORY_LOG | grep "possibly" | awk '{print $1;}')
 
-	################################################################################
-	# Generate coverage report artifact and summary                                #
-	################################################################################
+	# Generate coverage report artifact and summary
 
 	FILENAME_COVERAGE_JSON="$TEST_FILEPATH_PREFIX-coverage.json"
 	FILENAME_COVERAGE_HTML="$TEST_FILEPATH_PREFIX-coverage.html"
@@ -351,22 +350,23 @@ main() {
 	COVERAGE_LINE_TOTAL="${COVERAGE_LINE_TOTAL:-0}"
 	COVERAGE_LINE_PERCENT="${COVERAGE_LINE_PERCENT:-0}"
 
-	################################################################################
-	# Default values                                                               #
-	################################################################################
+	# Default values
 
 	DEFINITELY_LOST="${DEFINITELY_LOST:-0}"
 	POSSIBLY_LOST="${POSSIBLY_LOST:-0}"
+	ping_loss_percent_1_to_2="${ping_loss_percent_1_to_2:-100.0}"
+	ping_loss_percent_2_to_1="${ping_loss_percent_2_to_1:-100.0}"
+	time_to_both_nodes_online="${time_to_both_nodes_online:--1}"
 
-	################################################################################
-	# Summarize and emit json for trend reporting                                  #
-	################################################################################
+	# Summarize and emit json for trend reporting
 
 	FILENAME_SUMMARY="$TEST_FILEPATH_PREFIX-summary.json"
 
 	time_length_test=$((time_test_end - time_test_start))
-	time_length_zt_node1_online=$((time_zt_node1_online - time_zt_start))
-	time_length_zt_node2_online=$((time_zt_node2_online - time_zt_start))
+	if [[ $time_to_both_nodes_online != -1 ]];
+	then
+		time_to_both_nodes_online=$((time_to_both_nodes_online - time_test_start))
+	fi
 	#time_length_zt_join=$((time_zt_join_end-time_zt_join_start))
 	#time_length_zt_leave=$((time_zt_leave_end-time_zt_leave_start))
 	#time_length_zt_can_still_ping=$((time_zt_can_still_ping-time_zt_leave_start))
@@ -378,31 +378,27 @@ main() {
   "commit":"$ZTO_COMMIT",
   "arch_m":"$(uname -m)",
   "arch_a":"$(uname -a)",
+  "binary_size":"$(stat -c %s zerotier-one)",
   "time_length_test":$time_length_test,
-  "time_length_zt_node1_online":$time_length_zt_node1_online,
-  "time_length_zt_node2_online":$time_length_zt_node2_online,
+  "time_to_both_nodes_online":$time_to_both_nodes_online,
   "num_possible_bytes_lost": $POSSIBLY_LOST,
   "num_definite_bytes_lost": $DEFINITELY_LOST,
-  "num_incorrect_settings": $POSSIBLY_LOST,
   "num_bad_formattings": $POSSIBLY_LOST,
-  "percent_coverage_branches": $POSSIBLY_LOST,
   "coverage_lines_covered": $COVERAGE_LINE_COVERED,
   "coverage_lines_total": $COVERAGE_LINE_TOTAL,
   "coverage_lines_percent": $COVERAGE_LINE_PERCENT,
   "ping_loss_percent_1_to_2": $ping_loss_percent_1_to_2,
   "ping_loss_percent_2_to_1": $ping_loss_percent_2_to_1,
-  "mean_latency_ping_random": $POSSIBLY_LOST,
-  "mean_latency_ping_netns": $POSSIBLY_LOST,
-  "mean_pdv_random": $POSSIBLY_LOST,
-  "mean_pdv_netns": $POSSIBLY_LOST,
-  "mean_perf_netns": $POSSIBLY_LOST,
-  "exit_test_failed": $EXIT_TEST_FAILED
+  "test_exit_code": $1,
+  "test_exit_reason":"$2"
 }
 EOF
 	)
 
 	echo $summary >$FILENAME_SUMMARY
 	cat $FILENAME_SUMMARY
+
+	exit 0
 }
 
 ################################################################################
@@ -437,6 +433,10 @@ spam_cli() {
 	done
 }
 
+################################################################################
+# Check for proper exit on load of invalid identity                            #
+################################################################################
+
 check_exit_on_invalid_identity() {
 	echo "Checking ZeroTier exits on invalid identity..."
 	mkdir -p $(pwd)/exit_test
@@ -448,17 +448,14 @@ check_exit_on_invalid_identity() {
 	$ZT1 &
 	my_pid=$!
 
-	echo "Waiting 5 secons"
+	echo "Waiting 5 seconds"
 	sleep 5
 
 	# check if process is running
 	kill -0 $my_pid
 	if [ $? -eq 0 ]; then
-		EXIT_TEST_FAILED=1
-		echo "Exit test FAILED: Process still running after being fed an invalid identity"
-	else
-		echo "Exit test PASSED"
+		exit_test_and_generate_report $TEST_FAIL "Exit test FAILED: Process still running after being fed an invalid identity"
 	fi
 }
 
-main "$@"
+test "$@"

+ 6 - 3
.github/workflows/report.sh → .github/workflows/validate-report.sh

@@ -5,6 +5,8 @@
 ################################################################################
 
 DEFINITELY_LOST=$(cat *test-results/*summary.json | jq .num_definite_bytes_lost)
+EXIT_CODE=$(cat *test-results/*summary.json | jq .exit_code)
+EXIT_REASON=$(cat *test-results/*summary.json | jq .exit_reason)
 
 cat *test-results/*summary.json
 
@@ -14,8 +16,9 @@ if [[ "$DEFINITELY_LOST" -gt 0 ]]; then
       exit 1
 fi
 
-EXIT_TEST_FAILED=$(cat *test-results/*summary.json | jq .exit_test_failed)
+# Catch-all for other non-zero exit codes
 
-if [[ "$EXIT_TEST_FAILED" -gt 0 ]]; then
+if [[ "$EXIT_CODE" -gt 0 ]]; then
+      echo "Test failed: $EXIT_REASON"
       exit 1
-fi
+fi

+ 4 - 4
.github/workflows/validate.yml

@@ -40,8 +40,8 @@ jobs:
       run: |
         sudo apt install -y valgrind xmlstarlet gcovr iperf3 tree
         make one ZT_COVERAGE=1 ZT_TRACE=1
-        sudo chmod +x ./.github/workflows/validate-1m-linux.sh
-        sudo ./.github/workflows/validate-1m-linux.sh
+        sudo chmod +x ./.github/workflows/validate-linux.sh
+        sudo ./.github/workflows/validate-linux.sh
 
     - name: Archive test results
       uses: actions/upload-artifact@v3
@@ -51,6 +51,6 @@ jobs:
 
     - name: final-report
       run: |
-        sudo chmod +x ./.github/workflows/report.sh
-        sudo ./.github/workflows/report.sh
+        sudo chmod +x ./.github/workflows/validate-report.sh
+        sudo ./.github/workflows/validate-report.sh
 

+ 7 - 1
node/Node.cpp

@@ -248,9 +248,15 @@ public:
 		const std::vector<InetAddress> *const alwaysContactEndpoints = _alwaysContact.get(p->address());
 		if (alwaysContactEndpoints) {
 
-			// Contact upstream peers as infrequently as possible
 			ZT_PeerRole role = RR->topology->role(p->address());
+
+			// Contact upstream peers as infrequently as possible
 			int roleBasedTimerScale = (role == ZT_PEER_ROLE_LEAF) ? 2 : 16;
+
+			// Unless we don't any have paths to the roots, then we shouldn't wait a long time to contact them
+			bool hasPaths = p->paths(RR->node->now()).size() > 0;
+			roleBasedTimerScale = (role != ZT_PEER_ROLE_LEAF && !hasPaths) ? 0 : roleBasedTimerScale;
+
 			if ((RR->node->now() - p->lastSentFullHello()) <= (ZT_PATH_HEARTBEAT_PERIOD * roleBasedTimerScale)) {
 				return;
 			}

+ 2 - 3
osdep/MacDNSHelper.mm

@@ -107,7 +107,6 @@ void MacDNSHelper::removeDNS(uint64_t nwid)
 bool MacDNSHelper::addIps4(uint64_t nwid, const MAC mac, const char *dev, const std::vector<InetAddress>& addrs)
 {
     const char* ipStr = {0};
-    const char* ipStr2 = {0};
     char buf2[256] = {0};
 
     bool hasV4 = false;
@@ -116,7 +115,6 @@ bool MacDNSHelper::addIps4(uint64_t nwid, const MAC mac, const char *dev, const
             hasV4 = true;
 
             ipStr = addrs[i].toIpString(buf2);
-            ipStr2 = addrs[i].toIpString(buf2);
 
             break;
         }
@@ -141,7 +139,8 @@ bool MacDNSHelper::addIps4(uint64_t nwid, const MAC mac, const char *dev, const
     CFStringRef cfdev = CFStringCreateWithCString(NULL, dev, kCFStringEncodingUTF8);
 
     CFStringRef cfserver = CFStringCreateWithCString(NULL, "127.0.0.1", kCFStringEncodingUTF8);
-    CFStringRef cfrouter = CFStringCreateWithCString(NULL, ipStr2, kCFStringEncodingUTF8);
+    // using the ip from the zerotier network breaks routing on the mac
+    CFStringRef cfrouter = CFStringCreateWithCString(NULL, "127.0.0.1", kCFStringEncodingUTF8);
 
     const int SIZE = 4;
     CFStringRef keys[SIZE];

+ 40 - 25
osdep/ManagedRoute.cpp

@@ -252,7 +252,7 @@ static std::vector<_RTE> _getRTEs(const InetAddress &target,bool contains)
 
 static void _routeCmd(const char *op,const InetAddress &target,const InetAddress &via,const char *ifscope,const char *localInterface)
 {
-	//char f1[1024],f2[1024]; printf("%s %s %s %s %s\n",op,target.toString(f1),via.toString(f2),ifscope,localInterface);
+	// char f1[1024],f2[1024]; printf("cmd %s %s %s %s %s\n",op,target.toString(f1),via.toString(f2),ifscope,localInterface);
 	long p = (long)fork();
 	if (p > 0) {
 		int exitcode = -1;
@@ -479,6 +479,9 @@ bool ManagedRoute::sync()
 		if (hasRoute) { break; }
 	}
 
+	// char buf[255];
+	// fprintf(stderr, "hasRoute %d %s\n", !!hasRoute, _target.toString(buf));
+
 
 	if (!hasRoute) {
 		if (_target && _target.netmaskBits() == 0) {
@@ -486,46 +489,58 @@ bool ManagedRoute::sync()
 			char newSystemDevice[128];
 			newSystemDevice[0] = (char)0;
 
-			// Find system default route that this route should override
-			// We need to put it back when default route is turned off
-			for(std::vector<_RTE>::iterator r(rtes.begin());r!=rtes.end();++r) {
-				if (r->via) {
-					if ( !_systemVia && r->isDefault == 1 && (strcmp(r->device,_device) != 0) ) {
+			// If macos has a network hiccup, it deletes what _systemVia we had set.
+			// Then we don't know how to set the default route again.
+			// So use the one we had set previously. Don't overwrite it.
+			if (!_systemVia) {
+				// Find system default route that this route should override
+				// We need to put it back when default route is turned off
+				for(std::vector<_RTE>::iterator r(rtes.begin());r!=rtes.end();++r) {
+					if (r->via) {
+						if ( !_systemVia && r->isDefault == 1 && (strcmp(r->device,_device) != 0) ) {
 
-						newSystemVia = r->via;
-						Utils::scopy(newSystemDevice,sizeof(newSystemDevice),r->device);
+							newSystemVia = r->via;
+							Utils::scopy(newSystemDevice,sizeof(newSystemDevice),r->device);
+						}
 					}
 				}
+				if (newSystemVia) { _systemVia = newSystemVia; }
 			}
 
-			if (!newSystemVia) { return false; }
 
-			// Get device corresponding to route if we don't have that already
-			if ((newSystemVia)&&(!newSystemDevice[0])) {
-				rtes = _getRTEs(newSystemVia,true);
-				for(std::vector<_RTE>::iterator r(rtes.begin());r!=rtes.end();++r) {
-					if ( (r->device[0]) && (strcmp(r->device,_device) != 0) && r->target.netmaskBits() != 0) {
-						Utils::scopy(newSystemDevice,sizeof(newSystemDevice),r->device);
-						break;
+			// char buf1[255], buf2[255];
+			// fprintf(stderr, "_systemVia %s new %s\n", _systemVia.toString(buf1), newSystemVia.toString(buf2));
+			if (!_systemVia) { return false; }
+
+			if (!_systemDevice[0]) {
+				// Get device corresponding to route if we don't have that already
+				if ((newSystemVia)&&(!newSystemDevice[0])) {
+					rtes = _getRTEs(newSystemVia,true);
+					for(std::vector<_RTE>::iterator r(rtes.begin());r!=rtes.end();++r) {
+						if ( (r->device[0]) && (strcmp(r->device,_device) != 0) && r->target.netmaskBits() != 0) {
+							Utils::scopy(newSystemDevice,sizeof(newSystemDevice),r->device);
+							break;
+						}
 					}
 				}
-			}
-			if (!newSystemDevice[0]) { return false; }
-
 
-			// update the system via in case it changed out from under us
-			// while we were in default route mode
+				if (newSystemDevice[0]) {
+					Utils::scopy(_systemDevice,sizeof(_systemDevice),newSystemDevice);
+				}
+			}
+			// fprintf(stderr, "_systemDevice %s new %s\n", _systemDevice, newSystemDevice);
+			if (!_systemDevice[0]) { return false; }
 
-			_systemVia = newSystemVia;
-			Utils::scopy(_systemDevice,sizeof(_systemDevice),newSystemDevice);
 
-			// Do the actual default route commands
+			// Do Default Route route commands
 			_routeCmd("delete",_target,_systemVia,(const char *)0,(const char *)0);
 			_routeCmd("add",_target,_via,(const char *)0,(const char *)0);
 			_routeCmd("add",_target,_systemVia,_systemDevice,(const char *)0);
+
 			_applied[_target] = true;
+
 		} else {
-			// Do the actual route commands
+			// Do Non-Default route commands
 			_applied[_target] = true;
 			_routeCmd("add",leftt,_via,(const char *)0,(_via) ? (const char *)0 : _device);
 		}

+ 22 - 0
osdep/PortMapper.cpp

@@ -79,6 +79,7 @@ public:
 		throw()
 	{
 		int mode = 0; // 0 == NAT-PMP, 1 == UPnP
+		int retrytime = 500;
 
 #ifdef ZT_PORTMAPPER_TRACE
 		fprintf(stderr,"PortMapper: started for UDP port %d" ZT_EOL_S,localPort);
@@ -86,6 +87,26 @@ public:
 
 		while (run) {
 
+			{
+				// use initnatpmp to check if we can bind a port at all
+				natpmp_t _natpmp;
+				int result = initnatpmp(&_natpmp,0,0);
+				if (result == NATPMP_ERR_CANNOTGETGATEWAY || result == NATPMP_ERR_SOCKETERROR) {
+					closenatpmp(&_natpmp);
+#ifdef ZT_PORTMAPPER_TRACE
+					PM_TRACE("PortMapper: init failed %d. You might not have an internet connection yet. Trying again in %d" ZT_EOL_S, result, retrytime);
+#endif
+					Thread::sleep(retrytime);
+					retrytime = retrytime * 2;
+					if (retrytime > ZT_PORTMAPPER_REFRESH_DELAY / 10) {
+						retrytime = ZT_PORTMAPPER_REFRESH_DELAY / 10;
+					}
+					continue;
+				} else {
+					closenatpmp(&_natpmp);
+					retrytime = 500;
+				}
+			}
 			// ---------------------------------------------------------------------
 			// NAT-PMP mode (preferred)
 			// ---------------------------------------------------------------------
@@ -172,6 +193,7 @@ public:
 #ifdef ZT_PORTMAPPER_TRACE
                     PM_TRACE("PortMapper: NAT-PMP: request failed, switching to UPnP mode" ZT_EOL_S);
 #endif
+					continue;
 				}
 			}
 			// ---------------------------------------------------------------------

+ 2 - 1
service/OneService.cpp

@@ -2617,8 +2617,9 @@ public:
 					r->second->sync();
 			}
 			for(std::map< InetAddress, SharedPtr<ManagedRoute> >::iterator r(n.managedRoutes().begin());r!=n.managedRoutes().end();++r) {
-				if (r->second->via())
+				if (r->second->via() && (!r->second->target().isDefaultRoute() || _node->online())) {
 					r->second->sync();
+				}
 			}
 		}