Browse Source

Merge pull request #2091 from zerotier/dev-validator-fix-dump-at-abort

Separate test and summary step in validator script
Joseph Henry 2 years ago
parent
commit
27a9cc6254

+ 73 - 89
.github/workflows/validate-1m-linux.sh → .github/workflows/validate-linux.sh

@@ -3,26 +3,35 @@
 # This test script joins Earth and pokes some stuff
 
 TEST_NETWORK=8056c2e21c000001
-RUN_LENGTH=60
+RUN_LENGTH=20
 TEST_FINISHED=false
 ZTO_VER=$(git describe --tags $(git rev-list --tags --max-count=1))
 ZTO_COMMIT=$(git rev-parse HEAD)
 ZTO_COMMIT_SHORT=$(git rev-parse --short HEAD)
 TEST_DIR_PREFIX="$ZTO_VER-$ZTO_COMMIT_SHORT-test-results"
-EXIT_TEST_FAILED=0
+
+TEST_OK=0
+TEST_FAIL=1
 
 echo "Performing test on: $ZTO_VER-$ZTO_COMMIT_SHORT"
 TEST_FILEPATH_PREFIX="$TEST_DIR_PREFIX/$ZTO_COMMIT_SHORT"
 mkdir $TEST_DIR_PREFIX
 
+# How long we will wait for ZT to come online before considering it a failure
+MAX_WAIT_SECS=60
+
 ################################################################################
 # Multi-node connectivity and performance test                                 #
 ################################################################################
-main() {
-	echo -e "\nRunning test for $RUN_LENGTH seconds"
+
+test() {
+
+	echo -e "\nPerforming pre-flight checks"
 
 	check_exit_on_invalid_identity
 
+	echo -e "\nRunning test for $RUN_LENGTH seconds"
+
 	NS1="ip netns exec ns1"
 	NS2="ip netns exec ns2"
 
@@ -75,11 +84,11 @@ main() {
 	# Memory Leak Check                                                            #
 	################################################################################
 
-	FILENAME_MEMORY_LOG="$TEST_FILEPATH_PREFIX-memory.log"
+	export FILENAME_MEMORY_LOG="$TEST_FILEPATH_PREFIX-memory.log"
 
 	echo -e "\nStarting a ZeroTier instance in each namespace..."
 
-	time_test_start=$(date +%s)
+	export time_test_start=$(date +%s)
 
 	# Spam the CLI as ZeroTier is starting
 	spam_cli 100
@@ -127,7 +136,6 @@ main() {
 	################################################################################
 
 	echo "Waiting for ZeroTier to come online before attempting test..."
-	MAX_WAIT_SECS="${MAX_WAIT_SECS:-120}"
 	node1_online=false
 	node2_online=false
 	both_instances_online=false
@@ -139,13 +147,13 @@ main() {
 		node2_online="$($ZT2 -j info | jq '.online' 2>/dev/null)"
 		echo "Checking for online status: try #$s, node1:$node1_online, node2:$node2_online"
 		if [[ "$node1_online" == "true" ]]; then
-			time_zt_node1_online=$(date +%s)
+			export time_zt_node1_online=$(date +%s)
 		fi
 		if [[ "$node2_online" == "true" ]]; then
-			time_zt_node2_online=$(date +%s)
+			export time_zt_node2_online=$(date +%s)
 		fi
 		if [[ "$node2_online" == "true" && "$node1_online" == "true" ]]; then
-			both_instances_online=true
+			export both_instances_online=true
 			break
 		fi
 		sleep 1
@@ -172,10 +180,7 @@ main() {
 	$ZT2 status
 
 	if [[ "$both_instances_online" != "true" ]]; then
-		echo "One or more instances of ZeroTier failed to come online. Aborting test."
-  		collect_zt_dump_files
-		collect_zt_log_files
-		exit 0
+		exit_test_and_generate_report $TEST_FAIL "one or more nodes failed to come online"
 	fi
 
 	echo -e "\nJoining networks"
@@ -199,18 +204,14 @@ main() {
 	$NS1 ping -c 16 $node2_ip4 >$PING12_FILENAME
 	$NS2 ping -c 16 $node1_ip4 >$PING21_FILENAME
 
-	# Parse ping statistics
-	ping_loss_percent_1_to_2="${ping_loss_percent_1_to_2:-100.0}"
-	ping_loss_percent_2_to_1="${ping_loss_percent_2_to_1:-100.0}"
-
 	ping_loss_percent_1_to_2=$(cat $PING12_FILENAME |
 		grep "packet loss" | awk '{print $6}' | sed 's/%//')
 	ping_loss_percent_2_to_1=$(cat $PING21_FILENAME |
 		grep "packet loss" | awk '{print $6}' | sed 's/%//')
 
 	# Normalize loss value
-	ping_loss_percent_1_to_2=$(echo "scale=2; $ping_loss_percent_1_to_2/100.0" | bc)
-	ping_loss_percent_2_to_1=$(echo "scale=2; $ping_loss_percent_2_to_1/100.0" | bc)
+	export ping_loss_percent_1_to_2=$(echo "scale=2; $ping_loss_percent_1_to_2/100.0" | bc)
+	export ping_loss_percent_2_to_1=$(echo "scale=2; $ping_loss_percent_2_to_1/100.0" | bc)
 
 	################################################################################
 	# CLI Check                                                                    #
@@ -261,11 +262,9 @@ main() {
 
 	# TODO: Validate JSON
 
-	################################################################################
-	# Performance Test                                                             #
-	################################################################################
+	# Performance Test
 
-	FILENAME_PERF_JSON="$TEST_FILEPATH_PREFIX-iperf.json"
+	export FILENAME_PERF_JSON="$TEST_FILEPATH_PREFIX-iperf.json"
 
 	echo -e "\nBeginning performance test:"
 
@@ -281,15 +280,7 @@ main() {
 
 	cat $FILENAME_PERF_JSON
 
-	################################################################################
-	# Collect ZeroTier dump files                                                  #
-	################################################################################
-
- 	collect_zt_dump_files
-
-	################################################################################
-	# Let ZeroTier idle long enough for various timers                             #
-	################################################################################
+	# Let ZeroTier idle long enough for various timers
 
 	echo -e "\nIdling ZeroTier for $RUN_LENGTH seconds..."
 	sleep $RUN_LENGTH
@@ -301,25 +292,44 @@ main() {
 
 	sleep 5
 
-	################################################################################
-	# Stop test                                                                    #
-	################################################################################
+	# Stop test
 
 	echo -e "\nStopping memory check..."
 	sudo pkill -15 -f valgrind
 	sleep 10
 
-	time_test_end=$(date +%s)
+	export time_test_end=$(date +%s)
 
-	################################################################################
-	# Copy ZeroTier stdout/stderr logs                                             #
-	################################################################################
+	exit_test_and_generate_report $TEST_OK "completed test"
+}
 
-	collect_zt_log_files
+################################################################################
+# Generate report                                                              #
+################################################################################
 
-	################################################################################
-	# Generate report                                                              #
-	################################################################################
+exit_test_and_generate_report() {
+
+	echo "Exiting test with reason: $2 ($1)"
+
+	# Collect ZeroTier dump files
+
+	echo -e "\nCollecting ZeroTier dump files"
+
+	node1_id=$($ZT1 -j status | jq -r .address)
+	node2_id=$($ZT2 -j status | jq -r .address)
+
+	$ZT1 dump
+	mv zerotier_dump.txt "$TEST_FILEPATH_PREFIX-node-dump-$node1_id.txt"
+
+	$ZT2 dump
+	mv zerotier_dump.txt "$TEST_FILEPATH_PREFIX-node-dump-$node2_id.txt"
+
+	# Copy ZeroTier stdout/stderr logs
+
+	cp node_1.log "$TEST_FILEPATH_PREFIX-node-log-$node1_id.txt"
+	cp node_2.log "$TEST_FILEPATH_PREFIX-node-log-$node2_id.txt"
+
+	# Generate report
 
 	cat $FILENAME_MEMORY_LOG
 
@@ -328,9 +338,7 @@ main() {
 	POSSIBLY_LOST=$(xmlstarlet sel -t -v '/valgrindoutput/error/xwhat' \
 		$FILENAME_MEMORY_LOG | grep "possibly" | awk '{print $1;}')
 
-	################################################################################
-	# Generate coverage report artifact and summary                                #
-	################################################################################
+	# Generate coverage report artifact and summary
 
 	FILENAME_COVERAGE_JSON="$TEST_FILEPATH_PREFIX-coverage.json"
 	FILENAME_COVERAGE_HTML="$TEST_FILEPATH_PREFIX-coverage.html"
@@ -350,22 +358,20 @@ main() {
 	COVERAGE_LINE_TOTAL="${COVERAGE_LINE_TOTAL:-0}"
 	COVERAGE_LINE_PERCENT="${COVERAGE_LINE_PERCENT:-0}"
 
-	################################################################################
-	# Default values                                                               #
-	################################################################################
+	# Default values
 
 	DEFINITELY_LOST="${DEFINITELY_LOST:-0}"
 	POSSIBLY_LOST="${POSSIBLY_LOST:-0}"
+	ping_loss_percent_1_to_2="${ping_loss_percent_1_to_2:-100.0}"
+	ping_loss_percent_2_to_1="${ping_loss_percent_2_to_1:-100.0}"
 
-	################################################################################
-	# Summarize and emit json for trend reporting                                  #
-	################################################################################
+	# Summarize and emit json for trend reporting
 
 	FILENAME_SUMMARY="$TEST_FILEPATH_PREFIX-summary.json"
 
 	time_length_test=$((time_test_end - time_test_start))
-	time_length_zt_node1_online=$((time_zt_node1_online - time_zt_start))
-	time_length_zt_node2_online=$((time_zt_node2_online - time_zt_start))
+	time_to_node1_online=$((time_zt_node1_online - time_zt_start))
+	time_to_node2_online=$((time_zt_node2_online - time_zt_start))
 	#time_length_zt_join=$((time_zt_join_end-time_zt_join_start))
 	#time_length_zt_leave=$((time_zt_leave_end-time_zt_leave_start))
 	#time_length_zt_can_still_ping=$((time_zt_can_still_ping-time_zt_leave_start))
@@ -377,25 +383,20 @@ main() {
   "commit":"$ZTO_COMMIT",
   "arch_m":"$(uname -m)",
   "arch_a":"$(uname -a)",
+  "binary_size":"$(stat -c %s zerotier-one)"
   "time_length_test":$time_length_test,
-  "time_length_zt_node1_online":$time_length_zt_node1_online,
-  "time_length_zt_node2_online":$time_length_zt_node2_online,
+  "time_to_node1_online":$time_to_node1_online,
+  "time_to_node2_online":$time_to_node2_online,
   "num_possible_bytes_lost": $POSSIBLY_LOST,
   "num_definite_bytes_lost": $DEFINITELY_LOST,
-  "num_incorrect_settings": $POSSIBLY_LOST,
   "num_bad_formattings": $POSSIBLY_LOST,
-  "percent_coverage_branches": $POSSIBLY_LOST,
   "coverage_lines_covered": $COVERAGE_LINE_COVERED,
   "coverage_lines_total": $COVERAGE_LINE_TOTAL,
   "coverage_lines_percent": $COVERAGE_LINE_PERCENT,
   "ping_loss_percent_1_to_2": $ping_loss_percent_1_to_2,
   "ping_loss_percent_2_to_1": $ping_loss_percent_2_to_1,
-  "mean_latency_ping_random": $POSSIBLY_LOST,
-  "mean_latency_ping_netns": $POSSIBLY_LOST,
-  "mean_pdv_random": $POSSIBLY_LOST,
-  "mean_pdv_netns": $POSSIBLY_LOST,
-  "mean_perf_netns": $POSSIBLY_LOST,
-  "exit_test_failed": $EXIT_TEST_FAILED
+  "test_exit_code": $1,
+  "test_exit_reason":"$2"
 }
 EOF
 	)
@@ -436,6 +437,10 @@ spam_cli() {
 	done
 }
 
+################################################################################
+# Check for proper exit on load of invalid identity                            #
+################################################################################
+
 check_exit_on_invalid_identity() {
 	echo "Checking ZeroTier exits on invalid identity..."
 	mkdir -p $(pwd)/exit_test
@@ -447,35 +452,14 @@ check_exit_on_invalid_identity() {
 	$ZT1 &
 	my_pid=$!
 
-	echo "Waiting 5 secons"
+	echo "Waiting 5 seconds"
 	sleep 5
 
 	# check if process is running
 	kill -0 $my_pid
 	if [ $? -eq 0 ]; then
-		EXIT_TEST_FAILED=1
-		echo "Exit test FAILED: Process still running after being fed an invalid identity"
-	else
-		echo "Exit test PASSED"
+		exit_test_and_generate_report $TEST_FAIL "Exit test FAILED: Process still running after being fed an invalid identity"
 	fi
 }
 
-collect_zt_dump_files() {
-	echo -e "\nCollecting ZeroTier dump files"
-
-	node1_id=$($ZT1 -j status | jq -r .address)
-	node2_id=$($ZT2 -j status | jq -r .address)
-
-	$ZT1 dump
-	mv zerotier_dump.txt "$TEST_FILEPATH_PREFIX-node-dump-$node1_id.txt"
-
-	$ZT2 dump
-	mv zerotier_dump.txt "$TEST_FILEPATH_PREFIX-node-dump-$node2_id.txt"
-}
-
-collect_zt_log_files() {
-	cp node_1.log "$TEST_FILEPATH_PREFIX-node-log-$node1_id.txt"
-	cp node_2.log "$TEST_FILEPATH_PREFIX-node-log-$node2_id.txt"
-}
-
-main "$@"
+test "$@"

+ 6 - 3
.github/workflows/report.sh → .github/workflows/validate-report.sh

@@ -5,6 +5,8 @@
 ################################################################################
 
 DEFINITELY_LOST=$(cat *test-results/*summary.json | jq .num_definite_bytes_lost)
+EXIT_CODE=$(cat *test-results/*summary.json | jq .exit_code)
+EXIT_REASON=$(cat *test-results/*summary.json | jq .exit_reason)
 
 cat *test-results/*summary.json
 
@@ -14,8 +16,9 @@ if [[ "$DEFINITELY_LOST" -gt 0 ]]; then
       exit 1
 fi
 
-EXIT_TEST_FAILED=$(cat *test-results/*summary.json | jq .exit_test_failed)
+# Catch-all for other non-zero exit codes
 
-if [[ "$EXIT_TEST_FAILED" -gt 0 ]]; then
+if [[ "$EXIT_CODE" -gt 0 ]]; then
+      echo "Test failed: $EXIT_REASON"
       exit 1
-fi
+fi

+ 4 - 4
.github/workflows/validate.yml

@@ -40,8 +40,8 @@ jobs:
       run: |
         sudo apt install -y valgrind xmlstarlet gcovr iperf3 tree
         make one ZT_COVERAGE=1 ZT_TRACE=1
-        sudo chmod +x ./.github/workflows/validate-1m-linux.sh
-        sudo ./.github/workflows/validate-1m-linux.sh
+        sudo chmod +x ./.github/workflows/validate-linux.sh
+        sudo ./.github/workflows/validate-linux.sh
 
     - name: Archive test results
       uses: actions/upload-artifact@v3
@@ -51,6 +51,6 @@ jobs:
 
     - name: final-report
       run: |
-        sudo chmod +x ./.github/workflows/report.sh
-        sudo ./.github/workflows/report.sh
+        sudo chmod +x ./.github/workflows/validate-report.sh
+        sudo ./.github/workflows/validate-report.sh