Browse Source

Catch `SIGILL`, `SIGFPE`, `SIGSEGV` in the test runner

Feoramund 1 year ago
parent
commit
d1723664a7

+ 63 - 2
core/testing/runner.odin

@@ -69,6 +69,8 @@ Task_Timeout :: struct {
 run_test_task :: proc(task: thread.Task) {
 	data := cast(^Task_Data)(task.data)
 
+	setup_task_signal_handler(task.user_index)
+
 	chan.send(data.t.channel, Event_New_Test {
 		test_index = task.user_index,
 	})
@@ -76,6 +78,8 @@ run_test_task :: proc(task: thread.Task) {
 	chan.send(data.t.channel, Event_State_Change {
 		new_state = .Running,
 	})
+	
+	context.assertion_failure_proc = test_assertion_failure_proc
 
 	context.logger = {
 		procedure = test_logger_proc,
@@ -389,6 +393,8 @@ runner :: proc(internal_tests: []Internal_Test) -> bool {
 	fmt.wprint(stdout, ansi.CSI + ansi.DECTCEM_HIDE)
 
 	when FANCY_OUTPUT {
+		signals_were_raised := false
+
 		redraw_report(stdout, report)
 		draw_status_bar(stdout, thread_count_status_string, total_done_count, total_test_count)
 	}
@@ -557,12 +563,57 @@ runner :: proc(internal_tests: []Internal_Test) -> bool {
 			}
 		}
 
-		if should_abort() {
+		if should_stop_runner() {
 			fmt.wprintln(stderr, "\nCaught interrupt signal. Stopping all tests.")
 			thread.pool_shutdown(&pool)
 			break main_loop
 		}
 
+		when FANCY_OUTPUT {
+			// Because the bounds checking procs send directly to STDERR with
+			// no way to redirect or handle them, we need to at least try to
+			// let the user see those messages when using the animated progress
+			// report. This flag may be set by the block of code below if a
+			// signal is raised.
+			//
+			// It'll be purely by luck if the output is interleaved properly,
+			// given the nature of non-thread-safe printing.
+			//
+			// At worst, if Odin did not print any error for this signal, we'll
+			// just re-display the progress report. The fatal log error message
+			// should be enough to clue the user in that something dire has
+			// occurred.
+			bypass_progress_overwrite := false
+		}
+
+		if test_index, reason, ok := should_stop_test(); ok {
+			#no_bounds_check report.all_test_states[test_index] = .Failed
+			#no_bounds_check it := internal_tests[test_index]
+			#no_bounds_check pkg := report.packages_by_name[it.pkg]
+			pkg.frame_ready = false
+
+			fmt.assertf(thread.pool_stop_task(&pool, test_index),
+				"A signal (%v) was raised to stop test #%i %s.%s, but it was unable to be found.",
+				reason, test_index, it.pkg, it.name)
+
+			if test_index not_in failed_test_reason_map {
+				// We only write a new error message here if there wasn't one
+				// already, because the message we can provide based only on
+				// the signal won't be very useful, whereas asserts and panics
+				// will provide a user-written error message.
+				failed_test_reason_map[test_index] = fmt.aprintf("Signal caught: %v", reason, allocator = shared_log_allocator)
+				pkg_log.fatalf("Caught signal to stop test #%i %s.%s for: %v.", test_index, it.pkg, it.name, reason)
+
+				when FANCY_OUTPUT {
+					signals_were_raised = true
+					bypass_progress_overwrite = true
+				}
+			}
+
+			total_failure_count += 1
+			total_done_count += 1
+		}
+
 		// -- Redraw.
 
 		when FANCY_OUTPUT {
@@ -570,7 +621,9 @@ runner :: proc(internal_tests: []Internal_Test) -> bool {
 				continue main_loop
 			}
 
-			fmt.wprintf(stdout, ansi_redraw_string, total_done_count, total_test_count)
+			if !bypass_progress_overwrite {
+				fmt.wprintf(stdout, ansi_redraw_string, total_done_count, total_test_count)
+			}
 		} else {
 			if total_done_count != last_done_count {
 				fmt.wprintf(stdout, OSC_WINDOW_TITLE, total_done_count, total_test_count)
@@ -698,6 +751,14 @@ runner :: proc(internal_tests: []Internal_Test) -> bool {
 
 	fmt.wprint(stdout, ansi.CSI + ansi.DECTCEM_SHOW)
 
+	when FANCY_OUTPUT {
+		if signals_were_raised {
+			fmt.wprintln(batch_writer, `
+Signals were raised during this test run. Log messages are likely to have collided with each other.
+To partly mitigate this, redirect STDERR to a file or use the -define:ODIN_TEST_FANCY=false option.`)
+		}
+	}
+
 	fmt.wprintln(stderr, bytes.buffer_to_string(&batch_buffer))
 
 	return total_success_count == total_test_count

+ 33 - 0
core/testing/signal_handler.odin

@@ -0,0 +1,33 @@
+//+private
+package testing
+
+import "base:runtime"
+import pkg_log "core:log"
+
+Stop_Reason :: enum {
+	Unknown,
+	Illegal_Instruction,
+	Arithmetic_Error,
+	Segmentation_Fault,
+}
+
+test_assertion_failure_proc :: proc(prefix, message: string, loc: runtime.Source_Code_Location) -> ! {
+	pkg_log.fatalf("%s: %s", prefix, message, location = loc)
+	runtime.trap()
+}
+
+setup_signal_handler :: proc() {
+	_setup_signal_handler()
+}
+
+setup_task_signal_handler :: proc(test_index: int) {
+	_setup_task_signal_handler(test_index)
+}
+
+should_stop_runner :: proc() -> bool {
+	return _should_stop_runner()
+}
+
+should_stop_test :: proc() -> (test_index: int, reason: Stop_Reason, ok: bool) {
+	return _should_stop_test()
+}

+ 117 - 7
core/testing/signal_handler_libc.odin

@@ -4,16 +4,126 @@ package testing
 
 import "base:intrinsics"
 import "core:c/libc"
+import "core:encoding/ansi"
+import "core:sync"
+
+@(private="file") stop_runner_flag: libc.sig_atomic_t
+
+@(private="file") stop_test_gate:   sync.Mutex
+@(private="file") stop_test_index:  libc.sig_atomic_t
+@(private="file") stop_test_reason: libc.sig_atomic_t
+@(private="file") stop_test_alert:  libc.sig_atomic_t
+
+@(private="file", thread_local)
+local_test_index: libc.sig_atomic_t
+
+@(private="file")
+stop_runner_callback :: proc "c" (sig: libc.int) {
+	intrinsics.atomic_store(&stop_runner_flag, 1)
+}
 
 @(private="file")
-abort_flag: libc.sig_atomic_t
+stop_test_callback :: proc "c" (sig: libc.int) {
+	if local_test_index == -1 {
+		// We're the test runner, and we ourselves have caught a signal from
+		// which there is no recovery.
+		//
+		// The most we can do now is make sure the user's cursor is visible,
+		// nuke the entire processs, and hope a useful core dump survives.
+
+		// NOTE(Feoramund): Using these write calls in a signal handler is
+		// undefined behavior in C99 but possibly tolerated in POSIX 2008.
+		// Either way, we may as well try to salvage what we can.
+		show_cursor := ansi.CSI + ansi.DECTCEM_SHOW
+		libc.fwrite(raw_data(show_cursor), size_of(byte), len(show_cursor), libc.stdout)
+		libc.fflush(libc.stdout)
+
+		// This is an attempt at being compliant by avoiding printf.
+		sigbuf: [8]byte
+		sigstr: string
+		{
+			signum := cast(int)sig
+			i := len(sigbuf) - 2
+			for signum > 0 {
+				m := signum % 10
+				signum /= 10
+				sigbuf[i] = cast(u8)('0' + m)
+				i -= 1
+			}
+			sigstr = cast(string)sigbuf[i:]
+		}
+
+		advisory_a := `
+The test runner's main thread has caught an unrecoverable error (signal `
+		advisory_b := `) and will now forcibly terminate.
+This is a dire bug and should be reported to the Odin developers.
+`
+		libc.fwrite(raw_data(advisory_a), size_of(byte), len(advisory_a), libc.stderr)
+		libc.fwrite(raw_data(sigstr), size_of(byte), len(sigstr), libc.stderr)
+		libc.fwrite(raw_data(advisory_b), size_of(byte), len(advisory_b), libc.stderr)
+
+		// Try to get a core dump.
+		libc.abort()
+	}
+
+	if sync.mutex_guard(&stop_test_gate) {
+		intrinsics.atomic_store(&stop_test_index, local_test_index)
+		intrinsics.atomic_store(&stop_test_reason, cast(libc.sig_atomic_t)sig)
+		intrinsics.atomic_store(&stop_test_alert, 1)
+
+		for {
+			// Idle until this thread is terminated by the runner,
+			// otherwise we may continue to generate signals.
+			intrinsics.cpu_relax()
+		}
+	}
+}
+
+_setup_signal_handler :: proc() {
+	local_test_index = -1
+
+	// Catch user interrupt / CTRL-C.
+	libc.signal(libc.SIGINT, stop_runner_callback)
+	// Catch polite termination request.
+	libc.signal(libc.SIGTERM, stop_runner_callback)
+
+	// For tests:
+	// Catch asserts and panics.
+	libc.signal(libc.SIGILL, stop_test_callback)
+	// Catch arithmetic errors.
+	libc.signal(libc.SIGFPE, stop_test_callback)
+	// Catch segmentation faults (illegal memory access).
+	libc.signal(libc.SIGSEGV, stop_test_callback)
+}
+
+_setup_task_signal_handler :: proc(test_index: int) {
+	local_test_index = cast(libc.sig_atomic_t)test_index
+}
+
+_should_stop_runner :: proc() -> bool {
+	return intrinsics.atomic_load(&stop_runner_flag) == 1
+}
 
-setup_signal_handler :: proc() {
-	libc.signal(libc.SIGINT, proc "c" (sig: libc.int) {
-		intrinsics.atomic_add(&abort_flag, 1)
-	})
+@(private="file")
+unlock_stop_test_gate :: proc(_: int, _: Stop_Reason, ok: bool) {
+	if ok {
+		sync.mutex_unlock(&stop_test_gate)
+	}
 }
 
-should_abort :: proc() -> bool {
-	return intrinsics.atomic_load(&abort_flag) > 0
+@(deferred_out=unlock_stop_test_gate)
+_should_stop_test :: proc() -> (test_index: int, reason: Stop_Reason, ok: bool) {
+	if intrinsics.atomic_load(&stop_test_alert) == 1 {
+		intrinsics.atomic_store(&stop_test_alert, 0)
+
+		test_index = cast(int)intrinsics.atomic_load(&stop_test_index)
+		switch intrinsics.atomic_load(&stop_test_reason) {
+		case libc.SIGFPE: reason = .Arithmetic_Error
+		case libc.SIGILL: reason = .Illegal_Instruction
+		case libc.SIGSEGV: reason = .Segmentation_Fault
+		}
+		ok = true
+	}
+
+	return
 }

+ 10 - 2
core/testing/signal_handler_other.odin

@@ -2,10 +2,18 @@
 //+build js, wasi, freestanding
 package testing
 
-setup_signal_handler :: proc() {
+_setup_signal_handler :: proc() {
 	// Do nothing.
 }
 
-should_abort :: proc() -> bool {
+_setup_task_signal_handler :: proc(test_index: int) {
+	// Do nothing.
+}
+
+_should_stop_runner :: proc() -> bool {
 	return false
 }
+
+_should_stop_test :: proc() -> (test_index: int, reason: Stop_Reason, ok: bool) {
+	return 0, {}, false
+}