瀏覽代碼

Re-implement the error squiggles with visual width

Feoramund 1 年之前
父節點
當前提交
8ed5cb283b
共有 2 個文件被更改,包括 143 次插入164 次删除
  1. 13 5
      src/check_stmt.cpp
  2. 130 159
      src/error.cpp

+ 13 - 5
src/check_stmt.cpp

@@ -582,20 +582,28 @@ gb_internal Type *check_assignment_variable(CheckerContext *ctx, Operand *lhs, O
 				isize offset = show_error_on_line(e->token.pos, token_pos_end(e->token));
 				if (offset < 0) {
 					if (is_type_map(e->type)) {
-						error_line("\t\tSuggestion: Did you mean? 'for key, &%.*s in ...'\n", LIT(e->token.string));
+						error_line("\tSuggestion: Did you mean? 'for key, &%.*s in ...'\n", LIT(e->token.string));
 					} else {
-						error_line("\t\tSuggestion: Did you mean? 'for &%.*s in ...'\n", LIT(e->token.string));
+						error_line("\tSuggestion: Did you mean? 'for &%.*s in ...'\n", LIT(e->token.string));
 					}
 				} else {
-					error_line("\t\t'%.*s' is immutable, declare it as '&%.*s' to make it mutable\n", LIT(e->token.string), LIT(e->token.string));
+					error_line("\t");
+					for (isize i = 0; i < offset-1; i++) {
+						error_line(" ");
+					}
+					error_line("'%.*s' is immutable, declare it as '&%.*s' to make it mutable\n", LIT(e->token.string), LIT(e->token.string));
 				}
 
 			} else if (e && e->flags & EntityFlag_SwitchValue) {
 				isize offset = show_error_on_line(e->token.pos, token_pos_end(e->token));
 				if (offset < 0) {
-					error_line("\t\tSuggestion: Did you mean? 'switch &%.*s in ...'\n", LIT(e->token.string));
+					error_line("\tSuggestion: Did you mean? 'switch &%.*s in ...'\n", LIT(e->token.string));
 				} else {
-					error_line("\t\t'%.*s' is immutable, declare it as '&%.*s' to make it mutable\n", LIT(e->token.string), LIT(e->token.string));
+					error_line("\t");
+					for (isize i = 0; i < offset-1; i++) {
+						error_line(" ");
+					}
+					error_line("'%.*s' is immutable, declare it as '&%.*s' to make it mutable\n", LIT(e->token.string), LIT(e->token.string));
 				}
 			}
 		}

+ 130 - 159
src/error.cpp

@@ -283,9 +283,6 @@ gb_internal isize show_error_on_line(TokenPos const &pos, TokenPos end) {
 		error_out("\t( empty line )\n");
 		terminal_reset_colours();
 
-		// Preserve the old return behaviour. Even if we can't guarantee the
-		// exact visual space offset, there are two places that check this to
-		// change what sort of suggestion they offer.
 		if (the_line == nullptr) {
 			return -1;
 		} else {
@@ -293,244 +290,218 @@ gb_internal isize show_error_on_line(TokenPos const &pos, TokenPos end) {
 		}
 	}
 
-	// Specfically use basic ASCII arrows here, in case the terminal
-	// doesn't support anything fancy. This is meant to be a good fallback.
-	char const *mark_error_sign  = "><";
-	char const *open_error_sign  = ">>";
-	char const *close_error_sign = "<<";
-	const TerminalColour marker_colour = TerminalColour_Yellow;
-
-	// ANSI SGR:
-	// 0      = Reset.
-	// 58:5:2 = Underline colour, 8-bit, green. (non-standard)
-	// 4:3    = Wiggly underline.               (non-standard)
-	char const *wiggly_underline_sgr  = "";
-	char const *disable_underline_sgr = "";
-	if (has_ansi_terminal_colours()) {
-		wiggly_underline_sgr  = "\x1b[0;58:5:2;4:3m";
-		disable_underline_sgr = "\x1b[24m";
-	}
-
 	// These two will be used like an Odin slice later.
 	char const *line_text = the_line;
 	i32 line_length_bytes = cast(i32)gb_string_length(the_line);
 
-	// NOTE(Feoramund): The numbers below are in Unicode codepoints
-	// (or runes), not visual glyph width. Calculating the visual width of
-	// a cluster of Unicode codepoints is vexing, and `utf8proc_charwidth`
-	// is inadequate.
-	//
-	// We're counting codepoints here so we don't slice one down the
-	// middle during truncation. It will still look strange if we slice
-	// a cluster down the middle. (i.e. a letter and a combining diacritic)
-	//
-	// Luckily, if our assumption about 1 codepoint == 1 glyph is wrong,
-	// we only suffer a shorter or longer line displayed in total, but all
-	// of our highlighting and marking will be precise.
-	// (Unless there's an invalid Unicode codepoint, in which case, no guarantees.)
-	//
-	// The line will be longer if a codepoint occupies more than one space
-	// (CJK in most cases) and shorter if a codepoint is invisible or is
-	// a type of joiner or combining codepoint.
-	//
-	// If we get a complete Unicode glyph counter, it would be as simple as
-	// replacing `utf8_decode` below to make all of this work perfectly.
+	ucg_grapheme* graphemes;
+	i32 line_length_runes = 0;
+	i32 line_length_graphemes = 0;
+	i32 line_width = 0;
+
+	int ucg_result = ucg_decode_grapheme_clusters(
+		permanent_allocator(), (const uint8_t*)line_text, line_length_bytes,
+		&graphemes, &line_length_runes, &line_length_graphemes, &line_width);
+
+	if (ucg_result < 0) {
+		// There was a UTF-8 parsing error.
+		// Insert a dummy grapheme so the start of the invalid rune can be pointed at.
+		graphemes = (ucg_grapheme*)gb_resize(permanent_allocator(),
+			graphemes,
+			sizeof(ucg_grapheme) * (line_length_graphemes),
+			sizeof(ucg_grapheme) * (1 + line_length_graphemes));
 
+		ucg_grapheme append = {
+			error_start_index_bytes,
+			line_length_runes,
+			1,
+		};
+
+		graphemes[line_length_graphemes] = append;
+	}
+
+	// The units below are counted in visual, monospace cells.
 	enum {
 		MAX_LINE_LENGTH  = 80,
 		MAX_TAB_WIDTH    = 8,
 		ELLIPSIS_PADDING = 8, // `...  ...`
-		MAX_MARK_WIDTH   = 4, // `><` or `>>` and `<<`
 		MIN_LEFT_VIEW    = 8,
 
 		// A rough estimate of how many characters we'll insert, at most:
-		MAX_INSERTED_WIDTH     = MAX_TAB_WIDTH + ELLIPSIS_PADDING + MAX_MARK_WIDTH,
+		MAX_INSERTED_WIDTH     = MAX_TAB_WIDTH + ELLIPSIS_PADDING,
 
 		MAX_LINE_LENGTH_PADDED = MAX_LINE_LENGTH - MAX_INSERTED_WIDTH,
 	};
 
-	// For the purposes of truncating long lines, we calculate how many
-	// runes the line is composed of, first. We'll take note of at which
-	// rune index the error starts, too.
-	i32 error_start_index_runes = 0;
-
-	i32 line_length_runes = 0;
-	for (i32 i = 0; i < line_length_bytes; /**/) {
-		Rune rune;
-
-		if (i == error_start_index_bytes) {
-			error_start_index_runes = line_length_runes;
-		}
-
-		i32 bytes_read = cast(i32)utf8_decode(cast(const u8 *)line_text + i, line_length_bytes - i, &rune);
-		if (rune == GB_RUNE_INVALID || bytes_read <= 0) {
-			// Bail out; we won't even try to truncate the line later.
-			line_length_runes = 0;
+	i32 error_start_index_graphemes = 0;
+	for (i32 i = 0; i < line_length_graphemes; i += 1) {
+		if (graphemes[i].byte_index == error_start_index_bytes) {
+			error_start_index_graphemes = i;
 			break;
 		}
-
-		line_length_runes += 1;
-		i += bytes_read;
 	}
 
-	if (error_start_index_runes == 0 && error_start_index_bytes != 0 && line_length_runes != 0) {
-		// The error index in runes was not found, but we did find a valid Unicode string.
+	if (error_start_index_graphemes == 0 && error_start_index_bytes != 0 && line_length_graphemes != 0) {
+		// The error index in graphemes was not found, but we did find a valid Unicode string.
 		//
 		// This is an edge case where the error is sitting on a newline or the
 		// end of the line, as that is the only location we could not have checked.
-		error_start_index_runes = line_length_runes;
+		error_start_index_graphemes = line_length_graphemes;
 	}
 
 	error_out("\t");
 
 	bool show_right_ellipsis = false;
 
-	if (line_length_runes > MAX_LINE_LENGTH_PADDED) {
+	i32 squiggle_padding = 0;
+	i32 window_open_bytes = 0;
+	i32 window_close_bytes = 0;
+	if (line_width > MAX_LINE_LENGTH_PADDED) {
 		// Now that we know the line is over the length limit, we have to
-		// compose a runic window in which to display the error.
-		i32 window_width = MAX_LINE_LENGTH_PADDED;
-
-		i32 extend_right = 0;
-		i32 extend_left = 0;
-		if (error_start_index_runes + window_width > line_length_runes - 1) {
-			// Trade space from the right to the left.
-			extend_right = line_length_runes - error_start_index_runes;
-			extend_left = window_width - extend_right;
-		} else if (MIN_LEFT_VIEW - error_start_index_runes > 0) {
-			// Trade space from the left to the right.
-			extend_left = error_start_index_runes;
-			extend_right = window_width - extend_left;
-		} else {
-			// Square in the middle somewhere.
-			extend_left = MIN_LEFT_VIEW;
-			extend_right = window_width - extend_left;
+		// compose a visual window in which to display the error.
+		i32 window_size_left = 0;
+		i32 window_size_right = 0;
+		i32 window_open_graphemes = 0;
+
+		for (i32 i = error_start_index_graphemes - 1; i > 0; i -= 1) {
+			window_size_left += graphemes[i].width;
+			if (window_size_left >= MIN_LEFT_VIEW) {
+				window_open_graphemes = i;
+				window_open_bytes = graphemes[i].byte_index;
+				break;
+			}
 		}
 
-		i32 window_right_runes = gb_min(error_start_index_runes + extend_right, line_length_runes);
-		i32 window_left_runes = gb_max(0, error_start_index_runes - extend_left);
-
-		i32 window_right_bytes = 0;
-		i32 window_left_bytes = 0;
-
-		i32 i_runes = 0;
-		for (i32 i = 0; i < line_length_bytes; /**/) {
-			if (i_runes == window_left_runes ) { window_left_bytes  = i; }
-			if (i_runes == window_right_runes) { window_right_bytes = i; }
-
-			// No need for error-checking.
-			//
-			// We've already validated the string at this point, otherwise
-			// `line_length_runes` would be 0, and we would not have
-			// entered this block.
-			i32 bytes_read = cast(i32)utf8_decode(cast(const u8 *)line_text + i, line_length_bytes - i, nullptr);
-
-			i_runes += 1;
-			i += bytes_read;
+		for (i32 i = error_start_index_graphemes; i < line_length_graphemes; i += 1) {
+			window_size_right += graphemes[i].width;
+			if (window_size_right >= MAX_LINE_LENGTH_PADDED - MIN_LEFT_VIEW) {
+				window_close_bytes = graphemes[i].byte_index;
+				break;
+			}
+		}
+		if (window_close_bytes == 0) {
+			// The window ends at the end of the line.
+			window_close_bytes = line_length_bytes;
 		}
 
-		if (window_right_bytes == 0) {
-			// The end of the window is the end of the line.
-			window_right_bytes = line_length_bytes;
+		if (window_size_right < MAX_LINE_LENGTH_PADDED - MIN_LEFT_VIEW) {
+			// Hit the end of the string early on the right side; expand backwards.
+			for (i32 i = window_open_graphemes - 1; i > 0; i -= 1) {
+				window_size_left += graphemes[i].width;
+				if (window_size_left + window_size_right >= MAX_LINE_LENGTH_PADDED) {
+					window_open_graphemes = i;
+					window_open_bytes = graphemes[i].byte_index;
+					break;
+				}
+			}
 		}
 
-		GB_ASSERT_MSG(window_right_runes >= window_left_runes, "Error line truncation window has wrong rune indices. (left, right: %i, %i)", window_left_runes, window_right_runes);
-		GB_ASSERT_MSG(window_right_bytes >= window_left_bytes, "Error line truncation window has wrong byte indices. (left, right: %i, %i)", window_left_bytes, window_right_bytes);
+		GB_ASSERT_MSG(window_close_bytes >= window_open_bytes, "Error line truncation window has wrong byte indices. (open, close: %i, %i)", window_open_bytes, window_close_bytes);
 
-		if (window_right_bytes != line_length_bytes) {
+		if (window_close_bytes != line_length_bytes) {
 			show_right_ellipsis = true;
 		}
 
-		// The text will advance; all indices and lengths will become relative.
-		// We must keep our other iterators in sync.
-		// NOTE: Uncomment the rune versions if they ever get used beyond this point.
-
 		// Close the window, going left.
-		line_length_bytes = window_right_bytes;
+		line_length_bytes = window_close_bytes;
 
 		// Adjust the slice of text. In Odin, this would be:
 		// `line_text = line_text[window_left_bytes:]`
-		line_text += window_left_bytes;
-		line_length_bytes -= window_left_bytes;
-		// line_length_runes -= window_left_runes;
+		line_text += window_open_bytes;
+		line_length_bytes -= window_open_bytes;
 		GB_ASSERT_MSG(line_length_bytes >= 0, "Bounds-checking error: line_length_bytes");
 
-		// Part of advancing `line_text`:
-		error_start_index_bytes -= window_left_bytes;
-		// error_start_index_runes -= window_left_runes;
-		GB_ASSERT_MSG(error_start_index_bytes >= 0, "Bounds-checking error: error_start_index_bytes");
-
-		if (window_left_bytes > 0) {
+		if (window_open_bytes > 0) {
 			error_out("... ");
+			squiggle_padding += 4;
 		}
+	} else {
+		// No truncation needed.
+		window_open_bytes = 0;
+		window_close_bytes = line_length_bytes;
+	}
+
+	for (i32 i = error_start_index_graphemes; i > 0; i -= 1) {
+		if (graphemes[i].byte_index == window_open_bytes) {
+			break;
+		}
+		squiggle_padding += graphemes[i].width;
 	}
 
 	// Start printing code.
 
 	terminal_set_colours(TerminalStyle_Normal, TerminalColour_White);
-	error_out("%.*s", error_start_index_bytes, line_text);
+	error_out("%.*s", line_length_bytes, line_text);
 
-	// Odin-like: `line_text = line_text[error_start_index_bytes:]`
-	line_text += error_start_index_bytes;
-	line_length_bytes -= error_start_index_bytes;
-	GB_ASSERT_MSG(line_length_bytes >= 0, "Bounds-checking error: line_length_bytes");
+	i32 squiggle_length = 0;
+	bool trailing_squiggle = false;
 
 	if (end.file_id == pos.file_id) {
 		// The error has an endpoint.
-		terminal_set_colours(TerminalStyle_Bold, marker_colour);
-		error_out(open_error_sign);
 
 		if (end.line > pos.line) {
 			// Error goes to next line.
-			error_out(wiggly_underline_sgr);
-			error_out("%.*s", line_length_bytes, line_text);
-
-			error_out(disable_underline_sgr);
-
 			// Always show the ellipsis in this case
 			show_right_ellipsis = true;
 
+			for (i32 i = error_start_index_graphemes; i < line_length_graphemes; i += 1) {
+				squiggle_length += graphemes[i].width;
+				trailing_squiggle = true;
+			}
+
 		} else if (end.line == pos.line && end.column > pos.column) {
 			// Error terminates before line end.
-			i32 error_length_bytes = gb_min(end.column - pos.column, line_length_bytes);
-
-			error_out(wiggly_underline_sgr);
-			error_out("%.*s", error_length_bytes, line_text);
-			line_text += error_length_bytes;
-			line_length_bytes -= error_length_bytes;
-			GB_ASSERT_MSG(line_length_bytes >= 0, "Bounds-checking error: line_length_bytes");
+			i32 adjusted_end_index = graphemes[error_start_index_graphemes].byte_index + end.column - pos.column;
 
-			error_out(disable_underline_sgr);
-
-			if (!show_right_ellipsis) {
-				// The line hasn't been truncated; show the end marker.
-				terminal_set_colours(TerminalStyle_Bold, marker_colour);
-				error_out(close_error_sign);
+			for (i32 i = error_start_index_graphemes; i < line_length_graphemes; i += 1) {
+				if (graphemes[i].byte_index >= adjusted_end_index) {
+					break;
+				} else if (graphemes[i].byte_index >= window_close_bytes) {
+					trailing_squiggle = true;
+					break;
+				}
+				squiggle_length += graphemes[i].width;
 			}
-
-			terminal_set_colours(TerminalStyle_Normal, TerminalColour_White);
-			error_out("%.*s", line_length_bytes, line_text);
 		}
-
 	} else {
 		// The error is at one spot; no range known.
-		terminal_set_colours(TerminalStyle_Bold, marker_colour);
-		error_out(mark_error_sign);
-
-		terminal_set_colours(TerminalStyle_Normal, TerminalColour_White);
-		error_out("%.*s", line_length_bytes, line_text);
+		squiggle_length = 1;
 	}
 
 	if (show_right_ellipsis) {
 		error_out(" ...");
 	}
 
+	error_out("\n\t");
+
+	for (i32 i = squiggle_padding; i > 0; i -= 1) {
+		error_out(" ");
+	}
+
+	terminal_set_colours(TerminalStyle_Bold, TerminalColour_Green);
+
+	if (squiggle_length > 0) {
+		error_out("^");
+		squiggle_length -= 1;
+	}
+	for (/**/; squiggle_length > 1; squiggle_length -= 1) {
+		error_out("~");
+	}
+	if (squiggle_length > 0) {
+		if (trailing_squiggle) {
+			error_out("~ ...");
+		} else {
+			error_out("^");
+		}
+	}
+
 	// NOTE(Feoramund): Specifically print a newline, then reset colours,
 	// instead of the other way around. Otherwise the printing mechanism
 	// will collapse the newline for reasons currently beyond my ken.
 	error_out("\n");
 	terminal_reset_colours();
 
-	return error_start_index_bytes;
+	return squiggle_padding;
 }
 
 gb_internal void error_out_empty(void) {