Browse Source

GDScript LSP: Rework and extend BBCode to markdown docstring conversion

The original implementation was minimal and produced docstrings with poor formatting and no line returns in code editors other than VSCode.

Co-authored-by: HolonProduction <[email protected]>
Nathan Lovato 5 months ago
parent
commit
cd2bd8f624
2 changed files with 232 additions and 21 deletions
  1. 169 21
      modules/gdscript/language_server/godot_lsp.h
  2. 63 0
      modules/gdscript/tests/test_lsp.h

+ 169 - 21
modules/gdscript/language_server/godot_lsp.h

@@ -1927,28 +1927,67 @@ static String marked_documentation(const String &p_bbcode) {
 	String markdown = p_bbcode.strip_edges();
 	String markdown = p_bbcode.strip_edges();
 
 
 	Vector<String> lines = markdown.split("\n");
 	Vector<String> lines = markdown.split("\n");
-	bool in_code_block = false;
-	int code_block_indent = -1;
+	bool in_codeblock_tag = false;
+	// This is for handling the special [codeblocks] syntax used by the built-in class reference.
+	bool in_codeblocks_tag = false;
+	bool in_codeblocks_gdscript_tag = false;
 
 
 	markdown = "";
 	markdown = "";
 	for (int i = 0; i < lines.size(); i++) {
 	for (int i = 0; i < lines.size(); i++) {
 		String line = lines[i];
 		String line = lines[i];
-		int block_start = line.find("[codeblock]");
+
+		// For [codeblocks] tags we locate a child [gdscript] tag and turn that
+		// into a GDScript code listing. Other languages and the surrounding tag
+		// are skipped.
+		if (line.contains("[codeblocks]")) {
+			in_codeblocks_tag = true;
+			continue;
+		}
+		if (in_codeblocks_tag && line.contains("[/codeblocks]")) {
+			in_codeblocks_tag = false;
+			continue;
+		}
+		if (in_codeblocks_tag) {
+			if (line.contains("[gdscript]")) {
+				in_codeblocks_gdscript_tag = true;
+				line = "```gdscript";
+			} else if (in_codeblocks_gdscript_tag && line.contains("[/gdscript]")) {
+				line = "```";
+				in_codeblocks_gdscript_tag = false;
+			} else if (!in_codeblocks_gdscript_tag) {
+				continue;
+			}
+		}
+
+		// We need to account for both [codeblock] and [codeblock lang=...].
+		String codeblock_lang = "gdscript";
+		int block_start = line.find("[codeblock");
 		if (block_start != -1) {
 		if (block_start != -1) {
-			code_block_indent = block_start;
-			in_code_block = true;
-			line = "\n";
-		} else if (in_code_block) {
-			line = "\t" + line.substr(code_block_indent);
+			int bracket_pos = line.find_char(']', block_start);
+			if (bracket_pos != -1) {
+				int lang_start = line.find("lang=", block_start);
+				if (lang_start != -1 && lang_start < bracket_pos) {
+					constexpr int LANG_PARAM_LENGTH = 5; // Length of "lang=".
+					int lang_value_start = lang_start + LANG_PARAM_LENGTH;
+					int lang_end = bracket_pos;
+					if (lang_value_start < lang_end) {
+						codeblock_lang = line.substr(lang_value_start, lang_end - lang_value_start);
+					}
+				}
+				in_codeblock_tag = true;
+				line = "```" + codeblock_lang;
+			}
 		}
 		}
 
 
-		if (in_code_block && line.contains("[/codeblock]")) {
-			line = "\n";
-			in_code_block = false;
+		if (in_codeblock_tag && line.contains("[/codeblock]")) {
+			line = "```";
+			in_codeblock_tag = false;
 		}
 		}
 
 
-		if (!in_code_block) {
+		if (!in_codeblock_tag) {
 			line = line.strip_edges();
 			line = line.strip_edges();
+			line = line.replace("[br]", "\n\n");
+
 			line = line.replace("[code]", "`");
 			line = line.replace("[code]", "`");
 			line = line.replace("[/code]", "`");
 			line = line.replace("[/code]", "`");
 			line = line.replace("[i]", "*");
 			line = line.replace("[i]", "*");
@@ -1957,17 +1996,126 @@ static String marked_documentation(const String &p_bbcode) {
 			line = line.replace("[/b]", "**");
 			line = line.replace("[/b]", "**");
 			line = line.replace("[u]", "__");
 			line = line.replace("[u]", "__");
 			line = line.replace("[/u]", "__");
 			line = line.replace("[/u]", "__");
-			line = line.replace("[method ", "`");
-			line = line.replace("[member ", "`");
-			line = line.replace("[signal ", "`");
-			line = line.replace("[enum ", "`");
-			line = line.replace("[constant ", "`");
-			line = line.replace_chars("[]", '`');
+			line = line.replace("[s]", "~~");
+			line = line.replace("[/s]", "~~");
+			line = line.replace("[kbd]", "`");
+			line = line.replace("[/kbd]", "`");
+			line = line.replace("[center]", "");
+			line = line.replace("[/center]", "");
+			line = line.replace("[/font]", "");
+			line = line.replace("[/color]", "");
+			line = line.replace("[/img]", "");
+
+			// Convert remaining simple bracketed class names to backticks and literal brackets.
+			// This handles cases like [Node2D], [Sprite2D], etc. and [lb] and [rb].
+			int pos = 0;
+			while ((pos = line.find_char('[', pos)) != -1) {
+				// Replace the special cases for [lb] and [rb] first and walk
+				// past them to avoid conflicts with class names.
+				const bool is_within_bounds = pos + 4 <= line.length();
+				if (is_within_bounds && line.substr(pos, 4) == "[lb]") {
+					line = line.substr(0, pos) + "\\[" + line.substr(pos + 4);
+					// We advance past the newly inserted `\\` and `[` characters (2 chars) so the
+					// next `line.find()` does not stop at the same position.
+					pos += 2;
+					continue;
+				} else if (is_within_bounds && line.substr(pos, 4) == "[rb]") {
+					line = line.substr(0, pos) + "\\]" + line.substr(pos + 4);
+					pos += 2;
+					continue;
+				}
+
+				// Replace class names in brackets.
+				int end_pos = line.find_char(']', pos);
+				if (end_pos == -1) {
+					break;
+				}
+
+				String content = line.substr(pos + 1, end_pos - pos - 1);
+				// We only convert if it looks like a simple class name (no spaces, no special chars).
+				// GDScript supports unicode characters as identifiers so we only exclude markers of other BBCode tags to avoid conflicts.
+				bool is_class_name = (!content.is_empty() && content != "url" && !content.contains_char(' ') && !content.contains_char('=') && !content.contains_char('/'));
+				if (is_class_name) {
+					line = line.substr(0, pos) + "`" + content + "`" + line.substr(end_pos + 1);
+					pos += content.length() + 2;
+				} else {
+					pos = end_pos + 1;
+				}
+			}
+
+			constexpr int URL_OPEN_TAG_LENGTH = 5; // Length of "[url=".
+			constexpr int URL_CLOSE_TAG_LENGTH = 6; // Length of "[/url]".
+
+			// This is for the case [url=$url]$text[/url].
+			pos = 0;
+			while ((pos = line.find("[url=", pos)) != -1) {
+				int url_end = line.find_char(']', pos);
+				int close_start = line.find("[/url]", url_end);
+				if (url_end == -1 || close_start == -1) {
+					break;
+				}
+
+				String url = line.substr(pos + URL_OPEN_TAG_LENGTH, url_end - pos - URL_OPEN_TAG_LENGTH);
+				String text = line.substr(url_end + 1, close_start - url_end - 1);
+				String replacement = "[" + text + "](" + url + ")";
+				line = line.substr(0, pos) + replacement + line.substr(close_start + URL_CLOSE_TAG_LENGTH);
+				pos += replacement.length();
+			}
+
+			// This is for the case [url]$url[/url].
+			pos = 0;
+			while ((pos = line.find("[url]", pos)) != -1) {
+				int close_pos = line.find("[/url]", pos);
+				if (close_pos == -1) {
+					break;
+				}
+
+				String url = line.substr(pos + URL_OPEN_TAG_LENGTH, close_pos - pos - URL_OPEN_TAG_LENGTH);
+				String replacement = "[" + url + "](" + url + ")";
+				line = line.substr(0, pos) + replacement + line.substr(close_pos + URL_CLOSE_TAG_LENGTH);
+				pos += replacement.length();
+			}
+
+			// Replace the various link types with inline code ([class MyNode] to `MyNode`).
+			// Uses a while loop because there can occasionally be multiple links of the same type in a single line.
+			const Vector<String> link_start_patterns = {
+				"[class ", "[method ", "[member ", "[signal ", "[enum ", "[constant ",
+				"[annotation ", "[constructor ", "[operator ", "[theme_item ", "[param "
+			};
+			for (const String &pattern : link_start_patterns) {
+				int pattern_pos = 0;
+				while ((pattern_pos = line.find(pattern, pattern_pos)) != -1) {
+					int end_pos = line.find_char(']', pattern_pos);
+					if (end_pos == -1) {
+						break;
+					}
+
+					String content = line.substr(pattern_pos + pattern.length(), end_pos - pattern_pos - pattern.length());
+					String replacement = "`" + content + "`";
+					line = line.substr(0, pattern_pos) + replacement + line.substr(end_pos + 1);
+					pattern_pos += replacement.length();
+				}
+			}
+
+			// Remove tags with attributes like [color=red], as they don't have a direct Markdown
+			// equivalent supported by external tools.
+			const String attribute_tags[] = {
+				"color", "font", "img"
+			};
+			for (const String &tag_name : attribute_tags) {
+				int tag_pos = 0;
+				while ((tag_pos = line.find("[" + tag_name + "=", tag_pos)) != -1) {
+					int end_pos = line.find_char(']', tag_pos);
+					if (end_pos == -1) {
+						break;
+					}
+
+					line = line.substr(0, tag_pos) + line.substr(end_pos + 1);
+				}
+			}
 		}
 		}
 
 
-		if (!in_code_block && i < lines.size() - 1) {
-			line += "\n\n";
-		} else if (i < lines.size() - 1) {
+		if (i < lines.size() - 1) {
 			line += "\n";
 			line += "\n";
 		}
 		}
 		markdown += line;
 		markdown += line;

+ 63 - 0
modules/gdscript/tests/test_lsp.h

@@ -530,6 +530,69 @@ func f():
 		memdelete(efs);
 		memdelete(efs);
 		finish_language();
 		finish_language();
 	}
 	}
+
+	TEST_CASE("BBCode to markdown conversion") {
+		// This tests the conversion from BBCode docstrings to the markdown markup sent to
+		// the LSP client on documentation requests
+
+		// Basic formatting
+		CHECK_EQ(LSP::marked_documentation("[b]bold[/b]"), "**bold**");
+		CHECK_EQ(LSP::marked_documentation("[i]italic[/i]"), "*italic*");
+		CHECK_EQ(LSP::marked_documentation("[u]underline[/u]"), "__underline__");
+		CHECK_EQ(LSP::marked_documentation("[s]strikethrough[/s]"), "~~strikethrough~~");
+		CHECK_EQ(LSP::marked_documentation("[code]code[/code]"), "`code`");
+		CHECK_EQ(LSP::marked_documentation("[kbd]Ctrl + S[/kbd]"), "`Ctrl + S`");
+
+		// Line breaks. We insert paragraphs for [br] because the BBCode to
+		// markdown conversion function simply makes the conversion line-wise and
+		// we don't distinguish markdown inline elements and blocks.
+		CHECK_EQ(LSP::marked_documentation("Line1[br]Line2"), "Line1\n\nLine2");
+
+		// These tags (center, color, font) aren't supported in markdown and should be stripped.
+		CHECK_EQ(LSP::marked_documentation("[center]Centered text[/center]"), "Centered text");
+		CHECK_EQ(LSP::marked_documentation("[color=red]red text[/color]"), "red text");
+		CHECK_EQ(LSP::marked_documentation("[font=Arial]Arial text[/font]"), "Arial text");
+
+		// The following tests are for all the link patterns specific to Godot's built-in docs that we render as inline code.
+		CHECK_EQ(LSP::marked_documentation("Class link: [Node2D], [Sprite2D]"), "Class link: `Node2D`, `Sprite2D`");
+		CHECK_EQ(LSP::marked_documentation("Single class [RigidBody2D]"), "Single class `RigidBody2D`");
+		CHECK_EQ(LSP::marked_documentation("[method Node2D.set_position]"), "`Node2D.set_position`");
+		CHECK_EQ(LSP::marked_documentation("[member Node2D.position]"), "`Node2D.position`");
+		CHECK_EQ(LSP::marked_documentation("[signal Node.ready]"), "`Node.ready`");
+		CHECK_EQ(LSP::marked_documentation("[constant Color.RED]"), "`Color.RED`");
+		CHECK_EQ(LSP::marked_documentation("[enum Node.ProcessMode]"), "`Node.ProcessMode`");
+		CHECK_EQ(LSP::marked_documentation("[annotation @GDScript.@export]"), "`@GDScript.@export`");
+		CHECK_EQ(LSP::marked_documentation("[constructor Vector2.Vector2]"), "`Vector2.Vector2`");
+		CHECK_EQ(LSP::marked_documentation("[operator Vector2.operator +]"), "`Vector2.operator +`");
+		CHECK_EQ(LSP::marked_documentation("[theme_item Button.font]"), "`Button.font`");
+		CHECK_EQ(LSP::marked_documentation("[param delta]"), "`delta`");
+
+		// Markdown links
+		CHECK_EQ(LSP::marked_documentation("[url=https://godotengine.org]link to Godot Engine[/url]"),
+				"[link to Godot Engine](https://godotengine.org)");
+		CHECK_EQ(LSP::marked_documentation("[url]https://godotengine.org/[/url]"),
+				"[https://godotengine.org/](https://godotengine.org/)");
+
+		// Code listings
+		CHECK_EQ(LSP::marked_documentation("[codeblock]\nfunc test():\n    print(\"Hello, Godot!\")\n[/codeblock]"),
+				"```gdscript\nfunc test():\n    print(\"Hello, Godot!\")\n```");
+		CHECK_EQ(LSP::marked_documentation("[codeblock lang=csharp]\npublic void Test()\n{\n    GD.Print(\"Hello, Godot!\");\n}\n[/codeblock]"),
+				"```csharp\npublic void Test()\n{\n    GD.Print(\"Hello, Godot!\");\n}\n```");
+		// Code listings with multiple languages (the codeblocks tag is used in the built-in reference)
+		// When [codeblocks] is used, we only convert the [gdscript] tag to a code block like the built-in editor.
+		// NOTE: There is always a GDScript code listing in the built-in class reference.
+		CHECK_EQ(LSP::marked_documentation("[codeblocks]\n[gdscript]\nprint(hash(\"a\")) # Prints 177670\n[/gdscript]\n[csharp]\nGD.Print(GD.Hash(\"a\")); // Prints 177670\n[/csharp]\n[/codeblocks]"),
+				"```gdscript\nprint(hash(\"a\")) # Prints 177670\n```\n");
+
+		// lb and rb are used to insert literal square brackets in markdown.
+		CHECK_EQ(LSP::marked_documentation("[lb]literal brackets[rb]"), "\\[literal brackets\\]");
+		CHECK_EQ(LSP::marked_documentation("[lb]literal[rb] with [ClassName]"), "\\[literal\\] with `ClassName`");
+
+		// We have to be careful that different patterns don't conflict with each
+		// other, especially with urls that use brackets in markdown.
+		CHECK_EQ(LSP::marked_documentation("Class [Sprite2D] with [url=https://godotengine.org]link[/url]"),
+				"Class `Sprite2D` with [link](https://godotengine.org)");
+	}
 }
 }
 
 
 } // namespace GDScriptTests
 } // namespace GDScriptTests