Browse Source

[xml] Improve XML tests, test `core:encoding/entity`.

Jeroen van Rijn 3 năm trước cách đây
mục cha
commit
d65d6edb0e

+ 7 - 0
core/encoding/entity/entity.odin

@@ -115,7 +115,14 @@ decode_xml :: proc(input: string, options := XML_Decode_Options{}, allocator :=
 
 				We don't need to check if we need to write a `<`, because if it isn't CDATA or a comment,
 				it couldn't have been part of an XML tag body to be decoded here.
+
+				Keep in mind that we could already *be* inside a CDATA tag.
+				If so, write `>` as a literal and continue.
 			*/
+			if in_data {
+				write_rune(&builder, '<')
+				continue
+			}
 			in_data = _handle_xml_special(&t, &builder, options) or_return
 
 		case ']':

+ 1 - 0
core/encoding/entity/example/entity_example.odin

@@ -50,6 +50,7 @@ _main :: proc() {
 	using fmt
 
 	options := xml.Options{ flags = { .Ignore_Unsupported, .Intern_Comments, .Unbox_CDATA, .Decode_SGML_Entities }}
+
 	doc, _ := xml.parse(#load("test.html"), options)
 
 	defer xml.destroy(doc)

+ 1 - 1
core/encoding/entity/example/test.html

@@ -22,7 +22,7 @@
 		</div>
 		<!-- EXPECTED: Foozle]! © BOX ® /BOX42&;1234& -->
 		<div>
-			&verbar; &vert; &VerticalLine; &fjlig; &grave; &bsol; &reg; &rhov; &CounterClockwiseContourIntegral;
+			&verbar; &vert; &VerticalLine; &fjlig; &grave; &bsol; &reg; &rhov; &CounterClockwiseContourIntegral; &bsemi;
 		</div>
 	</body>
 </html>

+ 29 - 0
tests/core/assets/XML/entities.html

@@ -0,0 +1,29 @@
+<html>
+	<head>
+		<title>Entity Reference Test</title>
+		<style>
+			body {
+				background: #000; color: #eee;
+				width: 40%;
+				margin-left:  auto;
+				margin-right: auto;
+				font-size: 14pt;
+			}
+		</style>
+	</head>
+	<body>
+		<h1>Entity Reference Test</h1>
+		<div id="test_cdata_in_comment" foo="">
+			Foozle]!&#32;&copy;&#x20;<!-- <![CDATA[&#32;&reg;&#x20;]]> -->42&;1234&
+		</div>
+		<!-- foo attribute should be empty but present -->
+		<!-- EXPECTED: Foozle]! © 42&;1234& -->
+		<div id="test_cdata_unwrap_and_passthrough">
+			Foozle]!&#32;&copy;&#x20;<![CDATA[BOX&#32;&reg;&#x20;/BOX]]>42&;1234&
+		</div>
+		<!-- EXPECTED: Foozle]! © BOX ® /BOX42&;1234& -->
+		<div>
+			&verbar; &vert; &VerticalLine; &fjlig; &grave; &bsol; &reg; &rhov; &CounterClockwiseContourIntegral; &bsemi;
+		</div>
+	</body>
+</html>

+ 0 - 0
tests/core/assets/XML/nl_NL-xliff-1.0.xliff → tests/core/assets/XML/nl_NL-xliff-1.2.xliff


+ 1 - 1
tests/core/assets/XML/utf8.xml

@@ -4,5 +4,5 @@
 <부끄러운:barzle>
 	<name foo:bar="birmese">ရှက်စရာ ဇီးကွက်</name>
 	<nickname>Owl of Shame</nickname>
-	<data>More CDATA <![CDATA[<greeting>Hello, world!</greeting><![CDATA] < ]]> Nonsense.</data>
+	<data>More CDATA <![CDATA[<greeting>Hello, world!</greeting><![CDATA] <$]]> Nonsense.</data>
 </부끄러운:barzle>

+ 230 - 152
tests/core/encoding/xml/test_core_xml.odin

@@ -3,16 +3,16 @@ package test_core_xml
 import "core:encoding/xml"
 import "core:testing"
 import "core:mem"
+import "core:strings"
+import "core:io"
 import "core:fmt"
+import "core:hash"
 
 Silent :: proc(pos: xml.Pos, fmt: string, args: ..any) {
 	// Custom (silent) error handler.
 }
 
-OPTIONS :: xml.Options{
-	flags            = {
-		.Ignore_Unsupported, .Intern_Comments,
-	},
+OPTIONS :: xml.Options{ flags = { .Ignore_Unsupported, .Intern_Comments, },
 	expected_doctype = "",
 }
 
@@ -22,76 +22,153 @@ TEST_fail  := 0
 TEST :: struct {
 	filename: string,
 	options:  xml.Options,
-	expected: struct {
-		error:        xml.Error,
-		xml_version:  string,
-		xml_encoding: string,
-		doctype:      string,
-	},
+	err:      xml.Error,
+	crc32:    u32,
 }
 
+/*
+	Relative to ODIN_ROOT
+*/
+TEST_FILE_PATH_PREFIX :: "tests/core/assets/XML"
+
 TESTS :: []TEST{
 	/*
 		First we test that certain files parse without error.
 	*/
+
+	{
+		/*
+		<?xml version="1.0" encoding="utf-8"?>
+		<!DOCTYPE 恥ずべきフクロウ>
+		<恥ずべきフクロウ 올빼미_id="Foozle&#32;<![CDATA[<greeting>Hello, world!"</greeting>]]>Barzle">
+		<부끄러운:barzle>
+			<name foo:bar="birmese">ရှက်စရာ ဇီးကွက်</name>
+			<nickname>Owl of Shame</nickname>
+			<data>More CDATA <![CDATA[<greeting>Hello, world!</greeting><![CDATA] <$]]> Nonsense.</data>
+		</부끄러운:barzle>
+		*/
+
+		/*
+			Tests UTF-8 idents and values.
+			Test namespaced ident.
+			Tests that nested partial CDATA start doesn't trip up parser.
+		*/
+		filename  = "utf8.xml",
+		options   = {
+			flags = {
+				.Ignore_Unsupported, .Intern_Comments,
+			},
+			expected_doctype = "恥ずべきフクロウ",
+		},
+		crc32     = 0x30d82264,
+	},
+
+	{
+		/*
+			Same as above.
+			Unbox CDATA in data tag.
+		*/
+		filename  = "utf8.xml",
+		options   = {
+			flags = {
+				.Ignore_Unsupported, .Intern_Comments, .Unbox_CDATA,
+			},
+			expected_doctype = "恥ずべきフクロウ",
+		},
+		crc32     = 0x6d38ac58,
+	},
+
+	{
+		/*
+			Simple Qt TS translation file.
+			`core:i18n` requires it to be parsed properly.
+		*/
+		filename  = "nl_NL-qt-ts.ts",
+		options   = {
+			flags = {
+				.Ignore_Unsupported, .Intern_Comments, .Unbox_CDATA, .Decode_SGML_Entities,
+			},
+			expected_doctype = "TS",
+		},
+		crc32     = 0x7bce2630,
+	},
+
 	{
-		filename  = "assets/XML/utf8.xml",
-		options   = OPTIONS,
-		expected  = {
-			error        = .None,
-			xml_version  = "1.0",
-			xml_encoding = "utf-8",
-			doctype      = "恥ずべきフクロウ",
+		/*
+			Simple XLiff 1.2 file.
+			`core:i18n` requires it to be parsed properly.
+		*/
+		filename  = "nl_NL-xliff-1.2.xliff",
+		options   = {
+			flags = {
+				.Ignore_Unsupported, .Intern_Comments, .Unbox_CDATA, .Decode_SGML_Entities,
+			},
+			expected_doctype = "xliff",
 		},
+		crc32     = 0x43f19d61,
 	},
+
+	{
+		/*
+			Simple XLiff 2.0 file.
+			`core:i18n` requires it to be parsed properly.
+		*/
+		filename  = "nl_NL-xliff-2.0.xliff",
+		options   = {
+			flags = {
+				.Ignore_Unsupported, .Intern_Comments, .Unbox_CDATA, .Decode_SGML_Entities,
+			},
+			expected_doctype = "xliff",
+		},
+		crc32     = 0x961e7635,
+	},
+
 	{
-		filename  = "assets/XML/nl_NL-qt-ts.ts",
-		options   = OPTIONS,
-		expected  = {
-			error        = .None,
-			xml_version  = "1.0",
-			xml_encoding = "utf-8",
-			doctype      = "TS",
+		filename  = "entities.html",
+		options   = {
+			flags = {
+				.Ignore_Unsupported, .Intern_Comments,
+			},
+			expected_doctype = "html",
 		},
+		crc32     = 0xdb4a1e79,
 	},
+
 	{
-		filename  = "assets/XML/nl_NL-xliff-1.0.xliff",
-		options   = OPTIONS,
-		expected  = {
-			error        = .None,
-			xml_version  = "1.0",
-			xml_encoding = "UTF-8",
-			doctype      = "",
+		filename  = "entities.html",
+		options   = {
+			flags = {
+				.Ignore_Unsupported, .Intern_Comments, .Unbox_CDATA,
+			},
+			expected_doctype = "html",
 		},
+		crc32     = 0x82588917,
 	},
+
 	{
-		filename  = "assets/XML/nl_NL-xliff-2.0.xliff",
-		options   = OPTIONS,
-		expected  = {
-			error        = .None,
-			xml_version  = "1.0",
-			xml_encoding = "utf-8",
-			doctype      = "",
+		filename  = "entities.html",
+		options   = {
+			flags = {
+				.Ignore_Unsupported, .Intern_Comments, .Unbox_CDATA, .Decode_SGML_Entities,
+			},
+			expected_doctype = "html",
 		},
+		crc32     = 0x5e74d8a6,
 	},
 
 	/*
 		Then we test that certain errors are returned as expected.
 	*/
 	{
-		filename  = "assets/XML/utf8.xml",
+		filename  = "utf8.xml",
 		options   = {
 			flags            = {
 				.Ignore_Unsupported, .Intern_Comments,
 			},
 			expected_doctype = "Odin",
 		},
-		expected  = {
-			error        = .Invalid_DocType,
-			xml_version  = "1.0",
-			xml_encoding = "utf-8",
-			doctype      = "恥ずべきフクロウ",
-		},
+		err       = .Invalid_DocType,
+		crc32     = 0x49b83d0a,
 	},
 }
 
@@ -115,150 +192,151 @@ when ODIN_TEST {
     }
 }
 
-main :: proc() {
-    t := testing.T{}
-
-	track: mem.Tracking_Allocator
-	mem.tracking_allocator_init(&track, context.allocator)
-	context.allocator = mem.tracking_allocator(&track)
+test_file_path :: proc(filename: string) -> (path: string) {
 
-	run_tests(&t)
+	path = fmt.tprintf("%v%v/%v", ODIN_ROOT, TEST_FILE_PATH_PREFIX, filename)
+	temp := transmute([]u8)path
 
-	if len(track.allocation_map) > 0 {
-		for _, v in track.allocation_map {
-			err_msg := fmt.tprintf("%v Leaked %v bytes.", v.location, v.size)
-			expect(&t, false, err_msg)
+	for r, i in path {
+		if r == '\\' {
+			temp[i] = '/'
 		}
-	}	
-
-    fmt.printf("%v/%v tests successful.\n", TEST_count - TEST_fail, TEST_count)
+	}
+	return path
 }
 
-@test
-run_tests :: proc(t: ^testing.T) {
-	using fmt
-
-	count := 0
+doc_to_string :: proc(doc: ^xml.Document) -> (result: string) {
+	/*
+		Effectively a clone of the debug printer in the xml package.
+		We duplicate it here so that the way it prints an XML document to a string is stable.
 
-	for test in TESTS {
-		printf("Trying to parse %v\n\n", test.filename)
+		This way we can hash the output. If it changes, it means that the document or how it was parsed changed,
+		not how it was printed. One less source of variability.
+	*/
+	print :: proc(writer: io.Writer, doc: ^xml.Document) -> (written: int, err: io.Error) {
+		if doc == nil { return }
+		using fmt
 
-		doc, err := xml.parse(test.filename, test.options, Silent)
-		defer xml.destroy(doc)
+		written += wprintf(writer, "[XML Prolog]\n")
 
-		err_msg := tprintf("Expected return value %v, got %v", test.expected.error, err)
-		expect(t, err == test.expected.error, err_msg)
+		for attr in doc.prolog {
+			written += wprintf(writer, "\t%v: %v\n", attr.key, attr.val)
+		}
 
-		if len(test.expected.xml_version) > 0 {
-			xml_version := ""
-			for attr in doc.prolog {
-				if attr.key == "version" {
-					xml_version = attr.val
-				}
-			}
+		written += wprintf(writer, "[Encoding] %v\n", doc.encoding)
 
-			err_msg  = tprintf("Expected XML version %v, got %v", test.expected.xml_version, xml_version)
-			expect(t, xml_version == test.expected.xml_version, err_msg)
-		}
+		if len(doc.doctype.ident) > 0 {
+			written += wprintf(writer, "[DOCTYPE]  %v\n", doc.doctype.ident)
 
-		if len(test.expected.xml_encoding) > 0 {
-			xml_encoding := ""
-			for attr in doc.prolog {
-				if attr.key == "encoding" {
-					xml_encoding = attr.val
-				}
+			if len(doc.doctype.rest) > 0 {
+			 	wprintf(writer, "\t%v\n", doc.doctype.rest)
 			}
-
-			err_msg  = tprintf("Expected XML encoding %v, got %v", test.expected.xml_encoding, xml_encoding)
-			expect(t, xml_encoding == test.expected.xml_encoding, err_msg)
 		}
 
-		err_msg  = tprintf("Expected DOCTYPE %v, got %v", test.expected.doctype, doc.doctype.ident)
-		expect(t, doc.doctype.ident == test.expected.doctype, err_msg)
-
-		/*
-			File-specific tests.
-		*/
-		switch count {
-		case 0:
-			expect(t, len(doc.root.attribs) > 0, "Expected the root tag to have an attribute.")
-			attr := doc.root.attribs[0]
+		for comment in doc.comments {
+			written += wprintf(writer, "[Pre-root comment]  %v\n", comment)
+		}
 
-			attr_key_expected := "올빼미_id"
-			attr_val_expected := "Foozle&#32;<![CDATA[<greeting>Hello, world!\"</greeting>]]>Barzle"
+		if doc.root != nil {
+		 	wprintln(writer, " --- ")
+		 	print_element(writer, doc.root)
+		 	wprintln(writer, " --- ")		
+		 }
 
-			attr_err := tprintf("Expected %v, got %v", attr_key_expected, attr.key)
-			expect(t, attr.key == attr_key_expected, attr_err)
+		return written, .None
+	}
 
-			attr_err  = tprintf("Expected %v, got %v", attr_val_expected, attr.val)
-			expect(t, attr.val == attr_val_expected, attr_err)
+	print_element :: proc(writer: io.Writer, element: ^xml.Element, indent := 0) -> (written: int, err: io.Error) {
+		if element == nil { return }
+		using fmt
 
-			expect(t, len(doc.root.children) > 0, "Expected the root tag to have children.")
-			child := doc.root.children[0]
+		tab :: proc(writer: io.Writer, indent: int) {
+			for _ in 0..=indent {
+				wprintf(writer, "\t")
+			}
+		}
 
-			first_child_ident := "부끄러운:barzle"
-			attr_err  = tprintf("Expected first child tag's ident to be %v, got %v", first_child_ident, child.ident)
-			expect(t, child.ident == first_child_ident, attr_err)
+		tab(writer, indent)
 
-		case 2:
-			expect(t, len(doc.root.attribs) > 0, "Expected the root tag to have an attribute.")
+		if element.kind == .Element {
+			wprintf(writer, "<%v>\n", element.ident)
+			if len(element.value) > 0 {
+				tab(writer, indent + 1)
+				wprintf(writer, "[Value] %v\n", element.value)
+			}
 
-			{
-				attr := doc.root.attribs[0]
+			for attr in element.attribs {
+				tab(writer, indent + 1)
+				wprintf(writer, "[Attr] %v: %v\n", attr.key, attr.val)
+			}
 
-				attr_key_expected := "version"
-				attr_val_expected := "1.2"
+			for child in element.children {
+				print_element(writer, child, indent + 1)
+			}
+		} else if element.kind == .Comment {
+			wprintf(writer, "[COMMENT] %v\n", element.value)
+		}
 
-				attr_err := tprintf("Expected %v, got %v", attr_key_expected, attr.key)
-				expect(t, attr.key == attr_key_expected, attr_err)
+		return written, .None
+	}
 
-				attr_err  = tprintf("Expected %v, got %v", attr_val_expected, attr.val)
-				expect(t, attr.val == attr_val_expected, attr_err)
-			}
+	buf: strings.Builder
+	defer strings.destroy_builder(&buf)
 
-			{
-				attr := doc.root.attribs[1]
+	print(strings.to_writer(&buf), doc)
+	return strings.clone(strings.to_string(buf))
+}
 
-				attr_key_expected := "xmlns"
-				attr_val_expected := "urn:oasis:names:tc:xliff:document:1.2"
+@test
+run_tests :: proc(t: ^testing.T) {
+	using fmt
 
-				attr_err := tprintf("Expected %v, got %v", attr_key_expected, attr.key)
-				expect(t, attr.key == attr_key_expected, attr_err)
+	for test in TESTS {
+		path := test_file_path(test.filename)
+		printf("\nTrying to parse %v\n\n", path)
 
-				attr_err  = tprintf("Expected %v, got %v", attr_val_expected, attr.val)
-				expect(t, attr.val == attr_val_expected, attr_err)
-			}
+		doc, err := xml.parse(path, test.options, Silent)
+		defer xml.destroy(doc)
 
-		case 3:
-			expect(t, len(doc.root.attribs) > 0, "Expected the root tag to have an attribute.")
+		tree_string := doc_to_string(doc)
+		tree_bytes  := transmute([]u8)tree_string
+		defer delete(tree_bytes)
 
-			{
-				attr := doc.root.attribs[0]
+		crc32 := hash.crc32(tree_bytes)
 
-				attr_key_expected := "xmlns"
-				attr_val_expected := "urn:oasis:names:tc:xliff:document:2.0"
+		failed := err != test.err
+		err_msg := tprintf("Expected return value %v, got %v", test.err, err)
+		expect(t, err == test.err, err_msg)
 
-				attr_err := tprintf("Expected %v, got %v", attr_key_expected, attr.key)
-				expect(t, attr.key == attr_key_expected, attr_err)
+		failed |= crc32 != test.crc32
+		err_msg  = tprintf("Expected CRC 0x%08x, got 0x%08x", test.crc32, crc32)
+		expect(t, crc32 == test.crc32, err_msg)
 
-				attr_err  = tprintf("Expected %v, got %v", attr_val_expected, attr.val)
-				expect(t, attr.val == attr_val_expected, attr_err)
-			}
+		if failed {
+			/*
+				Don't fully print big trees.
+			*/
+			tree_string = tree_string[:min(2_048, len(tree_string))]
+			println(tree_string)
+		}
+	}
+}
 
-			{
-				attr := doc.root.attribs[1]
+main :: proc() {
+    t := testing.T{}
 
-				attr_key_expected := "version"
-				attr_val_expected := "2.0"
+	track: mem.Tracking_Allocator
+	mem.tracking_allocator_init(&track, context.allocator)
+	context.allocator = mem.tracking_allocator(&track)
 
-				attr_err := tprintf("Expected %v, got %v", attr_key_expected, attr.key)
-				expect(t, attr.key == attr_key_expected, attr_err)
+	run_tests(&t)
 
-				attr_err  = tprintf("Expected %v, got %v", attr_val_expected, attr.val)
-				expect(t, attr.val == attr_val_expected, attr_err)
-			}
+	if len(track.allocation_map) > 0 {
+		for _, v in track.allocation_map {
+			err_msg := fmt.tprintf("%v Leaked %v bytes.", v.location, v.size)
+			expect(&t, false, err_msg)
 		}
+	}	
 
-		count += 1
-	}
+    fmt.printf("%v/%v tests successful.\n", TEST_count - TEST_fail, TEST_count)
 }