Jeroen van Rijn 3 роки тому
батько
коміт
7ec88d2430

+ 2 - 2
core/encoding/entity/example/entity_example.odin

@@ -64,8 +64,8 @@ main :: proc() {
 	mem.tracking_allocator_init(&track, context.allocator)
 	context.allocator = mem.tracking_allocator(&track)
 
-	_main()
-	//_entities()
+	// _main()
+	_entities()
 
 	if len(track.allocation_map) > 0 {
 		println()

+ 30 - 23
core/encoding/xml/example/xml_example.odin

@@ -2,35 +2,40 @@ package xml_example
 
 import "core:encoding/xml"
 import "core:os"
-import "core:path"
 import "core:mem"
 import "core:fmt"
-
-/*
-	Silent error handler for the parser.
-*/
-Error_Handler :: proc(pos: xml.Pos, fmt: string, args: ..any) {}
-
-OPTIONS :: xml.Options{ flags = { .Ignore_Unsupported, }, expected_doctype = "unicode", }
+import "core:time"
+import "core:strings"
+import "core:hash"
 
 example :: proc() {
 	using fmt
 
-	filename := path.join(ODIN_ROOT, "tests", "core", "assets", "XML", "unicode.xml")
-	defer delete(filename)
+	doc: ^xml.Document
+	err: xml.Error
+
+	DOC :: #load("../../../../tests/core/assets/XML/unicode.xml")
 
-	doc, err := xml.parse(filename, OPTIONS, Error_Handler)
+	parse_duration: time.Duration
+	{
+		time.SCOPED_TICK_DURATION(&parse_duration)
+		doc, err = xml.parse(DOC, xml.Options{flags={.Ignore_Unsupported}})
+	}
 	defer xml.destroy(doc)
 
+	ms := time.duration_milliseconds(parse_duration)
+	speed := (f64(1000.0) / ms) * f64(len(DOC)) / 1_024.0 / 1_024.0
+	fmt.printf("Parse time: %v bytes in %.2f ms (%.2f MiB/s).\n", len(DOC), ms, speed)
+
 	if err != .None {
 		printf("Load/Parse error: %v\n", err)
 		if err == .File_Error {
-			printf("\"%v\" not found. Did you run \"tests\\download_assets.py\"?", filename)
+			println("\"unicode.xml\" not found. Did you run \"tests\\download_assets.py\"?")
 		}
 		os.exit(1)
 	}
 
-	printf("\"%v\" loaded and parsed.\n", filename)
+	println("\"unicode.xml\" loaded and parsed.")
 
 	charlist, charlist_ok := xml.find_child_by_ident(doc.root, "charlist")
 	if !charlist_ok {
@@ -40,17 +45,19 @@ example :: proc() {
 
 	printf("Found `<charlist>` with %v children.\n", len(charlist.children))
 
-	for char in charlist.children {
-		if char.ident != "character" {
-			eprintf("Expected `<character>`, got `<%v>`\n", char.ident)
-			os.exit(1)
-		}
+	crc32 := doc_hash(doc)
+	printf("[%v] CRC32: 0x%08x\n", "🎉" if crc32 == 0xcaa042b9 else "🤬", crc32)
+}
 
-		if _, ok := xml.find_attribute_val_by_key(char, "dec"); !ok {
-			eprintln("`<character dec=\"...\">` attribute not found.")
-			os.exit(1)
-		}
-	}
+doc_hash :: proc(doc: ^xml.Document, print := false) -> (crc32: u32) {
+	buf: strings.Builder
+	defer strings.destroy_builder(&buf)
+	w := strings.to_writer(&buf)
+
+	xml.print(w, doc)
+	tree := strings.to_string(buf)
+	if print { fmt.println(tree) }
+	return hash.crc32(transmute([]u8)tree)
 }
 
 main :: proc() {

+ 23 - 3
core/encoding/xml/xml_reader.odin

@@ -71,6 +71,12 @@ Option_Flag :: enum {
 		This option decodes them when encountered.
 	*/
 	Decode_SGML_Entities,
+
+	/*
+		If a tag body has a comment, it will be stripped unless this option is given.
+	*/
+	Keep_Tag_Body_Comments,
+
 }
 Option_Flags :: bit_set[Option_Flag; u8]
 
@@ -413,15 +419,29 @@ parse_from_slice :: proc(data: []u8, options := DEFAULT_Options, path := "", err
 			/*
 				This should be a tag's body text.
 			*/
-			body_text   := scan_string(t, t.offset) or_return
+			body_text        := scan_string(t, t.offset) or_return
+			needs_processing := .Unbox_CDATA          in opts.flags
+			needs_processing |= .Decode_SGML_Entities in opts.flags
+
+			if !needs_processing {
+				element.value = strings.intern_get(&doc.intern, body_text)
+				continue
+			}
 
-			decode_opts := entity.XML_Decode_Options{ .Comment_Strip }
+			decode_opts := entity.XML_Decode_Options{}
+			if .Keep_Tag_Body_Comments not_in opts.flags {
+				decode_opts += { .Comment_Strip }
+			}
 
 			if .Decode_SGML_Entities not_in opts.flags {
 				decode_opts += { .No_Entity_Decode }
 			}
+
 			if .Unbox_CDATA in opts.flags {
-				decode_opts += { .Unbox_CDATA, .Decode_CDATA }
+				decode_opts += { .Unbox_CDATA }
+				if .Decode_SGML_Entities in opts.flags {
+					decode_opts += { .Decode_CDATA }
+				}
 			}
 
 			decoded, decode_err := entity.decode_xml(body_text, decode_opts)

+ 4 - 6
tests/core/encoding/xml/test_core_xml.odin

@@ -8,9 +8,7 @@ import "core:io"
 import "core:fmt"
 import "core:hash"
 
-Silent :: proc(pos: xml.Pos, fmt: string, args: ..any) {
-	// Custom (silent) error handler.
-}
+Silent :: proc(pos: xml.Pos, format: string, args: ..any) {}
 
 OPTIONS :: xml.Options{ flags = { .Ignore_Unsupported, .Intern_Comments, },
 	expected_doctype = "",
@@ -75,7 +73,7 @@ TESTS :: []TEST{
 			},
 			expected_doctype = "恥ずべきフクロウ",
 		},
-		crc32     = 0x6d38ac58,
+		crc32     = 0xad31d8e8,
 	},
 
 	{
@@ -131,7 +129,7 @@ TESTS :: []TEST{
 			},
 			expected_doctype = "html",
 		},
-		crc32     = 0xdb4a1e79,
+		crc32     = 0x573c1033,
 	},
 
 	{
@@ -306,7 +304,7 @@ run_tests :: proc(t: ^testing.T) {
 		expect(t, err == test.err, err_msg)
 
 		failed |= crc32 != test.crc32
-		err_msg  = tprintf("Expected CRC 0x%08x, got 0x%08x", test.crc32, crc32)
+		err_msg  = tprintf("Expected CRC 0x%08x, got 0x%08x, with options %v", test.crc32, crc32, test.options)
 		expect(t, crc32 == test.crc32, err_msg)
 
 		if failed {