Просмотр исходного кода

Fold XML attribute whitespace.

Jeroen van Rijn 1 год назад
Родитель
Сommit
2fe961cbcd

+ 24 - 10
core/encoding/entity/entity.odin

@@ -89,7 +89,7 @@ decode_xml :: proc(input: string, options := XML_Decode_Options{}, allocator :=
 	t := Tokenizer{src=input}
 	in_data := false
 
-	prev: rune
+	prev: rune = ' '
 
 	loop: for {
 		advance(&t) or_return
@@ -153,18 +153,32 @@ decode_xml :: proc(input: string, options := XML_Decode_Options{}, allocator :=
 					write_string(&builder, ";")
 				}
 			} else {
-				// https://www.w3.org/TR/2006/REC-xml11-20060816/#sec-line-ends
-				switch t.r {
-				case '\n', 0x85, 0x2028:
-					write_rune(&builder, '\n')
-				case '\r': // Do nothing until next character
-				case:
-					if prev == '\r' { // Turn a single carriage return into a \n
+				// Handle AV Normalization: https://www.w3.org/TR/2006/REC-xml11-20060816/#AVNormalize
+				if .Normalize_Whitespace in options {
+					switch t.r {
+					case ' ', '\r', '\n', '\t':
+						if prev != ' ' {
+							write_rune(&builder, ' ')
+							prev = ' '
+						}
+					case:
+						write_rune(&builder, t.r)
+						prev = t.r
+					}
+				} else {
+					// https://www.w3.org/TR/2006/REC-xml11-20060816/#sec-line-ends
+					switch t.r {
+					case '\n', 0x85, 0x2028:
 						write_rune(&builder, '\n')
+					case '\r': // Do nothing until next character
+					case:
+						if prev == '\r' { // Turn a single carriage return into a \n
+							write_rune(&builder, '\n')
+						}
+						write_rune(&builder, t.r)
 					}
-					write_rune(&builder, t.r)
+					prev = t.r
 				}
-				prev = t.r
 			}
 		}
 	}

+ 8 - 0
tests/core/assets/XML/attribute-whitespace.xml

@@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="utf-8"?>
+<!DOCTYPE foozle>
+<foozle>Barzle</foozle>
+<부끄러운:barzle>
+	<name foo:bar="   birmese
+	kittens   have
+	fur   ">Indeed!</name>
+</부끄러운:barzle>

+ 14 - 0
tests/core/encoding/xml/test_core_xml.odin

@@ -146,6 +146,20 @@ xml_test_entities_unbox_decode :: proc(t: ^testing.T) {
 	})
 }
 
+@(test)
+xml_test_attribute_whitespace :: proc(t: ^testing.T) {
+	run_test(t, {
+		// Same as above.
+		// Unbox CDATA in data tag.
+		filename  = "XML/attribute-whitespace.xml",
+		options   = {
+			flags = {},
+			expected_doctype = "foozle",
+		},
+		crc32     = 0x8f5fd6c1,
+	})
+}
+
 @(test)
 xml_test_invalid_doctype :: proc(t: ^testing.T) {
 	run_test(t, {