3 years ago · 6e61abc7d0
--- a/core/encoding/xml/xml_reader.odin
+++ b/core/encoding/xml/xml_reader.odin
@@ -26,6 +26,7 @@ package xml
 
				 		Jeroen van Rijn: Initial implementation.
			
 
				 */
			
 
				 
			
 
				+import "core:bytes"
			
 
				 import "core:strings"
			
 
				 import "core:encoding/entity"
			
 
				 import "core:mem"
			
@@ -39,6 +40,12 @@ DEFAULT_Options :: Options{
 
				 }
			
 
				 
			
 
				 Option_Flag :: enum {
			
 
				+	/*
			
 
				+		If the caller says that input may be modified, we can perform in-situ parsing.
			
 
				+		If this flag isn't provided, the XML parser first duplicates the input so that it can.
			
 
				+	*/
			
 
				+	Input_May_Be_Modified,
			
 
				+
			
 
				 	/*
			
 
				 		Document MUST start with `<?xml` prolog.
			
 
				 	*/
			
@@ -78,7 +85,7 @@ Option_Flag :: enum {
 
				 	Keep_Tag_Body_Comments,
			
 
				 
			
 
				 }
			
 
				-Option_Flags :: bit_set[Option_Flag; u8]
			
 
				+Option_Flags :: bit_set[Option_Flag; u16]
			
 
				 
			
 
				 Document :: struct {
			
 
				 	root:     ^Element,
			
@@ -104,7 +111,12 @@ Document :: struct {
 
				 	*/
			
 
				 	tokenizer: ^Tokenizer,
			
 
				 	allocator: mem.Allocator,
			
 
				-	intern:    strings.Intern,
			
 
				+
			
 
				+	/*
			
 
				+		Input. Either the original buffer, or a copy if `.Input_May_Be_Modified` isn't specified.
			
 
				+	*/
			
 
				+	input:           []u8,
			
 
				+	strings_to_free: [dynamic]string,
			
 
				 }
			
 
				 
			
 
				 Element :: struct {
			
@@ -205,21 +217,30 @@ Error :: enum {
 
				 	Implementation starts here.
			
 
				 */
			
 
				 parse_from_slice :: proc(data: []u8, options := DEFAULT_Options, path := "", error_handler := default_error_handler, allocator := context.allocator) -> (doc: ^Document, err: Error) {
			
 
				+	data := data
			
 
				 	context.allocator = allocator
			
 
				 
			
 
				 	opts := validate_options(options) or_return
			
 
				 
			
 
				+	/*
			
 
				+		If `.Input_May_Be_Modified` is not specified, we duplicate the input so that we can modify it in-place.
			
 
				+	*/
			
 
				+	if .Input_May_Be_Modified not_in opts.flags {
			
 
				+		data = bytes.clone(data)
			
 
				+	}
			
 
				+
			
 
				 	t := &Tokenizer{}
			
 
				 	init(t, string(data), path, error_handler)
			
 
				 
			
 
				 	doc = new(Document)
			
 
				 	doc.allocator = allocator
			
 
				 	doc.tokenizer = t
			
 
				+	doc.input     = data
			
 
				 
			
 
				-	strings.intern_init(&doc.intern, allocator, allocator)
			
 
				+	// strings.intern_init(&doc.intern, allocator, allocator)
			
 
				 
			
 
				-	err =               .Unexpected_Token
			
 
				-	element, parent:    ^Element
			
 
				+	err =            .Unexpected_Token
			
 
				+	element, parent: ^Element
			
 
				 
			
 
				 	tag_is_open := false
			
 
				 
			
@@ -292,8 +313,7 @@ parse_from_slice :: proc(data: []u8, options := DEFAULT_Options, path := "", err
 
				 					case:
			
 
				 						if .Error_on_Unsupported in opts.flags {
			
 
				 							error(t, t.offset, "Unhandled: <!%v\n", next.text)
			
 
				-							err = .Unhandled_Bang
			
 
				-							return	
			
 
				+							return doc, .Unhandled_Bang
			
 
				 						}
			
 
				 						skip_element(t) or_return
			
 
				 					}
			
@@ -307,8 +327,6 @@ parse_from_slice :: proc(data: []u8, options := DEFAULT_Options, path := "", err
 
				 					comment := scan_comment(t) or_return
			
 
				 
			
 
				 					if .Intern_Comments in opts.flags {
			
 
				-						comment = strings.intern_get(&doc.intern, comment)
			
 
				-
			
 
				 						if doc.root == nil {
			
 
				 							append(&doc.comments, comment)
			
 
				 						} else {
			
@@ -343,7 +361,7 @@ parse_from_slice :: proc(data: []u8, options := DEFAULT_Options, path := "", err
 
				 				}
			
 
				 
			
 
				 				element.parent = parent
			
 
				-				element.ident  = strings.intern_get(&doc.intern, open.text)
			
 
				+				element.ident  = open.text
			
 
				 
			
 
				 				parse_attributes(doc, &element.attribs) or_return
			
 
				 
			
@@ -424,7 +442,7 @@ parse_from_slice :: proc(data: []u8, options := DEFAULT_Options, path := "", err
 
				 			needs_processing |= .Decode_SGML_Entities in opts.flags
			
 
				 
			
 
				 			if !needs_processing {
			
 
				-				element.value = strings.intern_get(&doc.intern, body_text)
			
 
				+				element.value = body_text
			
 
				 				continue
			
 
				 			}
			
 
				 
			
@@ -445,12 +463,11 @@ parse_from_slice :: proc(data: []u8, options := DEFAULT_Options, path := "", err
 
				 			}
			
 
				 
			
 
				 			decoded, decode_err := entity.decode_xml(body_text, decode_opts)
			
 
				-			defer delete(decoded)
			
 
				-
			
 
				 			if decode_err == .None {
			
 
				-				element.value = strings.intern_get(&doc.intern, decoded)
			
 
				+				element.value = decoded
			
 
				+				append(&doc.strings_to_free, decoded)
			
 
				 			} else {
			
 
				-				element.value = strings.intern_get(&doc.intern, body_text)
			
 
				+				element.value = body_text
			
 
				 			}
			
 
				 		}
			
 
				 	}
			
@@ -468,12 +485,13 @@ parse_from_slice :: proc(data: []u8, options := DEFAULT_Options, path := "", err
 
				 
			
 
				 parse_from_file :: proc(filename: string, options := DEFAULT_Options, error_handler := default_error_handler, allocator := context.allocator) -> (doc: ^Document, err: Error) {
			
 
				 	context.allocator = allocator
			
 
				+	options := options
			
 
				 
			
 
				 	data, data_ok := os.read_entire_file(filename)
			
 
				-	defer delete(data)
			
 
				-
			
 
				 	if !data_ok { return {}, .File_Error }
			
 
				 
			
 
				+	options.flags += { .Input_May_Be_Modified }
			
 
				+
			
 
				 	return parse_from_slice(data, options, filename, error_handler, allocator)
			
 
				 }
			
 
				 
			
@@ -499,10 +517,16 @@ destroy :: proc(doc: ^Document) {
 
				 	if doc == nil { return }
			
 
				 
			
 
				 	free_element(doc.root)
			
 
				-	strings.intern_destroy(&doc.intern)
			
 
				 
			
 
				 	delete(doc.prolog)
			
 
				 	delete(doc.comments)
			
 
				+	delete(doc.input)
			
 
				+
			
 
				+	for s in doc.strings_to_free {
			
 
				+		delete(s)
			
 
				+	}
			
 
				+	delete(doc.strings_to_free)
			
 
				+
			
 
				 	free(doc)
			
 
				 }
			
 
				 
			
@@ -538,8 +562,8 @@ parse_attribute :: proc(doc: ^Document) -> (attr: Attr, offset: int, err: Error)
 
				 	_       = expect(t, .Eq)     or_return
			
 
				 	value  := expect(t, .String) or_return
			
 
				 
			
 
				-	attr.key = strings.intern_get(&doc.intern, key.text)
			
 
				-	attr.val = strings.intern_get(&doc.intern, value.text)
			
 
				+	attr.key = key.text
			
 
				+	attr.val = value.text
			
 
				 
			
 
				 	err = .None
			
 
				 	return
			
@@ -651,7 +675,7 @@ parse_doctype :: proc(doc: ^Document) -> (err: Error) {
 
				 	t := doc.tokenizer
			
 
				 
			
 
				 	tok := expect(t, .Ident) or_return
			
 
				-	doc.doctype.ident = strings.intern_get(&doc.intern, tok.text)
			
 
				+	doc.doctype.ident = tok.text
			
 
				 
			
 
				 	skip_whitespace(t)
			
 
				 	offset := t.offset
			
@@ -660,6 +684,6 @@ parse_doctype :: proc(doc: ^Document) -> (err: Error) {
 
				 	/*
			
 
				 		-1 because the current offset is that of the closing tag, so the rest of the DOCTYPE tag ends just before it.
			
 
				 	*/
			
 
				-	doc.doctype.rest = strings.intern_get(&doc.intern, string(t.src[offset : t.offset - 1]))
			
 
				+	doc.doctype.rest = string(t.src[offset : t.offset - 1])
			
 
				 	return .None
			
 
				 }