3 年之前 · b5c828fe4e
--- a/core/encoding/xml/debug_print.odin
+++ b/core/encoding/xml/debug_print.odin
@@ -0,0 +1,73 @@
 
				+package xml
			
 
				+/*
			
 
				+	An XML 1.0 / 1.1 parser
			
 
				+
			
 
				+	Copyright 2021 Jeroen van Rijn <[email protected]>.
			
 
				+	Made available under Odin's BSD-3 license.
			
 
				+
			
 
				+	A from-scratch XML implementation, loosely modeled on the [spec](https://www.w3.org/TR/2006/REC-xml11-20060816).
			
 
				+
			
 
				+	List of contributors:
			
 
				+		Jeroen van Rijn: Initial implementation.
			
 
				+*/
			
 
				+import "core:fmt"
			
 
				+
			
 
				+/*
			
 
				+	Just for debug purposes.
			
 
				+*/
			
 
				+print :: proc(doc: ^Document) {
			
 
				+	assert(doc != nil)
			
 
				+
			
 
				+	using fmt
			
 
				+	println("[XML Prolog]")
			
 
				+
			
 
				+	for attr in doc.prolog {
			
 
				+		printf("\t%v: %v\n", attr.key, attr.val)
			
 
				+	}
			
 
				+
			
 
				+	printf("[Encoding] %v\n",  doc.encoding)
			
 
				+	printf("[DOCTYPE]  %v\n",  doc.doctype.ident)
			
 
				+
			
 
				+	if len(doc.doctype.rest) > 0 {
			
 
				+		printf("\t%v\n", doc.doctype.rest)
			
 
				+	}
			
 
				+
			
 
				+	if doc.root != nil {
			
 
				+		println(" --- ")
			
 
				+		print_element(0, doc.root)
			
 
				+		println(" --- ")		
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+print_element :: proc(indent: int, element: ^Element) {
			
 
				+	if element == nil { return }
			
 
				+	using fmt
			
 
				+
			
 
				+	tab :: proc(indent: int) {
			
 
				+		tabs := "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t"
			
 
				+
			
 
				+		i := max(0, min(indent, len(tabs)))
			
 
				+		printf("%v", tabs[:i])
			
 
				+	}
			
 
				+
			
 
				+	tab(indent)
			
 
				+
			
 
				+	if element.kind == .Element {
			
 
				+		printf("<%v>\n", element.ident)
			
 
				+		if len(element.value) > 0 {
			
 
				+			tab(indent + 1)
			
 
				+			printf("[Value] %v\n", element.value)
			
 
				+		}
			
 
				+
			
 
				+		for attr in element.attribs {
			
 
				+			tab(indent + 1)
			
 
				+			printf("[Attr] %v: %v\n", attr.key, attr.val)
			
 
				+		}
			
 
				+
			
 
				+		for child in element.children {
			
 
				+			print_element(indent + 1, child)
			
 
				+		}
			
 
				+	} else if element.kind == .Comment {
			
 
				+		printf("[COMMENT] %v\n", element.value)
			
 
				+	}
			
 
				+}
			
--- a/core/encoding/xml/example/xml_example.odin
+++ b/core/encoding/xml/example/xml_example.odin
@@ -0,0 +1,55 @@
 
				+package xml_example
			
 
				+
			
 
				+import "core:encoding/xml"
			
 
				+import "core:mem"
			
 
				+import "core:fmt"
			
 
				+
			
 
				+Error_Handler :: proc(pos: xml.Pos, fmt: string, args: ..any) {
			
 
				+
			
 
				+}
			
 
				+
			
 
				+FILENAME :: "../../../../tests/core/assets/xml/nl_NL-xliff-1.0.xliff"
			
 
				+DOC      :: #load(FILENAME)
			
 
				+
			
 
				+OPTIONS  :: xml.Options{
			
 
				+	flags            = {
			
 
				+		.Ignore_Unsupported, .Intern_Comments,
			
 
				+	},
			
 
				+	expected_doctype = "",
			
 
				+}
			
 
				+
			
 
				+_main :: proc() {
			
 
				+	using fmt
			
 
				+
			
 
				+	println("--- DOCUMENT TO PARSE  ---")
			
 
				+	println(string(DOC))
			
 
				+	println("--- /DOCUMENT TO PARSE ---\n")
			
 
				+
			
 
				+	doc, err := xml.parse(DOC, OPTIONS, FILENAME, Error_Handler)
			
 
				+	defer xml.destroy(doc)
			
 
				+
			
 
				+	xml.print(doc)
			
 
				+
			
 
				+	if err != .None {
			
 
				+		printf("Parse error: %v\n", err)
			
 
				+	} else {
			
 
				+		println("DONE!")
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+main :: proc() {
			
 
				+	using fmt
			
 
				+
			
 
				+	track: mem.Tracking_Allocator
			
 
				+	mem.tracking_allocator_init(&track, context.allocator)
			
 
				+	context.allocator = mem.tracking_allocator(&track)
			
 
				+
			
 
				+	_main()
			
 
				+
			
 
				+	if len(track.allocation_map) > 0 {
			
 
				+		println()
			
 
				+		for _, v in track.allocation_map {
			
 
				+			printf("%v Leaked %v bytes.\n", v.location, v.size)
			
 
				+		}
			
 
				+	}	
			
 
				+}
			
--- a/core/encoding/xml/tokenizer.odin
+++ b/core/encoding/xml/tokenizer.odin
@@ -0,0 +1,339 @@
 
				+package xml
			
 
				+
			
 
				+import "core:fmt"
			
 
				+import "core:unicode"
			
 
				+import "core:unicode/utf8"
			
 
				+
			
 
				+Error_Handler :: #type proc(pos: Pos, fmt: string, args: ..any)
			
 
				+
			
 
				+Token :: struct {
			
 
				+	kind: Token_Kind,
			
 
				+	text: string,
			
 
				+	pos:  Pos,
			
 
				+}
			
 
				+
			
 
				+Pos :: struct {
			
 
				+	file:   string,
			
 
				+	offset: int, // starting at 0
			
 
				+	line:   int, // starting at 1
			
 
				+	column: int, // starting at 1
			
 
				+}
			
 
				+
			
 
				+Token_Kind :: enum {
			
 
				+	Invalid,
			
 
				+
			
 
				+	Ident,
			
 
				+	Literal,
			
 
				+	Rune,
			
 
				+	String,
			
 
				+
			
 
				+	Double_Quote,  // "
			
 
				+	Single_Quote,  // '
			
 
				+	Colon,         // :
			
 
				+
			
 
				+	Eq,            // =
			
 
				+	Lt,            // <
			
 
				+	Gt,            // >
			
 
				+	Exclaim,       // !
			
 
				+	Question,      // ?
			
 
				+	Hash,          // #
			
 
				+	Slash,         // /
			
 
				+	Dash,          // -
			
 
				+
			
 
				+	Open_Bracket,  // [
			
 
				+	Close_Bracket, // ]
			
 
				+
			
 
				+	EOF,
			
 
				+}
			
 
				+
			
 
				+CDATA_START :: "<![CDATA["
			
 
				+CDATA_END   :: "]]>"
			
 
				+
			
 
				+Tokenizer :: struct {
			
 
				+	// Immutable data
			
 
				+	path: string,
			
 
				+	src:  string,
			
 
				+	err:  Error_Handler,
			
 
				+
			
 
				+	// Tokenizing state
			
 
				+	ch:          rune,
			
 
				+	offset:      int,
			
 
				+	read_offset: int,
			
 
				+	line_offset: int,
			
 
				+	line_count:  int,
			
 
				+
			
 
				+	// Mutable data
			
 
				+	error_count: int,
			
 
				+}
			
 
				+
			
 
				+init :: proc(t: ^Tokenizer, src: string, path: string, err: Error_Handler = default_error_handler) {
			
 
				+	t.src = src
			
 
				+	t.err = err
			
 
				+	t.ch = ' '
			
 
				+	t.offset = 0
			
 
				+	t.read_offset = 0
			
 
				+	t.line_offset = 0
			
 
				+	t.line_count = len(src) > 0 ? 1 : 0
			
 
				+	t.error_count = 0
			
 
				+	t.path = path
			
 
				+
			
 
				+	advance_rune(t)
			
 
				+	if t.ch == utf8.RUNE_BOM {
			
 
				+		advance_rune(t)
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+@(private)
			
 
				+offset_to_pos :: proc(t: ^Tokenizer, offset: int) -> Pos {
			
 
				+	line := t.line_count
			
 
				+	column := offset - t.line_offset + 1
			
 
				+
			
 
				+	return Pos {
			
 
				+		file = t.path,
			
 
				+		offset = offset,
			
 
				+		line = line,
			
 
				+		column = column,
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+default_error_handler :: proc(pos: Pos, msg: string, args: ..any) {
			
 
				+	fmt.eprintf("%s(%d:%d) ", pos.file, pos.line, pos.column)
			
 
				+	fmt.eprintf(msg, ..args)
			
 
				+	fmt.eprintf("\n")
			
 
				+}
			
 
				+
			
 
				+error :: proc(t: ^Tokenizer, offset: int, msg: string, args: ..any) {
			
 
				+	pos := offset_to_pos(t, offset)
			
 
				+	if t.err != nil {
			
 
				+		t.err(pos, msg, ..args)
			
 
				+	}
			
 
				+	t.error_count += 1
			
 
				+}
			
 
				+
			
 
				+advance_rune :: proc(using t: ^Tokenizer) {
			
 
				+	if read_offset < len(src) {
			
 
				+		offset = read_offset
			
 
				+		if ch == '\n' {
			
 
				+			line_offset = offset
			
 
				+			line_count += 1
			
 
				+		}
			
 
				+		r, w := rune(src[read_offset]), 1
			
 
				+		switch {
			
 
				+		case r == 0:
			
 
				+			error(t, t.offset, "illegal character NUL")
			
 
				+		case r >= utf8.RUNE_SELF:
			
 
				+			r, w = utf8.decode_rune_in_string(src[read_offset:])
			
 
				+			if r == utf8.RUNE_ERROR && w == 1 {
			
 
				+				error(t, t.offset, "illegal UTF-8 encoding")
			
 
				+			} else if r == utf8.RUNE_BOM && offset > 0 {
			
 
				+				error(t, t.offset, "illegal byte order mark")
			
 
				+			}
			
 
				+		}
			
 
				+		read_offset += w
			
 
				+		ch = r
			
 
				+	} else {
			
 
				+		offset = len(src)
			
 
				+		if ch == '\n' {
			
 
				+			line_offset = offset
			
 
				+			line_count += 1
			
 
				+		}
			
 
				+		ch = -1
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+peek_byte :: proc(t: ^Tokenizer, offset := 0) -> byte {
			
 
				+	if t.read_offset+offset < len(t.src) {
			
 
				+		return t.src[t.read_offset+offset]
			
 
				+	}
			
 
				+	return 0
			
 
				+}
			
 
				+
			
 
				+skip_whitespace :: proc(t: ^Tokenizer) {
			
 
				+	for {
			
 
				+		switch t.ch {
			
 
				+		case ' ', '\t', '\r', '\n':
			
 
				+			advance_rune(t)
			
 
				+		case:
			
 
				+			return
			
 
				+		}
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+is_letter :: proc(r: rune) -> bool {
			
 
				+	if r < utf8.RUNE_SELF {
			
 
				+		switch r {
			
 
				+		case '_':
			
 
				+			return true
			
 
				+		case 'A'..='Z', 'a'..='z':
			
 
				+			return true
			
 
				+		}
			
 
				+	}
			
 
				+	return unicode.is_letter(r)
			
 
				+}
			
 
				+
			
 
				+is_valid_identifier_rune :: proc(r: rune) -> bool {
			
 
				+	if r < utf8.RUNE_SELF {
			
 
				+		switch r {
			
 
				+		case '_', '-', ':':        return true
			
 
				+		case 'A'..='Z', 'a'..='z': return true
			
 
				+		case '0'..'9':             return true
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	if unicode.is_digit(r) || unicode.is_letter(r) {
			
 
				+		return true
			
 
				+	}
			
 
				+	return false
			
 
				+}
			
 
				+
			
 
				+scan_identifier :: proc(t: ^Tokenizer) -> string {
			
 
				+	offset     := t.offset
			
 
				+	namespaced := false
			
 
				+
			
 
				+	for is_valid_identifier_rune(t.ch) {
			
 
				+		advance_rune(t)
			
 
				+		if t.ch == ':' {
			
 
				+			/*
			
 
				+				A namespaced attr can have at most two parts, `namespace:ident`.
			
 
				+			*/
			
 
				+			if namespaced {
			
 
				+				break	
			
 
				+			}
			
 
				+			namespaced = true
			
 
				+		}
			
 
				+	}
			
 
				+	return string(t.src[offset : t.offset])
			
 
				+}
			
 
				+
			
 
				+scan_string :: proc(t: ^Tokenizer, offset: int, close: rune = '<', consume_close := false) -> (value: string, err: Error) {
			
 
				+	err = .None
			
 
				+	in_cdata := false
			
 
				+
			
 
				+	loop: for {
			
 
				+		ch := t.ch
			
 
				+
			
 
				+		switch ch {
			
 
				+		case -1:
			
 
				+			error(t, t.offset, "[scan_string] Premature end of file.\n")
			
 
				+			return "", .Premature_EOF
			
 
				+
			
 
				+		case '<':
			
 
				+			/*
			
 
				+				Might be the start of a CDATA tag.
			
 
				+			*/
			
 
				+			if t.read_offset + len(CDATA_START) < len(t.src) {
			
 
				+				if string(t.src[t.offset:][:len(CDATA_START)]) == CDATA_START {
			
 
				+					in_cdata = true
			
 
				+				}
			
 
				+			}
			
 
				+
			
 
				+		case ']':
			
 
				+			/*
			
 
				+				Might be the end of a CDATA tag.
			
 
				+			*/
			
 
				+			if t.read_offset + len(CDATA_END) < len(t.src) {
			
 
				+				if string(t.src[t.offset:][:len(CDATA_END)]) == CDATA_END {
			
 
				+					in_cdata = false
			
 
				+				}
			
 
				+			}
			
 
				+
			
 
				+		case '\n':
			
 
				+			if !in_cdata {
			
 
				+				error(t, offset, string(t.src[offset : t.offset]))
			
 
				+				error(t, offset, "[scan_string] Not terminated\n")
			
 
				+				err = .Invalid_Tag_Value
			
 
				+				break loop	
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		if ch == close && !in_cdata {
			
 
				+			/*
			
 
				+				If it's not a CDATA tag, it's the end of this body.
			
 
				+			*/
			
 
				+			break loop
			
 
				+		}
			
 
				+
			
 
				+		advance_rune(t)
			
 
				+	}
			
 
				+
			
 
				+	lit := string(t.src[offset : t.offset])
			
 
				+	if consume_close {
			
 
				+		advance_rune(t)
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+		TODO: Handle decoding escape characters and unboxing CDATA.
			
 
				+	*/
			
 
				+
			
 
				+	return lit, err
			
 
				+}
			
 
				+
			
 
				+peek :: proc(t: ^Tokenizer) -> (token: Token) {
			
 
				+	old  := t^
			
 
				+	token = scan(t)
			
 
				+	t^ = old
			
 
				+	return token
			
 
				+}
			
 
				+
			
 
				+scan :: proc(t: ^Tokenizer) -> Token {
			
 
				+	skip_whitespace(t)
			
 
				+
			
 
				+	offset := t.offset
			
 
				+
			
 
				+	kind: Token_Kind
			
 
				+	err:  Error
			
 
				+	lit:  string
			
 
				+	pos := offset_to_pos(t, offset)
			
 
				+
			
 
				+	switch ch := t.ch; true {
			
 
				+	case is_letter(ch):
			
 
				+		lit = scan_identifier(t)
			
 
				+		kind = .Ident
			
 
				+
			
 
				+	case:
			
 
				+		advance_rune(t)
			
 
				+		switch ch {
			
 
				+		case -1:
			
 
				+			kind = .EOF
			
 
				+
			
 
				+		case '<': kind = .Lt
			
 
				+		case '>': kind = .Gt
			
 
				+		case '!': kind = .Exclaim
			
 
				+		case '?': kind = .Question
			
 
				+		case '=': kind = .Eq
			
 
				+		case '#': kind = .Hash
			
 
				+		case '/': kind = .Slash
			
 
				+		case '-': kind = .Dash
			
 
				+		case ':': kind = .Colon
			
 
				+
			
 
				+		case '"', '\'':
			
 
				+			lit, err = scan_string(t, t.offset, ch, true)
			
 
				+			if err == .None {
			
 
				+				kind = .String
			
 
				+			} else {
			
 
				+				kind = .Invalid
			
 
				+			}
			
 
				+
			
 
				+		case '\n':
			
 
				+			lit = "\n"
			
 
				+
			
 
				+		case '\\':
			
 
				+			token := scan(t)
			
 
				+			if token.pos.line == pos.line {
			
 
				+				error(t, token.pos.offset, "expected a newline after \\")
			
 
				+			}
			
 
				+			return token
			
 
				+
			
 
				+		case:
			
 
				+			if ch != utf8.RUNE_BOM {
			
 
				+				// error(t, t.offset, "illegal character '%r': %d", ch, ch)
			
 
				+			}
			
 
				+			kind = .Invalid
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	if lit == "" {
			
 
				+		lit = string(t.src[offset : t.offset])
			
 
				+	}
			
 
				+	return Token{kind, lit, pos}
			
 
				+}
			
--- a/core/encoding/xml/xml_reader.odin
+++ b/core/encoding/xml/xml_reader.odin
@@ -0,0 +1,651 @@
 
				+package xml
			
 
				+/*
			
 
				+	An XML 1.0 / 1.1 parser
			
 
				+
			
 
				+	Copyright 2021 Jeroen van Rijn <[email protected]>.
			
 
				+	Made available under Odin's BSD-3 license.
			
 
				+
			
 
				+	A from-scratch XML implementation, loosely modelled on the [spec](https://www.w3.org/TR/2006/REC-xml11-20060816).
			
 
				+
			
 
				+	Features:
			
 
				+		- Supports enough of the XML 1.0/1.1 spec to handle the 99.9% of XML documents in common current usage.
			
 
				+		- Simple to understand and use. Small.
			
 
				+
			
 
				+	Caveats:
			
 
				+		- We do NOT support HTML in this package, as that may or may not be valid XML.
			
 
				+		  If it works, great. If it doesn't, that's not considered a bug.
			
 
				+
			
 
				+		- We do NOT support UTF-16. If you have a UTF-16 XML file, please convert it to UTF-8 first. Also, our condolences.
			
 
				+		- <[!ELEMENT and <[!ATTLIST are not supported, and will be either ignored or return an error depending on the parser options.
			
 
				+
			
 
				+	TODO:
			
 
				+	- Optional CDATA unboxing.
			
 
				+	- Optional `&gt;`, `&#32;`, `&#x20;` and other escape substitution in tag bodies.
			
 
				+
			
 
				+	MAYBE:
			
 
				+	- XML writer?
			
 
				+	- Serialize/deserialize Odin types?
			
 
				+
			
 
				+	List of contributors:
			
 
				+		Jeroen van Rijn: Initial implementation.
			
 
				+*/
			
 
				+
			
 
				+import "core:strings"
			
 
				+import "core:mem"
			
 
				+import "core:os"
			
 
				+
			
 
				+DEFAULT_Options :: Options{
			
 
				+	flags            = {
			
 
				+		.Ignore_Unsupported,
			
 
				+	},
			
 
				+	expected_doctype = "",
			
 
				+}
			
 
				+
			
 
				+Option_Flag :: enum {
			
 
				+	/*
			
 
				+		Document MUST start with `<?xml` prolog.
			
 
				+	*/
			
 
				+	Must_Have_Prolog,
			
 
				+
			
 
				+	/*
			
 
				+		Document MUST have a `<!DOCTYPE`.
			
 
				+	*/
			
 
				+	Must_Have_DocType,
			
 
				+
			
 
				+	/*
			
 
				+		By default we skip comments. Use this option to intern a comment on a parented Element.
			
 
				+	*/
			
 
				+	Intern_Comments,
			
 
				+
			
 
				+	/*
			
 
				+		How to handle unsupported parts of the specification, like <! other than <!DOCTYPE and <![CDATA[
			
 
				+	*/
			
 
				+	Error_on_Unsupported,
			
 
				+	Ignore_Unsupported,
			
 
				+
			
 
				+	/*
			
 
				+		By default CDATA tags are passed-through as-is.
			
 
				+		This option unwraps them when encountered.
			
 
				+	*/
			
 
				+	Unbox_CDATA,
			
 
				+
			
 
				+	/*
			
 
				+		By default SGML entities like `&gt;`, `&#32;` and `&#x20;` are passed-through as-is.
			
 
				+		This option decodes them when encountered.
			
 
				+	*/
			
 
				+	Decode_SGML_Entities,
			
 
				+}
			
 
				+
			
 
				+Document :: struct {
			
 
				+	root:     ^Element,
			
 
				+	prolog:   Attributes,
			
 
				+	encoding: Encoding,
			
 
				+
			
 
				+	doctype: struct {
			
 
				+		/*
			
 
				+			We only scan the <!DOCTYPE IDENT part and skip the rest.
			
 
				+		*/
			
 
				+		ident: string,
			
 
				+		rest:  string,
			
 
				+	},
			
 
				+
			
 
				+	/*
			
 
				+		Internal
			
 
				+	*/
			
 
				+	tokenizer: ^Tokenizer,
			
 
				+	allocator: mem.Allocator,
			
 
				+	intern:    strings.Intern,
			
 
				+}
			
 
				+
			
 
				+Element :: struct {
			
 
				+	ident:   string,
			
 
				+	value:   string,
			
 
				+	attribs: Attributes,
			
 
				+
			
 
				+	kind: enum {
			
 
				+		Element = 0,
			
 
				+		Comment,
			
 
				+	},
			
 
				+
			
 
				+	parent:   ^Element,
			
 
				+	children: [dynamic]^Element,
			
 
				+}
			
 
				+
			
 
				+Attr :: struct {
			
 
				+	key: string,
			
 
				+	val: string,
			
 
				+}
			
 
				+
			
 
				+Attributes :: [dynamic]Attr
			
 
				+
			
 
				+Options :: struct {
			
 
				+	flags:            Option_Flags,
			
 
				+	expected_doctype: string,
			
 
				+}
			
 
				+Option_Flags :: bit_set[Option_Flag]
			
 
				+
			
 
				+Encoding :: enum {
			
 
				+	Unknown,
			
 
				+
			
 
				+	UTF_8,
			
 
				+	ISO_8859_1,
			
 
				+
			
 
				+	/*
			
 
				+		Aliases
			
 
				+	*/
			
 
				+	LATIN_1 = ISO_8859_1,
			
 
				+}
			
 
				+
			
 
				+Error :: enum {
			
 
				+	/*
			
 
				+		General return values.
			
 
				+	*/
			
 
				+	None = 0,
			
 
				+	General_Error,
			
 
				+	Unexpected_Token,
			
 
				+	Invalid_Token,
			
 
				+
			
 
				+	/*
			
 
				+		Couldn't find, open or read file.
			
 
				+	*/
			
 
				+	File_Error,
			
 
				+
			
 
				+	/*
			
 
				+		File too short.
			
 
				+	*/
			
 
				+	Premature_EOF,
			
 
				+
			
 
				+	/*
			
 
				+		XML-specific errors.
			
 
				+	*/
			
 
				+	No_Prolog,
			
 
				+	Invalid_Prolog,
			
 
				+	Too_Many_Prologs,
			
 
				+
			
 
				+	No_DocType,
			
 
				+	Too_Many_DocTypes,
			
 
				+	DocType_Must_Proceed_Elements,
			
 
				+
			
 
				+	/*
			
 
				+		If a DOCTYPE is present _or_ the caller
			
 
				+		asked for a specific DOCTYPE and the DOCTYPE
			
 
				+		and root tag don't match, we return `.Invalid_DocType`.
			
 
				+	*/
			
 
				+	Invalid_DocType,
			
 
				+
			
 
				+	Invalid_Tag_Value,
			
 
				+	Mismatched_Closing_Tag,
			
 
				+
			
 
				+	Unclosed_Comment,
			
 
				+	Comment_Before_Root_Element,
			
 
				+	Invalid_Sequence_In_Comment,
			
 
				+
			
 
				+	Unsupported_Version,
			
 
				+	Unsupported_Encoding,
			
 
				+
			
 
				+	/*
			
 
				+		<!FOO are usually skipped.
			
 
				+	*/
			
 
				+	Unhandled_Bang,
			
 
				+
			
 
				+	Duplicate_Attribute,
			
 
				+	Conflicting_Options,
			
 
				+
			
 
				+	/*
			
 
				+		Unhandled TODO:
			
 
				+	*/
			
 
				+	Unhandled_CDATA_Unboxing,
			
 
				+	Unhandled_SGML_Entity_Decoding,
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+	Implementation starts here.
			
 
				+*/
			
 
				+parse_from_slice :: proc(data: []u8, options := DEFAULT_Options, path := "", error_handler := default_error_handler, allocator := context.allocator) -> (doc: ^Document, err: Error) {
			
 
				+	context.allocator = allocator
			
 
				+
			
 
				+	opts := validate_options(options) or_return
			
 
				+
			
 
				+	t := &Tokenizer{}
			
 
				+	init(t, string(data), path, error_handler)
			
 
				+
			
 
				+	doc = new(Document)
			
 
				+	doc.allocator = allocator
			
 
				+	doc.tokenizer = t
			
 
				+
			
 
				+	strings.intern_init(&doc.intern, allocator, allocator)
			
 
				+
			
 
				+	err =               .Unexpected_Token
			
 
				+	element, parent:    ^Element
			
 
				+
			
 
				+	/*
			
 
				+		If a DOCTYPE is present, the root tag has to match.
			
 
				+		If an expected DOCTYPE is given in options (i.e. it's non-empty), the DOCTYPE (if present) and root tag have to match.
			
 
				+	*/
			
 
				+	expected_doctype := options.expected_doctype
			
 
				+
			
 
				+	loop: for {
			
 
				+		tok := scan(t)
			
 
				+		#partial switch tok.kind {
			
 
				+
			
 
				+		case .Lt:
			
 
				+			open := scan(t)
			
 
				+			#partial switch open.kind {
			
 
				+
			
 
				+			case .Question:
			
 
				+				/*
			
 
				+					<?xml
			
 
				+				*/
			
 
				+				next := scan(t)
			
 
				+				#partial switch next.kind {
			
 
				+				case .Ident:
			
 
				+					if len(next.text) == 3 && strings.to_lower(next.text, context.temp_allocator) == "xml" {
			
 
				+						parse_prolog(doc) or_return
			
 
				+					} else if len(doc.prolog) > 0 {
			
 
				+						/*
			
 
				+							We've already seen a prolog.
			
 
				+						*/
			
 
				+						return doc, .Too_Many_Prologs
			
 
				+					} else {
			
 
				+						error(t, t.offset, "Expected \"<?xml\", got \"<?%v\".", tok.text)	
			
 
				+						return
			
 
				+					}
			
 
				+				case:
			
 
				+					error(t, t.offset, "Expected \"<?xml\", got \"<?%v\".", tok.text)
			
 
				+					return
			
 
				+				}
			
 
				+
			
 
				+			case .Exclaim:
			
 
				+				/*
			
 
				+					<!
			
 
				+				*/
			
 
				+				next := scan(t)
			
 
				+				#partial switch next.kind {
			
 
				+				case .Ident:
			
 
				+					switch next.text {
			
 
				+					case "DOCTYPE":
			
 
				+						if len(doc.doctype.ident) > 0 {
			
 
				+							return doc, .Too_Many_DocTypes
			
 
				+						}
			
 
				+						if doc.root != nil {
			
 
				+							return doc, .DocType_Must_Proceed_Elements
			
 
				+						}
			
 
				+						parse_doctype(doc) or_return
			
 
				+
			
 
				+						if len(expected_doctype) > 0 && expected_doctype != doc.doctype.ident {
			
 
				+							error(t, t.offset, "Invalid DOCTYPE. Expected: %v, got: %v\n", expected_doctype, doc.doctype.ident)
			
 
				+							return doc, .Invalid_DocType
			
 
				+						}
			
 
				+						expected_doctype = doc.doctype.ident
			
 
				+
			
 
				+					case:
			
 
				+						if .Error_on_Unsupported in opts.flags {
			
 
				+							error(t, t.offset, "Unhandled: <!%v\n", next.text)
			
 
				+							err = .Unhandled_Bang
			
 
				+							return	
			
 
				+						}
			
 
				+						skip_element(t) or_return
			
 
				+					}
			
 
				+
			
 
				+				case .Dash:
			
 
				+					/*
			
 
				+						Comment: <!-- -->.
			
 
				+						The grammar does not allow a comment to end in --->
			
 
				+					*/
			
 
				+					if doc.root == nil {
			
 
				+						return doc, .Comment_Before_Root_Element
			
 
				+					}
			
 
				+
			
 
				+					expect(t, .Dash)
			
 
				+					offset := t.offset
			
 
				+
			
 
				+					for {
			
 
				+						advance_rune(t)
			
 
				+						ch := t.ch
			
 
				+
			
 
				+						/*
			
 
				+							A comment ends when we see -->, preceded by a character that's not a dash.
			
 
				+							"For compatibility, the string "--" (double-hyphen) must not occur within comments."
			
 
				+
			
 
				+							See: https://www.w3.org/TR/2006/REC-xml11-20060816/#dt-comment
			
 
				+
			
 
				+							Thanks to the length (4) of the comment start, we also have enough lookback,
			
 
				+							and the peek at the next byte asserts that there's at least one more character
			
 
				+							that's a `>`.
			
 
				+						*/
			
 
				+						if ch < 0 {
			
 
				+							error(t, offset, "[parse] Comment was not terminated\n")
			
 
				+							return doc, .Unclosed_Comment
			
 
				+						}
			
 
				+
			
 
				+						if string(t.src[t.offset - 1:][:2]) == "--" {
			
 
				+							if peek_byte(t) == '>' {
			
 
				+								break
			
 
				+							} else {
			
 
				+								error(t, t.offset - 1, "Invalid -- sequence in comment.\n")
			
 
				+								return doc, .Invalid_Sequence_In_Comment
			
 
				+							}
			
 
				+						}
			
 
				+					}
			
 
				+
			
 
				+					if .Intern_Comments in opts.flags {
			
 
				+						el := new(Element)
			
 
				+
			
 
				+						el.parent = element
			
 
				+						el.kind   = .Comment
			
 
				+						el.value  = strings.intern_get(&doc.intern, string(t.src[offset : t.offset - 1]))
			
 
				+						append(&element.children, el)
			
 
				+					}
			
 
				+
			
 
				+					expect(t, .Dash)
			
 
				+					expect(t, .Gt)
			
 
				+
			
 
				+				case:
			
 
				+					error(t, t.offset, "Invalid Token after <!. Expected .Ident, got %#v\n", next)
			
 
				+					return
			
 
				+				}
			
 
				+
			
 
				+			case .Ident:
			
 
				+				/*
			
 
				+					e.g. <odin - Start of new element.
			
 
				+				*/
			
 
				+				element = new(Element)
			
 
				+
			
 
				+				if doc.root == nil {
			
 
				+					/*
			
 
				+						First element.
			
 
				+					*/
			
 
				+					doc.root = element
			
 
				+					parent   = element
			
 
				+				} else {
			
 
				+					append(&parent.children, element)
			
 
				+				}
			
 
				+
			
 
				+				element.parent = parent
			
 
				+				element.ident  = strings.intern_get(&doc.intern, open.text)
			
 
				+
			
 
				+				parse_attributes(doc, &element.attribs) or_return
			
 
				+
			
 
				+				/*
			
 
				+					If a DOCTYPE is present _or_ the caller
			
 
				+					asked for a specific DOCTYPE and the DOCTYPE
			
 
				+					and root tag don't match, we return .Invalid_Root_Tag.
			
 
				+				*/
			
 
				+				if element == doc.root {
			
 
				+					if len(expected_doctype) > 0 && expected_doctype != open.text {
			
 
				+						error(t, t.offset, "Root Tag doesn't match DOCTYPE. Expected: %v, got: %v\n", expected_doctype, open.text)
			
 
				+						return doc, .Invalid_DocType
			
 
				+					}
			
 
				+				}
			
 
				+
			
 
				+				/*
			
 
				+					One of these should follow:
			
 
				+					- `>`,  which means we've just opened this tag and expect a later element to close it.
			
 
				+					- `/>`, which means this is an 'empty' or self-closing tag.
			
 
				+				*/
			
 
				+				end_token := scan(t)
			
 
				+
			
 
				+				#partial switch end_token.kind {
			
 
				+				case .Gt:
			
 
				+					/*
			
 
				+						We're now the new parent.
			
 
				+					*/
			
 
				+					parent = element
			
 
				+
			
 
				+				case .Slash:
			
 
				+					/*
			
 
				+						Empty tag?
			
 
				+					*/
			
 
				+					expect(t, .Gt) or_return
			
 
				+
			
 
				+				case:
			
 
				+					error(t, t.offset, "Expected close tag, got: %#v\n", end_token)
			
 
				+					return
			
 
				+				}
			
 
				+
			
 
				+			case .Slash:
			
 
				+				/*
			
 
				+					Close tag.
			
 
				+				*/
			
 
				+				ident := expect(t, .Ident) or_return
			
 
				+				_      = expect(t, .Gt)    or_return
			
 
				+
			
 
				+				if element.ident != ident.text {
			
 
				+					error(t, t.offset, "Mismatched Closing Tag: %v\n", ident.text)
			
 
				+					return doc, .Mismatched_Closing_Tag
			
 
				+				}
			
 
				+				parent  = element.parent
			
 
				+				element = parent
			
 
				+
			
 
				+			case:
			
 
				+				error(t, t.offset, "Invalid Token after <: %#v\n", open)
			
 
				+				return
			
 
				+			}
			
 
				+
			
 
				+		case .EOF:
			
 
				+			break loop
			
 
				+
			
 
				+		case:
			
 
				+			/*
			
 
				+				This should be a tag's body text.
			
 
				+			*/
			
 
				+			element.value = scan_string(t, tok.pos.offset) or_return
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	if .Must_Have_Prolog in opts.flags && len(doc.prolog) == 0 {
			
 
				+		return doc, .No_Prolog
			
 
				+	}
			
 
				+
			
 
				+	if .Must_Have_DocType in opts.flags && len(doc.doctype.ident) == 0 {
			
 
				+		return doc, .No_DocType
			
 
				+	}
			
 
				+
			
 
				+	return doc, .None
			
 
				+}
			
 
				+
			
 
				+parse_from_file :: proc(filename: string, options := DEFAULT_Options, error_handler := default_error_handler, allocator := context.allocator) -> (doc: ^Document, err: Error) {
			
 
				+	context.allocator = allocator
			
 
				+
			
 
				+	data, data_ok := os.read_entire_file(filename)
			
 
				+	defer delete(data)
			
 
				+
			
 
				+	if !data_ok { return {}, .File_Error }
			
 
				+
			
 
				+	return parse_from_slice(data, options, filename, error_handler, allocator)
			
 
				+}
			
 
				+
			
 
				+parse :: proc { parse_from_file, parse_from_slice }
			
 
				+
			
 
				+free_element :: proc(element: ^Element) {
			
 
				+	if element == nil { return }
			
 
				+
			
 
				+	for child in element.children {
			
 
				+		/*
			
 
				+			NOTE: Recursive.
			
 
				+
			
 
				+			Could be rewritten so it adds them to a list of pointers to free.
			
 
				+		*/
			
 
				+		free_element(child)
			
 
				+	}
			
 
				+	delete(element.attribs)
			
 
				+	delete(element.children)
			
 
				+	free(element)
			
 
				+}
			
 
				+
			
 
				+destroy :: proc(doc: ^Document) {
			
 
				+	if doc == nil { return }
			
 
				+
			
 
				+	free_element(doc.root)
			
 
				+	strings.intern_destroy(&doc.intern)
			
 
				+
			
 
				+	delete(doc.prolog)
			
 
				+	free(doc)
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+	Helpers.
			
 
				+*/
			
 
				+
			
 
				+validate_options :: proc(options: Options) -> (validated: Options, err: Error) {
			
 
				+	validated = options
			
 
				+
			
 
				+	if .Error_on_Unsupported in validated.flags && .Ignore_Unsupported in validated.flags {
			
 
				+		return options, .Conflicting_Options
			
 
				+	}
			
 
				+
			
 
				+	if .Unbox_CDATA in validated.flags {
			
 
				+		return options, .Unhandled_CDATA_Unboxing
			
 
				+	}
			
 
				+
			
 
				+	if .Decode_SGML_Entities in validated.flags {
			
 
				+		return options, .Unhandled_SGML_Entity_Decoding
			
 
				+	}
			
 
				+
			
 
				+	return validated, .None
			
 
				+}
			
 
				+
			
 
				+expect :: proc(t: ^Tokenizer, kind: Token_Kind) -> (tok: Token, err: Error) {
			
 
				+	tok = scan(t)
			
 
				+	if tok.kind == kind { return tok, .None }
			
 
				+
			
 
				+	error(t, t.offset, "Expected \"%v\", got \"%v\".", kind, tok.kind)
			
 
				+	return tok, .Unexpected_Token
			
 
				+}
			
 
				+
			
 
				+parse_attribute :: proc(doc: ^Document) -> (attr: Attr, offset: int, err: Error) {
			
 
				+	assert(doc != nil)
			
 
				+	context.allocator = doc.allocator
			
 
				+	t := doc.tokenizer
			
 
				+
			
 
				+	key    := expect(t, .Ident)  or_return
			
 
				+	offset  = t.offset - len(key.text)
			
 
				+
			
 
				+	_       = expect(t, .Eq)     or_return
			
 
				+	value  := expect(t, .String) or_return
			
 
				+
			
 
				+	attr.key = strings.intern_get(&doc.intern, key.text)
			
 
				+	attr.val = strings.intern_get(&doc.intern, value.text)
			
 
				+
			
 
				+	err = .None
			
 
				+	return
			
 
				+}
			
 
				+
			
 
				+check_duplicate_attributes :: proc(t: ^Tokenizer, attribs: Attributes, attr: Attr, offset: int) -> (err: Error) {
			
 
				+	for a in attribs {
			
 
				+		if attr.key == a.key {
			
 
				+			error(t, offset, "Duplicate attribute: %v\n", attr.key)
			
 
				+			return .Duplicate_Attribute
			
 
				+		}
			
 
				+	}
			
 
				+	return .None	
			
 
				+}
			
 
				+
			
 
				+parse_attributes :: proc(doc: ^Document, attribs: ^Attributes) -> (err: Error) {
			
 
				+	assert(doc != nil)
			
 
				+	context.allocator = doc.allocator
			
 
				+	t := doc.tokenizer
			
 
				+
			
 
				+	for peek(t).kind == .Ident {
			
 
				+		attr, offset := parse_attribute(doc)                  or_return
			
 
				+		check_duplicate_attributes(t, attribs^, attr, offset) or_return
			
 
				+		append(attribs, attr)
			
 
				+	}
			
 
				+	skip_whitespace(t)
			
 
				+	return .None
			
 
				+}
			
 
				+
			
 
				+parse_prolog :: proc(doc: ^Document) -> (err: Error) {
			
 
				+	assert(doc != nil)
			
 
				+	context.allocator = doc.allocator
			
 
				+	t := doc.tokenizer
			
 
				+
			
 
				+	offset := t.offset
			
 
				+	parse_attributes(doc, &doc.prolog) or_return
			
 
				+
			
 
				+	for attr in doc.prolog {
			
 
				+		switch attr.key {
			
 
				+		case "version":
			
 
				+			switch attr.val {
			
 
				+			case "1.0", "1.1":
			
 
				+			case:
			
 
				+				error(t, offset, "[parse_prolog] Warning: Unhandled XML version: %v\n", attr.val)
			
 
				+			}
			
 
				+
			
 
				+		case "encoding":
			
 
				+			switch strings.to_lower(attr.val, context.temp_allocator) {
			
 
				+			case "utf-8", "utf8":
			
 
				+				doc.encoding = .UTF_8
			
 
				+
			
 
				+			case "latin-1", "latin1", "iso-8859-1":
			
 
				+				doc.encoding = .LATIN_1
			
 
				+
			
 
				+			case:
			
 
				+				/*
			
 
				+					Unrecognized encoding, assume UTF-8.
			
 
				+				*/
			
 
				+				error(t, offset, "[parse_prolog] Warning: Unrecognized encoding: %v\n", attr.val)
			
 
				+			}
			
 
				+
			
 
				+		case:
			
 
				+			// Ignored.
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	_ = expect(t, .Question) or_return
			
 
				+	_ = expect(t, .Gt)       or_return
			
 
				+
			
 
				+	return .None
			
 
				+}
			
 
				+
			
 
				+skip_element :: proc(t: ^Tokenizer) -> (err: Error) {
			
 
				+	close := 1
			
 
				+
			
 
				+	loop: for {
			
 
				+		tok := scan(t)
			
 
				+		#partial switch tok.kind {
			
 
				+		case .EOF:
			
 
				+			error(t, t.offset, "[skip_element] Premature EOF\n")
			
 
				+			return .Premature_EOF
			
 
				+
			
 
				+		case .Lt:
			
 
				+			close += 1
			
 
				+
			
 
				+		case .Gt:
			
 
				+			close -= 1
			
 
				+			if close == 0 {
			
 
				+				break loop
			
 
				+			}
			
 
				+
			
 
				+		case:
			
 
				+
			
 
				+		}
			
 
				+	}
			
 
				+	return .None
			
 
				+}
			
 
				+
			
 
				+parse_doctype :: proc(doc: ^Document) -> (err: Error) {
			
 
				+	/*
			
 
				+		<!DOCTYPE greeting SYSTEM "hello.dtd">
			
 
				+
			
 
				+		<!DOCTYPE greeting [
			
 
				+			<!ELEMENT greeting (#PCDATA)>
			
 
				+		]>
			
 
				+	*/
			
 
				+	assert(doc != nil)
			
 
				+	context.allocator = doc.allocator
			
 
				+	t := doc.tokenizer
			
 
				+
			
 
				+	tok := expect(t, .Ident) or_return
			
 
				+	doc.doctype.ident = strings.intern_get(&doc.intern, tok.text)
			
 
				+
			
 
				+	skip_whitespace(t)
			
 
				+	offset := t.offset
			
 
				+	skip_element(t) or_return
			
 
				+
			
 
				+	/*
			
 
				+		-1 because the current offset is that of the closing tag, so the rest of the DOCTYPE tag ends just before it.
			
 
				+	*/
			
 
				+	doc.doctype.rest = strings.intern_get(&doc.intern, string(t.src[offset : t.offset - 1]))
			
 
				+	return .None
			
 
				+}
			
--- a/tests/core/Makefile
+++ b/tests/core/Makefile
@@ -1,22 +1,29 @@
 
				 ODIN=../../odin
			
 
				 PYTHON=$(shell which python3)
			
 
				 
			
 
				-all: download_test_assets image_test compress_test strings_test hash_test crypto_test
			
 
				+all: download_test_assets image_test compress_test strings_test hash_test crypto_test encoding_test
			
 
				 
			
 
				 download_test_assets:
			
 
				 	$(PYTHON) download_assets.py
			
 
				 
			
 
				 image_test:
			
 
				-	$(ODIN) run image/test_core_image.odin
			
 
				+	$(ODIN) run image/test_core_image.odin -out=test_image -o:speed -no-bounds-check
			
 
				 
			
 
				 compress_test:
			
 
				-	$(ODIN) run compress/test_core_compress.odin
			
 
				+	$(ODIN) run compress/test_core_compress.odin -out=test_compress -o:speed -no-bounds-check
			
 
				 
			
 
				 strings_test:
			
 
				-	$(ODIN) run strings/test_core_strings.odin
			
 
				+	$(ODIN) run strings/test_core_strings.odin -out=test_strings -o:speed -no-bounds-check
			
 
				+
			
 
				+odin_test:
			
 
				+	$(ODIN) run odin -out=test_odin -o:speed -no-bounds-check
			
 
				 
			
 
				 hash_test:
			
 
				 	$(ODIN) run hash -out=test_hash -o:speed -no-bounds-check
			
 
				 
			
 
				 crypto_test:
			
 
				-	$(ODIN) run crypto -out=crypto_hash -o:speed -no-bounds-check
			
 
				+	$(ODIN) run crypto -out=crypto_hash -o:speed -no-bounds-check
			
 
				+
			
 
				+encoding_test:
			
 
				+	$(ODIN) run encoding/json -out=test_encoding_json -o:speed -no-bounds-check
			
 
				+	$(ODIN) run encoding/xml  -out=test_encoding_xml  -o:speed -no-bounds-check
			
--- a/tests/core/assets/xml/nl_NL-qt-ts.ts
+++ b/tests/core/assets/xml/nl_NL-qt-ts.ts
@@ -0,0 +1,35 @@
 
				+<?xml version="1.0" encoding="utf-8"?>

			
 
				+<!DOCTYPE TS>

			
 
				+<TS version="2.1" language="nl" sourcelanguage="en">

			
 
				+<context>

			
 
				+  <name>Page</name>

			
 
				+  <message>

			
 
				+    <source>Text for translation</source>

			
 
				+    <comment>commenting</comment>

			
 
				+    <translation type="obsolete">Tekst om te vertalen</translation>

			
 
				+  </message>

			
 
				+  <message>

			
 
				+     <source>Also text to translate</source>

			
 
				+     <extracomment>some text</extracomment>

			
 
				+    <translation>Ook tekst om te vertalen</translation>

			
 
				+  </message>

			
 
				+</context>

			
 
				+<context>

			
 
				+  <name>installscript</name>

			
 
				+  <message>

			
 
				+    <source>99 bottles of beer on the wall</source>

			
 
				+    <oldcomment>some new comments here</oldcomment>

			
 
				+    <translation>99 flessen bier op de muur</translation>

			
 
				+  </message>

			
 
				+</context>

			
 
				+<context>

			
 
				+    <name>apple_count</name>

			
 
				+    <message numerus="yes">

			
 
				+      <source>%d apple(s)</source>

			
 
				+      <translation>

			
 
				+        <numerusform>%d appel</numerusform>

			
 
				+        <numerusform>%d appels</numerusform>

			
 
				+      </translation>

			
 
				+    </message>

			
 
				+  </context>

			
 
				+</TS>

			
--- a/tests/core/assets/xml/nl_NL-xliff-1.0.xliff
+++ b/tests/core/assets/xml/nl_NL-xliff-1.0.xliff
@@ -0,0 +1,38 @@
 
				+<?xml version="1.0" encoding="UTF-8"?>
			
 
				+<xliff version="1.2" xmlns="urn:oasis:names:tc:xliff:document:1.2">
			
 
				+	<file id="42" original="Foozle.xml" source-language="en" target-language="nl-NL" datatype="plaintext">
			
 
				+		<body>
			
 
				+			<trans-unit id="874396" maxwidth="20" size-unit="char">
			
 
				+				<source>text</source>
			
 
				+				<target state="translated">tekst</target>
			
 
				+				<note>Context</note>
			
 
				+			</trans-unit>
			
 
				+			<trans-unit id="874397" approved="yes">
			
 
				+				<source>text 1</source>
			
 
				+				<target state="translated">tekst 1</target>
			
 
				+				<note>Context 1</note>
			
 
				+			</trans-unit>
			
 
				+			<trans-unit id="874398">
			
 
				+				<source>text 2</source>
			
 
				+				<target state="needs-translation"/>
			
 
				+				<context context-type="context">Context of the segment 2</context>
			
 
				+			</trans-unit>
			
 
				+			<trans-unit id="874399" translate="no">
			
 
				+				<source>text 3</source>
			
 
				+				<target state="final">translation 3</target>
			
 
				+				<note>Context 3</note>
			
 
				+			</trans-unit>
			
 
				+			<group restype="x-gettext-plurals">
			
 
				+				<note>Plurals</note>
			
 
				+				<trans-unit id="14343743[0]">
			
 
				+					<source>%d month</source>
			
 
				+					<target xml:lang="nl" state="translated">%d maand</target>
			
 
				+				</trans-unit>
			
 
				+				<trans-unit id="14343743[1]">
			
 
				+					<source>%d months</source>
			
 
				+					<target xml:lang="nl" state="translated">%d maanden</target>
			
 
				+				</trans-unit>
			
 
				+			</group>
			
 
				+		</body>
			
 
				+	</file>
			
 
				+</xliff>
			
--- a/tests/core/assets/xml/nl_NL-xliff-2.0.xliff
+++ b/tests/core/assets/xml/nl_NL-xliff-2.0.xliff
@@ -0,0 +1,52 @@
 
				+<?xml version="1.0" encoding="utf-8"?>

			
 
				+<xliff xmlns="urn:oasis:names:tc:xliff:document:2.0" version="2.0" srcLang="en" trgLang="nl">

			
 
				+	<file id="f1">

			
 
				+		<notes>

			
 
				+			<note id="n1">Note for file</note>

			
 
				+		</notes>

			
 
				+		<unit id="u1">

			
 
				+			<notes>

			
 
				+				<note id="n1">Note for unit</note>

			
 
				+			</notes>

			
 
				+			<segment id="s1" state="initial">

			
 
				+				<source>text</source>

			
 
				+				<target></target>

			
 
				+			</segment>

			
 
				+		</unit>

			
 
				+		<unit id="u2">

			
 
				+			<notes>

			
 
				+				<note id="n2">Note for unit 2</note>

			
 
				+			</notes>

			
 
				+			<segment id="s2" state="translated">

			
 
				+				<source>text 2</source>

			
 
				+				<target>translation 2</target>

			
 
				+			</segment>

			
 
				+		</unit>

			
 
				+		<unit id="u3">

			
 
				+			<notes>

			
 
				+				<note id="n3">Note for unit 3</note>

			
 
				+			</notes>

			
 
				+			<segment id="s3" state="final">

			
 
				+				<source>text 3</source>

			
 
				+				<target>approved translation 3</target>

			
 
				+			</segment>

			
 
				+		</unit>

			
 
				+		<group id="90290" type="x-gettext:plurals">

			
 
				+			<unit id="90291" name="90290[0]">

			
 
				+				<notes>

			
 
				+					<note category="context">Plurals</note>

			
 
				+				</notes>

			
 
				+				<segment>

			
 
				+					<source>%d month</source>

			
 
				+					<target xml:lang="nl">%d maand</target>

			
 
				+				</segment>

			
 
				+			</unit>

			
 
				+			<unit id="90292" name="90290[1]">

			
 
				+				<segment>

			
 
				+					<source>%d months</source>

			
 
				+					<target xml:lang="nl">%d maanden</target>

			
 
				+				</segment>

			
 
				+			</unit>

			
 
				+		</group>

			
 
				+	</file>

			
 
				+</xliff>
			
--- a/tests/core/assets/xml/utf8.xml
+++ b/tests/core/assets/xml/utf8.xml
@@ -0,0 +1,8 @@
 
				+<?xml version="1.0" encoding="utf-8"?>

			
 
				+<!DOCTYPE 恥ずべきフクロウ>

			
 
				+<恥ずべきフクロウ 올빼미_id="Foozle&#32;<![CDATA[<greeting>Hello, world!"</greeting>]]>Barzle">

			
 
				+<부끄러운:barzle>

			
 
				+	<name foo:bar="birmese">ရှက်စရာ ဇီးကွက်</name>

			
 
				+	<nickname>Owl of Shame</nickname>

			
 
				+	<data>More CDATA <![CDATA[<greeting>Hello, world!</greeting><![CDATA] < ]]> Nonsense.</data>

			
 
				+</부끄러운:barzle>
			
--- a/tests/core/build.bat
+++ b/tests/core/build.bat
@@ -5,34 +5,35 @@ python3 download_assets.py
 
				 echo ---

			
 
				 echo Running core:image tests

			
 
				 echo ---

			
 
				-%PATH_TO_ODIN% run image    %COMMON%

			
 
				+%PATH_TO_ODIN% run image    %COMMON% -out:test_image.exe

			
 
				 

			
 
				 echo ---

			
 
				 echo Running core:compress tests

			
 
				 echo ---

			
 
				-%PATH_TO_ODIN% run compress %COMMON%

			
 
				+%PATH_TO_ODIN% run compress %COMMON% -out:test_compress.exe

			
 
				 

			
 
				 echo ---

			
 
				 echo Running core:strings tests

			
 
				 echo ---

			
 
				-%PATH_TO_ODIN% run strings %COMMON%

			
 
				+%PATH_TO_ODIN% run strings %COMMON% -out:test_strings.exe

			
 
				 

			
 
				 echo ---

			
 
				 echo Running core:hash tests

			
 
				 echo ---

			
 
				-%PATH_TO_ODIN% run hash %COMMON% -o:size

			
 
				+%PATH_TO_ODIN% run hash %COMMON% -o:size -out:test_hash.exe

			
 
				 

			
 
				 echo ---

			
 
				 echo Running core:odin tests

			
 
				 echo ---

			
 
				-%PATH_TO_ODIN% run odin %COMMON% -o:size

			
 
				+%PATH_TO_ODIN% run odin %COMMON% -o:size -out:test_odin.exe

			
 
				 

			
 
				 echo ---

			
 
				 echo Running core:crypto hash tests

			
 
				 echo ---

			
 
				-%PATH_TO_ODIN% run crypto %COMMON%

			
 
				+%PATH_TO_ODIN% run crypto %COMMON% -o:speed -out:test_crypto.exe

			
 
				 

			
 
				 echo ---

			
 
				 echo Running core:encoding tests

			
 
				 echo ---

			
 
				-%PATH_TO_ODIN% run encoding %COMMON%
			
 
				+%PATH_TO_ODIN% run encoding\json %COMMON% -out:test_json.exe

			
 
				+%PATH_TO_ODIN% run encoding\xml %COMMON% -out:test_xml.exe
			
--- a/tests/core/encoding/json/test_core_json.odin
+++ b/tests/core/encoding/json/test_core_json.odin
@@ -8,32 +8,32 @@ TEST_count := 0
 
				 TEST_fail  := 0
			
 
				 
			
 
				 when ODIN_TEST {
			
 
				-    expect  :: testing.expect
			
 
				-    log     :: testing.log
			
 
				+	expect  :: testing.expect
			
 
				+	log     :: testing.log
			
 
				 } else {
			
 
				-    expect  :: proc(t: ^testing.T, condition: bool, message: string, loc := #caller_location) {
			
 
				-        fmt.printf("[%v] ", loc)
			
 
				-        TEST_count += 1
			
 
				-        if !condition {
			
 
				-            TEST_fail += 1
			
 
				-            fmt.println(message)
			
 
				-            return
			
 
				-        }
			
 
				-        fmt.println(" PASS")
			
 
				-    }
			
 
				-    log     :: proc(t: ^testing.T, v: any, loc := #caller_location) {
			
 
				-        fmt.printf("[%v] ", loc)
			
 
				-        fmt.printf("log: %v\n", v)
			
 
				-    }
			
 
				+	expect  :: proc(t: ^testing.T, condition: bool, message: string, loc := #caller_location) {
			
 
				+		fmt.printf("[%v] ", loc)
			
 
				+		TEST_count += 1
			
 
				+		if !condition {
			
 
				+			TEST_fail += 1
			
 
				+			fmt.println(message)
			
 
				+			return
			
 
				+		}
			
 
				+		fmt.println(" PASS")
			
 
				+	}
			
 
				+	log     :: proc(t: ^testing.T, v: any, loc := #caller_location) {
			
 
				+		fmt.printf("[%v] ", loc)
			
 
				+		fmt.printf("log: %v\n", v)
			
 
				+	}
			
 
				 }
			
 
				 
			
 
				 main :: proc() {
			
 
				-    t := testing.T{}
			
 
				+	t := testing.T{}
			
 
				 	
			
 
				 	parse_json(&t)
			
 
				 	marshal_json(&t)
			
 
				 
			
 
				-    fmt.printf("%v/%v tests successful.\n", TEST_count - TEST_fail, TEST_count)
			
 
				+	fmt.printf("%v/%v tests successful.\n", TEST_count - TEST_fail, TEST_count)
			
 
				 }
			
 
				 
			
 
				 @test
			
--- a/tests/core/encoding/xml/test_core_xml.odin
+++ b/tests/core/encoding/xml/test_core_xml.odin
@@ -0,0 +1,264 @@
 
				+package test_core_xml
			
 
				+
			
 
				+import "core:encoding/xml"
			
 
				+import "core:testing"
			
 
				+import "core:mem"
			
 
				+import "core:fmt"
			
 
				+
			
 
				+Silent :: proc(pos: xml.Pos, fmt: string, args: ..any) {
			
 
				+	// Custom (silent) error handler.
			
 
				+}
			
 
				+
			
 
				+OPTIONS :: xml.Options{
			
 
				+	flags            = {
			
 
				+		.Ignore_Unsupported, .Intern_Comments,
			
 
				+	},
			
 
				+	expected_doctype = "",
			
 
				+}
			
 
				+
			
 
				+TEST_count := 0
			
 
				+TEST_fail  := 0
			
 
				+
			
 
				+TEST :: struct {
			
 
				+	filename: string,
			
 
				+	options:  xml.Options,
			
 
				+	expected: struct {
			
 
				+		error:        xml.Error,
			
 
				+		xml_version:  string,
			
 
				+		xml_encoding: string,
			
 
				+		doctype:      string,
			
 
				+	},
			
 
				+}
			
 
				+
			
 
				+TESTS :: []TEST{
			
 
				+	/*
			
 
				+		First we test that certain files parse without error.
			
 
				+	*/
			
 
				+	{
			
 
				+		filename  = "assets/xml/utf8.xml",
			
 
				+		options   = OPTIONS,
			
 
				+		expected  = {
			
 
				+			error        = .None,
			
 
				+			xml_version  = "1.0",
			
 
				+			xml_encoding = "utf-8",
			
 
				+			doctype      = "恥ずべきフクロウ",
			
 
				+		},
			
 
				+	},
			
 
				+	{
			
 
				+		filename  = "assets/xml/nl_NL-qt-ts.ts",
			
 
				+		options   = OPTIONS,
			
 
				+		expected  = {
			
 
				+			error        = .None,
			
 
				+			xml_version  = "1.0",
			
 
				+			xml_encoding = "utf-8",
			
 
				+			doctype      = "TS",
			
 
				+		},
			
 
				+	},
			
 
				+	{
			
 
				+		filename  = "assets/xml/nl_NL-xliff-1.0.xliff",
			
 
				+		options   = OPTIONS,
			
 
				+		expected  = {
			
 
				+			error        = .None,
			
 
				+			xml_version  = "1.0",
			
 
				+			xml_encoding = "UTF-8",
			
 
				+			doctype      = "",
			
 
				+		},
			
 
				+	},
			
 
				+	{
			
 
				+		filename  = "assets/xml/nl_NL-xliff-2.0.xliff",
			
 
				+		options   = OPTIONS,
			
 
				+		expected  = {
			
 
				+			error        = .None,
			
 
				+			xml_version  = "1.0",
			
 
				+			xml_encoding = "utf-8",
			
 
				+			doctype      = "",
			
 
				+		},
			
 
				+	},
			
 
				+
			
 
				+	/*
			
 
				+		Then we test that certain errors are returned as expected.
			
 
				+	*/
			
 
				+	{
			
 
				+		filename  = "assets/xml/utf8.xml",
			
 
				+		options   = {
			
 
				+			flags            = {
			
 
				+				.Ignore_Unsupported, .Intern_Comments,
			
 
				+			},
			
 
				+			expected_doctype = "Odin",
			
 
				+		},
			
 
				+		expected  = {
			
 
				+			error        = .Invalid_DocType,
			
 
				+			xml_version  = "1.0",
			
 
				+			xml_encoding = "utf-8",
			
 
				+			doctype      = "恥ずべきフクロウ",
			
 
				+		},
			
 
				+	},
			
 
				+}
			
 
				+
			
 
				+when ODIN_TEST {
			
 
				+    expect  :: testing.expect
			
 
				+    log     :: testing.log
			
 
				+} else {
			
 
				+    expect  :: proc(t: ^testing.T, condition: bool, message: string, loc := #caller_location) {
			
 
				+        fmt.printf("[%v] ", loc)
			
 
				+        TEST_count += 1
			
 
				+        if !condition {
			
 
				+            TEST_fail += 1
			
 
				+            fmt.println(message)
			
 
				+            return
			
 
				+        }
			
 
				+        fmt.println(" PASS")
			
 
				+    }
			
 
				+    log     :: proc(t: ^testing.T, v: any, loc := #caller_location) {
			
 
				+        fmt.printf("[%v] ", loc)
			
 
				+        fmt.printf("log: %v\n", v)
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+main :: proc() {
			
 
				+    t := testing.T{}
			
 
				+
			
 
				+	track: mem.Tracking_Allocator
			
 
				+	mem.tracking_allocator_init(&track, context.allocator)
			
 
				+	context.allocator = mem.tracking_allocator(&track)
			
 
				+
			
 
				+	run_tests(&t)
			
 
				+
			
 
				+	if len(track.allocation_map) > 0 {
			
 
				+		for _, v in track.allocation_map {
			
 
				+			err_msg := fmt.tprintf("%v Leaked %v bytes.", v.location, v.size)
			
 
				+			expect(&t, false, err_msg)
			
 
				+		}
			
 
				+	}	
			
 
				+
			
 
				+    fmt.printf("%v/%v tests successful.\n", TEST_count - TEST_fail, TEST_count)
			
 
				+}
			
 
				+
			
 
				+@test
			
 
				+run_tests :: proc(t: ^testing.T) {
			
 
				+	using fmt
			
 
				+
			
 
				+	count := 0
			
 
				+
			
 
				+	for test in TESTS {
			
 
				+		printf("Trying to parse %v\n\n", test.filename)
			
 
				+
			
 
				+		doc, err := xml.parse(test.filename, test.options, Silent)
			
 
				+		defer xml.destroy(doc)
			
 
				+
			
 
				+		err_msg := tprintf("Expected return value %v, got %v", test.expected.error, err)
			
 
				+		expect(t, err == test.expected.error, err_msg)
			
 
				+
			
 
				+		if len(test.expected.xml_version) > 0 {
			
 
				+			xml_version := ""
			
 
				+			for attr in doc.prolog {
			
 
				+				if attr.key == "version" {
			
 
				+					xml_version = attr.val
			
 
				+				}
			
 
				+			}
			
 
				+
			
 
				+			err_msg  = tprintf("Expected XML version %v, got %v", test.expected.xml_version, xml_version)
			
 
				+			expect(t, xml_version == test.expected.xml_version, err_msg)
			
 
				+		}
			
 
				+
			
 
				+		if len(test.expected.xml_encoding) > 0 {
			
 
				+			xml_encoding := ""
			
 
				+			for attr in doc.prolog {
			
 
				+				if attr.key == "encoding" {
			
 
				+					xml_encoding = attr.val
			
 
				+				}
			
 
				+			}
			
 
				+
			
 
				+			err_msg  = tprintf("Expected XML encoding %v, got %v", test.expected.xml_encoding, xml_encoding)
			
 
				+			expect(t, xml_encoding == test.expected.xml_encoding, err_msg)
			
 
				+		}
			
 
				+
			
 
				+		err_msg  = tprintf("Expected DOCTYPE %v, got %v", test.expected.doctype, doc.doctype.ident)
			
 
				+		expect(t, doc.doctype.ident == test.expected.doctype, err_msg)
			
 
				+
			
 
				+		/*
			
 
				+			File-specific tests.
			
 
				+		*/
			
 
				+		switch count {
			
 
				+		case 0:
			
 
				+			expect(t, len(doc.root.attribs) > 0, "Expected the root tag to have an attribute.")
			
 
				+			attr := doc.root.attribs[0]
			
 
				+
			
 
				+			attr_key_expected := "올빼미_id"
			
 
				+			attr_val_expected := "Foozle&#32;<![CDATA[<greeting>Hello, world!\"</greeting>]]>Barzle"
			
 
				+
			
 
				+			attr_err := tprintf("Expected %v, got %v", attr_key_expected, attr.key)
			
 
				+			expect(t, attr.key == attr_key_expected, attr_err)
			
 
				+
			
 
				+			attr_err  = tprintf("Expected %v, got %v", attr_val_expected, attr.val)
			
 
				+			expect(t, attr.val == attr_val_expected, attr_err)
			
 
				+
			
 
				+			expect(t, len(doc.root.children) > 0, "Expected the root tag to have children.")
			
 
				+			child := doc.root.children[0]
			
 
				+
			
 
				+			first_child_ident := "부끄러운:barzle"
			
 
				+			attr_err  = tprintf("Expected first child tag's ident to be %v, got %v", first_child_ident, child.ident)
			
 
				+			expect(t, child.ident == first_child_ident, attr_err)
			
 
				+
			
 
				+		case 2:
			
 
				+			expect(t, len(doc.root.attribs) > 0, "Expected the root tag to have an attribute.")
			
 
				+
			
 
				+			{
			
 
				+				attr := doc.root.attribs[0]
			
 
				+
			
 
				+				attr_key_expected := "version"
			
 
				+				attr_val_expected := "1.2"
			
 
				+
			
 
				+				attr_err := tprintf("Expected %v, got %v", attr_key_expected, attr.key)
			
 
				+				expect(t, attr.key == attr_key_expected, attr_err)
			
 
				+
			
 
				+				attr_err  = tprintf("Expected %v, got %v", attr_val_expected, attr.val)
			
 
				+				expect(t, attr.val == attr_val_expected, attr_err)
			
 
				+			}
			
 
				+
			
 
				+			{
			
 
				+				attr := doc.root.attribs[1]
			
 
				+
			
 
				+				attr_key_expected := "xmlns"
			
 
				+				attr_val_expected := "urn:oasis:names:tc:xliff:document:1.2"
			
 
				+
			
 
				+				attr_err := tprintf("Expected %v, got %v", attr_key_expected, attr.key)
			
 
				+				expect(t, attr.key == attr_key_expected, attr_err)
			
 
				+
			
 
				+				attr_err  = tprintf("Expected %v, got %v", attr_val_expected, attr.val)
			
 
				+				expect(t, attr.val == attr_val_expected, attr_err)
			
 
				+			}
			
 
				+
			
 
				+		case 3:
			
 
				+			expect(t, len(doc.root.attribs) > 0, "Expected the root tag to have an attribute.")
			
 
				+
			
 
				+			{
			
 
				+				attr := doc.root.attribs[0]
			
 
				+
			
 
				+				attr_key_expected := "xmlns"
			
 
				+				attr_val_expected := "urn:oasis:names:tc:xliff:document:2.0"
			
 
				+
			
 
				+				attr_err := tprintf("Expected %v, got %v", attr_key_expected, attr.key)
			
 
				+				expect(t, attr.key == attr_key_expected, attr_err)
			
 
				+
			
 
				+				attr_err  = tprintf("Expected %v, got %v", attr_val_expected, attr.val)
			
 
				+				expect(t, attr.val == attr_val_expected, attr_err)
			
 
				+			}
			
 
				+
			
 
				+			{
			
 
				+				attr := doc.root.attribs[1]
			
 
				+
			
 
				+				attr_key_expected := "version"
			
 
				+				attr_val_expected := "2.0"
			
 
				+
			
 
				+				attr_err := tprintf("Expected %v, got %v", attr_key_expected, attr.key)
			
 
				+				expect(t, attr.key == attr_key_expected, attr_err)
			
 
				+
			
 
				+				attr_err  = tprintf("Expected %v, got %v", attr_val_expected, attr.val)
			
 
				+				expect(t, attr.val == attr_val_expected, attr_err)
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		count += 1
			
 
				+	}
			
 
				+}