2
0
Jeroen van Rijn 3 жил өмнө
parent
commit
5807214406

+ 40 - 29
core/encoding/xml/example/xml_example.odin

@@ -1,45 +1,55 @@
 package xml_example
 
 import "core:encoding/xml"
+import "core:os"
+import "core:path"
 import "core:mem"
-import "core:strings"
 import "core:fmt"
 
-Error_Handler :: proc(pos: xml.Pos, fmt: string, args: ..any) {
+/*
+	Silent error handler for the parser.
+*/
+Error_Handler :: proc(pos: xml.Pos, fmt: string, args: ..any) {}
 
-}
-
-FILENAME :: "../../../../tests/core/assets/xml/nl_NL-xliff-1.0.xliff"
-DOC      :: #load(FILENAME)
-
-OPTIONS  :: xml.Options{
-	flags            = {
-		.Ignore_Unsupported, .Intern_Comments,
-	},
-	expected_doctype = "",
-}
+OPTIONS :: xml.Options{ flags = { .Ignore_Unsupported, }, expected_doctype = "unicode", }
 
-_main :: proc() {
+example :: proc() {
 	using fmt
 
-	println("--- DOCUMENT TO PARSE  ---")
-	println(string(DOC))
-	println("--- /DOCUMENT TO PARSE ---\n")
+	filename := path.join(ODIN_ROOT, "tests", "core", "assets", "XML", "unicode.xml")
+	defer delete(filename)
 
-	doc, err := xml.parse(DOC, OPTIONS, FILENAME, Error_Handler)
+	doc, err := xml.parse(filename, OPTIONS, Error_Handler)
 	defer xml.destroy(doc)
 
-	buf: strings.Builder
-	defer strings.destroy_builder(&buf)
-	w := strings.to_writer(&buf)
+	if err != .None {
+		printf("Load/Parse error: %v\n", err)
+		if err == .File_Error {
+			printf("\"%v\" not found. Did you run \"tests\\download_assets.py\"?", filename)
+		}
+		os.exit(1)
+	}
 
-	xml.print(w, doc)
-	println(strings.to_string(buf))
+	printf("\"%v\" loaded and parsed.\n", filename)
 
-	if err != .None {
-		printf("Parse error: %v\n", err)
-	} else {
-		println("DONE!")
+	charlist, charlist_ok := xml.find_child_by_ident(doc.root, "charlist")
+	if !charlist_ok {
+		eprintln("Could not locate top-level `<charlist>` tag.")
+		os.exit(1)
+	}
+
+	printf("Found `<charlist>` with %v children.\n", len(charlist.children))
+
+	for char in charlist.children {
+		if char.ident != "character" {
+			eprintf("Expected `<character>`, got `<%v>`\n", char.ident)
+			os.exit(1)
+		}
+
+		if _, ok := xml.find_attribute_val_by_key(char, "dec"); !ok {
+			eprintln("`<character dec=\"...\">` attribute not found.")
+			os.exit(1)
+		}
 	}
 }
 
@@ -50,12 +60,13 @@ main :: proc() {
 	mem.tracking_allocator_init(&track, context.allocator)
 	context.allocator = mem.tracking_allocator(&track)
 
-	_main()
+	example()
 
 	if len(track.allocation_map) > 0 {
 		println()
 		for _, v in track.allocation_map {
 			printf("%v Leaked %v bytes.\n", v.location, v.size)
 		}
-	}	
+	}
+	println("Done and cleaned up!")
 }

+ 49 - 0
core/encoding/xml/helpers.odin

@@ -0,0 +1,49 @@
+package xml
+/*
+	An XML 1.0 / 1.1 parser
+
+	Copyright 2021 Jeroen van Rijn <[email protected]>.
+	Made available under Odin's BSD-3 license.
+
+	This file contains helper functions.
+*/
+
+
+/*
+	Find `tag`'s nth child with a given ident.
+*/
+find_child_by_ident :: proc(tag: ^Element, ident: string, nth := 0) -> (res: ^Element, found: bool) {
+	if tag == nil                                 { return nil, false }
+
+	count := 0
+	for child in tag.children {
+		/*
+			Skip commments. They have no name.
+		*/
+		if child.kind  != .Element                { continue }
+
+		/*
+			If the ident matches and it's the nth such child, return it.
+		*/
+		if child.ident == ident {
+			if count == nth                       { return child, true }
+			count += 1
+		}
+	}
+	return nil, false
+}
+
+/*
+	Find an attribute by key.
+*/
+find_attribute_val_by_key :: proc(tag: ^Element, key: string) -> (val: string, found: bool) {
+	if tag == nil            { return "", false }
+
+	for attr in tag.attribs {
+		/*
+			If the ident matches, we're done. There can only ever be one attribute with the same name.
+		*/
+		if attr.key == key { return attr.val, true }
+	}
+	return "", false
+}

+ 3 - 3
core/encoding/xml/tokenizer.odin

@@ -403,11 +403,11 @@ scan :: proc(t: ^Tokenizer) -> Token {
 		case ':': kind = .Colon
 
 		case '"', '\'':
+			kind = .Invalid
+
 			lit, err = scan_string(t, t.offset, ch, true, false)
 			if err == .None {
 				kind = .String
-			} else {
-				kind = .Invalid
 			}
 
 		case '\n':
@@ -418,7 +418,7 @@ scan :: proc(t: ^Tokenizer) -> Token {
 		}
 	}
 
-	if lit == "" {
+	if kind != .String && lit == "" {
 		lit = string(t.src[offset : t.offset])
 	}
 	return Token{kind, lit, pos}

+ 2 - 0
core/encoding/xml/xml_reader.odin

@@ -519,6 +519,8 @@ parse_attribute :: proc(doc: ^Document) -> (attr: Attr, offset: int, err: Error)
 	_       = expect(t, .Eq)     or_return
 	value  := expect(t, .String) or_return
 
+	error(t, t.offset, "String: %v\n", value)
+
 	attr.key = strings.intern_get(&doc.intern, key.text)
 	attr.val = strings.intern_get(&doc.intern, value.text)
 

+ 2 - 0
tests/core/assets/XML/.gitignore

@@ -0,0 +1,2 @@
+# This file will be downloaded by download_assets.py
+unicode.xml

+ 0 - 0
tests/core/assets/xml/nl_NL-qt-ts.ts → tests/core/assets/XML/nl_NL-qt-ts.ts


+ 0 - 0
tests/core/assets/xml/nl_NL-xliff-1.0.xliff → tests/core/assets/XML/nl_NL-xliff-1.0.xliff


+ 0 - 0
tests/core/assets/xml/nl_NL-xliff-2.0.xliff → tests/core/assets/XML/nl_NL-xliff-2.0.xliff


+ 0 - 0
tests/core/assets/xml/utf8.xml → tests/core/assets/XML/utf8.xml


+ 35 - 8
tests/core/download_assets.py

@@ -50,10 +50,7 @@ def try_download_file(url, out_file):
 	 	print("Could not download", url)
 	 	return 1	
 
-def try_download_and_unpack_zip(suite):
-	url      = ASSETS_BASE_URL.format(suite, "{}.zip".format(suite))
-	out_file = DOWNLOAD_BASE_PATH.format(suite) + "/{}.zip".format(suite)
-
+def try_download_and_unpack_zip(url, out_file, extract_path):
 	print("\tDownloading {} to {}.".format(url, out_file))
 
 	if try_download_file(url, out_file) is not None:
@@ -65,7 +62,6 @@ def try_download_and_unpack_zip(suite):
 		with zipfile.ZipFile(out_file) as z:
 			for file in z.filelist:
 				filename = file.filename
-				extract_path = DOWNLOAD_BASE_PATH.format(suite)
 
 				print("\t\tExtracting: {}".format(filename))
 				z.extract(file, extract_path)
@@ -73,25 +69,56 @@ def try_download_and_unpack_zip(suite):
 		print("Could not extract ZIP file")
 		return 2
 
+def download_png_assets():
+	suite = "PNG"
+	url          = ASSETS_BASE_URL.format(suite, "{}.zip".format(suite))
+	out_file     = DOWNLOAD_BASE_PATH.format(suite) + "/{}.zip".format(suite)
+	extract_path = DOWNLOAD_BASE_PATH.format(suite)
 
-def main():
 	print("Downloading PNG assets")
 
 	# Make PNG assets path
 	try:
-		path = DOWNLOAD_BASE_PATH.format("PNG")
+		path = DOWNLOAD_BASE_PATH.format(suite)
 		os.makedirs(path)
 	except FileExistsError:
 		pass
 
 	# Try downloading and unpacking the PNG assets
-	r = try_download_and_unpack_zip("PNG")
+	r = try_download_and_unpack_zip(url, out_file, extract_path)
 	if r is not None:
 		return r
 
 	# We could fall back on downloading the PNG files individually, but it's slow
 
 	print("Done downloading PNG assets")
+
+def download_unicode_assets():
+	suite = "XML"
+	url          = "https://www.w3.org/2003/entities/2007xml/unicode.xml.zip"
+	out_file     = DOWNLOAD_BASE_PATH.format(suite) + "/{}.zip".format(suite)
+	extract_path = DOWNLOAD_BASE_PATH.format(suite)
+
+	print("Downloading {}.".format(url))
+
+	# Make XML assets path
+	try:
+		path = DOWNLOAD_BASE_PATH.format(suite)
+		os.makedirs(path)
+	except FileExistsError:
+		pass
+
+	# Try downloading and unpacking the assets
+	r = try_download_and_unpack_zip(url, out_file, extract_path)
+	if r is not None:
+		return r
+
+	print("Done downloading Unicode/XML assets")
+
+def main():
+	download_png_assets()
+	download_unicode_assets()
+
 	return 0
 
 if __name__ == '__main__':

+ 5 - 5
tests/core/encoding/xml/test_core_xml.odin

@@ -35,7 +35,7 @@ TESTS :: []TEST{
 		First we test that certain files parse without error.
 	*/
 	{
-		filename  = "assets/xml/utf8.xml",
+		filename  = "assets/XML/utf8.xml",
 		options   = OPTIONS,
 		expected  = {
 			error        = .None,
@@ -45,7 +45,7 @@ TESTS :: []TEST{
 		},
 	},
 	{
-		filename  = "assets/xml/nl_NL-qt-ts.ts",
+		filename  = "assets/XML/nl_NL-qt-ts.ts",
 		options   = OPTIONS,
 		expected  = {
 			error        = .None,
@@ -55,7 +55,7 @@ TESTS :: []TEST{
 		},
 	},
 	{
-		filename  = "assets/xml/nl_NL-xliff-1.0.xliff",
+		filename  = "assets/XML/nl_NL-xliff-1.0.xliff",
 		options   = OPTIONS,
 		expected  = {
 			error        = .None,
@@ -65,7 +65,7 @@ TESTS :: []TEST{
 		},
 	},
 	{
-		filename  = "assets/xml/nl_NL-xliff-2.0.xliff",
+		filename  = "assets/XML/nl_NL-xliff-2.0.xliff",
 		options   = OPTIONS,
 		expected  = {
 			error        = .None,
@@ -79,7 +79,7 @@ TESTS :: []TEST{
 		Then we test that certain errors are returned as expected.
 	*/
 	{
-		filename  = "assets/xml/utf8.xml",
+		filename  = "assets/XML/utf8.xml",
 		options   = {
 			flags            = {
 				.Ignore_Unsupported, .Intern_Comments,