Browse Source

Merge pull request #1336 from Kelimion/i18n

[i18n] Initial i18n support.
Jeroen van Rijn 3 years ago
parent
commit
aa681932a9

+ 0 - 1
core/encoding/xml/xml_reader.odin

@@ -87,7 +87,6 @@ Option_Flag :: enum {
 		If a tag body has a comment, it will be stripped unless this option is given.
 	*/
 	Keep_Tag_Body_Comments,
-
 }
 Option_Flags :: bit_set[Option_Flag; u16]
 

+ 111 - 0
core/text/i18n/doc.odin

@@ -0,0 +1,111 @@
+//+ignore
+package i18n
+
+/*
+	The i18n package is flexible and easy to use.
+
+	It has one call to get a translation: `get`, which the user can alias into something like `T`.
+
+	`get`, referred to as `T` here, has a few different signatures.
+	All of them will return the key if the entry can't be found in the active translation catalog.
+
+	- `T(key)`              returns the translation of `key`.
+	- `T(key, n)`           returns a pluralized translation of `key` according to value `n`.
+
+	- `T(section, key)`     returns the translation of `key` in `section`.
+	- `T(section, key, n)`  returns a pluralized translation of `key` in `section` according to value `n`.
+
+	By default lookup take place in the global `i18n.ACTIVE` catalog for ease of use.
+	If you want to override which translation to use, for example in a language preview dialog, you can use the following:
+
+	- `T(key, n, catalog)`           returns the pluralized version of `key` from explictly supplied catalog.
+	- `T(section, key, n, catalog)`  returns the pluralized version of `key` in `section` from explictly supplied catalog.
+
+	If a catalog has translation contexts or sections, then ommitting it in the above calls looks up in section "".
+
+	The default pluralization rule is n != 1, which is to say that passing n == 1 (or not passing n) returns the singular form.
+	Passing n != 1 returns plural form 1.
+
+	Should a language not conform to this rule, you can pass a pluralizer procedure to the catalog parser.
+	This is a procedure that maps an integer to an integer, taking a value and returning which plural slot should be used.
+
+	You can also assign it to a loaded catalog after parsing, of course.
+
+	Some code examples follow.
+*/
+
+/*
+```cpp
+import "core:fmt"
+import "core:text/i18n"
+
+T :: i18n.get
+
+mo :: proc() {
+	using fmt
+
+	err: i18n.Error
+
+	/*
+		Parse MO file and set it as the active translation so we can omit `get`'s "catalog" parameter.
+	*/
+	i18n.ACTIVE, err = i18n.parse_mo(#load("translations/nl_NL.mo"))
+	defer i18n.destroy()
+
+	if err != .None { return }
+
+	/*
+		These are in the .MO catalog.
+	*/
+	println("-----")
+	println(T(""))
+	println("-----")
+	println(T("There are 69,105 leaves here."))
+	println("-----")
+	println(T("Hellope, World!"))
+	println("-----")
+	// We pass 1 into `T` to get the singular format string, then 1 again into printf.
+	printf(T("There is %d leaf.\n", 1), 1)
+	// We pass 42 into `T` to get the plural format string, then 42 again into printf.
+	printf(T("There is %d leaf.\n", 42), 42)
+
+	/*
+		This isn't in the translation catalog, so the key is passed back untranslated.
+	*/
+	println("-----")
+	println(T("Come visit us on Discord!"))
+}
+
+qt :: proc() {
+	using fmt
+
+	err: i18n.Error
+
+	/*
+		Parse QT file and set it as the active translation so we can omit `get`'s "catalog" parameter.
+	*/
+	i18n.ACTIVE, err = i18n.parse_qt(#load("translations/nl_NL-qt-ts.ts"))
+	defer i18n.destroy()
+
+	if err != .None {
+		return
+	}
+
+	/*
+		These are in the .TS catalog. As you can see they have sections.
+	*/
+	println("--- Page section ---")
+	println("Page:Text for translation =", T("Page", "Text for translation"))
+	println("-----")
+	println("Page:Also text to translate =", T("Page", "Also text to translate"))
+	println("-----")
+	println("--- installscript section ---")
+	println("installscript:99 bottles of beer on the wall =", T("installscript", "99 bottles of beer on the wall"))
+	println("-----")
+	println("--- apple_count section ---")
+	println("apple_count:%d apple(s) =")
+	println("\t 1  =", T("apple_count", "%d apple(s)", 1))
+	println("\t 42 =", T("apple_count", "%d apple(s)", 42))
+}
+```
+*/

+ 167 - 0
core/text/i18n/gettext.odin

@@ -0,0 +1,167 @@
+package i18n
+/*
+	A parser for GNU GetText .MO files.
+
+	Copyright 2021-2022 Jeroen van Rijn <[email protected]>.
+	Made available under Odin's BSD-3 license.
+
+	A from-scratch implementation based after the specification found here:
+		https://www.gnu.org/software/gettext/manual/html_node/MO-Files.html
+
+	Options are ignored as they're not applicable to this format.
+	They're part of the signature for consistency with other catalog formats.
+
+	List of contributors:
+		Jeroen van Rijn: Initial implementation.
+*/
+import "core:os"
+import "core:strings"
+import "core:bytes"
+
+parse_mo_from_slice :: proc(data: []u8, options := DEFAULT_PARSE_OPTIONS, pluralizer: proc(int) -> int = nil, allocator := context.allocator) -> (translation: ^Translation, err: Error) {
+	context.allocator = allocator
+	/*
+		An MO file should have at least a 4-byte magic, 2 x 2 byte version info,
+		a 4-byte number of strings value, and 2 x 4-byte offsets.
+	*/
+	if len(data) < 20 {
+		return {}, .MO_File_Invalid
+	}
+
+	/*
+		Check magic. Should be 0x950412de in native Endianness.
+	*/
+	native := true
+	magic  := read_u32(data, native) or_return
+
+	if magic != 0x950412de {
+		native = false
+		magic = read_u32(data, native) or_return
+
+		if magic != 0x950412de { return {}, .MO_File_Invalid_Signature }
+	}
+
+	/*
+		We can ignore version_minor at offset 6.
+	*/
+	version_major := read_u16(data[4:]) or_return
+	if version_major > 1 { return {}, .MO_File_Unsupported_Version }
+
+	count             := read_u32(data[ 8:]) or_return
+	original_offset   := read_u32(data[12:]) or_return
+	translated_offset := read_u32(data[16:]) or_return
+
+	if count == 0 { return {}, .Empty_Translation_Catalog }
+
+	/*
+		Initalize Translation, interner and optional pluralizer.
+	*/
+	translation = new(Translation)
+	translation.pluralize = pluralizer
+	strings.intern_init(&translation.intern, allocator, allocator)
+
+	// Gettext MO files only have one section.
+	translation.k_v[""] = {}
+	section := &translation.k_v[""]
+
+	for n := u32(0); n < count; n += 1 {
+		/*
+			Grab string's original length and offset.
+		*/
+		offset := original_offset + 8 * n
+		if len(data) < int(offset + 8) { return translation, .MO_File_Invalid }
+
+		o_length := read_u32(data[offset    :], native) or_return
+		o_offset := read_u32(data[offset + 4:], native) or_return
+
+		offset = translated_offset + 8 * n
+		if len(data) < int(offset + 8) { return translation, .MO_File_Invalid }
+
+		t_length := read_u32(data[offset    :], native) or_return
+		t_offset := read_u32(data[offset + 4:], native) or_return
+
+		max_offset := int(max(o_offset + o_length + 1, t_offset + t_length + 1))
+		if len(data) < max_offset { return translation, .Premature_EOF }
+
+		key := data[o_offset:][:o_length]
+		val := data[t_offset:][:t_length]
+
+		/*
+			Could be a pluralized string.
+		*/
+		zero := []byte{0}
+
+		keys := bytes.split(key, zero)
+		vals := bytes.split(val, zero)
+	
+		if len(keys) != len(vals) || max(len(keys), len(vals)) > MAX_PLURALS {
+			return translation, .MO_File_Incorrect_Plural_Count
+		}
+
+		for k in keys {
+			interned_key := strings.intern_get(&translation.intern, string(k))
+
+			interned_vals := make([]string, len(keys))
+			last_val: string
+
+			i := 0
+			for v in vals {
+				interned_vals[i] = strings.intern_get(&translation.intern, string(v))
+				last_val = interned_vals[i]
+				i += 1
+			}
+			section[interned_key] = interned_vals
+		}
+		delete(vals)
+		delete(keys)
+	}
+	return
+}
+
+parse_mo_file :: proc(filename: string, options := DEFAULT_PARSE_OPTIONS, pluralizer: proc(int) -> int = nil, allocator := context.allocator) -> (translation: ^Translation, err: Error) {
+	context.allocator = allocator
+
+	data, data_ok := os.read_entire_file(filename)
+	defer delete(data)
+
+	if !data_ok { return {}, .File_Error }
+
+	return parse_mo_from_slice(data, options, pluralizer, allocator)
+}
+
+parse_mo :: proc { parse_mo_file, parse_mo_from_slice }
+
+/*
+	Helpers.
+*/
+read_u32 :: proc(data: []u8, native_endian := true) -> (res: u32, err: Error) {
+	if len(data) < size_of(u32) { return 0, .Premature_EOF }
+
+	val := (^u32)(raw_data(data))^
+
+	if native_endian {
+		return val, .None
+	} else {
+		when ODIN_ENDIAN == .Little {
+			return u32(transmute(u32be)val), .None
+		} else {
+			return u32(transmute(u32le)val), .None
+		}
+	}
+}
+
+read_u16 :: proc(data: []u8, native_endian := true) -> (res: u16, err: Error) {
+	if len(data) < size_of(u16) { return 0, .Premature_EOF }
+
+	val := (^u16)(raw_data(data))^
+
+	if native_endian {
+		return val, .None
+	} else {
+		when ODIN_ENDIAN == .Little {
+			return u16(transmute(u16be)val), .None
+		} else {
+			return u16(transmute(u16le)val), .None
+		}
+	}
+}

+ 178 - 0
core/text/i18n/i18n.odin

@@ -0,0 +1,178 @@
+package i18n
+/*
+	Internationalization helpers.
+
+	Copyright 2021-2022 Jeroen van Rijn <[email protected]>.
+	Made available under Odin's BSD-3 license.
+
+	List of contributors:
+		Jeroen van Rijn: Initial implementation.
+*/
+import "core:strings"
+
+/*
+	TODO:
+	- Support for more translation catalog file formats.
+*/
+
+/*
+	Currently active catalog.
+*/
+ACTIVE: ^Translation
+
+// Allow between 1 and 255 plural forms. Default: 10.
+MAX_PLURALS :: min(max(#config(ODIN_i18N_MAX_PLURAL_FORMS, 10), 1), 255)
+
+/*
+	The main data structure. This can be generated from various different file formats, as long as we have a parser for them.
+*/
+
+Section :: map[string][]string
+
+Translation :: struct {
+	k_v:    map[string]Section, // k_v[section][key][plural_form] = ...
+	intern: strings.Intern,
+
+	pluralize: proc(number: int) -> int,
+}
+
+Error :: enum {
+	/*
+		General return values.
+	*/
+	None = 0,
+	Empty_Translation_Catalog,
+	Duplicate_Key,
+
+	/*
+		Couldn't find, open or read file.
+	*/
+	File_Error,
+
+	/*
+		File too short.
+	*/
+	Premature_EOF,
+
+	/*
+		GNU Gettext *.MO file errors.
+	*/
+	MO_File_Invalid_Signature,
+	MO_File_Unsupported_Version,
+	MO_File_Invalid,
+	MO_File_Incorrect_Plural_Count,
+
+	/*
+		Qt Linguist *.TS file errors.
+	*/
+	TS_File_Parse_Error,
+	TS_File_Expected_Context,
+	TS_File_Expected_Context_Name,
+	TS_File_Expected_Source,
+	TS_File_Expected_Translation,
+	TS_File_Expected_NumerusForm,
+
+}
+
+Parse_Options :: struct {
+	merge_sections: bool,
+}
+
+DEFAULT_PARSE_OPTIONS :: Parse_Options{
+	merge_sections = false,
+}
+
+/*
+	Several ways to use:
+	- get(key), which defaults to the singular form and i18n.ACTIVE catalog, or
+	- get(key, number), which returns the appropriate plural from the active catalog, or
+	- get(key, number, catalog) to grab text from a specific one.
+*/
+get_single_section :: proc(key: string, number := 0, catalog: ^Translation = ACTIVE) -> (value: string) {
+	/*
+		A lot of languages use singular for 1 item and plural for 0 or more than 1 items. This is our default pluralize rule.
+	*/
+	plural := 1 if number != 1 else 0
+
+	if catalog.pluralize != nil {
+		plural = catalog.pluralize(number)
+	}
+	return get_by_slot(key, plural, catalog)
+}
+
+/*
+	Several ways to use:
+	- get(section, key), which defaults to the singular form and i18n.ACTIVE catalog, or
+	- get(section, key, number), which returns the appropriate plural from the active catalog, or
+	- get(section, key, number, catalog) to grab text from a specific one.
+*/
+get_by_section :: proc(section, key: string, number := 0, catalog: ^Translation = ACTIVE) -> (value: string) {
+	/*
+		A lot of languages use singular for 1 item and plural for 0 or more than 1 items. This is our default pluralize rule.
+	*/
+	plural := 1 if number != 1 else 0
+
+	if catalog.pluralize != nil {
+		plural = catalog.pluralize(number)
+	}
+	return get_by_slot(section, key, plural, catalog)
+}
+get :: proc{get_single_section, get_by_section}
+
+/*
+	Several ways to use:
+	- get_by_slot(key), which defaults to the singular form and i18n.ACTIVE catalog, or
+	- get_by_slot(key, slot), which returns the requested plural from the active catalog, or
+	- get_by_slot(key, slot, catalog) to grab text from a specific one.
+
+	If a file format parser doesn't (yet) support plural slots, each of the slots will point at the same string.
+*/
+get_by_slot_single_section :: proc(key: string, slot := 0, catalog: ^Translation = ACTIVE) -> (value: string) {
+	return get_by_slot_by_section("", key, slot, catalog)
+}
+
+/*
+	Several ways to use:
+	- get_by_slot(key), which defaults to the singular form and i18n.ACTIVE catalog, or
+	- get_by_slot(key, slot), which returns the requested plural from the active catalog, or
+	- get_by_slot(key, slot, catalog) to grab text from a specific one.
+
+	If a file format parser doesn't (yet) support plural slots, each of the slots will point at the same string.
+*/
+get_by_slot_by_section :: proc(section, key: string, slot := 0, catalog: ^Translation = ACTIVE) -> (value: string) {
+	if catalog == nil || section not_in catalog.k_v {
+		/*
+			Return the key if the catalog catalog hasn't been initialized yet, or the section is not present.
+		*/
+		return key
+	}
+
+	/*
+		Return the translation from the requested slot if this key is known, else return the key.
+	*/
+	if translations, ok := catalog.k_v[section][key]; ok {
+		plural := min(max(0, slot), len(catalog.k_v[section][key]) - 1)
+		return translations[plural]
+	}
+	return key
+}
+get_by_slot :: proc{get_by_slot_single_section, get_by_slot_by_section}
+
+/*
+	Same for destroy:
+	- destroy(), to clean up the currently active catalog catalog i18n.ACTIVE
+	- destroy(catalog), to clean up a specific catalog.
+*/
+destroy :: proc(catalog: ^Translation = ACTIVE) {
+	if catalog != nil {
+		strings.intern_destroy(&catalog.intern)
+		for section in &catalog.k_v {
+			for key in &catalog.k_v[section] {
+				delete(catalog.k_v[section][key])
+			}
+			delete(catalog.k_v[section])
+		}
+		delete(catalog.k_v)
+		free(catalog)
+	}
+}

+ 153 - 0
core/text/i18n/qt_linguist.odin

@@ -0,0 +1,153 @@
+package i18n
+/*
+	A parser for Qt Linguist TS files.
+
+	Copyright 2022 Jeroen van Rijn <[email protected]>.
+	Made available under Odin's BSD-3 license.
+
+	A from-scratch implementation based after the specification found here:
+		https://doc.qt.io/qt-5/linguist-ts-file-format.html
+
+	List of contributors:
+		Jeroen van Rijn: Initial implementation.
+*/
+import "core:os"
+import "core:encoding/xml"
+import "core:strings"
+
+TS_XML_Options := xml.Options{
+	flags = {
+		.Input_May_Be_Modified,
+		.Must_Have_Prolog,
+		.Must_Have_DocType,
+		.Ignore_Unsupported,
+		.Unbox_CDATA,
+		.Decode_SGML_Entities,
+	},
+	expected_doctype = "TS",
+}
+
+parse_qt_linguist_from_slice :: proc(data: []u8, options := DEFAULT_PARSE_OPTIONS, pluralizer: proc(int) -> int = nil, allocator := context.allocator) -> (translation: ^Translation, err: Error) {
+	context.allocator = allocator
+
+	ts, xml_err := xml.parse(data, TS_XML_Options)
+	defer xml.destroy(ts)
+
+	if xml_err != .None || ts.element_count < 1 || ts.elements[0].ident != "TS" || len(ts.elements[0].children) == 0 {
+		return nil, .TS_File_Parse_Error
+	}
+
+	/*
+		Initalize Translation, interner and optional pluralizer.
+	*/
+	translation = new(Translation)
+	translation.pluralize = pluralizer
+	strings.intern_init(&translation.intern, allocator, allocator)
+
+	section: ^Section
+
+	for child_id in ts.elements[0].children {
+		// These should be <context>s.
+		child := ts.elements[child_id]
+		if child.ident != "context" {
+			return translation, .TS_File_Expected_Context
+		}
+
+		// Find section name.
+		section_name_id, section_name_found := xml.find_child_by_ident(ts, child_id, "name")
+		if !section_name_found {
+			return translation, .TS_File_Expected_Context_Name,
+		}
+
+		section_name := "" if options.merge_sections else ts.elements[section_name_id].value
+
+		if section_name not_in translation.k_v {
+			translation.k_v[section_name] = {}
+		}
+		section = &translation.k_v[section_name]
+
+		// Find messages in section.
+		nth: int
+		for {
+			message_id, message_found := xml.find_child_by_ident(ts, child_id, "message", nth)
+			if !message_found {
+				break
+			}
+
+			numerus_tag, _ := xml.find_attribute_val_by_key(ts, message_id, "numerus")
+			has_plurals := numerus_tag == "yes"
+
+			// We must have a <source> = key
+			source_id, source_found := xml.find_child_by_ident(ts, message_id, "source")
+			if !source_found {
+				return translation, .TS_File_Expected_Source
+			}
+
+			// We must have a <translation>
+			translation_id, translation_found := xml.find_child_by_ident(ts, message_id, "translation")
+			if !translation_found {
+				return translation, .TS_File_Expected_Translation
+			}
+
+			source := ts.elements[source_id]
+			xlat   := ts.elements[translation_id]
+
+			if source.value in section {
+				return translation, .Duplicate_Key
+			}
+
+			if has_plurals {
+				if xlat.value != "" {
+					return translation, .TS_File_Expected_NumerusForm
+				}
+
+				num_plurals: int
+				for {
+					numerus_id, numerus_found := xml.find_child_by_ident(ts, translation_id, "numerusform", num_plurals)
+					if !numerus_found {
+						break
+					}
+					num_plurals += 1
+				}
+
+				if num_plurals < 2 {
+					return translation, .TS_File_Expected_NumerusForm
+				}
+				section[source.value] = make([]string, num_plurals)
+
+				num_plurals = 0
+				for {
+					numerus_id, numerus_found := xml.find_child_by_ident(ts, translation_id, "numerusform", num_plurals)
+					if !numerus_found {
+						break
+					}
+					numerus := ts.elements[numerus_id]
+					section[source.value][num_plurals] = strings.intern_get(&translation.intern, numerus.value)
+
+					num_plurals += 1
+				}
+			} else {
+				// Single translation
+				section[source.value] = make([]string, 1)
+				section[source.value][0] = strings.intern_get(&translation.intern, xlat.value)
+			}
+
+			nth += 1
+		}
+	}
+
+	return
+}
+
+parse_qt_linguist_file :: proc(filename: string, options := DEFAULT_PARSE_OPTIONS, pluralizer: proc(int) -> int = nil, allocator := context.allocator) -> (translation: ^Translation, err: Error) {
+	context.allocator = allocator
+
+	data, data_ok := os.read_entire_file(filename)
+	defer delete(data)
+
+	if !data_ok { return {}, .File_Error }
+
+	return parse_qt_linguist_from_slice(data, options, pluralizer, allocator)
+}
+
+parse_qt :: proc { parse_qt_linguist_file, parse_qt_linguist_from_slice }

+ 4 - 0
examples/all/all_main.odin

@@ -56,6 +56,7 @@ import csv            "core:encoding/csv"
 import hxa            "core:encoding/hxa"
 import json           "core:encoding/json"
 import varint         "core:encoding/varint"
+import xml            "core:encoding/xml"
 
 import fmt            "core:fmt"
 import hash           "core:hash"
@@ -100,6 +101,7 @@ import strings        "core:strings"
 import sync           "core:sync"
 import testing        "core:testing"
 import scanner        "core:text/scanner"
+import i18n           "core:text/i18n"
 import thread         "core:thread"
 import time           "core:time"
 
@@ -158,6 +160,7 @@ _ :: csv
 _ :: hxa
 _ :: json
 _ :: varint
+_ :: xml
 _ :: fmt
 _ :: hash
 _ :: image
@@ -192,6 +195,7 @@ _ :: strings
 _ :: sync
 _ :: testing
 _ :: scanner
+_ :: i18n
 _ :: thread
 _ :: time
 _ :: unicode

+ 3 - 2
tests/core/Makefile

@@ -26,9 +26,10 @@ noise_test:
 	$(ODIN) run math/noise -out:test_noise
 
 encoding_test:
-	$(ODIN) run encoding/hxa -collection:tests=.. -out:test_hxa
-	$(ODIN) run encoding/json -out:test_json
+	$(ODIN) run encoding/hxa    -out:test_hxa -collection:tests=..
+	$(ODIN) run encoding/json   -out:test_json
 	$(ODIN) run encoding/varint -out:test_varint
+	$(ODIN) run encoding/xml    -out:test_xml
 
 math_test:
 	$(ODIN) run math/test_core_math.odin -file -collection:tests=.. -out:test_core_math

+ 22 - 0
tests/core/assets/I18N/duplicate-key.ts

@@ -0,0 +1,22 @@
+<?xml version="1.0" encoding="utf-8"?>
+<!DOCTYPE TS>
+<TS version="2.1" language="nl" sourcelanguage="en">
+<context>
+	<name>Page</name>
+	<message>
+		<source>%d apple(s)</source>
+		<comment>commenting</comment>
+		<translation type="obsolete">Tekst om te vertalen</translation>
+	</message>
+</context>
+<context>
+		<name>apple_count</name>
+		<message numerus="yes">
+			<source>%d apple(s)</source>
+			<translation>
+				<numerusform>%d appel</numerusform>
+				<numerusform>%d appels</numerusform>
+			</translation>
+		</message>
+	</context>
+</TS>

+ 30 - 0
tests/core/assets/I18N/messages.pot

@@ -0,0 +1,30 @@
+# Odin i18n Example
+# Copyright (C) 2021 Jeroen van Rijn
+# This file is distributed under the same license as the PACKAGE package.
+# Jeroen van Rijn <[email protected]>, 2021.
+#
+#, fuzzy
+msgid   ""
+msgstr  "Project-Id-Version: Example 0.0.1\n"
+        "Report-Msgid-Bugs-To: Jeroen van Rijn <[email protected]>\n"
+        "POT-Creation-Date: 2021-11-27 19:23+0100\n"
+        "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
+        "Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
+        "Language: en-GB\n"
+        "MIME-Version: 1.0\n"
+        "Content-Type: text/plain; charset=UTF-8\n"
+        "Content-Transfer-Encoding: 8bit\n"
+
+#: i18n_example.odin:28
+msgid "There are 69,105 leaves here."
+msgstr "Er zijn hier 69.105 bladeren."
+
+#: i18n_example.odin:30
+msgid "Hellope, World!"
+msgstr "Hallo, Wereld!"
+
+#: i18n_example.odin:36
+msgid "There is %d leaf.\n"
+msgid_plural "There are %d leaves.\n"
+msgstr[0] "Er is %d blad.\n"
+msgstr[1] "Er zijn %d bladeren.\n"

+ 35 - 0
tests/core/assets/I18N/nl_NL-qt-ts.ts

@@ -0,0 +1,35 @@
+<?xml version="1.0" encoding="utf-8"?>
+<!DOCTYPE TS>
+<TS version="2.1" language="nl" sourcelanguage="en">
+<context>
+	<name>Page</name>
+	<message>
+		<source>Text for translation</source>
+		<comment>commenting</comment>
+		<translation type="obsolete">Tekst om te vertalen</translation>
+	</message>
+	<message>
+		 <source>Also text to translate</source>
+		 <extracomment>some text</extracomment>
+		<translation>Ook tekst om te vertalen</translation>
+	</message>
+</context>
+<context>
+	<name>installscript</name>
+	<message>
+		<source>99 bottles of beer on the wall</source>
+		<oldcomment>some new comments here</oldcomment>
+		<translation>99 flessen bier op de muur</translation>
+	</message>
+</context>
+<context>
+		<name>apple_count</name>
+		<message numerus="yes">
+			<source>%d apple(s)</source>
+			<translation>
+				<numerusform>%d appel</numerusform>
+				<numerusform>%d appels</numerusform>
+			</translation>
+		</message>
+	</context>
+</TS>

+ 0 - 0
tests/core/assets/XML/nl_NL-xliff-1.2.xliff → tests/core/assets/I18N/nl_NL-xliff-1.2.xliff


+ 0 - 0
tests/core/assets/XML/nl_NL-xliff-2.0.xliff → tests/core/assets/I18N/nl_NL-xliff-2.0.xliff


BIN
tests/core/assets/I18N/nl_NL.mo


+ 33 - 0
tests/core/assets/I18N/nl_NL.po

@@ -0,0 +1,33 @@
+# Odin i18n Example
+# Copyright (C) 2021 Jeroen van Rijn
+# This file is distributed under the same license as the PACKAGE package.
+# Jeroen van Rijn <[email protected]>, 2021.
+#
+msgid ""
+msgstr ""
+"Project-Id-Version: Example 0.0.1\n"
+"Report-Msgid-Bugs-To: Jeroen van Rijn <[email protected]>\n"
+"POT-Creation-Date: 2021-11-27 19:23+0100\n"
+"PO-Revision-Date: 2021-11-28 02:56+0100\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=UTF-8\n"
+"Content-Transfer-Encoding: 8bit\n"
+"Language-Team: Odin Language Team\n"
+"X-Generator: Poedit 3.0\n"
+"Last-Translator: Jeroen van Rijn\n"
+"Plural-Forms: nplurals=2; plural=(n != 1);\n"
+"Language: nl_NL\n"
+
+#: i18n_example.odin:28
+msgid "There are 69,105 leaves here."
+msgstr "Er zijn hier 69.105 bladeren."
+
+#: i18n_example.odin:30
+msgid "Hellope, World!"
+msgstr "Hallo, Wereld!"
+
+#: i18n_example.odin:36
+msgid "There is %d leaf.\n"
+msgid_plural "There are %d leaves.\n"
+msgstr[0] "Er is %d blad.\n"
+msgstr[1] "Er zijn %d bladeren.\n"

+ 0 - 35
tests/core/assets/XML/nl_NL-qt-ts.ts

@@ -1,35 +0,0 @@
-<?xml version="1.0" encoding="utf-8"?>
-<!DOCTYPE TS>
-<TS version="2.1" language="nl" sourcelanguage="en">
-<context>
-  <name>Page</name>
-  <message>
-    <source>Text for translation</source>
-    <comment>commenting</comment>
-    <translation type="obsolete">Tekst om te vertalen</translation>
-  </message>
-  <message>
-     <source>Also text to translate</source>
-     <extracomment>some text</extracomment>
-    <translation>Ook tekst om te vertalen</translation>
-  </message>
-</context>
-<context>
-  <name>installscript</name>
-  <message>
-    <source>99 bottles of beer on the wall</source>
-    <oldcomment>some new comments here</oldcomment>
-    <translation>99 flessen bier op de muur</translation>
-  </message>
-</context>
-<context>
-    <name>apple_count</name>
-    <message numerus="yes">
-      <source>%d apple(s)</source>
-      <translation>
-        <numerusform>%d appel</numerusform>
-        <numerusform>%d appels</numerusform>
-      </translation>
-    </message>
-  </context>
-</TS>

+ 6 - 1
tests/core/build.bat

@@ -64,4 +64,9 @@ echo ---
 echo ---
 echo Running core:reflect tests
 echo ---
-%PATH_TO_ODIN% run reflect %COMMON% %COLLECTION% -out:test_core_reflect.exe
+%PATH_TO_ODIN% run reflect %COMMON% %COLLECTION% -out:test_core_reflect.exe
+
+echo ---
+echo Running core:text/i18n tests
+echo ---
+%PATH_TO_ODIN% run text\i18n %COMMON% -out:test_core_i18n.exe

+ 11 - 22
tests/core/encoding/xml/test_core_xml.odin

@@ -27,7 +27,7 @@ TEST :: struct {
 /*
 	Relative to ODIN_ROOT
 */
-TEST_FILE_PATH_PREFIX :: "tests/core/assets/XML"
+TEST_FILE_PATH_PREFIX :: "tests/core/assets"
 
 TESTS :: []TEST{
 	/*
@@ -35,23 +35,12 @@ TESTS :: []TEST{
 	*/
 
 	{
-		/*
-		<?xml version="1.0" encoding="utf-8"?>
-		<!DOCTYPE 恥ずべきフクロウ>
-		<恥ずべきフクロウ 올빼미_id="Foozle&#32;<![CDATA[<greeting>Hello, world!"</greeting>]]>Barzle">
-		<부끄러운:barzle>
-			<name foo:bar="birmese">ရှက်စရာ ဇီးကွက်</name>
-			<nickname>Owl of Shame</nickname>
-			<data>More CDATA <![CDATA[<greeting>Hello, world!</greeting><![CDATA] <$]]> Nonsense.</data>
-		</부끄러운:barzle>
-		*/
-
 		/*
 			Tests UTF-8 idents and values.
 			Test namespaced ident.
 			Tests that nested partial CDATA start doesn't trip up parser.
 		*/
-		filename  = "utf8.xml",
+		filename  = "XML/utf8.xml",
 		options   = {
 			flags = {
 				.Ignore_Unsupported, .Intern_Comments,
@@ -66,7 +55,7 @@ TESTS :: []TEST{
 			Same as above.
 			Unbox CDATA in data tag.
 		*/
-		filename  = "utf8.xml",
+		filename  = "XML/utf8.xml",
 		options   = {
 			flags = {
 				.Ignore_Unsupported, .Intern_Comments, .Unbox_CDATA,
@@ -81,7 +70,7 @@ TESTS :: []TEST{
 			Simple Qt TS translation file.
 			`core:i18n` requires it to be parsed properly.
 		*/
-		filename  = "nl_NL-qt-ts.ts",
+		filename  = "I18N/nl_NL-qt-ts.ts",
 		options   = {
 			flags = {
 				.Ignore_Unsupported, .Intern_Comments, .Unbox_CDATA, .Decode_SGML_Entities,
@@ -96,7 +85,7 @@ TESTS :: []TEST{
 			Simple XLiff 1.2 file.
 			`core:i18n` requires it to be parsed properly.
 		*/
-		filename  = "nl_NL-xliff-1.2.xliff",
+		filename  = "I18N/nl_NL-xliff-1.2.xliff",
 		options   = {
 			flags = {
 				.Ignore_Unsupported, .Intern_Comments, .Unbox_CDATA, .Decode_SGML_Entities,
@@ -111,7 +100,7 @@ TESTS :: []TEST{
 			Simple XLiff 2.0 file.
 			`core:i18n` requires it to be parsed properly.
 		*/
-		filename  = "nl_NL-xliff-2.0.xliff",
+		filename  = "I18N/nl_NL-xliff-2.0.xliff",
 		options   = {
 			flags = {
 				.Ignore_Unsupported, .Intern_Comments, .Unbox_CDATA, .Decode_SGML_Entities,
@@ -122,7 +111,7 @@ TESTS :: []TEST{
 	},
 
 	{
-		filename  = "entities.html",
+		filename  = "XML/entities.html",
 		options   = {
 			flags = {
 				.Ignore_Unsupported, .Intern_Comments,
@@ -133,7 +122,7 @@ TESTS :: []TEST{
 	},
 
 	{
-		filename  = "entities.html",
+		filename  = "XML/entities.html",
 		options   = {
 			flags = {
 				.Ignore_Unsupported, .Intern_Comments, .Unbox_CDATA,
@@ -144,7 +133,7 @@ TESTS :: []TEST{
 	},
 
 	{
-		filename  = "entities.html",
+		filename  = "XML/entities.html",
 		options   = {
 			flags = {
 				.Ignore_Unsupported, .Intern_Comments, .Unbox_CDATA, .Decode_SGML_Entities,
@@ -158,7 +147,7 @@ TESTS :: []TEST{
 		Then we test that certain errors are returned as expected.
 	*/
 	{
-		filename  = "utf8.xml",
+		filename  = "XML/utf8.xml",
 		options   = {
 			flags            = {
 				.Ignore_Unsupported, .Intern_Comments,
@@ -173,7 +162,7 @@ TESTS :: []TEST{
 		Parse the 8.2 MiB unicode.xml for good measure.
 	*/
 	{
-		filename  = "unicode.xml",
+		filename  = "XML/unicode.xml",
 		options   = {
 			flags            = {
 				.Ignore_Unsupported,

+ 165 - 0
tests/core/text/i18n/test_core_text_i18n.odin

@@ -0,0 +1,165 @@
+package test_core_text_i18n
+
+import "core:mem"
+import "core:fmt"
+import "core:os"
+import "core:testing"
+import "core:text/i18n"
+
+TEST_count := 0
+TEST_fail  := 0
+
+when ODIN_TEST {
+	expect  :: testing.expect
+	log     :: testing.log
+} else {
+	expect  :: proc(t: ^testing.T, condition: bool, message: string, loc := #caller_location) {
+		TEST_count += 1
+		if !condition {
+			TEST_fail += 1
+			fmt.printf("[%v] %v\n", loc, message)
+			return
+		}
+	}
+	log     :: proc(t: ^testing.T, v: any, loc := #caller_location) {
+		fmt.printf("[%v] ", loc)
+		fmt.printf("log: %v\n", v)
+	}
+}
+T :: i18n.get
+
+Test :: struct {
+	section: string,
+	key:     string,
+	val:     string,
+	n:       int,
+}
+
+Test_Suite :: struct {
+	file:    string,
+	loader:  proc(string, i18n.Parse_Options, proc(int) -> int, mem.Allocator) -> (^i18n.Translation, i18n.Error),
+	err:     i18n.Error,
+	options: i18n.Parse_Options,
+	tests:   []Test,
+}
+
+TESTS := []Test_Suite{
+	{
+		file   = "assets/I18N/nl_NL.mo",
+		loader = i18n.parse_mo_file,
+		tests  = {
+			// These are in the catalog.
+			{ "", "There are 69,105 leaves here.", "Er zijn hier 69.105 bladeren.",  1 },
+			{ "", "Hellope, World!",               "Hallo, Wereld!",                 1 },
+			{ "", "There is %d leaf.\n",           "Er is %d blad.\n",               1 },
+			{ "", "There are %d leaves.\n",        "Er is %d blad.\n",               1 },
+			{ "", "There is %d leaf.\n",           "Er zijn %d bladeren.\n",        42 },
+			{ "", "There are %d leaves.\n",        "Er zijn %d bladeren.\n",        42 },
+
+			// This isn't in the catalog, so should ruturn the key.
+			{ "", "Come visit us on Discord!",     "Come visit us on Discord!",      1 },
+		},
+	},
+
+	// QT Linguist with default loader options.
+	{
+		file   = "assets/I18N/nl_NL-qt-ts.ts",
+		loader = i18n.parse_qt_linguist_file,
+		tests  = {
+			// These are in the catalog.
+			{ "Page",          "Text for translation",           "Tekst om te vertalen",        1},
+			{ "Page",          "Also text to translate",         "Ook tekst om te vertalen",    1},
+			{ "installscript", "99 bottles of beer on the wall", "99 flessen bier op de muur",  1},
+			{ "apple_count",   "%d apple(s)",                    "%d appel",                    1},
+			{ "apple_count",   "%d apple(s)",                    "%d appels",                  42},
+
+			// These aren't in the catalog, so should ruturn the key.
+			{ "",              "Come visit us on Discord!",      "Come visit us on Discord!",  1 },
+			{ "Fake_Section",  "Come visit us on Discord!",      "Come visit us on Discord!",  1 },
+		},
+	},
+
+	// QT Linguist, merging sections.
+	{
+		file    = "assets/I18N/nl_NL-qt-ts.ts",
+		loader  = i18n.parse_qt_linguist_file,
+		options = {merge_sections = true},
+		tests   = {
+			// All of them are now in section "", lookup with original section should return the key.
+			{ "",              "Text for translation",           "Tekst om te vertalen",        1},
+			{ "",              "Also text to translate",         "Ook tekst om te vertalen",    1},
+			{ "",              "99 bottles of beer on the wall", "99 flessen bier op de muur",  1},
+			{ "",              "%d apple(s)",                    "%d appel",                    1},
+			{ "",              "%d apple(s)",                    "%d appels",                  42},
+
+			// All of them are now in section "", lookup with original section should return the key.
+			{ "Page",          "Text for translation",           "Text for translation",            1},
+			{ "Page",          "Also text to translate",         "Also text to translate",          1},
+			{ "installscript", "99 bottles of beer on the wall", "99 bottles of beer on the wall",  1},
+			{ "apple_count",   "%d apple(s)",                    "%d apple(s)",                     1},
+			{ "apple_count",   "%d apple(s)",                    "%d apple(s)",                    42},
+		},
+	},
+
+	// QT Linguist, merging sections. Expecting .Duplicate_Key error because same key exists in more than 1 section.
+	{
+		file    = "assets/I18N/duplicate-key.ts",
+		loader  = i18n.parse_qt_linguist_file,
+		options = {merge_sections = true},
+		err     = .Duplicate_Key,
+	},
+
+	// QT Linguist, not merging sections. Shouldn't return error despite same key existing in more than 1 section.
+	{
+		file    = "assets/I18N/duplicate-key.ts",
+		loader  = i18n.parse_qt_linguist_file,
+	},
+}
+
+@test
+tests :: proc(t: ^testing.T) {
+	using fmt
+
+	cat: ^i18n.Translation
+	err: i18n.Error
+
+	for suite in TESTS {
+		cat, err = suite.loader(suite.file, suite.options, nil, context.allocator)
+
+		msg := fmt.tprintf("Expected loading %v to return %v, got %v", suite.file, suite.err, err)
+		expect(t, err == suite.err, msg)
+
+		if err == .None {
+			for test in suite.tests {
+				val := T(test.section, test.key, test.n, cat)
+
+				msg  = fmt.tprintf("Expected key `%v` from section `%v`'s form for value `%v` to equal `%v`, got `%v`", test.key, test.section, test.n, test.val, val)
+				expect(t, val == test.val, msg)
+			}
+		}
+		i18n.destroy(cat)
+	}
+}
+
+main :: proc() {
+	using fmt
+
+	track: mem.Tracking_Allocator
+	mem.tracking_allocator_init(&track, context.allocator)
+	context.allocator = mem.tracking_allocator(&track)
+
+	t := testing.T{}
+	tests(&t)
+
+	fmt.printf("%v/%v tests successful.\n", TEST_count - TEST_fail, TEST_count)
+	if TEST_fail > 0 {
+		os.exit(1)
+	}
+
+	if len(track.allocation_map) > 0 {
+		println()
+		for _, v in track.allocation_map {
+			printf("%v Leaked %v bytes.\n", v.location, v.size)
+		}
+	}
+}