Browse Source

[i18n] Initial i18n support.

- Add initial GetText .MO parser
- Add translation struct and helpers
- Pluralized lookup

 TODO:
- Support for more translation catalog file formats.
Jeroen van Rijn 3 years ago
parent
commit
2fae6eda23

+ 64 - 0
core/i18n/example/i18n_example.odin

@@ -0,0 +1,64 @@
+package i18n_example
+
+import "core:mem"
+import "core:fmt"
+import "core:i18n"
+
+LOC :: i18n.get
+
+_main :: proc() {
+	using fmt
+
+	err: i18n.Error
+
+	/*
+		Parse MO file and set it as the active translation so we can omit `get`'s "catalog" parameter.
+	*/
+	i18n.ACTIVE, err = i18n.parse_mo(#load("nl_NL.mo"))
+	defer i18n.destroy()
+
+	if err != .None { return }
+
+	/*
+		These are in the .MO catalog.
+	*/
+	println("-----")
+	println(LOC(""))
+	println("-----")
+	println(LOC("There are 69,105 leaves here."))
+	println("-----")
+	println(LOC("Hellope, World!"))
+
+	/*
+		For ease of use, pluralized lookup can use both singular and plural form as key for the same translation.
+	*/
+	println("-----")
+	printf(LOC("There is %d leaf.\n", 1), 1)
+	printf(LOC("There is %d leaf.\n", 42), 42)
+
+	printf(LOC("There are %d leaves.\n", 1), 1)
+	printf(LOC("There are %d leaves.\n", 42), 42)
+
+	/*
+		This isn't.
+	*/
+	println("-----")
+	println(LOC("Come visit us on Discord!"))
+}
+
+main :: proc() {
+	using fmt
+
+	track: mem.Tracking_Allocator
+	mem.tracking_allocator_init(&track, context.allocator)
+	context.allocator = mem.tracking_allocator(&track)
+
+	_main()
+
+	if len(track.allocation_map) > 0 {
+		println()
+		for _, v in track.allocation_map {
+			printf("%v Leaked %v bytes.\n", v.location, v.size)
+		}
+	}
+}

+ 30 - 0
core/i18n/example/messages.pot

@@ -0,0 +1,30 @@
+# Odin i18n Example
+# Copyright (C) 2021 Jeroen van Rijn
+# This file is distributed under the same license as the PACKAGE package.
+# Jeroen van Rijn <[email protected]>, 2021.
+#
+#, fuzzy
+msgid   ""
+msgstr  "Project-Id-Version: Example 0.0.1\n"
+        "Report-Msgid-Bugs-To: Jeroen van Rijn <[email protected]>\n"
+        "POT-Creation-Date: 2021-11-27 19:23+0100\n"
+        "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
+        "Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
+        "Language: en-GB\n"
+        "MIME-Version: 1.0\n"
+        "Content-Type: text/plain; charset=UTF-8\n"
+        "Content-Transfer-Encoding: 8bit\n"
+
+#: i18n_example.odin:28
+msgid "There are 69,105 leaves here."
+msgstr "Er zijn hier 69.105 bladeren."
+
+#: i18n_example.odin:30
+msgid "Hellope, World!"
+msgstr "Hallo, Wereld!"
+
+#: i18n_example.odin:36
+msgid "There is %d leaf.\n"
+msgid_plural "There are %d leaves.\n"
+msgstr[0] "Er is %d blad.\n"
+msgstr[1] "Er zijn %d bladeren.\n"

BIN
core/i18n/example/nl_NL.mo


+ 33 - 0
core/i18n/example/nl_NL.po

@@ -0,0 +1,33 @@
+# Odin i18n Example
+# Copyright (C) 2021 Jeroen van Rijn
+# This file is distributed under the same license as the PACKAGE package.
+# Jeroen van Rijn <[email protected]>, 2021.
+#
+msgid ""
+msgstr ""
+"Project-Id-Version: Example 0.0.1\n"
+"Report-Msgid-Bugs-To: Jeroen van Rijn <[email protected]>\n"
+"POT-Creation-Date: 2021-11-27 19:23+0100\n"
+"PO-Revision-Date: 2021-11-28 02:56+0100\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=UTF-8\n"
+"Content-Transfer-Encoding: 8bit\n"
+"Language-Team: Odin Language Team\n"
+"X-Generator: Poedit 3.0\n"
+"Last-Translator: Jeroen van Rijn\n"
+"Plural-Forms: nplurals=2; plural=(n != 1);\n"
+"Language: nl_NL\n"
+
+#: i18n_example.odin:28
+msgid "There are 69,105 leaves here."
+msgstr "Er zijn hier 69.105 bladeren."
+
+#: i18n_example.odin:30
+msgid "Hellope, World!"
+msgstr "Hallo, Wereld!"
+
+#: i18n_example.odin:36
+msgid "There is %d leaf.\n"
+msgid_plural "There are %d leaves.\n"
+msgstr[0] "Er is %d blad.\n"
+msgstr[1] "Er zijn %d bladeren.\n"

+ 163 - 0
core/i18n/gettext.odin

@@ -0,0 +1,163 @@
+package i18n
+/*
+	A parser for GNU GetText .MO files.
+
+	Copyright 2021 Jeroen van Rijn <[email protected]>.
+	Made available under Odin's BSD-3 license.
+
+	A from-scratch implementation based after the specification found here:
+		https://www.gnu.org/software/gettext/manual/html_node/MO-Files.html
+
+	List of contributors:
+		Jeroen van Rijn: Initial implementation.
+*/
+import "core:os"
+import "core:strings"
+import "core:bytes"
+
+parse_mo_from_slice :: proc(data: []u8, pluralizer: proc(int) -> int = nil, allocator := context.allocator) -> (translation: ^Translation, err: Error) {
+	context.allocator = allocator
+	/*
+		An MO file should have at least a 4-byte magic, 2 x 2 byte version info,
+		a 4-byte number of strings value, and 2 x 4-byte offsets.
+	*/
+	if len(data) < 20 {
+		return {}, .MO_File_Invalid
+	}
+
+	/*
+		Check magic. Should be 0x950412de in native Endianness.
+	*/
+	native := true
+	magic  := read_u32(data, native) or_return
+
+	if magic != 0x950412de {
+		native = false
+		magic = read_u32(data, native) or_return
+
+		if magic != 0x950412de { return {}, .MO_File_Invalid_Signature }
+	}
+
+	/*
+		We can ignore version_minor at offset 6.
+	*/
+	version_major := read_u16(data[4:]) or_return
+	if version_major > 1 { return {}, .MO_File_Unsupported_Version }
+
+	count             := read_u32(data[ 8:]) or_return
+	original_offset   := read_u32(data[12:]) or_return
+	translated_offset := read_u32(data[16:]) or_return
+
+	if count == 0 { return {}, .Empty_Translation_Catalog }
+
+	/*
+		Initalize Translation, interner and optional pluralizer.
+	*/
+	translation = new(Translation)
+	translation.pluralize = pluralizer
+	strings.intern_init(&translation.intern, allocator, allocator)
+
+	for n := u32(0); n < count; n += 1 {
+		/*
+			Grab string's original length and offset.
+		*/
+		offset := original_offset + 8 * n
+		if len(data) < int(offset + 8) { return translation, .MO_File_Invalid }
+
+		o_length := read_u32(data[offset    :], native) or_return
+		o_offset := read_u32(data[offset + 4:], native) or_return
+
+		offset = translated_offset + 8 * n
+		if len(data) < int(offset + 8) { return translation, .MO_File_Invalid }
+
+		t_length := read_u32(data[offset    :], native) or_return
+		t_offset := read_u32(data[offset + 4:], native) or_return
+
+		max_offset := int(max(o_offset + o_length + 1, t_offset + t_length + 1))
+		if len(data) < max_offset { return translation, .Premature_EOF }
+
+		key := data[o_offset:][:o_length]
+		val := data[t_offset:][:t_length]
+
+		/*
+			Could be a pluralized string.
+		*/
+		zero := []byte{0}
+
+		keys := bytes.split(key, zero)
+		vals := bytes.split(val, zero)
+	
+		if len(keys) != len(vals) || max(len(keys), len(vals)) > MAX_PLURALS {
+			return translation, .MO_File_Incorrect_Plural_Count
+		}
+
+		for k in keys {
+			interned_key := strings.intern_get(&translation.intern, string(k))
+
+			interned_vals: [MAX_PLURALS]string = {}
+			last_val: string
+
+			i := 0
+			for v in vals {
+				interned_vals[i] = strings.intern_get(&translation.intern, string(v))
+				last_val = interned_vals[i]
+				i += 1
+			}
+			for ; i < MAX_PLURALS; i += 1 {
+				interned_vals[i] = last_val
+			}
+			translation.k_v[interned_key] = interned_vals
+		}
+		delete(vals)
+		delete(keys)
+	}
+	return
+}
+
+parse_mo_file :: proc(filename: string, pluralizer: proc(int) -> int = nil, allocator := context.allocator) -> (translation: ^Translation, err: Error) {
+	context.allocator = allocator
+
+	data, data_ok := os.read_entire_file(filename)
+	defer delete(data)
+
+	if !data_ok { return {}, .File_Error }
+
+	return parse_mo_from_slice(data, pluralizer)
+}
+
+parse_mo :: proc { parse_mo_file, parse_mo_from_slice }
+
+/*
+	Helpers.
+*/
+read_u32 :: proc(data: []u8, native_endian := true) -> (res: u32, err: Error) {
+	if len(data) < size_of(u32) { return 0, .Premature_EOF }
+
+	val := (^u32)(raw_data(data))^
+
+	if native_endian {
+		return val, .None
+	} else {
+		when ODIN_ENDIAN == .Little {
+			return u32(transmute(u32be)val), .None
+		} else {
+			return u32(transmute(u32le)val), .None
+		}
+	}
+}
+
+read_u16 :: proc(data: []u8, native_endian := true) -> (res: u16, err: Error) {
+	if len(data) < size_of(u16) { return 0, .Premature_EOF }
+
+	val := (^u16)(raw_data(data))^
+
+	if native_endian {
+		return val, .None
+	} else {
+		when ODIN_ENDIAN == .Little {
+			return u16(transmute(u16be)val), .None
+		} else {
+			return u16(transmute(u16le)val), .None
+		}
+	}
+}

+ 116 - 0
core/i18n/i18n.odin

@@ -0,0 +1,116 @@
+package i18n
+/*
+	Internationalization helpers.
+
+	Copyright 2021 Jeroen van Rijn <[email protected]>.
+	Made available under Odin's BSD-3 license.
+
+	List of contributors:
+		Jeroen van Rijn: Initial implementation.
+*/
+import "core:strings"
+
+/*
+	TODO:
+	- Support for more translation catalog file formats.
+*/
+
+MAX_PLURALS :: 10
+
+/*
+	Currently active catalog.
+*/
+ACTIVE: ^Translation
+
+/*
+	The main data structure. This can be generated from various different file formats, as long as we have a parser for them.
+*/
+Translation :: struct {
+	k_v:    map[string][MAX_PLURALS]string,
+	intern: strings.Intern,
+
+	pluralize: proc(number: int) -> int,
+}
+
+Error :: enum {
+	/*
+		General return values.
+	*/
+	None = 0,
+	Empty_Translation_Catalog,
+
+	/*
+		Couldn't find, open or read file.
+	*/
+	File_Error,
+
+	/*
+		File too short.
+	*/
+	Premature_EOF,
+
+	/*
+		GNU Gettext *.MO file errors.
+	*/
+	MO_File_Invalid_Signature,
+	MO_File_Unsupported_Version,
+	MO_File_Invalid,
+	MO_File_Incorrect_Plural_Count,
+}
+
+/*
+	Several ways to use:
+	- get(key), which defaults to the singular form and i18n.ACTIVE catalog, or
+	- get(key, number), which returns the appropriate plural from the active catalog, or
+	- get(key, number, catalog) to grab text from a specific one.
+*/
+get :: proc(key: string, number := 0, catalog: ^Translation = ACTIVE) -> (value: string) {
+	/*
+		A lot of languages use singular for 1 item and plural for 0 or more than 1 items. This is our default pluralize rule.
+	*/
+	plural := 1 if number != 1 else 0
+
+	if catalog.pluralize != nil {
+		plural = catalog.pluralize(number)
+	}
+	return get_by_slot(key, plural, catalog)
+}
+
+/*
+	Several ways to use:
+	- get_by_slot(key), which defaults to the singular form and i18n.ACTIVE catalog, or
+	- get_by_slot(key, slot), which returns the requested plural from the active catalog, or
+	- get_by_slot(key, slot, catalog) to grab text from a specific one.
+
+	If a file format parser doesn't (yet) support plural slots, each of the slots will point at the same string.
+*/
+get_by_slot :: proc(key: string, slot := 0, catalog: ^Translation = ACTIVE) -> (value: string) {
+	if catalog == nil {
+		/*
+			Return the key if the catalog catalog hasn't been initialized yet.
+		*/
+		return key
+	}
+
+	/*
+		Return the translation from the requested slot if this key is known, else return the key.
+	*/
+	if translations, ok := catalog.k_v[key]; ok {
+		plural := min(max(0, slot), MAX_PLURALS - 1)
+		return translations[plural]
+	}
+	return key
+}
+
+/*
+	Same for destroy:
+	- destroy(), to clean up the currently active catalog catalog i18n.ACTIVE
+	- destroy(catalog), to clean up a specific catalog.
+*/
+destroy :: proc(catalog: ^Translation = ACTIVE) {
+	if catalog != nil {
+		strings.intern_destroy(&catalog.intern)
+		delete(catalog.k_v)
+		free(catalog)
+	}
+}