Browse Source

Begin supporting `string16` across the core library

gingerBill 2 months ago
parent
commit
ae02d3d02d

+ 1 - 0
base/intrinsics/intrinsics.odin

@@ -141,6 +141,7 @@ type_is_quaternion :: proc($T: typeid) -> bool ---
 type_is_string     :: proc($T: typeid) -> bool ---
 type_is_typeid     :: proc($T: typeid) -> bool ---
 type_is_any        :: proc($T: typeid) -> bool ---
+type_is_string16   :: proc($T: typeid) -> bool ---
 
 type_is_endian_platform       :: proc($T: typeid) -> bool ---
 type_is_endian_little         :: proc($T: typeid) -> bool ---

+ 6 - 0
base/runtime/print.odin

@@ -293,7 +293,13 @@ print_type :: #force_no_inline proc "contextless" (ti: ^Type_Info) {
 		print_string("quaternion")
 		print_u64(u64(8*ti.size))
 	case Type_Info_String:
+		if info.is_cstring {
+			print_byte('c')
+		}
 		print_string("string")
+		if info.is_utf16 {
+			print_string("16")
+		}
 	case Type_Info_Boolean:
 		switch ti.id {
 		case bool: print_string("bool")

+ 1 - 1
core/encoding/cbor/tags.odin

@@ -298,7 +298,7 @@ tag_base64_unmarshal :: proc(_: ^Tag_Implementation, d: Decoder, _: Tag_Number,
 
 	#partial switch t in ti.variant {
 	case reflect.Type_Info_String:
-
+		assert(!t.is_utf16)
 		if t.is_cstring {
 			length  := base64.decoded_len(bytes)
 			builder := strings.builder_make(0, length+1)

+ 2 - 0
core/encoding/cbor/unmarshal.odin

@@ -335,6 +335,8 @@ _unmarshal_value :: proc(d: Decoder, v: any, hdr: Header, allocator := context.a
 _unmarshal_bytes :: proc(d: Decoder, v: any, ti: ^reflect.Type_Info, hdr: Header, add: Add, allocator := context.allocator, loc := #caller_location) -> (err: Unmarshal_Error) {
 	#partial switch t in ti.variant {
 	case reflect.Type_Info_String:
+		assert(!t.is_utf16)
+
 		bytes := err_conv(_decode_bytes(d, add, allocator=allocator, loc=loc)) or_return
 
 		if t.is_cstring {

+ 4 - 4
core/encoding/json/marshal.odin

@@ -353,10 +353,10 @@ marshal_to_writer :: proc(w: io.Writer, v: any, opt: ^Marshal_Options) -> (err:
 			#partial switch info in ti.variant {
 			case runtime.Type_Info_String:
 				switch x in v {
-				case string:
-					return x == ""
-				case cstring:
-					return x == nil || x == ""
+				case string:    return x == ""
+				case cstring:   return x == nil || x == ""
+				case string16:  return x == ""
+				case cstring16: return x == nil || x == ""
 				}
 			case runtime.Type_Info_Any:
 				return v.(any) == nil

+ 3 - 1
core/encoding/json/unmarshal.odin

@@ -570,7 +570,9 @@ unmarshal_object :: proc(p: ^Parser, v: any, end_token: Token_Kind) -> (err: Unm
 			key_ptr: rawptr
 
 			#partial switch tk in t.key.variant {
-				case runtime.Type_Info_String:			
+				case runtime.Type_Info_String:
+					assert(!tk.is_utf16)
+
 					key_ptr = rawptr(&key)
 					key_cstr: cstring
 					if reflect.is_cstring(t.key) {

+ 2 - 0
core/flags/internal_rtti.odin

@@ -127,6 +127,8 @@ parse_and_set_pointer_by_base_type :: proc(ptr: rawptr, str: string, type_info:
 		}
 
 	case runtime.Type_Info_String:
+		assert(!specific_type_info.is_utf16)
+
 		if specific_type_info.is_cstring {
 			cstr_ptr := (^cstring)(ptr)
 			if cstr_ptr != nil {

+ 8 - 8
core/fmt/fmt.odin

@@ -2346,14 +2346,14 @@ fmt_array :: proc(fi: ^Info, data: rawptr, n: int, elem_size: int, elem: ^reflec
 		}
 
 		switch reflect.type_info_base(elem).id {
-		case byte:  fmt_string(fi, string(([^]byte)(data)[:n]), verb); return
-		case u16:   print_utf16(fi, ([^]u16)(data)[:n]);               return
-		case u16le: print_utf16(fi, ([^]u16le)(data)[:n]);             return
-		case u16be: print_utf16(fi, ([^]u16be)(data)[:n]);             return
-		case u32:   print_utf32(fi, ([^]u32)(data)[:n]);               return
-		case u32le: print_utf32(fi, ([^]u32le)(data)[:n]);             return
-		case u32be: print_utf32(fi, ([^]u32be)(data)[:n]);             return
-		case rune:  print_utf32(fi, ([^]rune)(data)[:n]);              return
+		case byte:  fmt_string(fi,   string  (([^]byte)(data)[:n]), verb); return
+		case u16:   fmt_string16(fi, string16(([^]u16) (data)[:n]), verb); return
+		case u16le: print_utf16(fi, ([^]u16le)(data)[:n]); return
+		case u16be: print_utf16(fi, ([^]u16be)(data)[:n]); return
+		case u32:   print_utf32(fi, ([^]u32)(data)[:n]);   return
+		case u32le: print_utf32(fi, ([^]u32le)(data)[:n]); return
+		case u32be: print_utf32(fi, ([^]u32be)(data)[:n]); return
+		case rune:  print_utf32(fi, ([^]rune)(data)[:n]);  return
 		}
 	}
 	if verb == 'p' {

+ 2 - 2
core/io/io.odin

@@ -319,7 +319,6 @@ write_string :: proc(s: Writer, str: string, n_written: ^int = nil) -> (n: int,
 write_string16 :: proc(s: Writer, str: string16, n_written: ^int = nil) -> (n: int, err: Error) {
 	for i := 0; i < len(str); i += 1 {
 		r := rune(utf16.REPLACEMENT_CHAR)
-
 		switch c := str[i]; {
 		case c < utf16._surr1, utf16._surr3 <= c:
 			r = rune(c)
@@ -329,7 +328,8 @@ write_string16 :: proc(s: Writer, str: string16, n_written: ^int = nil) -> (n: i
 			i += 1
 		}
 
-		w, err := write_rune(s, r, n_written)
+		w: int
+		w, err = write_rune(s, r, n_written)
 		n += w
 		if err != nil {
 			return

+ 5 - 3
core/reflect/types.odin

@@ -511,9 +511,11 @@ write_type_writer :: #force_no_inline proc(w: io.Writer, ti: ^Type_Info, n_writt
 		io.write_i64(w, i64(8*ti.size), 10, &n) or_return
 	case Type_Info_String:
 		if info.is_cstring {
-			io.write_string(w, "cstring", &n) or_return
-		} else {
-			io.write_string(w, "string", &n)  or_return
+			io.write_byte(w, 'c', &n) or_return
+		}
+		io.write_string(w, "string", &n)  or_return
+		if info.is_utf16 {
+			io.write_string(w, "16", &n) or_return
 		}
 	case Type_Info_Boolean:
 		switch ti.id {

+ 2 - 0
src/check_builtin.cpp

@@ -19,6 +19,7 @@ gb_global BuiltinTypeIsProc *builtin_type_is_procs[BuiltinProc__type_simple_bool
 	is_type_complex,
 	is_type_quaternion,
 	is_type_string,
+	is_type_string16,
 	is_type_typeid,
 	is_type_any,
 	is_type_endian_platform,
@@ -6139,6 +6140,7 @@ gb_internal bool check_builtin_procedure(CheckerContext *c, Operand *operand, As
 	case BuiltinProc_type_is_complex:
 	case BuiltinProc_type_is_quaternion:
 	case BuiltinProc_type_is_string:
+	case BuiltinProc_type_is_string16:
 	case BuiltinProc_type_is_typeid:
 	case BuiltinProc_type_is_any:
 	case BuiltinProc_type_is_endian_platform:

+ 35 - 7
src/check_expr.cpp

@@ -2106,6 +2106,9 @@ gb_internal bool check_representable_as_constant(CheckerContext *c, ExactValue i
 	} else if (is_type_boolean(type)) {
 		return in_value.kind == ExactValue_Bool;
 	} else if (is_type_string(type)) {
+		if (in_value.kind == ExactValue_String16) {
+			return is_type_string16(type) || is_type_cstring16(type);
+		}
 		return in_value.kind == ExactValue_String;
 	} else if (is_type_integer(type) || is_type_rune(type)) {
 		if (in_value.kind == ExactValue_Bool) {
@@ -2320,6 +2323,9 @@ gb_internal bool check_representable_as_constant(CheckerContext *c, ExactValue i
 		if (in_value.kind == ExactValue_String) {
 			return false;
 		}
+		if (in_value.kind == ExactValue_String16) {
+			return false;
+		}
 		if (out_value) *out_value = in_value;
 	} else if (is_type_bit_set(type)) {
 		if (in_value.kind == ExactValue_Integer) {
@@ -4654,6 +4660,13 @@ gb_internal void convert_to_typed(CheckerContext *c, Operand *operand, Type *tar
 						break;
 					}
 				}
+			} else if (operand->value.kind == ExactValue_String16) {
+				String16 s = operand->value.value_string16;
+				if (is_type_u16_array(t)) {
+					if (s.len == t->Array.count) {
+						break;
+					}
+				}
 			}
 			operand->mode = Addressing_Invalid;
 			convert_untyped_error(c, operand, target_type);
@@ -4983,6 +4996,12 @@ gb_internal ExactValue get_constant_field_single(CheckerContext *c, ExactValue v
 		if (success_) *success_ = true;
 		if (finish_) *finish_ = true;
 		return exact_value_u64(val);
+	} else if (value.kind == ExactValue_String16) {
+		GB_ASSERT(0 <= index && index < value.value_string.len);
+		u16 val = value.value_string16[index];
+		if (success_) *success_ = true;
+		if (finish_) *finish_ = true;
+		return exact_value_u64(val);
 	}
 	if (value.kind != ExactValue_Compound) {
 		if (success_) *success_ = true;
@@ -11124,15 +11143,21 @@ gb_internal ExprKind check_slice_expr(CheckerContext *c, Operand *o, Ast *node,
 			o->expr = node;
 			return kind;
 		}
-
-		String s = {};
-		if (o->value.kind == ExactValue_String) {
-			s = o->value.value_string;
-		}
-
 		o->mode = Addressing_Constant;
 		o->type = t;
-		o->value = exact_value_string(substring(s, cast(isize)indices[0], cast(isize)indices[1]));
+
+		if (o->value.kind == ExactValue_String16) {
+			String16 s = o->value.value_string16;
+
+			o->value = exact_value_string16(substring(s, cast(isize)indices[0], cast(isize)indices[1]));
+		} else {
+			String s = {};
+			if (o->value.kind == ExactValue_String) {
+				s = o->value.value_string;
+			}
+
+			o->value = exact_value_string(substring(s, cast(isize)indices[0], cast(isize)indices[1]));
+		}
 	}
 	return kind;
 }
@@ -11221,6 +11246,7 @@ gb_internal ExprKind check_expr_base_internal(CheckerContext *c, Operand *o, Ast
 		Type *t = t_invalid;
 		switch (node->tav.value.kind) {
 		case ExactValue_String:     t = t_untyped_string;     break;
+		case ExactValue_String16:   t = t_string16;           break; // TODO(bill): determine this correctly
 		case ExactValue_Float:      t = t_untyped_float;      break;
 		case ExactValue_Complex:    t = t_untyped_complex;    break;
 		case ExactValue_Quaternion: t = t_untyped_quaternion; break;
@@ -11657,6 +11683,8 @@ gb_internal bool is_exact_value_zero(ExactValue const &v) {
 		return !v.value_bool;
 	case ExactValue_String:
 		return v.value_string.len == 0;
+	case ExactValue_String16:
+		return v.value_string16.len == 0;
 	case ExactValue_Integer:
 		return big_int_is_zero(&v.value_integer);
 	case ExactValue_Float:

+ 2 - 0
src/checker_builtin_procs.hpp

@@ -250,6 +250,7 @@ BuiltinProc__type_simple_boolean_begin,
 	BuiltinProc_type_is_complex,
 	BuiltinProc_type_is_quaternion,
 	BuiltinProc_type_is_string,
+	BuiltinProc_type_is_string16,
 	BuiltinProc_type_is_typeid,
 	BuiltinProc_type_is_any,
 
@@ -607,6 +608,7 @@ gb_global BuiltinProc builtin_procs[BuiltinProc_COUNT] = {
 	{STR_LIT("type_is_complex"),           1, false, Expr_Expr, BuiltinProcPkg_intrinsics},
 	{STR_LIT("type_is_quaternion"),        1, false, Expr_Expr, BuiltinProcPkg_intrinsics},
 	{STR_LIT("type_is_string"),            1, false, Expr_Expr, BuiltinProcPkg_intrinsics},
+	{STR_LIT("type_is_string16"),          1, false, Expr_Expr, BuiltinProcPkg_intrinsics},
 	{STR_LIT("type_is_typeid"),            1, false, Expr_Expr, BuiltinProcPkg_intrinsics},
 	{STR_LIT("type_is_any"),               1, false, Expr_Expr, BuiltinProcPkg_intrinsics},
 

+ 6 - 0
src/llvm_backend.cpp

@@ -1264,7 +1264,13 @@ String lb_get_objc_type_encoding(Type *t, isize pointer_depth = 0) {
 		case Basic_string:
 			return build_context.metrics.int_size == 4 ? str_lit("{string=*i}") : str_lit("{string=*q}");
 
+		case Basic_string16:
+			return build_context.metrics.int_size == 4 ? str_lit("{string16=*i}") : str_lit("{string16=*q}");
+
 		case Basic_cstring: return str_lit("*");
+		case Basic_cstring16: return str_lit("*");
+
+
 		case Basic_any:     return str_lit("{any=^v^v}");  // rawptr + ^Type_Info
 
 		case Basic_typeid:

+ 73 - 4
src/llvm_backend_const.cpp

@@ -122,6 +122,25 @@ gb_internal lbValue lb_const_ptr_cast(lbModule *m, lbValue value, Type *t) {
 
 
 gb_internal LLVMValueRef llvm_const_string_internal(lbModule *m, Type *t, LLVMValueRef data, LLVMValueRef len) {
+	GB_ASSERT(!is_type_string16(t));
+	if (build_context.metrics.ptr_size < build_context.metrics.int_size) {
+		LLVMValueRef values[3] = {
+			data,
+			LLVMConstNull(lb_type(m, t_i32)),
+			len,
+		};
+		return llvm_const_named_struct_internal(lb_type(m, t), values, 3);
+	} else {
+		LLVMValueRef values[2] = {
+			data,
+			len,
+		};
+		return llvm_const_named_struct_internal(lb_type(m, t), values, 2);
+	}
+}
+
+gb_internal LLVMValueRef llvm_const_string16_internal(lbModule *m, Type *t, LLVMValueRef data, LLVMValueRef len) {
+	GB_ASSERT(is_type_string16(t));
 	if (build_context.metrics.ptr_size < build_context.metrics.int_size) {
 		LLVMValueRef values[3] = {
 			data,
@@ -238,6 +257,10 @@ gb_internal lbValue lb_const_string(lbModule *m, String const &value) {
 	return lb_const_value(m, t_string, exact_value_string(value));
 }
 
+gb_internal lbValue lb_const_string(lbModule *m, String16 const &value) {
+	return lb_const_value(m, t_string16, exact_value_string16(value));
+}
+
 
 gb_internal lbValue lb_const_bool(lbModule *m, Type *type, bool value) {
 	lbValue res = {};
@@ -569,7 +592,11 @@ gb_internal lbValue lb_const_value(lbModule *m, Type *type, ExactValue value, lb
 			GB_ASSERT(is_type_slice(type));
 			res.value = lb_find_or_add_entity_string_byte_slice_with_type(m, value.value_string, original_type).value;
 			return res;
-		} else {
+		} else if (value.kind == ExactValue_String16) {
+			GB_ASSERT(is_type_slice(type));
+			GB_PANIC("TODO(bill): UTF-16 String");
+			return res;
+		}else {
 			ast_node(cl, CompoundLit, value.value_compound);
 
 			isize count = cl->elems.count;
@@ -751,15 +778,23 @@ gb_internal lbValue lb_const_value(lbModule *m, Type *type, ExactValue value, lb
 		{
 			bool custom_link_section = cc.link_section.len > 0;
 
-			LLVMValueRef ptr = lb_find_or_add_entity_string_ptr(m, value.value_string, custom_link_section);
+			LLVMValueRef ptr = nullptr;
 			lbValue res = {};
 			res.type = default_type(original_type);
 
+			if (is_type_string16(res.type) || is_type_cstring16(res.type)) {
+				TEMPORARY_ALLOCATOR_GUARD();
+				String16 s16 = string_to_string16(temporary_allocator(), value.value_string);
+				ptr = lb_find_or_add_entity_string16_ptr(m, s16, custom_link_section);
+			} else {
+				ptr = lb_find_or_add_entity_string_ptr(m, value.value_string, custom_link_section);
+			}
+
 			if (custom_link_section) {
 				LLVMSetSection(ptr, alloc_cstring(permanent_allocator(), cc.link_section));
 			}
 
-			if (is_type_cstring(res.type)) {
+			if (is_type_cstring(res.type) || is_type_cstring16(res.type)) {
 				res.value = ptr;
 			} else {
 				if (value.value_string.len == 0) {
@@ -768,12 +803,46 @@ gb_internal lbValue lb_const_value(lbModule *m, Type *type, ExactValue value, lb
 				LLVMValueRef str_len = LLVMConstInt(lb_type(m, t_int), value.value_string.len, true);
 				GB_ASSERT(is_type_string(original_type));
 
-				res.value = llvm_const_string_internal(m, original_type, ptr, str_len);
+				if (is_type_string16(res.type)) {
+					res.value = llvm_const_string16_internal(m, original_type, ptr, str_len);
+				} else {
+					res.value = llvm_const_string_internal(m, original_type, ptr, str_len);
+				}
+			}
+
+			return res;
+		}
+
+	case ExactValue_String16:
+		{
+			GB_ASSERT(is_type_string16(res.type) || is_type_cstring16(res.type));
+
+			bool custom_link_section = cc.link_section.len > 0;
+
+			LLVMValueRef ptr = lb_find_or_add_entity_string16_ptr(m, value.value_string16, custom_link_section);
+			lbValue res = {};
+			res.type = default_type(original_type);
+
+			if (custom_link_section) {
+				LLVMSetSection(ptr, alloc_cstring(permanent_allocator(), cc.link_section));
+			}
+
+			if (is_type_cstring16(res.type)) {
+				res.value = ptr;
+			} else {
+				if (value.value_string16.len == 0) {
+					ptr = LLVMConstNull(lb_type(m, t_u8_ptr));
+				}
+				LLVMValueRef str_len = LLVMConstInt(lb_type(m, t_int), value.value_string16.len, true);
+				GB_ASSERT(is_type_string(original_type));
+
+				res.value = llvm_const_string16_internal(m, original_type, ptr, str_len);
 			}
 
 			return res;
 		}
 
+
 	case ExactValue_Integer:
 		if (is_type_pointer(type) || is_type_multi_pointer(type) || is_type_proc(type)) {
 			LLVMTypeRef t = lb_type(m, original_type);

+ 14 - 0
src/llvm_backend_debug.cpp

@@ -802,6 +802,20 @@ gb_internal LLVMMetadataRef lb_debug_type_internal(lbModule *m, Type *type) {
 				LLVMMetadataRef char_type = lb_debug_type_basic_type(m, str_lit("char"), 8, LLVMDWARFTypeEncoding_Unsigned);
 				return LLVMDIBuilderCreatePointerType(m->debug_builder, char_type, ptr_bits, ptr_bits, 0, "cstring", 7);
 			}
+
+		case Basic_string16:
+			{
+				LLVMMetadataRef elements[2] = {};
+				elements[0] = lb_debug_struct_field(m, str_lit("data"), t_u16_ptr, 0);
+				elements[1] = lb_debug_struct_field(m, str_lit("len"),  t_int, int_bits);
+				return lb_debug_basic_struct(m, str_lit("string16"), 2*int_bits, int_bits, elements, gb_count_of(elements));
+			}
+		case Basic_cstring16:
+			{
+				LLVMMetadataRef char_type = lb_debug_type_basic_type(m, str_lit("wchar_t"), 16, LLVMDWARFTypeEncoding_Unsigned);
+				return LLVMDIBuilderCreatePointerType(m->debug_builder, char_type, ptr_bits, ptr_bits, 0, "cstring16", 7);
+			}
+
 		case Basic_any:
 			{
 				LLVMMetadataRef elements[2] = {};

+ 2 - 1
src/llvm_backend_expr.cpp

@@ -4354,12 +4354,13 @@ gb_internal lbAddr lb_build_addr_index_expr(lbProcedure *p, Ast *expr) {
 	}
 
 
-	case Type_Basic: { // Basic_string
+	case Type_Basic: { // Basic_string/Basic_string16
 		lbValue str;
 		lbValue elem;
 		lbValue len;
 		lbValue index;
 
+
 		str = lb_build_expr(p, ie->expr);
 		if (deref) {
 			str = lb_emit_load(p, str);

+ 37 - 0
src/llvm_backend_general.cpp

@@ -2715,6 +2715,43 @@ gb_internal LLVMValueRef lb_find_or_add_entity_string_ptr(lbModule *m, String co
 	}
 }
 
+gb_internal LLVMValueRef lb_find_or_add_entity_string16_ptr(lbModule *m, String16 const &str, bool custom_link_section) {
+	// TODO(bill): caching for UTF-16 strings
+
+	LLVMValueRef indices[2] = {llvm_zero(m), llvm_zero(m)};
+
+	LLVMValueRef data = nullptr;
+	{
+		LLVMTypeRef llvm_u16 = LLVMInt16TypeInContext(m->ctx);
+
+		TEMPORARY_ALLOCATOR_GUARD();
+
+		LLVMValueRef *values = gb_alloc_array(temporary_allocator(), LLVMValueRef, str.len+1);
+
+		for (isize i = 0; i < str.len; i++) {
+			values[i] = LLVMConstInt(llvm_u16, str.text[i], false);
+		}
+		values[str.len] = LLVMConstInt(llvm_u16, 0, false);
+
+		data = LLVMConstArray(llvm_u16, values, cast(unsigned)(str.len+1));
+	}
+
+
+	u32 id = m->global_array_index.fetch_add(1);
+	gbString name = gb_string_make(temporary_allocator(), "csbs$");
+	name = gb_string_appendc(name, m->module_name);
+	name = gb_string_append_fmt(name, "$%x", id);
+
+	LLVMTypeRef type = LLVMTypeOf(data);
+	LLVMValueRef global_data = LLVMAddGlobal(m->mod, type, name);
+	LLVMSetInitializer(global_data, data);
+	lb_make_global_private_const(global_data);
+	LLVMSetAlignment(global_data, 1);
+
+	LLVMValueRef ptr = LLVMConstInBoundsGEP2(type, global_data, indices, 2);
+	return ptr;
+}
+
 gb_internal lbValue lb_find_or_add_entity_string(lbModule *m, String const &str, bool custom_link_section) {
 	LLVMValueRef ptr = nullptr;
 	if (str.len != 0) {

+ 18 - 1
src/llvm_backend_utility.cpp

@@ -6,6 +6,7 @@ gb_internal bool lb_is_type_aggregate(Type *t) {
 	case Type_Basic:
 		switch (t->Basic.kind) {
 		case Basic_string:
+		case Basic_string16:
 		case Basic_any:
 			return true;
 
@@ -981,7 +982,8 @@ gb_internal i32 lb_convert_struct_index(lbModule *m, Type *t, i32 index) {
 	} else if (build_context.ptr_size != build_context.int_size) {
 		switch (t->kind) {
 		case Type_Basic:
-			if (t->Basic.kind != Basic_string) {
+			if (t->Basic.kind != Basic_string &&
+			    t->Basic.kind != Basic_string16) {
 				break;
 			}
 			/*fallthrough*/
@@ -1160,6 +1162,11 @@ gb_internal lbValue lb_emit_struct_ep(lbProcedure *p, lbValue s, i32 index) {
 		case 0: result_type = alloc_type_pointer(t->Slice.elem); break;
 		case 1: result_type = t_int; break;
 		}
+	} else if (is_type_string16(t)) {
+		switch (index) {
+		case 0: result_type = t_u16_ptr; break;
+		case 1: result_type = t_int;    break;
+		}
 	} else if (is_type_string(t)) {
 		switch (index) {
 		case 0: result_type = t_u8_ptr; break;
@@ -1273,6 +1280,12 @@ gb_internal lbValue lb_emit_struct_ev(lbProcedure *p, lbValue s, i32 index) {
 	switch (t->kind) {
 	case Type_Basic:
 		switch (t->Basic.kind) {
+		case Basic_string16:
+			switch (index) {
+			case 0: result_type = t_u16_ptr; break;
+			case 1: result_type = t_int;    break;
+			}
+			break;
 		case Basic_string:
 			switch (index) {
 			case 0: result_type = t_u8_ptr; break;
@@ -1440,6 +1453,10 @@ gb_internal lbValue lb_emit_deep_field_gep(lbProcedure *p, lbValue e, Selection
 				e = lb_emit_struct_ep(p, e, index);
 				break;
 
+			case Basic_string16:
+				e = lb_emit_struct_ep(p, e, index);
+				break;
+
 			default:
 				GB_PANIC("un-gep-able type %s", type_to_string(type));
 				break;

+ 7 - 0
src/string.cpp

@@ -79,6 +79,13 @@ gb_internal String substring(String const &s, isize lo, isize hi) {
 	return make_string(s.text+lo, hi-lo);
 }
 
+gb_internal String16 substring(String16 const &s, isize lo, isize hi) {
+	isize max = s.len;
+	GB_ASSERT_MSG(lo <= hi && hi <= max, "%td..%td..%td", lo, hi, max);
+
+	return make_string16(s.text+lo, hi-lo);
+}
+
 
 gb_internal char *alloc_cstring(gbAllocator a, String s) {
 	char *c_str = gb_alloc_array(a, char, s.len+1);