Browse Source

`cstring`

gingerBill 7 years ago
parent
commit
d3ea334e7a
7 changed files with 135 additions and 20 deletions
  1. 23 9
      core/_preload.odin
  2. 9 1
      core/fmt.odin
  3. 18 0
      examples/demo.odin
  4. 14 2
      src/check_expr.cpp
  5. 28 1
      src/ir.cpp
  6. 15 0
      src/ir_print.cpp
  7. 28 7
      src/types.cpp

+ 23 - 9
core/_preload.odin

@@ -41,15 +41,15 @@ Type_Info_Enum_Value :: union {
 };
 
 // Variant Types
-Type_Info_Named   :: struct {name: string, base: ^Type_Info};
-Type_Info_Integer :: struct {signed: bool};
-Type_Info_Rune    :: struct{};
-Type_Info_Float   :: struct{};
-Type_Info_Complex :: struct{};
-Type_Info_String  :: struct{};
-Type_Info_Boolean :: struct{};
-Type_Info_Any     :: struct{};
-Type_Info_Pointer :: struct {
+Type_Info_Named    :: struct {name: string, base: ^Type_Info};
+Type_Info_Integer  :: struct {signed: bool};
+Type_Info_Rune     :: struct {};
+Type_Info_Float    :: struct {};
+Type_Info_Complex  :: struct {};
+Type_Info_String   :: struct {is_cstring: bool};
+Type_Info_Boolean  :: struct {};
+Type_Info_Any      :: struct {};
+Type_Info_Pointer  :: struct {
 	elem: ^Type_Info // nil -> rawptr
 };
 Type_Info_Procedure :: struct {
@@ -863,6 +863,20 @@ __string_gt :: inline proc "contextless" (a, b: string) -> bool { return __strin
 __string_le :: inline proc "contextless" (a, b: string) -> bool { return __string_cmp(a, b) <= 0; }
 __string_ge :: inline proc "contextless" (a, b: string) -> bool { return __string_cmp(a, b) >= 0; }
 
+__cstring_len :: proc "contextless" (s: cstring) -> int {
+	n := 0;
+	for p := (^byte)(s); p != nil && p^ != 0; p += 1 {
+		n += 1;
+	}
+	return n;
+}
+
+__cstring_to_string :: proc "contextless" (s: cstring) -> string {
+	ptr := (^byte)(s);
+	n := __cstring_len(s);
+	return transmute(string)raw.String{ptr, n};
+}
+
 
 __complex64_eq :: inline proc "contextless"  (a, b: complex64)  -> bool { return real(a) == real(b) && imag(a) == imag(b); }
 __complex64_ne :: inline proc "contextless"  (a, b: complex64)  -> bool { return real(a) != real(b) || imag(a) != imag(b); }

+ 9 - 1
core/fmt.odin

@@ -178,7 +178,11 @@ write_type :: proc(buf: ^String_Buffer, ti: ^Type_Info) {
 		write_string(buf, "complex");
 		write_i64(buf, i64(8*ti.size), 10);
 	case Type_Info_String:
-		write_string(buf, "string");
+		if info.is_cstring {
+			write_string(buf, "cstring");
+		} else {
+			write_string(buf, "string");
+		}
 	case Type_Info_Boolean:
 		a := any{type_info = ti};
 		switch _ in a {
@@ -599,6 +603,9 @@ fmt_string :: proc(fi: ^Fmt_Info, s: string, verb: rune) {
 		fmt_bad_verb(fi, verb);
 	}
 }
+fmt_cstring :: proc(fi: ^Fmt_Info, s: cstring, verb: rune) {
+	fmt_string(fi, string(s), verb);
+}
 
 fmt_pointer :: proc(fi: ^Fmt_Info, p: rawptr, verb: rune) {
 	switch verb {
@@ -974,6 +981,7 @@ fmt_arg :: proc(fi: ^Fmt_Info, arg: any, verb: rune) {
 	case uintptr: fmt_int(fi, u64(a), false, 8*size_of(uintptr), verb);
 
 	case string:  fmt_string(fi, a, verb);
+	case cstring: fmt_cstring(fi, a, verb);
 
 	case:         fmt_value(fi, arg, verb);
 	}

+ 18 - 0
examples/demo.odin

@@ -761,6 +761,23 @@ complete_switch :: proc() {
 }
 
 
+cstring_example :: proc() {
+	W :: "Hellope";
+	X :: cstring(W);
+	Y :: string(X);
+
+	w := W;
+	x: cstring = X;
+	y: string = Y;
+	z := string(x);
+	fmt.println(x, y, z);
+	fmt.println(len(x), len(y), len(z));
+	fmt.println(len(W), len(X), len(Y));
+	// IMPORTANT NOTE for cstring variables
+	// len(cstring) is O(N)
+	// cast(cstring)string is O(N)
+}
+
 main :: proc() {
 	when true {
 		general_stuff();
@@ -774,5 +791,6 @@ main :: proc() {
 		enum_export();
 		explicit_procedure_overloading();
 		complete_switch();
+		cstring_example();
 	}
 }

+ 14 - 2
src/check_expr.cpp

@@ -1786,7 +1786,19 @@ bool check_is_castable_to(Checker *c, Operand *operand, Type *y) {
 			return true;
 		// }
 	}
+	// cstring -> string
+	if (src == t_cstring && dst == t_string) {
+		return true;
+	}
+	// cstring -> ^u8
+	if (src == t_cstring && is_type_u8_ptr(dst)) {
+		return true;
+	}
 
+	// ^u8 -> cstring
+	if (is_type_u8_ptr(src) && dst == t_cstring) {
+		return true;
+	}
 	// proc <-> proc
 	if (is_type_proc(src) && is_type_proc(dst)) {
 		return true;
@@ -5005,7 +5017,7 @@ bool check_set_index_data(Operand *o, Type *type, bool indirection, i64 *max_cou
 
 	switch (t->kind) {
 	case Type_Basic:
-		if (is_type_string(t)) {
+		if (t->Basic.kind == Basic_string) {
 			if (o->mode == Addressing_Constant) {
 				*max_count = o->value.value_string.len;
 			}
@@ -5904,7 +5916,7 @@ ExprKind check_expr_base_internal(Checker *c, Operand *o, AstNode *node, Type *t
 		Type *t = base_type(type_deref(o->type));
 		switch (t->kind) {
 		case Type_Basic:
-			if (is_type_string(t)) {
+			if (t->Basic.kind == Basic_string) {
 				valid = true;
 				if (o->mode == Addressing_Constant) {
 					max_count = o->value.value_string.len;

+ 28 - 1
src/ir.cpp

@@ -2890,6 +2890,14 @@ irValue *ir_string_len(irProcedure *proc, irValue *string) {
 	return ir_emit_struct_ev(proc, string, 1);
 }
 
+irValue *ir_cstring_len(irProcedure *proc, irValue *value) {
+	GB_ASSERT(is_type_cstring(ir_type(value)));
+	auto args = array_make<irValue *>(proc->module->allocator, 1);
+	args[0] = ir_emit_conv(proc, value, t_cstring);
+	return ir_emit_global_call(proc, "__cstring_len", args);
+}
+
+
 
 void ir_fill_slice(irProcedure *proc, irValue *slice_ptr, irValue *data, irValue *len) {
 	Type *t = ir_type(slice_ptr);
@@ -3122,6 +3130,18 @@ irValue *ir_emit_conv(irProcedure *proc, irValue *value, Type *t) {
 		return ir_emit(proc, ir_instr_conv(proc, irConv_zext, b, t_llvm_bool, t));
 	}
 
+	if (src == t_cstring && is_type_u8_ptr(dst)) {
+		return ir_emit_bitcast(proc, value, dst);
+	}
+
+	if (src == t_cstring && dst == t_string) {
+		irValue *c = ir_emit_conv(proc, value, t_cstring);
+		auto args = array_make<irValue *>(proc->module->allocator, 1);
+		args[0] = c;
+		irValue *s = ir_emit_global_call(proc, "__cstring_to_string", args);
+		return ir_emit_conv(proc, s, dst);
+	}
+
 
 	// integer -> boolean
 	if (is_type_integer(src) && is_type_boolean(dst)) {
@@ -4171,7 +4191,9 @@ irValue *ir_build_builtin_proc(irProcedure *proc, AstNode *expr, TypeAndValue tv
 			v = ir_emit_load(proc, v);
 			t = type_deref(t);
 		}
-		if (is_type_string(t)) {
+		if (is_type_cstring(t)) {
+			return ir_cstring_len(proc, v);
+		} else if (is_type_string(t)) {
 			return ir_string_len(proc, v);
 		} else if (is_type_array(t)) {
 			GB_PANIC("Array lengths are constant");
@@ -7902,6 +7924,11 @@ void ir_setup_type_info_data(irProcedure *proc) { // NOTE(bill): Setup type_info
 				tag = ir_emit_conv(proc, variant_ptr, t_type_info_string_ptr);
 				break;
 
+			case Basic_cstring:
+				tag = ir_emit_conv(proc, variant_ptr, t_type_info_string_ptr);
+				ir_emit_store(proc, ir_emit_struct_ep(proc, tag, 0), v_true); // is_cstring
+				break;
+
 			case Basic_any:
 				tag = ir_emit_conv(proc, variant_ptr, t_type_info_any_ptr);
 				break;

+ 15 - 0
src/ir_print.cpp

@@ -340,6 +340,7 @@ void ir_print_type(irFileBuffer *f, irModule *m, Type *t, bool in_struct) {
 		case Basic_any:     ir_write_str_lit(f, "%..any");              return;
 		case Basic_rawptr:  ir_write_str_lit(f, "%..rawptr");           return;
 		case Basic_string:  ir_write_str_lit(f, "%..string");           return;
+		case Basic_cstring: ir_write_str_lit(f, "i8*");                 return;
 
 		}
 		break;
@@ -551,11 +552,25 @@ void ir_print_exact_value(irFileBuffer *f, irModule *m, ExactValue value, Type *
 			ir_write_str_lit(f, "zeroinitializer");
 			break;
 		}
+		Type *t = core_type(type);
 		if (!is_type_string(type)) {
 			GB_ASSERT(is_type_array(type));
 			ir_write_str_lit(f, "c\"");
 			ir_print_escape_string(f, str, false, false);
 			ir_write_str_lit(f, "\\00\"");
+		} else if (t == t_cstring) {
+			// HACK NOTE(bill): This is a hack but it works because strings are created at the very end
+			// of the .ll file
+			irValue *str_array = ir_add_global_string_array(m, str);
+			ir_write_str_lit(f, "getelementptr inbounds (");
+			ir_print_type(f, m, str_array->Global.entity->type);
+			ir_write_str_lit(f, ", ");
+			ir_print_type(f, m, str_array->Global.entity->type);
+			ir_write_str_lit(f, "* ");
+			ir_print_encoded_global(f, str_array->Global.entity->token.string, false);
+			ir_write_str_lit(f, ", ");
+			ir_print_type(f, m, t_int);
+			ir_write_str_lit(f, " 0, i32 0)");
 		} else {
 			// HACK NOTE(bill): This is a hack but it works because strings are created at the very end
 			// of the .ll file

+ 28 - 7
src/types.cpp

@@ -34,8 +34,9 @@ enum BasicKind {
 	Basic_uint,
 	Basic_uintptr,
 	Basic_rawptr,
-	Basic_string, // ^u8 + int
-	Basic_any,    // rawptr + ^Type_Info
+	Basic_string,  // ^u8 + int
+	Basic_cstring, // ^u8
+	Basic_any,     // rawptr + ^Type_Info
 
 	Basic_UntypedBool,
 	Basic_UntypedInteger,
@@ -277,6 +278,7 @@ gb_global Type basic_types[] = {
 
 	{Type_Basic, {Basic_rawptr,            BasicFlag_Pointer,                         -1, STR_LIT("rawptr")}},
 	{Type_Basic, {Basic_string,            BasicFlag_String,                          -1, STR_LIT("string")}},
+	{Type_Basic, {Basic_cstring,           BasicFlag_String,                          -1, STR_LIT("cstring")}},
 	{Type_Basic, {Basic_any,               0,                                         -1, STR_LIT("any")}},
 
 	{Type_Basic, {Basic_UntypedBool,       BasicFlag_Boolean    | BasicFlag_Untyped,   0, STR_LIT("untyped bool")}},
@@ -322,6 +324,7 @@ gb_global Type *t_uintptr         = &basic_types[Basic_uintptr];
 
 gb_global Type *t_rawptr          = &basic_types[Basic_rawptr];
 gb_global Type *t_string          = &basic_types[Basic_string];
+gb_global Type *t_cstring         = &basic_types[Basic_cstring];
 gb_global Type *t_any             = &basic_types[Basic_any];
 
 gb_global Type *t_untyped_bool       = &basic_types[Basic_UntypedBool];
@@ -690,6 +693,13 @@ bool is_type_string(Type *t) {
 	}
 	return false;
 }
+bool is_type_cstring(Type *t) {
+	t = base_type(t);
+	if (t->kind == Type_Basic) {
+		return t->Basic.kind == Basic_cstring;
+	}
+	return false;
+}
 bool is_type_typed(Type *t) {
 	t = base_type(t);
 	if (t == nullptr) {
@@ -802,6 +812,13 @@ bool is_type_u8_slice(Type *t) {
 	}
 	return false;
 }
+bool is_type_u8_ptr(Type *t) {
+	t = base_type(t);
+	if (t->kind == Type_Pointer) {
+		return is_type_u8(t->Slice.elem);
+	}
+	return false;
+}
 bool is_type_proc(Type *t) {
 	t = base_type(t);
 	return t->kind == Type_Proc;
@@ -933,7 +950,7 @@ bool is_type_indexable(Type *t) {
 	Type *bt = base_type(t);
 	switch (bt->kind) {
 	case Type_Basic:
-		return is_type_string(bt);
+		return bt->Basic.kind == Basic_string;
 	case Type_Array:
 	case Type_Slice:
 	case Type_DynamicArray:
@@ -1101,6 +1118,8 @@ bool is_type_comparable(Type *t) {
 			return false;
 		case Basic_rune:
 			return true;
+		case Basic_cstring:
+			return false;
 		}
 		return true;
 	case Type_Pointer:
@@ -1849,8 +1868,9 @@ i64 type_align_of_internal(gbAllocator allocator, Type *t, TypePath *path) {
 	case Type_Basic: {
 		GB_ASSERT(is_type_typed(t));
 		switch (t->Basic.kind) {
-		case Basic_string: return build_context.word_size;
-		case Basic_any:    return build_context.word_size;
+		case Basic_string:  return build_context.word_size;
+		case Basic_cstring: return build_context.word_size;
+		case Basic_any:     return build_context.word_size;
 
 		case Basic_int: case Basic_uint: case Basic_uintptr: case Basic_rawptr:
 			return build_context.word_size;
@@ -2048,8 +2068,9 @@ i64 type_size_of_internal(gbAllocator allocator, Type *t, TypePath *path) {
 			return size;
 		}
 		switch (kind) {
-		case Basic_string: return 2*build_context.word_size;
-		case Basic_any:    return 2*build_context.word_size;
+		case Basic_string:  return 2*build_context.word_size;
+		case Basic_cstring: return build_context.word_size;
+		case Basic_any:     return 2*build_context.word_size;
 
 		case Basic_int: case Basic_uint: case Basic_uintptr: case Basic_rawptr:
 			return build_context.word_size;