Browse Source

Change `typeid` definition to be based around the canonical type hash

`typeid` used to be a fancy index with extra metadata stored on it. Now it is direct hash of the type.

This is safe to do in practice since any possible collisions are checked at compile time AND the chances of having a 1% collision are around 1 in 600K (see the Birthday Paradox).

Therefore accessing a `^Type_Info` is now a hash table lookup with linear probing. The table is twice the size than necessary so prevent too much probing due to an overly dense hash table.
gingerBill 6 months ago
parent
commit
5489a88983
7 changed files with 64 additions and 111 deletions
  1. 9 47
      base/runtime/core.odin
  2. BIN
      odin.rdi
  3. 29 16
      src/checker.cpp
  4. 1 0
      src/checker.hpp
  5. 3 2
      src/llvm_backend.cpp
  6. 14 38
      src/llvm_backend_type.cpp
  7. 8 8
      src/types.cpp

+ 9 - 47
base/runtime/core.odin

@@ -239,47 +239,6 @@ Type_Info :: struct {
 	},
 	},
 }
 }
 
 
-// NOTE(bill): This must match the compiler's
-Typeid_Kind :: enum u8 {
-	Invalid,
-	Integer,
-	Rune,
-	Float,
-	Complex,
-	Quaternion,
-	String,
-	Boolean,
-	Any,
-	Type_Id,
-	Pointer,
-	Multi_Pointer,
-	Procedure,
-	Array,
-	Enumerated_Array,
-	Dynamic_Array,
-	Slice,
-	Tuple,
-	Struct,
-	Union,
-	Enum,
-	Map,
-	Bit_Set,
-	Simd_Vector,
-	Matrix,
-	Soa_Pointer,
-	Bit_Field,
-}
-#assert(len(Typeid_Kind) < 32)
-
-Typeid_Bit_Field :: bit_field uintptr {
-	index:    uintptr     | 8*size_of(uintptr) - 8,
-	kind:     Typeid_Kind | 5, // Typeid_Kind
-	named:    bool        | 1,
-	special:  bool        | 1, // signed, cstring, etc
-	reserved: bool        | 1,
-}
-#assert(size_of(Typeid_Bit_Field) == size_of(uintptr))
-
 // NOTE(bill): only the ones that are needed (not all types)
 // NOTE(bill): only the ones that are needed (not all types)
 // This will be set by the compiler
 // This will be set by the compiler
 type_table: []^Type_Info
 type_table: []^Type_Info
@@ -686,13 +645,16 @@ type_info_core :: proc "contextless" (info: ^Type_Info) -> ^Type_Info {
 type_info_base_without_enum :: type_info_core
 type_info_base_without_enum :: type_info_core
 
 
 __type_info_of :: proc "contextless" (id: typeid) -> ^Type_Info #no_bounds_check {
 __type_info_of :: proc "contextless" (id: typeid) -> ^Type_Info #no_bounds_check {
-	MASK :: 1<<(8*size_of(typeid) - 8) - 1
-	data := transmute(uintptr)id
-	n := int(data & MASK)
-	if n < 0 || n >= len(type_table) {
-		n = 0
+	n := u64(len(type_table))
+	i := transmute(u64)id % n
+	for k in 0..<n {
+		ptr := type_table[i]
+		if ptr != nil && ptr.id == id {
+			return ptr
+		}
+		i = i+1 if i+1 < n else 0
 	}
 	}
-	return type_table[n]
+	return type_table[0]
 }
 }
 
 
 when !ODIN_NO_RTTI {
 when !ODIN_NO_RTTI {

BIN
odin.rdi


+ 29 - 16
src/checker.cpp

@@ -6740,30 +6740,43 @@ gb_internal void check_parsed_files(Checker *c) {
 		}
 		}
 		array_sort(c->info.type_info_types, type_info_pair_cmp);
 		array_sort(c->info.type_info_types, type_info_pair_cmp);
 
 
+		array_init(&c->info.type_info_types_hash_map, heap_allocator(), c->info.type_info_types.count*2 + 1);
 		map_reserve(&c->info.minimum_dependency_type_info_index_map, c->info.type_info_types.count);
 		map_reserve(&c->info.minimum_dependency_type_info_index_map, c->info.type_info_types.count);
 
 
-		for_array(i, c->info.type_info_types) {
-			auto const &tt = c->info.type_info_types[i];
-			bool exists = map_set_if_not_previously_exists(&c->info.minimum_dependency_type_info_index_map, tt.hash, i);
-			if (!exists) {
-				continue;
-			}
-			for (auto const &entry : c->info.minimum_dependency_type_info_index_map) {
-				if (entry.key != tt.hash) {
+		isize hash_map_len = c->info.type_info_types_hash_map.count;
+		for (auto const &tt : c->info.type_info_types) {
+			isize index = tt.hash % hash_map_len;
+			// NOTE(bill): no need for a sanity check since there
+			// will always be enough space for the entries
+			for (;;) {
+				if (index == 0 || c->info.type_info_types_hash_map[index].hash != 0) {
+					index = (index+1) % hash_map_len;
 					continue;
 					continue;
 				}
 				}
-				auto const &other = c->info.type_info_types[entry.value];
-				if (are_types_identical_unique_tuples(tt.type, other.type)) {
-					continue;
+				break;
+			}
+			c->info.type_info_types_hash_map[index] = tt;
+
+			bool exists = map_set_if_not_previously_exists(&c->info.minimum_dependency_type_info_index_map, tt.hash, index);
+			if (exists) {
+				for (auto const &entry : c->info.minimum_dependency_type_info_index_map) {
+					if (entry.key != tt.hash) {
+						continue;
+					}
+					auto const &other = c->info.type_info_types[entry.value];
+					if (are_types_identical_unique_tuples(tt.type, other.type)) {
+						continue;
+					}
+					gbString t = temp_canonical_string(tt.type);
+					gbString o = temp_canonical_string(other.type);
+					GB_PANIC("%s (%s) %llu vs %s (%s) %llu",
+					         type_to_string(tt.type, false),    t, cast(unsigned long long)tt.hash,
+					         type_to_string(other.type, false), o, cast(unsigned long long)other.hash);
 				}
 				}
-				gbString t = temp_canonical_string(tt.type);
-				gbString o = temp_canonical_string(other.type);
-				GB_PANIC("%s (%s) %llu vs %s (%s) %llu",
-				         type_to_string(tt.type, false),    t, cast(unsigned long long)tt.hash,
-				         type_to_string(other.type, false), o, cast(unsigned long long)other.hash);
 			}
 			}
 		}
 		}
 
 
+
 		GB_ASSERT(c->info.minimum_dependency_type_info_index_map.count <= c->info.type_info_types.count);
 		GB_ASSERT(c->info.minimum_dependency_type_info_index_map.count <= c->info.type_info_types.count);
 	}
 	}
 
 

+ 1 - 0
src/checker.hpp

@@ -432,6 +432,7 @@ struct CheckerInfo {
 	PtrMap</*type info hash*/u64, /*min dep index*/isize> minimum_dependency_type_info_index_map;
 	PtrMap</*type info hash*/u64, /*min dep index*/isize> minimum_dependency_type_info_index_map;
 	TypeSet min_dep_type_info_set;
 	TypeSet min_dep_type_info_set;
 	Array<TypeInfoPair> type_info_types; // sorted after filled
 	Array<TypeInfoPair> type_info_types; // sorted after filled
+	Array<TypeInfoPair> type_info_types_hash_map; // 2 * type_info_types.count
 
 
 
 
 	Array<Entity *> testing_procedures;
 	Array<Entity *> testing_procedures;

+ 3 - 2
src/llvm_backend.cpp

@@ -3154,9 +3154,10 @@ gb_internal bool lb_generate_code(lbGenerator *gen) {
 		lbModule *m = default_module;
 		lbModule *m = default_module;
 
 
 		{ // Add type info data
 		{ // Add type info data
-			GB_ASSERT_MSG(info->minimum_dependency_type_info_index_map.count == info->type_info_types.count, "%tu vs %tu", info->minimum_dependency_type_info_index_map.count, info->type_info_types.count);
+			// GB_ASSERT_MSG(info->minimum_dependency_type_info_index_map.count == info->type_info_types.count, "%tu vs %tu", info->minimum_dependency_type_info_index_map.count, info->type_info_types.count);
 
 
-			isize max_type_info_count = info->minimum_dependency_type_info_index_map.count+1;
+			// isize max_type_info_count = info->minimum_dependency_type_info_index_map.count+1;
+			isize max_type_info_count = info->type_info_types_hash_map.count;
 			Type *t = alloc_type_array(t_type_info_ptr, max_type_info_count);
 			Type *t = alloc_type_array(t_type_info_ptr, max_type_info_count);
 
 
 			// IMPORTANT NOTE(bill): As LLVM does not have a union type, an array of unions cannot be initialized
 			// IMPORTANT NOTE(bill): As LLVM does not have a union type, an array of unions cannot be initialized

+ 14 - 38
src/llvm_backend_type.cpp

@@ -2,13 +2,13 @@
 gb_internal isize lb_type_info_index(CheckerInfo *info, TypeInfoPair pair, bool err_on_not_found=true) {
 gb_internal isize lb_type_info_index(CheckerInfo *info, TypeInfoPair pair, bool err_on_not_found=true) {
 	isize index = type_info_index(info, pair, err_on_not_found);
 	isize index = type_info_index(info, pair, err_on_not_found);
 	if (index >= 0) {
 	if (index >= 0) {
-		return index+1;
+		return index;
 	}
 	}
 	if (err_on_not_found) {
 	if (err_on_not_found) {
 		gb_printf_err("NOT FOUND lb_type_info_index:\n\t%s\n\t@ index %td\n\tmax count: %u\nFound:\n", type_to_string(pair.type), index, info->minimum_dependency_type_info_index_map.count);
 		gb_printf_err("NOT FOUND lb_type_info_index:\n\t%s\n\t@ index %td\n\tmax count: %u\nFound:\n", type_to_string(pair.type), index, info->minimum_dependency_type_info_index_map.count);
 		for (auto const &entry : info->minimum_dependency_type_info_index_map) {
 		for (auto const &entry : info->minimum_dependency_type_info_index_map) {
 			isize type_info_index = entry.key;
 			isize type_info_index = entry.key;
-			gb_printf_err("\t%s\n", type_to_string(info->type_info_types[type_info_index].type));
+			gb_printf_err("\t%s\n", type_to_string(info->type_info_types_hash_map[type_info_index].type));
 		}
 		}
 		GB_PANIC("NOT FOUND");
 		GB_PANIC("NOT FOUND");
 	}
 	}
@@ -73,37 +73,8 @@ gb_internal lbValue lb_typeid(lbModule *m, Type *type) {
 
 
 	type = default_type(type);
 	type = default_type(type);
 
 
-	u64 id = cast(u64)lb_type_info_index(m->info, type);
-	GB_ASSERT(id >= 0);
-
-	u64 kind = lb_typeid_kind(m, type, id);
-	u64 named = is_type_named(type) && type->kind != Type_Basic;
-	u64 special = 0;
-	u64 reserved = 0;
-
-	if (is_type_cstring(type)) {
-		special = 1;
-	} else if (is_type_integer(type) && !is_type_unsigned(type)) {
-		special = 1;
-	}
-
-	u64 data = 0;
-	if (build_context.ptr_size == 4) {
-		GB_ASSERT(id <= (1u<<24u));
-		data |= (id       &~ (1u<<24)) << 0u;  // index
-		data |= (kind     &~ (1u<<5))  << 24u; // kind
-		data |= (named    &~ (1u<<1))  << 29u; // named
-		data |= (special  &~ (1u<<1))  << 30u; // special
-		data |= (reserved &~ (1u<<1))  << 31u; // reserved
-	} else {
-		GB_ASSERT(build_context.ptr_size == 8);
-		GB_ASSERT(id <= (1ull<<56u));
-		data |= (id       &~ (1ull<<56)) << 0ul;  // index
-		data |= (kind     &~ (1ull<<5))  << 56ull; // kind
-		data |= (named    &~ (1ull<<1))  << 61ull; // named
-		data |= (special  &~ (1ull<<1))  << 62ull; // special
-		data |= (reserved &~ (1ull<<1))  << 63ull; // reserved
-	}
+	u64 data = type_hash_canonical_type(type);
+	GB_ASSERT(data != 0);
 
 
 	lbValue res = {};
 	lbValue res = {};
 	res.value = LLVMConstInt(lb_type(m, t_typeid), data, false);
 	res.value = LLVMConstInt(lb_type(m, t_typeid), data, false);
@@ -279,8 +250,8 @@ gb_internal void lb_setup_type_info_data_giant_array(lbModule *m, i64 global_typ
 
 
 	LLVMTypeRef *modified_types = lb_setup_modified_types_for_type_info(m, global_type_info_data_entity_count);
 	LLVMTypeRef *modified_types = lb_setup_modified_types_for_type_info(m, global_type_info_data_entity_count);
 	defer (gb_free(heap_allocator(), modified_types));
 	defer (gb_free(heap_allocator(), modified_types));
-	for_array(type_info_type_index, info->type_info_types) {
-		auto const &tt = info->type_info_types[type_info_type_index];
+	for_array(type_info_type_index, info->type_info_types_hash_map) {
+		auto const &tt = info->type_info_types_hash_map[type_info_type_index];
 		Type *t = tt.type;
 		Type *t = tt.type;
 		if (t == nullptr || t == t_invalid) {
 		if (t == nullptr || t == t_invalid) {
 			continue;
 			continue;
@@ -343,8 +314,8 @@ gb_internal void lb_setup_type_info_data_giant_array(lbModule *m, i64 global_typ
 		return giant_const_values[index];
 		return giant_const_values[index];
 	};
 	};
 
 
-	for_array(type_info_type_index, info->type_info_types) {
-		Type *t = info->type_info_types[type_info_type_index].type;
+	for_array(type_info_type_index, info->type_info_types_hash_map) {
+		Type *t = info->type_info_types_hash_map[type_info_type_index].type;
 		if (t == nullptr || t == t_invalid) {
 		if (t == nullptr || t == t_invalid) {
 			continue;
 			continue;
 		}
 		}
@@ -1072,7 +1043,12 @@ gb_internal void lb_setup_type_info_data_giant_array(lbModule *m, i64 global_typ
 		LLVMSetInitializer(giant_const_values[entry_index], LLVMConstNamedStruct(stype, small_const_values, variant_index+1));
 		LLVMSetInitializer(giant_const_values[entry_index], LLVMConstNamedStruct(stype, small_const_values, variant_index+1));
 	}
 	}
 	for (isize i = 0; i < global_type_info_data_entity_count; i++) {
 	for (isize i = 0; i < global_type_info_data_entity_count; i++) {
-		giant_const_values[i] = LLVMConstPointerCast(giant_const_values[i], lb_type(m, t_type_info_ptr));
+		auto *ptr = &giant_const_values[i];
+		if (*ptr != nullptr) {
+			*ptr = LLVMConstPointerCast(*ptr, lb_type(m, t_type_info_ptr));
+		} else {
+			*ptr = LLVMConstNull(lb_type(m, t_type_info_ptr));
+		}
 	}
 	}
 
 
 
 

+ 8 - 8
src/types.cpp

@@ -503,9 +503,9 @@ gb_global Type basic_types[] = {
 	{Type_Basic, {Basic_rawptr,            BasicFlag_Pointer,                         -1, STR_LIT("rawptr")}},
 	{Type_Basic, {Basic_rawptr,            BasicFlag_Pointer,                         -1, STR_LIT("rawptr")}},
 	{Type_Basic, {Basic_string,            BasicFlag_String,                          -1, STR_LIT("string")}},
 	{Type_Basic, {Basic_string,            BasicFlag_String,                          -1, STR_LIT("string")}},
 	{Type_Basic, {Basic_cstring,           BasicFlag_String,                          -1, STR_LIT("cstring")}},
 	{Type_Basic, {Basic_cstring,           BasicFlag_String,                          -1, STR_LIT("cstring")}},
-	{Type_Basic, {Basic_any,               0,                                         -1, STR_LIT("any")}},
+	{Type_Basic, {Basic_any,               0,                                         16, STR_LIT("any")}},
 
 
-	{Type_Basic, {Basic_typeid,            0,                                         -1, STR_LIT("typeid")}},
+	{Type_Basic, {Basic_typeid,            0,                                          8, STR_LIT("typeid")}},
 
 
 	// Endian
 	// Endian
 	{Type_Basic, {Basic_i16le,  BasicFlag_Integer |                      BasicFlag_EndianLittle,  2, STR_LIT("i16le")}},
 	{Type_Basic, {Basic_i16le,  BasicFlag_Integer |                      BasicFlag_EndianLittle,  2, STR_LIT("i16le")}},
@@ -3700,7 +3700,7 @@ gb_internal i64 type_size_of(Type *t) {
 		switch (t->Basic.kind) {
 		switch (t->Basic.kind) {
 		case Basic_string:  size = 2*build_context.int_size; break;
 		case Basic_string:  size = 2*build_context.int_size; break;
 		case Basic_cstring: size = build_context.ptr_size;   break;
 		case Basic_cstring: size = build_context.ptr_size;   break;
-		case Basic_any:     size = 2*build_context.ptr_size; break;
+		case Basic_any:     size = 16;                       break;
 		case Basic_typeid:  size = build_context.ptr_size;   break;
 		case Basic_typeid:  size = build_context.ptr_size;   break;
 
 
 		case Basic_int: case Basic_uint:
 		case Basic_int: case Basic_uint:
@@ -3763,7 +3763,7 @@ gb_internal i64 type_align_of_internal(Type *t, TypePath *path) {
 		switch (t->Basic.kind) {
 		switch (t->Basic.kind) {
 		case Basic_string:  return build_context.int_size;
 		case Basic_string:  return build_context.int_size;
 		case Basic_cstring: return build_context.ptr_size;
 		case Basic_cstring: return build_context.ptr_size;
-		case Basic_any:     return build_context.ptr_size;
+		case Basic_any:     return 8;
 		case Basic_typeid:  return build_context.ptr_size;
 		case Basic_typeid:  return build_context.ptr_size;
 
 
 		case Basic_int: case Basic_uint:
 		case Basic_int: case Basic_uint:
@@ -4014,7 +4014,7 @@ gb_internal i64 type_size_of_internal(Type *t, TypePath *path) {
 		switch (kind) {
 		switch (kind) {
 		case Basic_string:  return 2*build_context.int_size;
 		case Basic_string:  return 2*build_context.int_size;
 		case Basic_cstring: return build_context.ptr_size;
 		case Basic_cstring: return build_context.ptr_size;
-		case Basic_any:     return 2*build_context.ptr_size;
+		case Basic_any:     return 16;
 		case Basic_typeid:  return build_context.ptr_size;
 		case Basic_typeid:  return build_context.ptr_size;
 
 
 		case Basic_int: case Basic_uint:
 		case Basic_int: case Basic_uint:
@@ -4251,7 +4251,7 @@ gb_internal i64 type_offset_of(Type *t, i64 index, Type **field_type_) {
 				return 0;                      // data
 				return 0;                      // data
 			case 1:
 			case 1:
 				if (field_type_) *field_type_ = t_typeid;
 				if (field_type_) *field_type_ = t_typeid;
-				return build_context.ptr_size; // id
+				return 8; // id
 			}
 			}
 		}
 		}
 		break;
 		break;
@@ -4322,8 +4322,8 @@ gb_internal i64 type_offset_of_from_selection(Type *type, Selection sel) {
 					}
 					}
 				} else if (t->Basic.kind == Basic_any) {
 				} else if (t->Basic.kind == Basic_any) {
 					switch (index) {
 					switch (index) {
-					case 0: t = t_type_info_ptr; break;
-					case 1: t = t_rawptr;        break;
+					case 0: t = t_rawptr; break;
+					case 1: t = t_typeid; break;
 					}
 					}
 				}
 				}
 				break;
 				break;