Browse Source

Simplify hashing approach `map`

gingerBill 4 years ago
parent
commit
1dfe0cdd1d
6 changed files with 51 additions and 68 deletions
  1. 4 2
      core/runtime/core.odin
  2. 10 0
      core/runtime/dynamic_map_internal.odin
  3. 3 3
      src/check_type.cpp
  4. 16 34
      src/ir.cpp
  5. 15 29
      src/llvm_backend.cpp
  6. 3 0
      src/types.cpp

+ 4 - 2
core/runtime/core.odin

@@ -42,8 +42,10 @@ Platform_Endianness :: enum u8 {
 	Big      = 2,
 }
 
-Equal_Proc  :: distinct proc "contextless" (rawptr, rawptr) -> bool;
-Hasher_Proc :: distinct proc "contextless" (data: rawptr, seed: uintptr) -> uintptr;
+// Procedure type to test whether two values of the same type are equal
+Equal_Proc :: distinct proc "contextless" (rawptr, rawptr) -> bool;
+// Procedure type to hash a value, default seed value is 0
+Hasher_Proc :: distinct proc "contextless" (data: rawptr, seed: uintptr = 0) -> uintptr;
 
 Type_Info_Struct_Soa_Kind :: enum u8 {
 	None    = 0,

+ 10 - 0
core/runtime/dynamic_map_internal.odin

@@ -110,6 +110,16 @@ default_hasher_string :: proc "contextless" (data: rawptr, seed: uintptr) -> uin
 	}
 	return uintptr(h);
 }
+default_hasher_cstring :: proc "contextless" (data: rawptr, seed: uintptr) -> uintptr {
+	h := u64(seed) + 0xcbf29ce484222325;
+	ptr := (^uintptr)(data)^;
+	for (^byte)(ptr)^ != 0 {
+		b := (^byte)(ptr)^;
+		h = (h ~ u64(b)) * 0x100000001b3;
+		ptr += 1;
+	}
+	return uintptr(h);
+}
 
 
 

+ 3 - 3
src/check_type.cpp

@@ -2849,11 +2849,11 @@ void init_map_internal_types(Type *type) {
 }
 
 void add_map_key_type_dependencies(CheckerContext *ctx, Type *key) {
-	if (is_type_string(key)) {
-		add_package_dependency(ctx, "runtime", "default_hash_string");
+	if (is_type_cstring(key)) {
+		add_package_dependency(ctx, "runtime", "default_hasher_cstring");
+	} else if (is_type_string(key)) {
 		add_package_dependency(ctx, "runtime", "default_hasher_string");
 	} else if (!is_type_polymorphic(key)) {
-		add_package_dependency(ctx, "runtime", "default_hash_ptr");
 		GB_ASSERT_MSG(is_type_simple_compare(key), "%s", type_to_string(key));
 
 		i64 sz = type_size_of(key);

+ 16 - 34
src/ir.cpp

@@ -532,6 +532,7 @@ irValue *ir_gen_anonymous_proc_lit(irModule *m, String prefix_name, Ast *expr, i
 void ir_begin_procedure_body(irProcedure *proc);
 void ir_end_procedure_body(irProcedure *proc);
 irValue *ir_get_equal_proc_for_type(irModule *m, Type *type);
+irValue *ir_get_hasher_proc_for_type(irModule *m, Type *type);
 
 
 irAddr ir_addr(irValue *addr) {
@@ -3620,43 +3621,18 @@ irValue *ir_gen_map_hash(irProcedure *proc, irValue *key, Type *key_type) {
 	Type *t = base_type(ir_type(key));
 	key = ir_emit_conv(proc, key, key_type);
 
-	if (is_type_string(t)) {
-		irValue *str = ir_emit_conv(proc, key, t_string);
-		irValue *hashed_str = nullptr;
-
-		if (str->kind == irValue_Constant) {
-			ExactValue ev = str->Constant.value;
-			GB_ASSERT(ev.kind == ExactValue_String);
-			u64 hs = fnv64a(ev.value_string.text, ev.value_string.len);
-			if (build_context.word_size == 4) {
-				hs &= 0xffffffff;
-			}
-			hashed_str = ir_value_constant(t_uintptr, exact_value_u64(hs));
-		} else {
-			auto args = array_make<irValue *>(ir_allocator(), 1);
-			args[0] = str;
-			hashed_str = ir_emit_runtime_call(proc, "default_hash_string", args);
-		}
-		ir_emit_store(proc, ir_emit_struct_ep(proc, v, 0), hashed_str);
-	} else {
-		i64 sz = type_size_of(t);
-		GB_ASSERT(sz <= 8);
-		if (sz != 0) {
-			auto args = array_make<irValue *>(ir_allocator(), 2);
-			args[0] = ir_address_from_load_or_generate_local(proc, key);
-			args[1] = ir_const_int(sz);
-			irValue *hash = ir_emit_runtime_call(proc, "default_hash_ptr", args);
-
-			irValue *hash_ptr = ir_emit_struct_ep(proc, v, 0);
-			ir_emit_store(proc, hash_ptr, hash);
-		}
-	}
-
 	irValue *key_ptr = ir_address_from_load_or_generate_local(proc, key);
 	key_ptr = ir_emit_conv(proc, key_ptr, t_rawptr);
 
-	irValue *key_data = ir_emit_struct_ep(proc, v, 1);
-	ir_emit_store(proc, key_data, key_ptr);
+	irValue *hasher = ir_get_hasher_proc_for_type(proc->module, key_type);
+
+	auto args = array_make<irValue *>(permanent_allocator(), 2);
+	args[0] = key_ptr;
+	args[1] = ir_value_constant(t_uintptr, exact_value_i64(0));
+	irValue *hashed_key = ir_emit_call(proc, hasher, args);
+
+	ir_emit_store(proc, ir_emit_struct_ep(proc, v, 0), hashed_key);
+	ir_emit_store(proc, ir_emit_struct_ep(proc, v, 1), key_ptr);
 
 	return ir_emit_load(proc, v);
 }
@@ -5007,6 +4983,12 @@ irValue *ir_get_hasher_proc_for_type(irModule *m, Type *type) {
 		type_set_offsets(type);
 
 		GB_PANIC("Type_Struct");
+	} else if (is_type_cstring(type)) {
+		auto args = array_make<irValue *>(permanent_allocator(), 2);
+		args[0] = data;
+		args[1] = seed;
+		irValue *res = ir_emit_runtime_call(proc, "default_hasher_cstring", args);
+		ir_emit(proc, ir_instr_return(proc, res));
 	} else if (is_type_string(type)) {
 		auto args = array_make<irValue *>(permanent_allocator(), 2);
 		args[0] = data;

+ 15 - 29
src/llvm_backend.cpp

@@ -9286,6 +9286,12 @@ lbValue lb_get_hasher_proc_for_type(lbModule *m, Type *type) {
 		type_set_offsets(type);
 
 		GB_PANIC("Type_Struct");
+	} else if (is_type_cstring(type)) {
+		auto args = array_make<lbValue>(permanent_allocator(), 2);
+		args[0] = data;
+		args[1] = seed;
+		lbValue res = lb_emit_runtime_call(p, "default_hasher_cstring", args);
+		LLVMBuildRet(p->builder, res.value);
 	} else if (is_type_string(type)) {
 		auto args = array_make<lbValue>(permanent_allocator(), 2);
 		args[0] = data;
@@ -10375,37 +10381,17 @@ lbValue lb_gen_map_key(lbProcedure *p, lbValue key, Type *key_type) {
 	Type *t = base_type(key.type);
 	key = lb_emit_conv(p, key, key_type);
 
-	if (is_type_string(t)) {
-		lbValue str = lb_emit_conv(p, key, t_string);
-		lbValue hashed_str = {};
-
-		if (lb_is_const(str)) {
-			String v = lb_get_const_string(p->module, str);
-			u64 hs = fnv64a(v.text, v.len);
-			if (build_context.word_size == 4) {
-				hs &= 0xffffffff;
-			}
-			hashed_str = lb_const_int(p->module, t_uintptr, hs);
-		} else {
-			auto args = array_make<lbValue>(permanent_allocator(), 1);
-			args[0] = str;
-			hashed_str = lb_emit_runtime_call(p, "default_hash_string", args);
-		}
-		lb_emit_store(p, lb_emit_struct_ep(p, vp, 0), hashed_str);
-	} else {
-		i64 sz = type_size_of(t);
-		GB_ASSERT(sz <= 8);
-		if (sz != 0) {
-			auto args = array_make<lbValue>(permanent_allocator(), 2);
-			args[0] = lb_address_from_load_or_generate_local(p, key);
-			args[1] = lb_const_int(p->module, t_int, sz);
-			lbValue hash = lb_emit_runtime_call(p, "default_hash_ptr", args);
-			lb_emit_store(p, lb_emit_struct_ep(p, vp, 0), hash);
-		}
-	}
-
 	lbValue key_ptr = lb_address_from_load_or_generate_local(p, key);
 	key_ptr = lb_emit_conv(p, key_ptr, t_rawptr);
+
+	lbValue hasher = lb_get_hasher_proc_for_type(p->module, key_type);
+
+	auto args = array_make<lbValue>(permanent_allocator(), 2);
+	args[0] = key_ptr;
+	args[1] = lb_const_int(p->module, t_uintptr, 0);
+	lbValue hashed_key = lb_emit_call(p, hasher, args);
+
+	lb_emit_store(p, lb_emit_struct_ep(p, vp, 0), hashed_key);
 	lb_emit_store(p, lb_emit_struct_ep(p, vp, 1), key_ptr);
 
 	return lb_addr_load(p, v);

+ 3 - 0
src/types.cpp

@@ -1922,6 +1922,9 @@ bool is_type_comparable(Type *t) {
 		return is_type_comparable(t->Opaque.elem);
 
 	case Type_Struct:
+		if (type_size_of(t) == 0) {
+			return false;
+		}
 		if (t->Struct.is_raw_union) {
 			return is_type_simple_compare(t);
 		}