Browse Source

Improve the performance of simple array comparisons

gingerBill 6 years ago
parent
commit
77734ea967
4 changed files with 88 additions and 14 deletions
  1. 39 1
      core/runtime/internal.odin
  2. 2 0
      src/checker.cpp
  3. 24 13
      src/ir.cpp
  4. 23 0
      src/types.cpp

+ 39 - 1
core/runtime/internal.odin

@@ -243,6 +243,44 @@ print_type :: proc(fd: os.Handle, ti: ^Type_Info) {
 	}
 }
 
+memory_compare :: proc "contextless" (a, b: rawptr, n: int) -> int #no_bounds_check {
+	x := uintptr(a);
+	y := uintptr(b);
+	n := uintptr(n);
+
+	SU :: size_of(uintptr);
+	fast := uintptr(n/SU + 1);
+	offset := (fast-1)*SU;
+	curr_block := uintptr(0);
+	if n < SU {
+		fast = 0;
+	}
+
+	for /**/; curr_block < fast; curr_block += 1 {
+		va := (^uintptr)(x + curr_block * size_of(uintptr))^;
+		vb := (^uintptr)(y + curr_block * size_of(uintptr))^;
+		if va ~ vb != 0 {
+			for pos := curr_block*SU; pos < n; pos += 1 {
+				a := (^byte)(x+pos)^;
+				b := (^byte)(y+pos)^;
+				if a ~ b != 0 {
+					return (int(a) - int(b)) < 0 ? -1 : +1;
+				}
+			}
+		}
+	}
+
+	for /**/; offset < n; offset += 1 {
+		a := (^byte)(x+offset)^;
+		b := (^byte)(y+offset)^;
+		if a ~ b != 0 {
+			return (int(a) - int(b)) < 0 ? -1 : +1;
+		}
+	}
+
+	return 0;
+}
+
 string_eq :: proc "contextless" (a, b: string) -> bool {
 	switch {
 	case len(a) != len(b): return false;
@@ -253,7 +291,7 @@ string_eq :: proc "contextless" (a, b: string) -> bool {
 }
 
 string_cmp :: proc "contextless" (a, b: string) -> int {
-	return mem.compare_byte_ptrs(&a[0], &b[0], min(len(a), len(b)));
+	return memory_compare(&a[0], &b[0], min(len(a), len(b)));
 }
 
 string_ne :: inline proc "contextless" (a, b: string) -> bool { return !string_eq(a, b); }

+ 2 - 0
src/checker.cpp

@@ -1613,6 +1613,8 @@ void generate_minimum_dependency_set(Checker *c, Entity *start) {
 
 		str_lit("umodti3"),
 		str_lit("udivti3"),
+
+		str_lit("memory_compare"),
 	};
 	for (isize i = 0; i < gb_count_of(required_runtime_entities); i++) {
 		add_dependency_to_set(c, scope_lookup(c->info.runtime_package->scope, required_runtime_entities[i]));

+ 24 - 13
src/ir.cpp

@@ -4126,20 +4126,31 @@ irValue *ir_emit_comp(irProcedure *proc, TokenKind op_kind, irValue *left, irVal
 
 			return ir_emit_load(proc, val);
 		} else {
-			irValue *val = ir_add_local_generated(proc, t_bool, false);
-			ir_emit_store(proc, val, res);
-			auto loop_data = ir_loop_start(proc, count, t_i32);
-			{
-				irValue *i = loop_data.idx;
-				irValue *x = ir_emit_load(proc, ir_emit_array_ep(proc, lhs, i));
-				irValue *y = ir_emit_load(proc, ir_emit_array_ep(proc, rhs, i));
-				irValue *cmp = ir_emit_comp(proc, op_kind, x, y);
-				irValue *new_res = ir_emit_arith(proc, cmp_op, ir_emit_load(proc, val), cmp, t_bool);
-				ir_emit_store(proc, val, ir_emit_conv(proc, new_res, t_bool));
-			}
-			ir_loop_end(proc, loop_data);
+			if (is_type_simple_compare(tl) && (op_kind == Token_CmpEq || op_kind == Token_NotEq)) {
+				// TODO(bill): Test to see if this is actually faster!!!!
+				auto args = array_make<irValue *>(heap_allocator(), 3);
+				args[0] = ir_emit_conv(proc, lhs, t_rawptr);
+				args[1] = ir_emit_conv(proc, rhs, t_rawptr);
+				args[2] = ir_const_int(type_size_of(tl));
+				irValue *val = ir_emit_runtime_call(proc, "memory_compare", args);
+				irValue *res = ir_emit_comp(proc, op_kind, val, v_zero);
+				return ir_emit_conv(proc, res, t_bool);
+			} else {
+				irValue *val = ir_add_local_generated(proc, t_bool, false);
+				ir_emit_store(proc, val, res);
+				auto loop_data = ir_loop_start(proc, count, t_i32);
+				{
+					irValue *i = loop_data.idx;
+					irValue *x = ir_emit_load(proc, ir_emit_array_ep(proc, lhs, i));
+					irValue *y = ir_emit_load(proc, ir_emit_array_ep(proc, rhs, i));
+					irValue *cmp = ir_emit_comp(proc, op_kind, x, y);
+					irValue *new_res = ir_emit_arith(proc, cmp_op, ir_emit_load(proc, val), cmp, t_bool);
+					ir_emit_store(proc, val, ir_emit_conv(proc, new_res, t_bool));
+				}
+				ir_loop_end(proc, loop_data);
 
-			return ir_emit_load(proc, val);
+				return ir_emit_load(proc, val);
+			}
 		}
 	}
 

+ 23 - 0
src/types.cpp

@@ -1037,6 +1037,29 @@ Type *core_array_type(Type *t) {
 	return t;
 }
 
+// NOTE(bill): type can be easily compared using memcmp
+bool is_type_simple_compare(Type *t) {
+	t = core_type(t);
+	switch (t->kind) {
+	case Type_Array:
+		return is_type_simple_compare(t->Array.elem);
+
+	case Type_Basic:
+		if (t->Basic.flags & (BasicFlag_Integer|BasicFlag_Float|BasicFlag_Complex|BasicFlag_Rune|BasicFlag_Pointer)) {
+			return true;
+		}
+		return false;
+
+	case Type_Pointer:
+	case Type_Proc:
+	case Type_BitSet:
+	case Type_BitField:
+		return true;
+	}
+
+	return false;
+}
+
 Type *base_complex_elem_type(Type *t) {
 	t = core_type(t);
 	if (is_type_complex(t)) {