Browse Source

Improve matrix->matrix casting implementation

gingerBill 3 years ago
parent
commit
23f0fbc376
5 changed files with 59 additions and 32 deletions
  1. 26 15
      src/build_settings.cpp
  2. 4 0
      src/llvm_backend.hpp
  3. 19 14
      src/llvm_backend_expr.cpp
  4. 2 2
      src/llvm_backend_proc.cpp
  5. 8 1
      src/types.cpp

+ 26 - 15
src/build_settings.cpp

@@ -18,6 +18,7 @@ enum TargetOsKind {
 	TargetOs_freebsd,
 	TargetOs_freebsd,
 	
 	
 	TargetOs_wasi,
 	TargetOs_wasi,
+	TargetOs_js,
 
 
 	TargetOs_freestanding,
 	TargetOs_freestanding,
 
 
@@ -54,6 +55,7 @@ String target_os_names[TargetOs_COUNT] = {
 	str_lit("freebsd"),
 	str_lit("freebsd"),
 	
 	
 	str_lit("wasi"),
 	str_lit("wasi"),
+	str_lit("js"),
 
 
 	str_lit("freestanding"),
 	str_lit("freestanding"),
 };
 };
@@ -344,12 +346,12 @@ gb_global TargetMetrics target_freestanding_wasm32 = {
 	str_lit(""),
 	str_lit(""),
 };
 };
 
 
-gb_global TargetMetrics target_freestanding_wasm64 = {
-	TargetOs_freestanding,
-	TargetArch_wasm64,
+gb_global TargetMetrics target_js_wasm32 = {
+	TargetOs_js,
+	TargetArch_wasm32,
+	4,
 	8,
 	8,
-	16,
-	str_lit("wasm64-freestanding-js"),
+	str_lit("wasm32-js-js"),
 	str_lit(""),
 	str_lit(""),
 };
 };
 
 
@@ -363,6 +365,14 @@ gb_global TargetMetrics target_wasi_wasm32 = {
 };
 };
 
 
 
 
+// gb_global TargetMetrics target_freestanding_wasm64 = {
+// 	TargetOs_freestanding,
+// 	TargetArch_wasm64,
+// 	8,
+// 	16,
+// 	str_lit("wasm64-freestanding-js"),
+// 	str_lit(""),
+// };
 
 
 
 
 
 
@@ -372,18 +382,19 @@ struct NamedTargetMetrics {
 };
 };
 
 
 gb_global NamedTargetMetrics named_targets[] = {
 gb_global NamedTargetMetrics named_targets[] = {
-	{ str_lit("darwin_amd64"),   &target_darwin_amd64   },
-	{ str_lit("darwin_arm64"),   &target_darwin_arm64   },
-	{ str_lit("essence_amd64"),  &target_essence_amd64  },
-	{ str_lit("linux_386"),      &target_linux_386      },
-	{ str_lit("linux_amd64"),    &target_linux_amd64    },
-	{ str_lit("windows_386"),    &target_windows_386    },
-	{ str_lit("windows_amd64"),  &target_windows_amd64  },
-	{ str_lit("freebsd_386"),    &target_freebsd_386    },
-	{ str_lit("freebsd_amd64"),  &target_freebsd_amd64  },
+	{ str_lit("darwin_amd64"),        &target_darwin_amd64   },
+	{ str_lit("darwin_arm64"),        &target_darwin_arm64   },
+	{ str_lit("essence_amd64"),       &target_essence_amd64  },
+	{ str_lit("linux_386"),           &target_linux_386      },
+	{ str_lit("linux_amd64"),         &target_linux_amd64    },
+	{ str_lit("windows_386"),         &target_windows_386    },
+	{ str_lit("windows_amd64"),       &target_windows_amd64  },
+	{ str_lit("freebsd_386"),         &target_freebsd_386    },
+	{ str_lit("freebsd_amd64"),       &target_freebsd_amd64  },
 	{ str_lit("freestanding_wasm32"), &target_freestanding_wasm32 },
 	{ str_lit("freestanding_wasm32"), &target_freestanding_wasm32 },
+	{ str_lit("wasi_wasm32"),         &target_wasi_wasm32 },
+	{ str_lit("js_wasm32"),           &target_js_wasm32 },
 	// { str_lit("freestanding_wasm64"), &target_freestanding_wasm64 },
 	// { str_lit("freestanding_wasm64"), &target_freestanding_wasm64 },
-	{ str_lit("wasi_wasm32"), &target_wasi_wasm32 },
 };
 };
 
 
 NamedTargetMetrics *selected_target_metrics;
 NamedTargetMetrics *selected_target_metrics;

+ 4 - 0
src/llvm_backend.hpp

@@ -473,6 +473,10 @@ LLVMTypeRef lb_type_padding_filler(lbModule *m, i64 padding, i64 padding_align);
 
 
 LLVMValueRef llvm_basic_shuffle(lbProcedure *p, LLVMValueRef vector, LLVMValueRef mask);
 LLVMValueRef llvm_basic_shuffle(lbProcedure *p, LLVMValueRef vector, LLVMValueRef mask);
 
 
+void lb_mem_copy_overlapping(lbProcedure *p, lbValue dst, lbValue src, lbValue len, bool is_volatile=false);
+void lb_mem_copy_non_overlapping(lbProcedure *p, lbValue dst, lbValue src, lbValue len, bool is_volatile=false);
+
+
 #define LB_STARTUP_RUNTIME_PROC_NAME   "__$startup_runtime"
 #define LB_STARTUP_RUNTIME_PROC_NAME   "__$startup_runtime"
 #define LB_STARTUP_TYPE_INFO_PROC_NAME "__$startup_type_info"
 #define LB_STARTUP_TYPE_INFO_PROC_NAME "__$startup_type_info"
 #define LB_TYPE_INFO_DATA_NAME       "__$type_info_data"
 #define LB_TYPE_INFO_DATA_NAME       "__$type_info_data"

+ 19 - 14
src/llvm_backend_expr.cpp

@@ -490,15 +490,11 @@ bool lb_is_matrix_simdable(Type *t) {
 	}
 	}
 	
 	
 	switch (build_context.metrics.arch) {
 	switch (build_context.metrics.arch) {
+	default:
+		return false;
 	case TargetArch_amd64:
 	case TargetArch_amd64:
 	case TargetArch_arm64:
 	case TargetArch_arm64:
-		// possible
 		break;
 		break;
-	case TargetArch_386:
-	case TargetArch_wasm32:
-	case TargetArch_wasm64:
-		// nope
-		return false;
 	}
 	}
 	
 	
 	if (elem->kind == Type_Basic) {
 	if (elem->kind == Type_Basic) {
@@ -2018,14 +2014,23 @@ lbValue lb_emit_conv(lbProcedure *p, lbValue value, Type *t) {
 			i64 src_count = src->Matrix.row_count*src->Matrix.column_count;
 			i64 src_count = src->Matrix.row_count*src->Matrix.column_count;
 			GB_ASSERT(dst_count == src_count);
 			GB_ASSERT(dst_count == src_count);
 			
 			
-			for (i64 j = 0; j < src->Matrix.column_count; j++) {
-				for (i64 i = 0; i < src->Matrix.row_count; i++) {
-					lbValue s = lb_emit_matrix_ev(p, value, i, j);
-					i64 index = i + j*src->Matrix.row_count;					
-					i64 dst_i = index%dst->Matrix.row_count;
-					i64 dst_j = index/dst->Matrix.row_count;
-					lbValue d = lb_emit_matrix_epi(p, v.addr, dst_i, dst_j);
-					lb_emit_store(p, d, s);
+			lbValue pdst = v.addr;
+			lbValue psrc = lb_address_from_load_or_generate_local(p, value);
+			
+			bool same_elem_base_types = are_types_identical(
+				base_type(dst->Matrix.elem),
+				base_type(src->Matrix.elem)
+			);
+			
+			if (same_elem_base_types && type_size_of(dst) == type_size_of(src)) {
+				lb_mem_copy_overlapping(p, v.addr, psrc, lb_const_int(p->module, t_int, type_size_of(dst)));
+			} else {
+				for (i64 i = 0; i < src_count; i++) {
+					lbValue dp = lb_emit_array_epi(p, v.addr, matrix_column_major_index_to_offset(dst, i));
+					lbValue sp = lb_emit_array_epi(p, psrc,   matrix_column_major_index_to_offset(src, i));
+					lbValue s = lb_emit_load(p, sp);
+					s = lb_emit_conv(p, s, dst->Matrix.elem);
+					lb_emit_store(p, dp, s);
 				}
 				}
 			}
 			}
 		}
 		}

+ 2 - 2
src/llvm_backend_proc.cpp

@@ -1,4 +1,4 @@
-void lb_mem_copy_overlapping(lbProcedure *p, lbValue dst, lbValue src, lbValue len, bool is_volatile=false) {
+void lb_mem_copy_overlapping(lbProcedure *p, lbValue dst, lbValue src, lbValue len, bool is_volatile) {
 	dst = lb_emit_conv(p, dst, t_rawptr);
 	dst = lb_emit_conv(p, dst, t_rawptr);
 	src = lb_emit_conv(p, src, t_rawptr);
 	src = lb_emit_conv(p, src, t_rawptr);
 	len = lb_emit_conv(p, len, t_int);
 	len = lb_emit_conv(p, len, t_int);
@@ -27,7 +27,7 @@ void lb_mem_copy_overlapping(lbProcedure *p, lbValue dst, lbValue src, lbValue l
 	args[3] = LLVMConstInt(LLVMInt1TypeInContext(p->module->ctx), 0, is_volatile);
 	args[3] = LLVMConstInt(LLVMInt1TypeInContext(p->module->ctx), 0, is_volatile);
 	LLVMBuildCall(p->builder, ip, args, gb_count_of(args), "");
 	LLVMBuildCall(p->builder, ip, args, gb_count_of(args), "");
 }
 }
-void lb_mem_copy_non_overlapping(lbProcedure *p, lbValue dst, lbValue src, lbValue len, bool is_volatile=false) {
+void lb_mem_copy_non_overlapping(lbProcedure *p, lbValue dst, lbValue src, lbValue len, bool is_volatile) {
 	dst = lb_emit_conv(p, dst, t_rawptr);
 	dst = lb_emit_conv(p, dst, t_rawptr);
 	src = lb_emit_conv(p, src, t_rawptr);
 	src = lb_emit_conv(p, src, t_rawptr);
 	len = lb_emit_conv(p, len, t_int);
 	len = lb_emit_conv(p, len, t_int);

+ 8 - 1
src/types.cpp

@@ -1417,7 +1417,14 @@ i64 matrix_row_major_index_to_offset(Type *t, i64 index) {
 	i64 row_index    = index/t->Matrix.column_count;
 	i64 row_index    = index/t->Matrix.column_count;
 	return matrix_indices_to_offset(t, row_index, column_index);
 	return matrix_indices_to_offset(t, row_index, column_index);
 }
 }
-
+i64 matrix_column_major_index_to_offset(Type *t, i64 index) {
+	t = base_type(t);
+	GB_ASSERT(t->kind == Type_Matrix);
+	
+	i64 row_index    = index%t->Matrix.row_count;
+	i64 column_index = index/t->Matrix.row_count;
+	return matrix_indices_to_offset(t, row_index, column_index);
+}
 
 
 
 
 bool is_matrix_square(Type *t) {
 bool is_matrix_square(Type *t) {