Explorar o código

Merge pull request #1245 from odin-lang/new-matrix-type

`matrix` type
gingerBill %!s(int64=3) %!d(string=hai) anos
pai
achega
c4d2aae0ed

+ 3 - 0
core/encoding/json/marshal.odin

@@ -160,6 +160,9 @@ marshal_to_writer :: proc(w: io.Writer, v: any) -> (err: Marshal_Error) {
 
 	case runtime.Type_Info_Relative_Slice:
 		return .Unsupported_Type
+		
+	case runtime.Type_Info_Matrix:
+		return .Unsupported_Type
 
 	case runtime.Type_Info_Array:
 		io.write_byte(w, '[') or_return

+ 40 - 0
core/fmt/fmt.odin

@@ -1953,6 +1953,46 @@ fmt_value :: proc(fi: ^Info, v: any, verb: rune) {
 			}
 		}
 
+	case runtime.Type_Info_Matrix:
+		io.write_string(fi.writer, "matrix[")
+		defer io.write_byte(fi.writer, ']')
+		
+		fi.indent += 1
+		
+		if fi.hash { 
+			io.write_byte(fi.writer, '\n')
+			// TODO(bill): Should this render it like in written form? e.g. tranposed
+			for row in 0..<info.row_count {
+				fmt_write_indent(fi)
+				for col in 0..<info.column_count {
+					if col > 0 { io.write_string(fi.writer, ", ") }
+					
+					offset := (row + col*info.elem_stride)*info.elem_size
+					
+					data := uintptr(v.data) + uintptr(offset)
+					fmt_arg(fi, any{rawptr(data), info.elem.id}, verb)
+				}
+				io.write_string(fi.writer, ";\n")
+			}
+		} else {
+			for row in 0..<info.row_count {
+				if row > 0 { io.write_string(fi.writer, ", ") }
+				for col in 0..<info.column_count {
+					if col > 0 { io.write_string(fi.writer, "; ") }
+					
+					offset := (row + col*info.elem_stride)*info.elem_size
+					
+					data := uintptr(v.data) + uintptr(offset)
+					fmt_arg(fi, any{rawptr(data), info.elem.id}, verb)
+				}
+			}
+		}
+		
+		fi.indent -= 1
+		
+		if fi.hash { 
+			fmt_write_indent(fi)
+		}
 	}
 }
 

+ 2 - 8
core/mem/mem.odin

@@ -128,14 +128,8 @@ compare_ptrs :: proc "contextless" (a, b: rawptr, n: int) -> int {
 	return compare_byte_ptrs((^byte)(a), (^byte)(b), n)
 }
 
-ptr_offset :: proc "contextless" (ptr: $P/^$T, n: int) -> P {
-	new := int(uintptr(ptr)) + size_of(T)*n
-	return P(uintptr(new))
-}
-
-ptr_sub :: proc "contextless" (a, b: $P/^$T) -> int {
-	return (int(uintptr(a)) - int(uintptr(b)))/size_of(T)
-}
+ptr_offset :: intrinsics.ptr_offset
+ptr_sub :: intrinsics.ptr_sub
 
 slice_ptr :: proc "contextless" (ptr: ^$T, len: int) -> []T {
 	return ([^]T)(ptr)[:len]

+ 5 - 1
core/reflect/reflect.odin

@@ -33,6 +33,7 @@ Type_Info_Bit_Set          :: runtime.Type_Info_Bit_Set
 Type_Info_Simd_Vector      :: runtime.Type_Info_Simd_Vector
 Type_Info_Relative_Pointer :: runtime.Type_Info_Relative_Pointer
 Type_Info_Relative_Slice   :: runtime.Type_Info_Relative_Slice
+Type_Info_Matrix           :: runtime.Type_Info_Matrix
 
 Type_Info_Enum_Value :: runtime.Type_Info_Enum_Value
 
@@ -66,6 +67,7 @@ Type_Kind :: enum {
 	Simd_Vector,
 	Relative_Pointer,
 	Relative_Slice,
+	Matrix,
 }
 
 
@@ -99,6 +101,7 @@ type_kind :: proc(T: typeid) -> Type_Kind {
 		case Type_Info_Simd_Vector:      return .Simd_Vector
 		case Type_Info_Relative_Pointer: return .Relative_Pointer
 		case Type_Info_Relative_Slice:   return .Relative_Slice
+		case Type_Info_Matrix:           return .Matrix
 		}
 
 	}
@@ -1401,7 +1404,8 @@ equal :: proc(a, b: any, including_indirect_array_recursion := false, recursion_
 		Type_Info_Bit_Set,
 		Type_Info_Enum,
 		Type_Info_Simd_Vector,
-		Type_Info_Relative_Pointer:
+		Type_Info_Relative_Pointer,
+		Type_Info_Matrix:
 		return mem.compare_byte_ptrs((^byte)(a.data), (^byte)(b.data), t.size) == 0
 		
 	case Type_Info_String:

+ 14 - 0
core/reflect/types.odin

@@ -164,6 +164,12 @@ are_types_identical :: proc(a, b: ^Type_Info) -> bool {
 	case Type_Info_Relative_Slice:
 		y := b.variant.(Type_Info_Relative_Slice) or_return
 		return x.base_integer == y.base_integer && x.slice == y.slice
+		
+	case Type_Info_Matrix:
+		y := b.variant.(Type_Info_Matrix) or_return
+		if x.row_count != y.row_count { return false }
+		if x.column_count != y.column_count { return false }
+		return are_types_identical(x.elem, y.elem)
 	}
 
 	return false
@@ -584,6 +590,14 @@ write_type_writer :: proc(w: io.Writer, ti: ^Type_Info, n_written: ^int = nil) -
 		write_type(w, info.base_integer, &n) or_return
 		io.write_string(w, ") ",         &n) or_return
 		write_type(w, info.slice,        &n) or_return
+		
+	case Type_Info_Matrix:
+		io.write_string(w, "matrix[",               &n) or_return
+		io.write_i64(w, i64(info.row_count), 10,    &n) or_return
+		io.write_string(w, ", ",                    &n) or_return
+		io.write_i64(w, i64(info.column_count), 10, &n) or_return
+		io.write_string(w, "]",                     &n) or_return
+		write_type(w, info.elem,                    &n) or_return
 	}
 
 	return

+ 10 - 0
core/runtime/core.odin

@@ -162,6 +162,14 @@ Type_Info_Relative_Slice :: struct {
 	slice:        ^Type_Info,
 	base_integer: ^Type_Info,
 }
+Type_Info_Matrix :: struct {
+	elem:         ^Type_Info,
+	elem_size:    int,
+	elem_stride:  int, // elem_stride >= row_count
+	row_count:    int,
+	column_count: int,
+	// Total element count = column_count * elem_stride
+}
 
 Type_Info_Flag :: enum u8 {
 	Comparable     = 0,
@@ -202,6 +210,7 @@ Type_Info :: struct {
 		Type_Info_Simd_Vector,
 		Type_Info_Relative_Pointer,
 		Type_Info_Relative_Slice,
+		Type_Info_Matrix,
 	},
 }
 
@@ -233,6 +242,7 @@ Typeid_Kind :: enum u8 {
 	Simd_Vector,
 	Relative_Pointer,
 	Relative_Slice,
+	Matrix,
 }
 #assert(len(Typeid_Kind) < 32)
 

+ 274 - 0
core/runtime/core_builtin_matrix.odin

@@ -0,0 +1,274 @@
+package runtime
+
+import "core:intrinsics"
+_ :: intrinsics
+
+
+@(builtin)
+determinant :: proc{
+	matrix1x1_determinant,
+	matrix2x2_determinant,
+	matrix3x3_determinant,
+	matrix4x4_determinant,
+}
+
+@(builtin)
+adjugate :: proc{
+	matrix1x1_adjugate,
+	matrix2x2_adjugate,
+	matrix3x3_adjugate,
+	matrix4x4_adjugate,
+}
+
+@(builtin)
+inverse_transpose :: proc{
+	matrix1x1_inverse_transpose,
+	matrix2x2_inverse_transpose,
+	matrix3x3_inverse_transpose,
+	matrix4x4_inverse_transpose,
+}
+
+
+@(builtin)
+inverse :: proc{
+	matrix1x1_inverse,
+	matrix2x2_inverse,
+	matrix3x3_inverse,
+	matrix4x4_inverse,
+}
+
+@(builtin)
+hermitian_adjoint :: proc(m: $M/matrix[$N, N]$T) -> M where intrinsics.type_is_complex(T), N >= 1 {
+	return conj(transpose(m))
+}
+
+@(builtin)
+matrix_trace :: proc(m: $M/matrix[$N, N]$T) -> (trace: T) {
+	for i in 0..<N {
+		trace += m[i, i]
+	}
+	return
+}
+
+@(builtin)
+matrix_minor :: proc(m: $M/matrix[$N, N]$T, row, column: int) -> (minor: T) where N > 1 {
+	K :: N-1
+	cut_down: matrix[K, K]T
+	for col_idx in 0..<K {
+		j := col_idx + int(col_idx >= column)
+		for row_idx in 0..<K {
+			i := row_idx + int(row_idx >= row)
+			cut_down[row_idx, col_idx] = m[i, j]
+		}
+	}
+	return determinant(cut_down)
+}
+
+
+
+@(builtin)
+matrix1x1_determinant :: proc(m: $M/matrix[1, 1]$T) -> (det: T) {
+	return m[0, 0]
+}
+
+@(builtin)
+matrix2x2_determinant :: proc(m: $M/matrix[2, 2]$T) -> (det: T) {
+	return m[0, 0]*m[1, 1] - m[0, 1]*m[1, 0]
+}
+@(builtin)
+matrix3x3_determinant :: proc(m: $M/matrix[3, 3]$T) -> (det: T) {
+	a := +m[0, 0] * (m[1, 1] * m[2, 2] - m[1, 2] * m[2, 1])
+	b := -m[0, 1] * (m[1, 0] * m[2, 2] - m[1, 2] * m[2, 0])
+	c := +m[0, 2] * (m[1, 0] * m[2, 1] - m[1, 1] * m[2, 0])
+	return a + b + c
+}
+@(builtin)
+matrix4x4_determinant :: proc(m: $M/matrix[4, 4]$T) -> (det: T) {
+	a := adjugate(m)
+	#no_bounds_check for i in 0..<4 {
+		det += m[0, i] * a[0, i]
+	}
+	return
+}
+
+
+
+
+@(builtin)
+matrix1x1_adjugate :: proc(x: $M/matrix[1, 1]$T) -> (y: M) {
+	y = x
+	return
+}
+
+@(builtin)
+matrix2x2_adjugate :: proc(x: $M/matrix[2, 2]$T) -> (y: M) {
+	y[0, 0] = +x[1, 1]
+	y[0, 1] = -x[1, 0]
+	y[1, 0] = -x[0, 1]
+	y[1, 1] = +x[0, 0]
+	return
+}
+
+@(builtin)
+matrix3x3_adjugate :: proc(m: $M/matrix[3, 3]$T) -> (y: M) {
+	y[0, 0] = +(m[1, 1] * m[2, 2] - m[2, 1] * m[1, 2])
+	y[0, 1] = -(m[1, 0] * m[2, 2] - m[2, 0] * m[1, 2])
+	y[0, 2] = +(m[1, 0] * m[2, 1] - m[2, 0] * m[1, 1])
+	y[1, 0] = -(m[0, 1] * m[2, 2] - m[2, 1] * m[0, 2])
+	y[1, 1] = +(m[0, 0] * m[2, 2] - m[2, 0] * m[0, 2])
+	y[1, 2] = -(m[0, 0] * m[2, 1] - m[2, 0] * m[0, 1])
+	y[2, 0] = +(m[0, 1] * m[1, 2] - m[1, 1] * m[0, 2])
+	y[2, 1] = -(m[0, 0] * m[1, 2] - m[1, 0] * m[0, 2])
+	y[2, 2] = +(m[0, 0] * m[1, 1] - m[1, 0] * m[0, 1])
+	return
+}
+
+
+@(builtin)
+matrix4x4_adjugate :: proc(x: $M/matrix[4, 4]$T) -> (y: M) {
+	for i in 0..<4 {
+		for j in 0..<4 {
+			sign: T = 1 if (i + j) % 2 == 0 else -1
+			y[i, j] = sign * matrix_minor(x, i, j)
+		}
+	}
+	return
+}
+
+@(builtin)
+matrix1x1_inverse_transpose :: proc(x: $M/matrix[1, 1]$T) -> (y: M) {
+	y[0, 0] = 1/x[0, 0]
+	return
+}
+
+@(builtin)
+matrix2x2_inverse_transpose :: proc(x: $M/matrix[2, 2]$T) -> (y: M) {
+	d := x[0, 0]*x[1, 1] - x[0, 1]*x[1, 0]
+	when intrinsics.type_is_integer(T) {
+		y[0, 0] = +x[1, 1] / d
+		y[1, 0] = -x[1, 0] / d
+		y[0, 1] = -x[0, 1] / d
+		y[1, 1] = +x[0, 0] / d
+	} else {
+		id := 1 / d
+		y[0, 0] = +x[1, 1] * id
+		y[1, 0] = -x[1, 0] * id
+		y[0, 1] = -x[0, 1] * id
+		y[1, 1] = +x[0, 0] * id
+	}
+	return
+}
+
+@(builtin)
+matrix3x3_inverse_transpose :: proc(x: $M/matrix[3, 3]$T) -> (y: M) #no_bounds_check {
+	a := adjugate(x)
+	d := determinant(x)
+	when intrinsics.type_is_integer(T) {
+		for i in 0..<3 {
+			for j in 0..<3 {
+				y[i, j] = a[i, j] / d
+			}
+		}
+	} else {
+		id := 1/d
+		for i in 0..<3 {
+			for j in 0..<3 {
+				y[i, j] = a[i, j] * id
+			}
+		}
+	}
+	return
+}
+
+@(builtin)
+matrix4x4_inverse_transpose :: proc(x: $M/matrix[4, 4]$T) -> (y: M) #no_bounds_check {
+	a := adjugate(x)
+	d: T
+	for i in 0..<4 {
+		d += x[0, i] * a[0, i]
+	}
+	when intrinsics.type_is_integer(T) {
+		for i in 0..<4 {
+			for j in 0..<4 {
+				y[i, j] = a[i, j] / d
+			}
+		}
+	} else {
+		id := 1/d
+		for i in 0..<4 {
+			for j in 0..<4 {
+				y[i, j] = a[i, j] * id
+			}
+		}
+	}
+	return
+}
+
+@(builtin)
+matrix1x1_inverse :: proc(x: $M/matrix[1, 1]$T) -> (y: M) {
+	y[0, 0] = 1/x[0, 0]
+	return
+}
+
+@(builtin)
+matrix2x2_inverse :: proc(x: $M/matrix[2, 2]$T) -> (y: M) {
+	d := x[0, 0]*x[1, 1] - x[0, 1]*x[1, 0]
+	when intrinsics.type_is_integer(T) {
+		y[0, 0] = x[1, 1] / d
+		y[0, 1] = x[1, 0] / d
+		y[1, 0] = x[0, 1] / d
+		y[1, 1] = x[0, 0] / d
+	} else {
+		id := 1 / d
+		y[0, 0] = x[1, 1] * id
+		y[0, 1] = x[1, 0] * id
+		y[1, 0] = x[0, 1] * id
+		y[1, 1] = x[0, 0] * id
+	}
+	return
+}
+
+@(builtin)
+matrix3x3_inverse :: proc(x: $M/matrix[3, 3]$T) -> (y: M) #no_bounds_check {
+	a := adjugate(x)
+	d := determinant(x)
+	when intrinsics.type_is_integer(T) {
+		for i in 0..<3 {
+			for j in 0..<3 {
+				y[i, j] = a[j, i] / d
+			}
+		}
+	} else {
+		id := 1/d
+		for i in 0..<3 {
+			for j in 0..<3 {
+				y[i, j] = a[j, i] * id
+			}
+		}
+	}
+	return
+}
+
+@(builtin)
+matrix4x4_inverse :: proc(x: $M/matrix[4, 4]$T) -> (y: M) #no_bounds_check {
+	a := adjugate(x)
+	d: T
+	for i in 0..<4 {
+		d += x[0, i] * a[0, i]
+	}
+	when intrinsics.type_is_integer(T) {
+		for i in 0..<4 {
+			for j in 0..<4 {
+				y[i, j] = a[j, i] / d
+			}
+		}
+	} else {
+		id := 1/d
+		for i in 0..<4 {
+			for j in 0..<4 {
+				y[i, j] = a[j, i] * id
+			}
+		}
+	}
+	return
+}

+ 23 - 0
core/runtime/error_checks.odin

@@ -96,6 +96,29 @@ dynamic_array_expr_error :: proc "contextless" (file: string, line, column: i32,
 }
 
 
+matrix_bounds_check_error :: proc "contextless" (file: string, line, column: i32, row_index, column_index, row_count, column_count: int) {
+	if 0 <= row_index && row_index < row_count && 
+	   0 <= column_index && column_index < column_count {
+		return
+	}
+	handle_error :: proc "contextless" (file: string, line, column: i32, row_index, column_index, row_count, column_count: int) {
+		print_caller_location(Source_Code_Location{file, line, column, ""})
+		print_string(" Matrix indices [")
+		print_i64(i64(row_index))
+		print_string(", ")
+		print_i64(i64(column_index))
+		print_string(" is out of bounds range [0..<")
+		print_i64(i64(row_count))
+		print_string(", 0..<")
+		print_i64(i64(column_count))
+		print_string("]")
+		print_byte('\n')
+		bounds_trap()
+	}
+	handle_error(file, line, column, row_index, column_index, row_count, column_count)
+}
+
+
 type_assertion_check :: proc "contextless" (ok: bool, file: string, line, column: i32, from, to: typeid) {
 	if ok {
 		return

+ 9 - 13
core/runtime/internal.odin

@@ -2,15 +2,15 @@ package runtime
 
 import "core:intrinsics"
 
-bswap_16 :: proc "none" (x: u16) -> u16 {
+bswap_16 :: proc "contextless" (x: u16) -> u16 {
 	return x>>8 | x<<8
 }
 
-bswap_32 :: proc "none" (x: u32) -> u32 {
+bswap_32 :: proc "contextless" (x: u32) -> u32 {
 	return x>>24 | (x>>8)&0xff00 | (x<<8)&0xff0000 | x<<24
 }
 
-bswap_64 :: proc "none" (x: u64) -> u64 {
+bswap_64 :: proc "contextless" (x: u64) -> u64 {
 	z := x
 	z = (z & 0x00000000ffffffff) << 32 | (z & 0xffffffff00000000) >> 32
 	z = (z & 0x0000ffff0000ffff) << 16 | (z & 0xffff0000ffff0000) >> 16
@@ -18,7 +18,7 @@ bswap_64 :: proc "none" (x: u64) -> u64 {
 	return z
 }
 
-bswap_128 :: proc "none" (x: u128) -> u128 {
+bswap_128 :: proc "contextless" (x: u128) -> u128 {
 	z := transmute([4]u32)x
 	z[0] = bswap_32(z[3])
 	z[1] = bswap_32(z[2])
@@ -27,33 +27,27 @@ bswap_128 :: proc "none" (x: u128) -> u128 {
 	return transmute(u128)z
 }
 
-bswap_f16 :: proc "none" (f: f16) -> f16 {
+bswap_f16 :: proc "contextless" (f: f16) -> f16 {
 	x := transmute(u16)f
 	z := bswap_16(x)
 	return transmute(f16)z
 
 }
 
-bswap_f32 :: proc "none" (f: f32) -> f32 {
+bswap_f32 :: proc "contextless" (f: f32) -> f32 {
 	x := transmute(u32)f
 	z := bswap_32(x)
 	return transmute(f32)z
 
 }
 
-bswap_f64 :: proc "none" (f: f64) -> f64 {
+bswap_f64 :: proc "contextless" (f: f64) -> f64 {
 	x := transmute(u64)f
 	z := bswap_64(x)
 	return transmute(f64)z
 }
 
 
-
-ptr_offset :: #force_inline proc "contextless" (ptr: $P/^$T, n: int) -> P {
-	new := int(uintptr(ptr)) + size_of(T)*n
-	return P(uintptr(new))
-}
-
 is_power_of_two_int :: #force_inline proc(x: int) -> bool {
 	if x <= 0 {
 		return false
@@ -828,12 +822,14 @@ floattidf_unsigned :: proc "c" (a: u128) -> f64 {
 
 @(link_name="__fixunsdfti")
 fixunsdfti :: #force_no_inline proc "c" (a: f64) -> u128 {
+	// TODO(bill): implement `fixunsdfti` correctly
 	x := u64(a)
 	return u128(x)
 }
 
 @(link_name="__fixunsdfdi")
 fixunsdfdi :: #force_no_inline proc "c" (a: f64) -> i128 {
+	// TODO(bill): implement `fixunsdfdi` correctly
 	x := i64(a)
 	return i128(x)
 }

+ 8 - 0
core/runtime/print.odin

@@ -370,5 +370,13 @@ print_type :: proc "contextless" (ti: ^Type_Info) {
 		print_type(info.base_integer)
 		print_string(") ")
 		print_type(info.slice)
+		
+	case Type_Info_Matrix:
+		print_string("matrix[")
+		print_u64(u64(info.row_count))
+		print_string(", ")
+		print_u64(u64(info.column_count))
+		print_string("]")
+		print_type(info.elem)
 	}
 }

+ 1 - 1
core/sys/windows/types.odin

@@ -916,7 +916,7 @@ USER_INFO_1 :: struct #packed {
 	flags: USER_INFO_FLAGS,
 	script_path: LPWSTR,
 }
-#assert(size_of(USER_INFO_1) == 50)
+// #assert(size_of(USER_INFO_1) == 50)
 
 LOCALGROUP_MEMBERS_INFO_0 :: struct #packed {
 	sid: ^SID,

+ 188 - 6
src/check_builtin.cpp

@@ -25,6 +25,7 @@ BuiltinTypeIsProc *builtin_type_is_procs[BuiltinProc__type_simple_boolean_end -
 	is_type_simple_compare,
 	is_type_dereferenceable,
 	is_type_valid_for_keys,
+	is_type_valid_for_matrix_elems,
 
 	is_type_named,
 	is_type_pointer,
@@ -40,6 +41,7 @@ BuiltinTypeIsProc *builtin_type_is_procs[BuiltinProc__type_simple_boolean_end -
 	is_type_proc,
 	is_type_bit_set,
 	is_type_simd_vector,
+	is_type_matrix,
 
 	is_type_polymorphic_record_specialized,
 	is_type_polymorphic_record_unspecialized,
@@ -1266,7 +1268,10 @@ bool check_builtin_procedure(CheckerContext *c, Operand *operand, Ast *call, i32
 	case BuiltinProc_conj: {
 		// conj :: proc(x: type) -> type
 		Operand *x = operand;
-		if (is_type_complex(x->type)) {
+		Type *t = x->type;
+		Type *elem = core_array_type(t);
+		
+		if (is_type_complex(t)) {
 			if (x->mode == Addressing_Constant) {
 				ExactValue v = exact_value_to_complex(x->value);
 				f64 r = v.value_complex->real;
@@ -1276,7 +1281,7 @@ bool check_builtin_procedure(CheckerContext *c, Operand *operand, Ast *call, i32
 			} else {
 				x->mode = Addressing_Value;
 			}
-		} else if (is_type_quaternion(x->type)) {
+		} else if (is_type_quaternion(t)) {
 			if (x->mode == Addressing_Constant) {
 				ExactValue v = exact_value_to_quaternion(x->value);
 				f64 r = +v.value_quaternion->real;
@@ -1288,7 +1293,11 @@ bool check_builtin_procedure(CheckerContext *c, Operand *operand, Ast *call, i32
 			} else {
 				x->mode = Addressing_Value;
 			}
-		} else {
+		} else if (is_type_array_like(t) && (is_type_complex(elem) || is_type_quaternion(elem))) {
+			x->mode = Addressing_Value;
+		} else if (is_type_matrix(t) && (is_type_complex(elem) || is_type_quaternion(elem))) {
+			x->mode = Addressing_Value;
+		}else {
 			gbString s = type_to_string(x->type);
 			error(call, "Expected a complex or quaternion, got '%s'", s);
 			gb_string_free(s);
@@ -1966,13 +1975,13 @@ bool check_builtin_procedure(CheckerContext *c, Operand *operand, Ast *call, i32
 			return false;
 		}
 		if (!is_operand_value(x)) {
-			error(call, "'soa_unzip' expects an #soa slice");
+			error(call, "'%.*s' expects an #soa slice", LIT(builtin_name));
 			return false;
 		}
 		Type *t = base_type(x.type);
 		if (!is_type_soa_struct(t) || t->Struct.soa_kind != StructSoa_Slice) {
 			gbString s = type_to_string(x.type);
-			error(call, "'soa_unzip' expects an #soa slice, got %s", s);
+			error(call, "'%.*s' expects an #soa slice, got %s", LIT(builtin_name), s);
 			gb_string_free(s);
 			return false;
 		}
@@ -1987,7 +1996,180 @@ bool check_builtin_procedure(CheckerContext *c, Operand *operand, Ast *call, i32
 		operand->mode = Addressing_Value;
 		break;
 	}
-
+	
+	case BuiltinProc_transpose: {
+		Operand x = {};
+		check_expr(c, &x, ce->args[0]);
+		if (x.mode == Addressing_Invalid) {
+			return false;
+		}
+		if (!is_operand_value(x)) {
+			error(call, "'%.*s' expects a matrix or array", LIT(builtin_name));
+			return false;
+		}
+		Type *t = base_type(x.type);
+		if (!is_type_matrix(t) && !is_type_array(t)) {
+			gbString s = type_to_string(x.type);
+			error(call, "'%.*s' expects a matrix or array, got %s", LIT(builtin_name), s);
+			gb_string_free(s);
+			return false;
+		}
+		
+		operand->mode = Addressing_Value;
+		if (is_type_array(t)) {
+			// Do nothing
+			operand->type = x.type;			
+		} else {
+			GB_ASSERT(t->kind == Type_Matrix);
+			operand->type = alloc_type_matrix(t->Matrix.elem, t->Matrix.column_count, t->Matrix.row_count);
+		}
+		operand->type = check_matrix_type_hint(operand->type, type_hint);
+		break;
+	}
+	
+	case BuiltinProc_outer_product: {
+		Operand x = {};
+		Operand y = {};
+		check_expr(c, &x, ce->args[0]);
+		if (x.mode == Addressing_Invalid) {
+			return false;
+		}
+		check_expr(c, &y, ce->args[1]);
+		if (y.mode == Addressing_Invalid) {
+			return false;
+		}
+		if (!is_operand_value(x) || !is_operand_value(y)) {
+			error(call, "'%.*s' expects only arrays", LIT(builtin_name));
+			return false;
+		}
+		
+		if (!is_type_array(x.type) && !is_type_array(y.type)) {
+			gbString s1 = type_to_string(x.type);
+			gbString s2 = type_to_string(y.type);
+			error(call, "'%.*s' expects only arrays, got %s and %s", LIT(builtin_name), s1, s2);
+			gb_string_free(s2);
+			gb_string_free(s1);
+			return false;
+		}
+		
+		Type *xt = base_type(x.type);
+		Type *yt = base_type(y.type);
+		GB_ASSERT(xt->kind == Type_Array);
+		GB_ASSERT(yt->kind == Type_Array);
+		if (!are_types_identical(xt->Array.elem, yt->Array.elem)) {
+			gbString s1 = type_to_string(xt->Array.elem);
+			gbString s2 = type_to_string(yt->Array.elem);
+			error(call, "'%.*s' mismatched element types, got %s vs %s", LIT(builtin_name), s1, s2);
+			gb_string_free(s2);
+			gb_string_free(s1);
+			return false;
+		}
+		
+		Type *elem = xt->Array.elem;
+		
+		if (!is_type_valid_for_matrix_elems(elem)) {
+			gbString s = type_to_string(elem);
+			error(call, "Matrix elements types are limited to integers, floats, and complex, got %s", s);
+			gb_string_free(s);
+		}
+		
+		if (xt->Array.count == 0 || yt->Array.count == 0) {
+			gbString s1 = type_to_string(x.type);
+			gbString s2 = type_to_string(y.type);
+			error(call, "'%.*s' expects only arrays of non-zero length, got %s and %s", LIT(builtin_name), s1, s2);
+			gb_string_free(s2);
+			gb_string_free(s1);
+			return false;
+		}
+		
+		i64 max_count = xt->Array.count*yt->Array.count;
+		if (max_count > MATRIX_ELEMENT_COUNT_MAX) {
+			error(call, "Product of the array lengths exceed the maximum matrix element count, got %d, expected a maximum of %d", cast(int)max_count, MATRIX_ELEMENT_COUNT_MAX);
+			return false;
+		}
+		
+		operand->mode = Addressing_Value;
+		operand->type = alloc_type_matrix(elem, xt->Array.count, yt->Array.count);	
+		operand->type = check_matrix_type_hint(operand->type, type_hint);
+		break;
+	}
+	
+	case BuiltinProc_hadamard_product: {
+		Operand x = {};
+		Operand y = {};
+		check_expr(c, &x, ce->args[0]);
+		if (x.mode == Addressing_Invalid) {
+			return false;
+		}
+		check_expr(c, &y, ce->args[1]);
+		if (y.mode == Addressing_Invalid) {
+			return false;
+		}
+		if (!is_operand_value(x) || !is_operand_value(y)) {
+			error(call, "'%.*s' expects a matrix or array types", LIT(builtin_name));
+			return false;
+		}
+		if (!is_type_matrix(x.type) && !is_type_array(y.type)) {
+			gbString s1 = type_to_string(x.type);
+			gbString s2 = type_to_string(y.type);
+			error(call, "'%.*s' expects matrix or array values, got %s and %s", LIT(builtin_name), s1, s2);
+			gb_string_free(s2);
+			gb_string_free(s1);
+			return false;
+		}
+		
+		if (!are_types_identical(x.type, y.type)) {
+			gbString s1 = type_to_string(x.type);
+			gbString s2 = type_to_string(y.type);
+			error(call, "'%.*s' values of the same type, got %s and %s", LIT(builtin_name), s1, s2);
+			gb_string_free(s2);
+			gb_string_free(s1);
+			return false;
+		}
+		
+		Type *elem = core_array_type(x.type);
+		if (!is_type_valid_for_matrix_elems(elem)) {
+			gbString s = type_to_string(elem);
+			error(call, "'%.*s' expects elements to be types are limited to integers, floats, and complex, got %s", LIT(builtin_name), s);
+			gb_string_free(s);
+		}
+		
+		operand->mode = Addressing_Value;
+		operand->type = x.type;
+		operand->type = check_matrix_type_hint(operand->type, type_hint);
+		break;
+	}
+	
+	case BuiltinProc_matrix_flatten: {
+		Operand x = {};
+		check_expr(c, &x, ce->args[0]);
+		if (x.mode == Addressing_Invalid) {
+			return false;
+		}
+		if (!is_operand_value(x)) {
+			error(call, "'%.*s' expects a matrix or array", LIT(builtin_name));
+			return false;
+		}
+		Type *t = base_type(x.type);
+		if (!is_type_matrix(t) && !is_type_array(t)) {
+			gbString s = type_to_string(x.type);
+			error(call, "'%.*s' expects a matrix or array, got %s", LIT(builtin_name), s);
+			gb_string_free(s);
+			return false;
+		}
+		
+		operand->mode = Addressing_Value;
+		if (is_type_array(t)) {
+			// Do nothing
+			operand->type = x.type;			
+		} else {
+			GB_ASSERT(t->kind == Type_Matrix);
+			operand->type = alloc_type_array(t->Matrix.elem, t->Matrix.row_count*t->Matrix.column_count);
+		}
+		operand->type = check_matrix_type_hint(operand->type, type_hint);
+		break;
+	}
+	
 	case BuiltinProc_simd_vector: {
 		Operand x = {};
 		Operand y = {};

+ 357 - 28
src/check_expr.cpp

@@ -657,6 +657,14 @@ i64 check_distance_between_types(CheckerContext *c, Operand *operand, Type *type
 			return distance + 6;
 		}
 	}
+	
+	if (is_type_matrix(dst)) {
+		Type *elem = base_array_type(dst);
+		i64 distance = check_distance_between_types(c, operand, elem);
+		if (distance >= 0) {
+			return distance + 7;
+		}
+	}
 
 	if (is_type_any(dst)) {
 		if (!is_type_polymorphic(src)) {
@@ -897,6 +905,34 @@ void check_assignment(CheckerContext *c, Operand *operand, Type *type, String co
 	}
 }
 
+bool polymorphic_assign_index(Type **gt_, i64 *dst_count, i64 source_count) {
+	Type *gt = *gt_;
+	
+	GB_ASSERT(gt->kind == Type_Generic);
+	Entity *e = scope_lookup(gt->Generic.scope, gt->Generic.name);
+	GB_ASSERT(e != nullptr);
+	if (e->kind == Entity_TypeName) {
+		*gt_ = nullptr;
+		*dst_count = source_count;
+
+		e->kind = Entity_Constant;
+		e->Constant.value = exact_value_i64(source_count);
+		e->type = t_untyped_integer;
+		return true;
+	} else if (e->kind == Entity_Constant) {
+		*gt_ = nullptr;
+		if (e->Constant.value.kind != ExactValue_Integer) {
+			return false;
+		}
+		i64 count = big_int_to_i64(&e->Constant.value.value_integer);
+		if (count != source_count) {
+			return false;
+		}
+		*dst_count = source_count;
+		return true;
+	}
+	return false;
+}
 
 bool is_polymorphic_type_assignable(CheckerContext *c, Type *poly, Type *source, bool compound, bool modify_type) {
 	Operand o = {Addressing_Value};
@@ -951,28 +987,7 @@ bool is_polymorphic_type_assignable(CheckerContext *c, Type *poly, Type *source,
 	case Type_Array:
 		if (source->kind == Type_Array) {
 			if (poly->Array.generic_count != nullptr) {
-				Type *gt = poly->Array.generic_count;
-				GB_ASSERT(gt->kind == Type_Generic);
-				Entity *e = scope_lookup(gt->Generic.scope, gt->Generic.name);
-				GB_ASSERT(e != nullptr);
-				if (e->kind == Entity_TypeName) {
-					poly->Array.generic_count = nullptr;
-					poly->Array.count = source->Array.count;
-
-					e->kind = Entity_Constant;
-					e->Constant.value = exact_value_i64(source->Array.count);
-					e->type = t_untyped_integer;
-				} else if (e->kind == Entity_Constant) {
-					poly->Array.generic_count = nullptr;
-					if (e->Constant.value.kind != ExactValue_Integer) {
-						return false;
-					}
-					i64 count = big_int_to_i64(&e->Constant.value.value_integer);
-					if (count != source->Array.count) {
-						return false;
-					}
-					poly->Array.count = source->Array.count;
-				} else {
+				if (!polymorphic_assign_index(&poly->Array.generic_count, &poly->Array.count, source->Array.count)) {
 					return false;
 				}
 			}
@@ -1165,6 +1180,27 @@ bool is_polymorphic_type_assignable(CheckerContext *c, Type *poly, Type *source,
 			return key || value;
 		}
 		return false;
+		
+	case Type_Matrix:
+		if (source->kind == Type_Matrix) {
+			if (poly->Matrix.generic_row_count != nullptr) {
+				poly->Matrix.stride_in_bytes = 0;
+				if (!polymorphic_assign_index(&poly->Matrix.generic_row_count, &poly->Matrix.row_count, source->Matrix.row_count)) {
+					return false;
+				}
+			}
+			if (poly->Matrix.generic_column_count != nullptr) {
+				poly->Matrix.stride_in_bytes = 0;
+				if (!polymorphic_assign_index(&poly->Matrix.generic_column_count, &poly->Matrix.column_count, source->Matrix.column_count)) {
+					return false;
+				}
+			}
+			if (poly->Matrix.row_count == source->Matrix.row_count &&
+			    poly->Matrix.column_count == source->Matrix.column_count) {
+				return is_polymorphic_type_assignable(c, poly->Matrix.elem, source->Matrix.elem, true, modify_type);
+			}
+		} 
+		return false;
 	}
 	return false;
 }
@@ -1400,8 +1436,9 @@ bool check_unary_op(CheckerContext *c, Operand *o, Token op) {
 }
 
 bool check_binary_op(CheckerContext *c, Operand *o, Token op) {
+	Type *main_type = o->type;
 	// TODO(bill): Handle errors correctly
-	Type *type = base_type(core_array_type(o->type));
+	Type *type = base_type(core_array_type(main_type));
 	Type *ct = core_type(type);
 	switch (op.kind) {
 	case Token_Sub:
@@ -1414,10 +1451,15 @@ bool check_binary_op(CheckerContext *c, Operand *o, Token op) {
 		}
 		break;
 
-	case Token_Mul:
 	case Token_Quo:
-	case Token_MulEq:
 	case Token_QuoEq:
+		if (is_type_matrix(main_type)) {
+			error(op, "Operator '%.*s' is only allowed with matrix types", LIT(op.string));
+			return false;
+		}
+		/*fallthrough*/
+	case Token_Mul:
+	case Token_MulEq:
 	case Token_AddEq:
 		if (is_type_bit_set(type)) {
 			return true;
@@ -1458,6 +1500,10 @@ bool check_binary_op(CheckerContext *c, Operand *o, Token op) {
 	case Token_ModMod:
 	case Token_ModEq:
 	case Token_ModModEq:
+		if (is_type_matrix(main_type)) {
+			error(op, "Operator '%.*s' is only allowed with matrix types", LIT(op.string));
+			return false;
+		}
 		if (!is_type_integer(type)) {
 			error(op, "Operator '%.*s' is only allowed with integers", LIT(op.string));
 			return false;
@@ -2414,6 +2460,26 @@ bool check_is_castable_to(CheckerContext *c, Operand *operand, Type *y) {
 	if (is_type_quaternion(src) && is_type_quaternion(dst)) {
 		return true;
 	}
+	
+	if (is_type_matrix(src) && is_type_matrix(dst)) {
+		GB_ASSERT(src->kind == Type_Matrix);
+		GB_ASSERT(dst->kind == Type_Matrix);
+		if (!are_types_identical(src->Matrix.elem, dst->Matrix.elem)) {
+			return false;
+		}
+		
+		if (src->Matrix.row_count != src->Matrix.column_count) {
+			i64 src_count = src->Matrix.row_count*src->Matrix.column_count;
+			i64 dst_count = dst->Matrix.row_count*dst->Matrix.column_count;
+			return src_count == dst_count;
+		}
+		
+		if (dst->Matrix.row_count != dst->Matrix.column_count) {
+			return false;
+		}
+		
+		return true;
+	}
 
 
 	// Cast between pointers
@@ -2670,6 +2736,127 @@ bool can_use_other_type_as_type_hint(bool use_lhs_as_type_hint, Type *other_type
 	return false;
 }
 
+Type *check_matrix_type_hint(Type *matrix, Type *type_hint) {
+	Type *xt = base_type(matrix);
+	if (type_hint != nullptr) {
+		Type *th = base_type(type_hint);
+		if (are_types_identical(th, xt)) {
+			return type_hint;
+		} else if (xt->kind == Type_Matrix && th->kind == Type_Array) {
+			if (!are_types_identical(xt->Matrix.elem, th->Array.elem)) {
+				// ignore
+			} else if (xt->Matrix.row_count == 1 && xt->Matrix.column_count == th->Array.count) {
+				return type_hint;
+			} else if (xt->Matrix.column_count == 1 && xt->Matrix.row_count == th->Array.count) {
+				return type_hint;
+			}
+		}
+	}
+	return matrix;
+}
+
+
+void check_binary_matrix(CheckerContext *c, Token const &op, Operand *x, Operand *y, Type *type_hint, bool use_lhs_as_type_hint) {
+	if (!check_binary_op(c, x, op)) {
+		x->mode = Addressing_Invalid;
+		return;
+	}
+		
+	Type *xt = base_type(x->type);
+	Type *yt = base_type(y->type);
+	
+	if (is_type_matrix(x->type)) {
+		GB_ASSERT(xt->kind == Type_Matrix);
+		if (op.kind == Token_Mul) {
+			if (yt->kind == Type_Matrix) {
+				if (!are_types_identical(xt->Matrix.elem, yt->Matrix.elem)) {
+					goto matrix_error;
+				}
+				
+				if (xt->Matrix.column_count != yt->Matrix.row_count) {
+					goto matrix_error;
+				}
+				x->mode = Addressing_Value;
+				x->type = alloc_type_matrix(xt->Matrix.elem, xt->Matrix.row_count, yt->Matrix.column_count);
+				goto matrix_success;
+			} else if (yt->kind == Type_Array) {
+				if (!are_types_identical(xt->Matrix.elem, yt->Array.elem)) {
+					goto matrix_error;
+				}
+				
+				if (xt->Matrix.column_count != yt->Array.count) {
+					goto matrix_error;
+				}
+				
+				// Treat arrays as column vectors
+				x->mode = Addressing_Value;
+				if (type_hint == nullptr && xt->Matrix.row_count == yt->Array.count) {
+					x->type = y->type;
+				} else {
+					x->type = alloc_type_matrix(xt->Matrix.elem, xt->Matrix.row_count, 1);
+				}
+				goto matrix_success;
+			}
+		}
+		if (!are_types_identical(xt, yt)) {
+			goto matrix_error;
+		}
+		x->mode = Addressing_Value;
+		x->type = xt;
+		goto matrix_success;
+	} else {
+		GB_ASSERT(is_type_matrix(yt));
+		GB_ASSERT(!is_type_matrix(xt));
+		
+		if (op.kind == Token_Mul) {
+			// NOTE(bill): no need to handle the matrix case here since it should be handled above
+			if (xt->kind == Type_Array) {
+				if (!are_types_identical(yt->Matrix.elem, xt->Array.elem)) {
+					goto matrix_error;
+				}
+				
+				if (xt->Array.count != yt->Matrix.row_count) {
+					goto matrix_error;
+				}
+				
+				// Treat arrays as row vectors
+				x->mode = Addressing_Value;
+				if (type_hint == nullptr && yt->Matrix.column_count == xt->Array.count) {
+					x->type = x->type;
+				} else {
+					x->type = alloc_type_matrix(yt->Matrix.elem, 1, yt->Matrix.column_count);
+				}
+				goto matrix_success;
+			}
+		}
+		if (!are_types_identical(xt, yt)) {
+			goto matrix_error;
+		}
+		x->mode = Addressing_Value;
+		x->type = xt;
+		goto matrix_success;
+	}
+
+matrix_success:
+	x->type = check_matrix_type_hint(x->type, type_hint);
+	
+	return;
+	
+	
+matrix_error:
+	gbString xts = type_to_string(x->type);
+	gbString yts = type_to_string(y->type);
+	gbString expr_str = expr_to_string(x->expr);
+	error(op, "Mismatched types in binary matrix expression '%s' for operator '%.*s' : '%s' vs '%s'", expr_str, LIT(op.string), xts, yts);
+	gb_string_free(expr_str);
+	gb_string_free(yts);
+	gb_string_free(xts);
+	x->type = t_invalid;
+	x->mode = Addressing_Invalid;
+	return;
+	
+}
+
 
 void check_binary_expr(CheckerContext *c, Operand *x, Ast *node, Type *type_hint, bool use_lhs_as_type_hint=false) {
 	GB_ASSERT(node->kind == Ast_BinaryExpr);
@@ -2874,6 +3061,13 @@ void check_binary_expr(CheckerContext *c, Operand *x, Ast *node, Type *type_hint
 		x->type = y->type;
 		return;
 	}
+	if (is_type_matrix(x->type) || is_type_matrix(y->type)) {
+		check_binary_matrix(c, op, x, y, type_hint, use_lhs_as_type_hint);
+		x->expr = node;
+		return;
+	}
+
+	
 	if (!are_types_identical(x->type, y->type)) {
 		if (x->type != t_invalid &&
 		    y->type != t_invalid) {
@@ -3262,6 +3456,29 @@ void convert_to_typed(CheckerContext *c, Operand *operand, Type *target_type) {
 
 		break;
 	}
+	
+	case Type_Matrix: {
+		Type *elem = base_array_type(t);
+		if (check_is_assignable_to(c, operand, elem)) {
+			if (t->Matrix.row_count != t->Matrix.column_count) {
+				operand->mode = Addressing_Invalid;
+				begin_error_block();
+				defer (end_error_block());
+				
+				convert_untyped_error(c, operand, target_type);
+				error_line("\tNote: Only a square matrix types can be initialized with a scalar value\n");
+				return;
+			} else {
+				operand->mode = Addressing_Value;
+			}
+		} else {
+			operand->mode = Addressing_Invalid;
+			convert_untyped_error(c, operand, target_type);
+			return;
+		}
+		break;
+	}
+		
 
 	case Type_Union:
 		if (!is_operand_nil(*operand) && !is_operand_undef(*operand)) {
@@ -6219,6 +6436,16 @@ bool check_set_index_data(Operand *o, Type *t, bool indirection, i64 *max_count,
 		}
 		o->type = t->EnumeratedArray.elem;
 		return true;
+		
+	case Type_Matrix:
+		*max_count = t->Matrix.column_count;
+		if (indirection) {
+			o->mode = Addressing_Variable;
+		} else if (o->mode != Addressing_Variable) {
+			o->mode = Addressing_Value;
+		}
+		o->type = alloc_type_array(t->Matrix.elem, t->Matrix.row_count);
+		return true;
 
 	case Type_Slice:
 		o->type = t->Slice.elem;
@@ -6517,6 +6744,72 @@ void check_promote_optional_ok(CheckerContext *c, Operand *x, Type **val_type_,
 }
 
 
+void check_matrix_index_expr(CheckerContext *c, Operand *o, Ast *node, Type *type_hint) {
+	ast_node(ie, MatrixIndexExpr, node);
+	
+	check_expr(c, o, ie->expr);
+	node->viral_state_flags |= ie->expr->viral_state_flags;
+	if (o->mode == Addressing_Invalid) {
+		o->expr = node;
+		return;
+	}
+	
+	Type *t = base_type(type_deref(o->type));
+	bool is_ptr = is_type_pointer(o->type);
+	bool is_const = o->mode == Addressing_Constant;
+	
+	if (t->kind != Type_Matrix) {
+		gbString str = expr_to_string(o->expr);
+		gbString type_str = type_to_string(o->type);
+		defer (gb_string_free(str));
+		defer (gb_string_free(type_str));
+		if (is_const) {
+			error(o->expr, "Cannot use matrix indexing on constant '%s' of type '%s'", str, type_str);
+		} else {
+			error(o->expr, "Cannot use matrix indexing on '%s' of type '%s'", str, type_str);
+		}
+		o->mode = Addressing_Invalid;
+		o->expr = node;
+		return;
+	}
+	o->type = t->Matrix.elem;
+	if (is_ptr) {
+		o->mode = Addressing_Variable;
+	} else if (o->mode != Addressing_Variable) {
+		o->mode = Addressing_Value;
+	}
+	
+	if (ie->row_index == nullptr) {
+		gbString str = expr_to_string(o->expr);
+		error(o->expr, "Missing row index for '%s'", str);
+		gb_string_free(str);
+		o->mode = Addressing_Invalid;
+		o->expr = node;
+		return;
+	}
+	if (ie->column_index == nullptr) {
+		gbString str = expr_to_string(o->expr);
+		error(o->expr, "Missing column index for '%s'", str);
+		gb_string_free(str);
+		o->mode = Addressing_Invalid;
+		o->expr = node;
+		return;
+	}
+	
+	i64 row_count = t->Matrix.row_count;
+	i64 column_count = t->Matrix.column_count;
+	
+	i64 row_index = 0;
+	i64 column_index = 0;
+	bool row_ok = check_index_value(c, t, false, ie->row_index, row_count, &row_index, nullptr);
+	bool column_ok = check_index_value(c, t, false, ie->column_index, column_count, &column_index, nullptr);
+	
+	
+	gb_unused(row_ok);
+	gb_unused(column_ok);
+}
+
+
 ExprKind check_expr_base_internal(CheckerContext *c, Operand *o, Ast *node, Type *type_hint) {
 	u32 prev_state_flags = c->state_flags;
 	defer (c->state_flags = prev_state_flags);
@@ -7150,6 +7443,7 @@ ExprKind check_expr_base_internal(CheckerContext *c, Operand *o, Ast *node, Type
 		case Type_Array:
 		case Type_DynamicArray:
 		case Type_SimdVector:
+		case Type_Matrix:
 		{
 			Type *elem_type = nullptr;
 			String context_name = {};
@@ -7176,6 +7470,10 @@ ExprKind check_expr_base_internal(CheckerContext *c, Operand *o, Ast *node, Type
 				elem_type = t->SimdVector.elem;
 				context_name = str_lit("simd vector literal");
 				max_type_count = t->SimdVector.count;
+			} else if (t->kind == Type_Matrix) {
+				elem_type = t->Matrix.elem;
+				context_name = str_lit("matrix literal");
+				max_type_count = t->Matrix.row_count*t->Matrix.column_count;
 			} else {
 				GB_PANIC("unreachable");
 			}
@@ -8214,6 +8512,8 @@ ExprKind check_expr_base_internal(CheckerContext *c, Operand *o, Ast *node, Type
 				// Okay
 			} else if (is_type_relative_slice(t)) {
 				// Okay
+			} else if (is_type_matrix(t)) {
+				// Okay
 			} else {
 				valid = false;
 			}
@@ -8278,10 +8578,14 @@ ExprKind check_expr_base_internal(CheckerContext *c, Operand *o, Ast *node, Type
 				}
 			}
 		}
+		
+		if (type_hint != nullptr && is_type_matrix(t)) {
+			// TODO(bill): allow matrix columns to be assignable to other types which are the same internally
+			// if a type hint exists
+		}
+		
 	case_end;
 
-
-
 	case_ast_node(se, SliceExpr, node);
 		check_expr(c, o, se->expr);
 		node->viral_state_flags |= se->expr->viral_state_flags;
@@ -8454,7 +8758,12 @@ ExprKind check_expr_base_internal(CheckerContext *c, Operand *o, Ast *node, Type
 		}
 
 	case_end;
-
+	
+	case_ast_node(mie, MatrixIndexExpr, node);
+		check_matrix_index_expr(c, o, node, type_hint);
+		o->expr = node;
+		return Expr_Expr;
+	case_end;
 
 	case_ast_node(ce, CallExpr, node);
 		return check_call_expr(c, o, node, ce->proc, ce->args, ce->inlining, type_hint);
@@ -8561,6 +8870,7 @@ ExprKind check_expr_base_internal(CheckerContext *c, Operand *o, Ast *node, Type
 	case Ast_EnumType:
 	case Ast_MapType:
 	case Ast_BitSetType:
+	case Ast_MatrixType:
 		o->mode = Addressing_Type;
 		o->type = check_type(c, node);
 		break;
@@ -8964,6 +9274,15 @@ gbString write_expr_to_string(gbString str, Ast *node, bool shorthand) {
 		str = gb_string_append_rune(str, ']');
 	case_end;
 
+	case_ast_node(mie, MatrixIndexExpr, node);
+		str = write_expr_to_string(str, mie->expr, shorthand);
+		str = gb_string_append_rune(str, '[');
+		str = write_expr_to_string(str, mie->row_index, shorthand);
+		str = gb_string_appendc(str, ", ");
+		str = write_expr_to_string(str, mie->column_index, shorthand);
+		str = gb_string_append_rune(str, ']');
+	case_end;
+	
 	case_ast_node(e, Ellipsis, node);
 		str = gb_string_appendc(str, "..");
 		str = write_expr_to_string(str, e->expr, shorthand);
@@ -9035,6 +9354,16 @@ gbString write_expr_to_string(gbString str, Ast *node, bool shorthand) {
 		str = gb_string_append_rune(str, ']');
 		str = write_expr_to_string(str, mt->value, shorthand);
 	case_end;
+	
+	case_ast_node(mt, MatrixType, node);
+		str = gb_string_appendc(str, "matrix[");
+		str = write_expr_to_string(str, mt->row_count, shorthand);
+		str = gb_string_appendc(str, ", ");
+		str = write_expr_to_string(str, mt->column_count, shorthand);
+		str = gb_string_append_rune(str, ']');
+		str = write_expr_to_string(str, mt->elem, shorthand);
+	case_end;
+
 
 	case_ast_node(f, Field, node);
 		if (f->flags&FieldFlag_using) {

+ 65 - 3
src/check_type.cpp

@@ -997,8 +997,8 @@ void check_bit_set_type(CheckerContext *c, Type *type, Type *named_type, Ast *no
 
 				GB_ASSERT(lower <= upper);
 
-				i64 bits = MAX_BITS;
-				if (bs->underlying != nullptr) {
+				i64 bits = MAX_BITS
+;				if (bs->underlying != nullptr) {
 					Type *u = check_type(c, bs->underlying);
 					if (!is_type_integer(u)) {
 						gbString ts = type_to_string(u);
@@ -1154,7 +1154,11 @@ Type *determine_type_from_polymorphic(CheckerContext *ctx, Type *poly_type, Oper
 	bool show_error = modify_type && !ctx->hide_polymorphic_errors;
 	if (!is_operand_value(operand)) {
 		if (show_error) {
-			error(operand.expr, "Cannot determine polymorphic type from parameter");
+			gbString pts = type_to_string(poly_type);
+			gbString ots = type_to_string(operand.type);
+			defer (gb_string_free(pts));
+			defer (gb_string_free(ots));
+			error(operand.expr, "Cannot determine polymorphic type from parameter: '%s' to '%s'", ots, pts);
 		}
 		return t_invalid;
 	}
@@ -2200,6 +2204,57 @@ void check_map_type(CheckerContext *ctx, Type *type, Ast *node) {
 	// error(node, "'map' types are not yet implemented");
 }
 
+void check_matrix_type(CheckerContext *ctx, Type **type, Ast *node) {
+	ast_node(mt, MatrixType, node);
+	
+	Operand row = {};
+	Operand column = {};
+	
+	i64 row_count = check_array_count(ctx, &row, mt->row_count);
+	i64 column_count = check_array_count(ctx, &column, mt->column_count);
+	
+	Type *elem = check_type_expr(ctx, mt->elem, nullptr);
+	
+	Type *generic_row = nullptr;
+	Type *generic_column = nullptr;
+	
+	if (row.mode == Addressing_Type && row.type->kind == Type_Generic) {
+		generic_row = row.type;
+	}
+	
+	if (column.mode == Addressing_Type && column.type->kind == Type_Generic) {
+		generic_column = column.type;
+	}
+	
+	if (row_count < MATRIX_ELEMENT_COUNT_MIN && generic_row == nullptr) {
+		gbString s = expr_to_string(row.expr);
+		error(row.expr, "Invalid matrix row count, expected %d+ rows, got %s", MATRIX_ELEMENT_COUNT_MIN, s);
+		gb_string_free(s);
+	}
+	
+	if (column_count < MATRIX_ELEMENT_COUNT_MIN && generic_column == nullptr) {
+		gbString s = expr_to_string(column.expr);
+		error(column.expr, "Invalid matrix column count, expected %d+ rows, got %s", MATRIX_ELEMENT_COUNT_MIN, s);
+		gb_string_free(s);
+	}
+	
+	if (row_count*column_count > MATRIX_ELEMENT_COUNT_MAX) {
+		i64 element_count = row_count*column_count;
+		error(column.expr, "Matrix types are limited to a maximum of %d elements, got %lld", MATRIX_ELEMENT_COUNT_MAX, cast(long long)element_count);
+	}
+	
+	if (!is_type_valid_for_matrix_elems(elem)) {
+		gbString s = type_to_string(elem);
+		error(column.expr, "Matrix elements types are limited to integers, floats, and complex, got %s", s);
+		gb_string_free(s);
+	}
+	
+	*type = alloc_type_matrix(elem, row_count, column_count, generic_row, generic_column);
+	
+	return;
+}
+
+
 
 Type *make_soa_struct_internal(CheckerContext *ctx, Ast *array_typ_expr, Ast *elem_expr, Type *elem, i64 count, Type *generic_type, StructSoaKind soa_kind) {
 	Type *bt_elem = base_type(elem);
@@ -2785,6 +2840,13 @@ bool check_type_internal(CheckerContext *ctx, Ast *e, Type **type, Type *named_t
 			return true;
 		}
 	case_end;
+	
+	
+	case_ast_node(mt, MatrixType, e);
+		check_matrix_type(ctx, type, e);
+		set_base_type(named_type, *type);
+		return true;
+	case_end;
 	}
 
 	*type = t_invalid;

+ 11 - 0
src/checker.cpp

@@ -1668,6 +1668,10 @@ void add_type_info_type_internal(CheckerContext *c, Type *t) {
 		add_type_info_type_internal(c, bt->RelativeSlice.slice_type);
 		add_type_info_type_internal(c, bt->RelativeSlice.base_integer);
 		break;
+		
+	case Type_Matrix:
+		add_type_info_type_internal(c, bt->Matrix.elem);
+		break;
 
 	default:
 		GB_PANIC("Unhandled type: %*.s %d", LIT(type_strings[bt->kind]), bt->kind);
@@ -1879,6 +1883,10 @@ void add_min_dep_type_info(Checker *c, Type *t) {
 		add_min_dep_type_info(c, bt->RelativeSlice.slice_type);
 		add_min_dep_type_info(c, bt->RelativeSlice.base_integer);
 		break;
+		
+	case Type_Matrix:
+		add_min_dep_type_info(c, bt->Matrix.elem);
+		break;
 
 	default:
 		GB_PANIC("Unhandled type: %*.s", LIT(type_strings[bt->kind]));
@@ -2023,6 +2031,7 @@ void generate_minimum_dependency_set(Checker *c, Entity *start) {
 		String bounds_check_entities[] = {
 			// Bounds checking related procedures
 			str_lit("bounds_check_error"),
+			str_lit("matrix_bounds_check_error"),
 			str_lit("slice_expr_error_hi"),
 			str_lit("slice_expr_error_lo_hi"),
 			str_lit("multi_pointer_slice_expr_error"),
@@ -2467,6 +2476,7 @@ void init_core_type_info(Checker *c) {
 	t_type_info_simd_vector      = find_core_type(c, str_lit("Type_Info_Simd_Vector"));
 	t_type_info_relative_pointer = find_core_type(c, str_lit("Type_Info_Relative_Pointer"));
 	t_type_info_relative_slice   = find_core_type(c, str_lit("Type_Info_Relative_Slice"));
+	t_type_info_matrix           = find_core_type(c, str_lit("Type_Info_Matrix"));
 
 	t_type_info_named_ptr            = alloc_type_pointer(t_type_info_named);
 	t_type_info_integer_ptr          = alloc_type_pointer(t_type_info_integer);
@@ -2494,6 +2504,7 @@ void init_core_type_info(Checker *c) {
 	t_type_info_simd_vector_ptr      = alloc_type_pointer(t_type_info_simd_vector);
 	t_type_info_relative_pointer_ptr = alloc_type_pointer(t_type_info_relative_pointer);
 	t_type_info_relative_slice_ptr   = alloc_type_pointer(t_type_info_relative_slice);
+	t_type_info_matrix_ptr           = alloc_type_pointer(t_type_info_matrix);
 }
 
 void init_mem_allocator(Checker *c) {

+ 14 - 0
src/checker_builtin_procs.hpp

@@ -34,6 +34,11 @@ enum BuiltinProcId {
 
 	BuiltinProc_soa_zip,
 	BuiltinProc_soa_unzip,
+	
+	BuiltinProc_transpose,
+	BuiltinProc_outer_product,
+	BuiltinProc_hadamard_product,
+	BuiltinProc_matrix_flatten,
 
 	BuiltinProc_DIRECTIVE, // NOTE(bill): This is used for specialized hash-prefixed procedures
 
@@ -194,6 +199,7 @@ BuiltinProc__type_simple_boolean_begin,
 	BuiltinProc_type_is_simple_compare, // easily compared using memcmp
 	BuiltinProc_type_is_dereferenceable,
 	BuiltinProc_type_is_valid_map_key,
+	BuiltinProc_type_is_valid_matrix_elements,
 
 	BuiltinProc_type_is_named,
 	BuiltinProc_type_is_pointer,
@@ -210,6 +216,7 @@ BuiltinProc__type_simple_boolean_begin,
 	BuiltinProc_type_is_bit_field_value,
 	BuiltinProc_type_is_bit_set,
 	BuiltinProc_type_is_simd_vector,
+	BuiltinProc_type_is_matrix,
 
 	BuiltinProc_type_is_specialized_polymorphic_record,
 	BuiltinProc_type_is_unspecialized_polymorphic_record,
@@ -277,6 +284,11 @@ gb_global BuiltinProc builtin_procs[BuiltinProc_COUNT] = {
 
 	{STR_LIT("soa_zip"),          1, true,  Expr_Expr, BuiltinProcPkg_builtin},
 	{STR_LIT("soa_unzip"),        1, false, Expr_Expr, BuiltinProcPkg_builtin},
+	
+	{STR_LIT("transpose"),        1, false, Expr_Expr, BuiltinProcPkg_builtin},
+	{STR_LIT("outer_product"),    2, false, Expr_Expr, BuiltinProcPkg_builtin},
+	{STR_LIT("hadamard_product"), 2, false, Expr_Expr, BuiltinProcPkg_builtin},
+	{STR_LIT("matrix_flatten"),   1, false, Expr_Expr, BuiltinProcPkg_builtin},
 
 	{STR_LIT(""),                 0, true,  Expr_Expr, BuiltinProcPkg_builtin}, // DIRECTIVE
 
@@ -437,6 +449,7 @@ gb_global BuiltinProc builtin_procs[BuiltinProc_COUNT] = {
 	{STR_LIT("type_is_simple_compare"),    1, false, Expr_Expr, BuiltinProcPkg_intrinsics},
 	{STR_LIT("type_is_dereferenceable"),   1, false, Expr_Expr, BuiltinProcPkg_intrinsics},
 	{STR_LIT("type_is_valid_map_key"),     1, false, Expr_Expr, BuiltinProcPkg_intrinsics},
+	{STR_LIT("type_is_valid_matrix_elements"), 1, false, Expr_Expr, BuiltinProcPkg_intrinsics},
 
 	{STR_LIT("type_is_named"),             1, false, Expr_Expr, BuiltinProcPkg_intrinsics},
 	{STR_LIT("type_is_pointer"),           1, false, Expr_Expr, BuiltinProcPkg_intrinsics},
@@ -453,6 +466,7 @@ gb_global BuiltinProc builtin_procs[BuiltinProc_COUNT] = {
 	{STR_LIT("type_is_bit_field_value"),   1, false, Expr_Expr, BuiltinProcPkg_intrinsics},
 	{STR_LIT("type_is_bit_set"),           1, false, Expr_Expr, BuiltinProcPkg_intrinsics},
 	{STR_LIT("type_is_simd_vector"),       1, false, Expr_Expr, BuiltinProcPkg_intrinsics},
+	{STR_LIT("type_is_matrix"),            1, false, Expr_Expr, BuiltinProcPkg_intrinsics},
 
 	{STR_LIT("type_is_specialized_polymorphic_record"),   1, false, Expr_Expr, BuiltinProcPkg_intrinsics},
 	{STR_LIT("type_is_unspecialized_polymorphic_record"), 1, false, Expr_Expr, BuiltinProcPkg_intrinsics},

+ 12 - 1
src/llvm_abi.cpp

@@ -153,7 +153,18 @@ void lb_add_function_type_attributes(LLVMValueRef fn, lbFunctionType *ft, ProcCa
 	// TODO(bill): Clean up this logic
 	if (!is_arch_wasm()) {
 		cc_kind = lb_calling_convention_map[calling_convention];
-	}
+	} 
+	// if (build_context.metrics.arch == TargetArch_amd64) {
+	// 	if (build_context.metrics.os == TargetOs_windows) {
+	// 		if (cc_kind == lbCallingConvention_C) {
+	// 			cc_kind = lbCallingConvention_Win64;
+	// 		}
+	// 	} else {
+	// 		if (cc_kind == lbCallingConvention_C) {
+	// 			cc_kind = lbCallingConvention_X86_64_SysV;
+	// 		}
+	// 	}
+	// } 
 	LLVMSetFunctionCallConv(fn, cc_kind);
 	if (calling_convention == ProcCC_Odin) {
 		unsigned context_index = offset+arg_count;

+ 51 - 14
src/llvm_backend.cpp

@@ -21,12 +21,6 @@
 #include "llvm_backend_stmt.cpp"
 #include "llvm_backend_proc.cpp"
 
-#if LLVM_VERSION_MAJOR < 11
-#error "LLVM Version 11 is the minimum required"
-#elif LLVM_VERSION_MAJOR == 12 && !(LLVM_VERSION_MINOR > 0 || LLVM_VERSION_PATCH > 0)
-#error "If LLVM Version 12.x.y is wanted, at least LLVM 12.0.1 is required"
-#endif
-
 
 void lb_add_foreign_library_path(lbModule *m, Entity *e) {
 	if (e == nullptr) {
@@ -1135,13 +1129,46 @@ void lb_generate_code(lbGenerator *gen) {
 
 	auto *min_dep_set = &info->minimum_dependency_set;
 
-	LLVMInitializeAllTargetInfos();
-	LLVMInitializeAllTargets();
-	LLVMInitializeAllTargetMCs();
-	LLVMInitializeAllAsmPrinters();
-	LLVMInitializeAllAsmParsers();
-	LLVMInitializeAllDisassemblers();
-	LLVMInitializeNativeTarget();
+	switch (build_context.metrics.arch) {
+	case TargetArch_amd64: 
+	case TargetArch_386:
+		LLVMInitializeX86TargetInfo();
+		LLVMInitializeX86Target();
+		LLVMInitializeX86TargetMC();
+		LLVMInitializeX86AsmPrinter();
+		LLVMInitializeX86AsmParser();
+		LLVMInitializeX86Disassembler();
+		break;
+	case TargetArch_arm64:
+		LLVMInitializeAArch64TargetInfo();
+		LLVMInitializeAArch64Target();
+		LLVMInitializeAArch64TargetMC();
+		LLVMInitializeAArch64AsmPrinter();
+		LLVMInitializeAArch64AsmParser();
+		LLVMInitializeAArch64Disassembler();
+		break;
+	case TargetArch_wasm32:
+		LLVMInitializeWebAssemblyTargetInfo();
+		LLVMInitializeWebAssemblyTarget();
+		LLVMInitializeWebAssemblyTargetMC();
+		LLVMInitializeWebAssemblyAsmPrinter();
+		LLVMInitializeWebAssemblyAsmParser();
+		LLVMInitializeWebAssemblyDisassembler();
+		break;
+	default:
+		LLVMInitializeAllTargetInfos();
+		LLVMInitializeAllTargets();
+		LLVMInitializeAllTargetMCs();
+		LLVMInitializeAllAsmPrinters();
+		LLVMInitializeAllAsmParsers();
+		LLVMInitializeAllDisassemblers();
+		break;
+	}
+
+	
+	if (build_context.microarch == "native") {
+		LLVMInitializeNativeTarget();
+	}
 
 	char const *target_triple = alloc_cstring(permanent_allocator(), build_context.metrics.target_triplet);
 	for_array(i, gen->modules.entries) {
@@ -1174,6 +1201,16 @@ void lb_generate_code(lbGenerator *gen) {
 		if (gb_strcmp(llvm_cpu, host_cpu_name) == 0) {
 			llvm_features = LLVMGetHostCPUFeatures();
 		}
+	} else if (build_context.metrics.arch == TargetArch_amd64) {
+		// NOTE(bill): x86-64-v2 is more than enough for everyone
+		//
+		// x86-64: CMOV, CMPXCHG8B, FPU, FXSR, MMX, FXSR, SCE, SSE, SSE2
+		// x86-64-v2: (close to Nehalem) CMPXCHG16B, LAHF-SAHF, POPCNT, SSE3, SSE4.1, SSE4.2, SSSE3
+		// x86-64-v3: (close to Haswell) AVX, AVX2, BMI1, BMI2, F16C, FMA, LZCNT, MOVBE, XSAVE
+		// x86-64-v4: AVX512F, AVX512BW, AVX512CD, AVX512DQ, AVX512VL
+		if (ODIN_LLVM_MINIMUM_VERSION_12) {
+			llvm_cpu = "x86-64-v2";
+		}
 	}
 
 	// GB_ASSERT_MSG(LLVMTargetHasAsmBackend(target));
@@ -1640,6 +1677,7 @@ void lb_generate_code(lbGenerator *gen) {
 		code_gen_file_type = LLVMAssemblyFile;
 	}
 
+
 	for_array(j, gen->modules.entries) {
 		lbModule *m = gen->modules.entries[j].value;
 		if (LLVMVerifyModule(m->mod, LLVMReturnStatusAction, &llvm_error)) {
@@ -1684,7 +1722,6 @@ void lb_generate_code(lbGenerator *gen) {
 		}
 	}
 
-
 	TIME_SECTION("LLVM Add Foreign Library Paths");
 
 	for_array(j, gen->modules.entries) {

+ 22 - 1
src/llvm_backend.hpp

@@ -30,6 +30,18 @@
 #include <llvm-c/Transforms/Vectorize.h>
 #endif
 
+#if LLVM_VERSION_MAJOR < 11
+#error "LLVM Version 11 is the minimum required"
+#elif LLVM_VERSION_MAJOR == 12 && !(LLVM_VERSION_MINOR > 0 || LLVM_VERSION_PATCH > 0)
+#error "If LLVM Version 12.x.y is wanted, at least LLVM 12.0.1 is required"
+#endif
+
+#if LLVM_VERSION_MAJOR > 12 || (LLVM_VERSION_MAJOR == 12 && LLVM_VERSION_MINOR >= 0 && LLVM_VERSION_PATCH > 0)
+#define ODIN_LLVM_MINIMUM_VERSION_12 1
+#else
+#define ODIN_LLVM_MINIMUM_VERSION_12 0
+#endif
+
 struct lbProcedure;
 
 struct lbValue {
@@ -333,6 +345,11 @@ lbValue lb_emit_array_ep(lbProcedure *p, lbValue s, lbValue index);
 lbValue lb_emit_deep_field_gep(lbProcedure *p, lbValue e, Selection sel);
 lbValue lb_emit_deep_field_ev(lbProcedure *p, lbValue e, Selection sel);
 
+lbValue lb_emit_matrix_ep(lbProcedure *p, lbValue s, lbValue row, lbValue column);
+lbValue lb_emit_matrix_epi(lbProcedure *p, lbValue s, isize row, isize column);
+lbValue lb_emit_matrix_ev(lbProcedure *p, lbValue s, isize row, isize column);
+
+
 lbValue lb_emit_arith(lbProcedure *p, TokenKind op, lbValue lhs, lbValue rhs, Type *type);
 lbValue lb_emit_byte_swap(lbProcedure *p, lbValue value, Type *end_type);
 void lb_emit_defer_stmts(lbProcedure *p, lbDeferExitKind kind, lbBlock *block);
@@ -388,6 +405,8 @@ lbValue lb_soa_struct_len(lbProcedure *p, lbValue value);
 void lb_emit_increment(lbProcedure *p, lbValue addr);
 lbValue lb_emit_select(lbProcedure *p, lbValue cond, lbValue x, lbValue y);
 
+lbValue lb_emit_mul_add(lbProcedure *p, lbValue a, lbValue b, lbValue c, Type *t);
+
 void lb_fill_slice(lbProcedure *p, lbAddr const &slice, lbValue base_elem, lbValue len);
 
 lbValue lb_type_info(lbModule *m, Type *type);
@@ -465,7 +484,7 @@ LLVMTypeRef lb_type_padding_filler(lbModule *m, i64 padding, i64 padding_align);
 
 
 
-enum lbCallingConventionKind {
+enum lbCallingConventionKind : unsigned {
 	lbCallingConvention_C = 0,
 	lbCallingConvention_Fast = 8,
 	lbCallingConvention_Cold = 9,
@@ -510,6 +529,8 @@ enum lbCallingConventionKind {
 	lbCallingConvention_AMDGPU_LS = 95,
 	lbCallingConvention_AMDGPU_ES = 96,
 	lbCallingConvention_AArch64_VectorCall = 97,
+	lbCallingConvention_AArch64_SVE_VectorCall = 98,
+	lbCallingConvention_WASM_EmscriptenInvoke = 99,
 	lbCallingConvention_MaxID = 1023,
 };
 

+ 101 - 0
src/llvm_backend_const.cpp

@@ -512,6 +512,31 @@ lbValue lb_const_value(lbModule *m, Type *type, ExactValue value, bool allow_loc
 
 		res.value = llvm_const_array(lb_type(m, elem), elems, cast(unsigned)count);
 		return res;
+	} else if (is_type_matrix(type) &&
+	    value.kind != ExactValue_Invalid &&
+	    value.kind != ExactValue_Compound) {
+		i64 row = type->Matrix.row_count;
+		i64 column = type->Matrix.column_count;
+		GB_ASSERT(row == column);
+		
+		Type *elem = type->Matrix.elem;
+		
+		lbValue single_elem = lb_const_value(m, elem, value, allow_local);
+		single_elem.value = llvm_const_cast(single_elem.value, lb_type(m, elem));
+				
+		i64 total_elem_count = matrix_type_total_internal_elems(type);
+		LLVMValueRef *elems = gb_alloc_array(permanent_allocator(), LLVMValueRef, cast(isize)total_elem_count);		
+		for (i64 i = 0; i < row; i++) {
+			elems[matrix_indices_to_offset(type, i, i)] = single_elem.value;
+		}
+		for (i64 i = 0; i < total_elem_count; i++) {
+			if (elems[i] == nullptr) {
+				elems[i] = LLVMConstNull(lb_type(m, elem));
+			}
+		}
+		
+		res.value = LLVMConstArray(lb_type(m, elem), elems, cast(unsigned)total_elem_count);
+		return res;
 	}
 
 	switch (value.kind) {
@@ -956,6 +981,82 @@ lbValue lb_const_value(lbModule *m, Type *type, ExactValue value, bool allow_loc
 
 			res.value = LLVMConstInt(lb_type(m, original_type), bits, false);
 			return res;
+		} else if (is_type_matrix(type)) {
+			ast_node(cl, CompoundLit, value.value_compound);
+			Type *elem_type = type->Matrix.elem;
+			isize elem_count = cl->elems.count;
+			if (elem_count == 0 || !elem_type_can_be_constant(elem_type)) {
+				return lb_const_nil(m, original_type);
+			}
+			
+			i64 max_count = type->Matrix.row_count*type->Matrix.column_count;
+			i64 total_count = matrix_type_total_internal_elems(type);
+			
+			LLVMValueRef *values = gb_alloc_array(temporary_allocator(), LLVMValueRef, cast(isize)total_count);
+			if (cl->elems[0]->kind == Ast_FieldValue) {
+				for_array(j, cl->elems) {
+					Ast *elem = cl->elems[j];
+					ast_node(fv, FieldValue, elem);
+					if (is_ast_range(fv->field)) {
+						ast_node(ie, BinaryExpr, fv->field);
+						TypeAndValue lo_tav = ie->left->tav;
+						TypeAndValue hi_tav = ie->right->tav;
+						GB_ASSERT(lo_tav.mode == Addressing_Constant);
+						GB_ASSERT(hi_tav.mode == Addressing_Constant);
+
+						TokenKind op = ie->op.kind;
+						i64 lo = exact_value_to_i64(lo_tav.value);
+						i64 hi = exact_value_to_i64(hi_tav.value);
+						if (op != Token_RangeHalf) {
+							hi += 1;
+						}
+						TypeAndValue tav = fv->value->tav;
+						LLVMValueRef val = lb_const_value(m, elem_type, tav.value, allow_local).value;
+						for (i64 k = lo; k < hi; k++) {
+							i64 offset = matrix_index_to_offset(type, k);
+							GB_ASSERT(values[offset] == nullptr);
+							values[offset] = val;
+						}
+					} else {
+						TypeAndValue index_tav = fv->field->tav;
+						GB_ASSERT(index_tav.mode == Addressing_Constant);
+						i64 index = exact_value_to_i64(index_tav.value);
+						TypeAndValue tav = fv->value->tav;
+						LLVMValueRef val = lb_const_value(m, elem_type, tav.value, allow_local).value;
+						i64 offset = matrix_index_to_offset(type, index);
+						GB_ASSERT(values[offset] == nullptr);
+						values[offset] = val;
+					}
+				}
+				
+				for (i64 i = 0; i < total_count; i++) {
+					if (values[i] == nullptr) {
+						values[i] = LLVMConstNull(lb_type(m, elem_type));
+					}
+				}
+
+				res.value = lb_build_constant_array_values(m, type, elem_type, cast(isize)total_count, values, allow_local);
+				return res;
+			} else {
+				GB_ASSERT_MSG(elem_count == max_count, "%td != %td", elem_count, max_count);
+
+				LLVMValueRef *values = gb_alloc_array(temporary_allocator(), LLVMValueRef, cast(isize)total_count);
+				
+				for_array(i, cl->elems) {
+					TypeAndValue tav = cl->elems[i]->tav;
+					GB_ASSERT(tav.mode != Addressing_Invalid);
+					i64 offset = matrix_index_to_offset(type, i);
+					values[offset] = lb_const_value(m, elem_type, tav.value, allow_local).value;
+				}
+				for (isize i = 0; i < total_count; i++) {
+					if (values[i] == nullptr) {
+						values[i] = LLVMConstNull(lb_type(m, elem_type));
+					}
+				}
+
+				res.value = lb_build_constant_array_values(m, type, elem_type, cast(isize)total_count, values, allow_local);
+				return res;
+			}
 		} else {
 			return lb_const_nil(m, original_type);
 		}

+ 748 - 1
src/llvm_backend_expr.cpp

@@ -331,7 +331,7 @@ bool lb_try_direct_vector_arith(lbProcedure *p, TokenKind op, lbValue lhs, lbVal
 				z = LLVMBuildFRem(p->builder, x, y, "");
 				break;
 			default:
-				GB_PANIC("Unsupported vector operation");
+				GB_PANIC("Unsupported vector operation %.*s", LIT(token_strings[op]));
 				break;
 			}
 
@@ -476,11 +476,545 @@ lbValue lb_emit_arith_array(lbProcedure *p, TokenKind op, lbValue lhs, lbValue r
 	}
 }
 
+bool lb_is_matrix_simdable(Type *t) {
+	Type *mt = base_type(t);
+	GB_ASSERT(mt->kind == Type_Matrix);
+	
+	Type *elem = core_type(mt->Matrix.elem);
+	if (is_type_complex(elem)) {
+		return false;
+	}
+	
+	if (is_type_different_to_arch_endianness(elem)) {
+		return false;
+	}
+	
+	switch (build_context.metrics.arch) {
+	case TargetArch_amd64:
+	case TargetArch_arm64:
+		// possible
+		break;
+	case TargetArch_386:
+	case TargetArch_wasm32:
+		// nope
+		return false;
+	}
+	
+	if (elem->kind == Type_Basic) {
+		switch (elem->Basic.kind) {
+		case Basic_f16:
+		case Basic_f16le:
+		case Basic_f16be:
+			switch (build_context.metrics.arch) {
+			case TargetArch_amd64:
+				return false;
+			case TargetArch_arm64:
+				// TODO(bill): determine when this is fine
+				return true;
+			case TargetArch_386:
+			case TargetArch_wasm32:
+				return false;
+			}
+		}
+	}
+	
+	return true;
+}
+
+
+LLVMValueRef lb_matrix_to_vector(lbProcedure *p, lbValue matrix) {
+	Type *mt = base_type(matrix.type);
+	GB_ASSERT(mt->kind == Type_Matrix);
+	LLVMTypeRef elem_type = lb_type(p->module, mt->Matrix.elem);
+	
+	unsigned total_count = cast(unsigned)matrix_type_total_internal_elems(mt);
+	LLVMTypeRef total_matrix_type = LLVMVectorType(elem_type, total_count);
+	
+#if 1
+	LLVMValueRef ptr = lb_address_from_load_or_generate_local(p, matrix).value;
+	LLVMValueRef matrix_vector_ptr = LLVMBuildPointerCast(p->builder, ptr, LLVMPointerType(total_matrix_type, 0), "");
+	LLVMValueRef matrix_vector = LLVMBuildLoad(p->builder, matrix_vector_ptr, "");
+	LLVMSetAlignment(matrix_vector, cast(unsigned)type_align_of(mt));
+	return matrix_vector;
+#else
+	LLVMValueRef matrix_vector = LLVMBuildBitCast(p->builder, matrix.value, total_matrix_type, "");
+	return matrix_vector;
+#endif
+}
+
+LLVMValueRef lb_matrix_trimmed_vector_mask(lbProcedure *p, Type *mt) {
+	mt = base_type(mt);
+	GB_ASSERT(mt->kind == Type_Matrix);
+	
+	unsigned stride = cast(unsigned)matrix_type_stride_in_elems(mt);
+	unsigned row_count = cast(unsigned)mt->Matrix.row_count;
+	unsigned column_count = cast(unsigned)mt->Matrix.column_count;
+	unsigned mask_elems_index = 0;
+	auto mask_elems = slice_make<LLVMValueRef>(permanent_allocator(), row_count*column_count);
+	for (unsigned j = 0; j < column_count; j++) {
+		for (unsigned i = 0; i < row_count; i++) {
+			unsigned offset = stride*j + i;
+			mask_elems[mask_elems_index++] = lb_const_int(p->module, t_u32, offset).value;
+		}
+	}
+	
+	LLVMValueRef mask = LLVMConstVector(mask_elems.data, cast(unsigned)mask_elems.count);
+	return mask;
+}
+
+LLVMValueRef lb_matrix_to_trimmed_vector(lbProcedure *p, lbValue m) {
+	LLVMValueRef vector = lb_matrix_to_vector(p, m);
+	
+	Type *mt = base_type(m.type);
+	GB_ASSERT(mt->kind == Type_Matrix);
+	
+	unsigned stride = cast(unsigned)matrix_type_stride_in_elems(mt);
+	unsigned row_count = cast(unsigned)mt->Matrix.row_count;
+	if (stride == row_count) {
+		return vector;
+	}
+	
+	LLVMValueRef mask = lb_matrix_trimmed_vector_mask(p, mt);
+	LLVMValueRef trimmed_vector = LLVMBuildShuffleVector(p->builder, vector, LLVMGetUndef(LLVMTypeOf(vector)), mask, "");
+	return trimmed_vector;
+}
+
+
+lbValue lb_emit_matrix_tranpose(lbProcedure *p, lbValue m, Type *type) {
+	if (is_type_array(m.type)) {
+		// no-op
+		m.type = type;
+		return m;
+	}
+	Type *mt = base_type(m.type);
+	GB_ASSERT(mt->kind == Type_Matrix);
+	
+	if (lb_is_matrix_simdable(mt)) {
+		unsigned stride = cast(unsigned)matrix_type_stride_in_elems(mt);
+		unsigned row_count    = cast(unsigned)mt->Matrix.row_count;
+		unsigned column_count = cast(unsigned)mt->Matrix.column_count;
+		
+		auto rows = slice_make<LLVMValueRef>(permanent_allocator(), row_count);
+		auto mask_elems = slice_make<LLVMValueRef>(permanent_allocator(), column_count);
+		
+		LLVMValueRef vector = lb_matrix_to_vector(p, m);
+		for (unsigned i = 0; i < row_count; i++) {
+			for (unsigned j = 0; j < column_count; j++) {
+				unsigned offset = stride*j + i;
+				mask_elems[j] = lb_const_int(p->module, t_u32, offset).value;
+			}
+			
+			// transpose mask
+			LLVMValueRef mask = LLVMConstVector(mask_elems.data, column_count);
+			LLVMValueRef row = LLVMBuildShuffleVector(p->builder, vector, LLVMGetUndef(LLVMTypeOf(vector)), mask, "");
+			rows[i] = row;
+		}
+		
+		lbAddr res = lb_add_local_generated(p, type, true);
+		for_array(i, rows) {
+			LLVMValueRef row = rows[i];
+			lbValue dst_row_ptr = lb_emit_matrix_epi(p, res.addr, 0, i);
+			LLVMValueRef ptr = dst_row_ptr.value;
+			ptr = LLVMBuildPointerCast(p->builder, ptr, LLVMPointerType(LLVMTypeOf(row), 0), "");
+			LLVMBuildStore(p->builder, row, ptr);
+		}
+		
+		return lb_addr_load(p, res);
+	}
+	
+	lbAddr res = lb_add_local_generated(p, type, true);
+	
+	i64 row_count = mt->Matrix.row_count;
+	i64 column_count = mt->Matrix.column_count;
+	for (i64 j = 0; j < column_count; j++) {
+		for (i64 i = 0; i < row_count; i++) {
+			lbValue src = lb_emit_matrix_ev(p, m, i, j);
+			lbValue dst = lb_emit_matrix_epi(p, res.addr, j, i);
+			lb_emit_store(p, dst, src);
+		}
+	}
+	return lb_addr_load(p, res);
+}
+
+lbValue lb_matrix_cast_vector_to_type(lbProcedure *p, LLVMValueRef vector, Type *type) {
+	lbAddr res = lb_add_local_generated(p, type, true);
+	LLVMValueRef res_ptr = res.addr.value;
+	unsigned alignment = cast(unsigned)gb_max(type_align_of(type), lb_alignof(LLVMTypeOf(vector)));
+	LLVMSetAlignment(res_ptr, alignment);
+	
+	res_ptr = LLVMBuildPointerCast(p->builder, res_ptr, LLVMPointerType(LLVMTypeOf(vector), 0), "");
+	LLVMBuildStore(p->builder, vector, res_ptr);
+	
+	return lb_addr_load(p, res);
+}
+
+lbValue lb_emit_matrix_flatten(lbProcedure *p, lbValue m, Type *type) {
+	if (is_type_array(m.type)) {
+		// no-op
+		m.type = type;
+		return m;
+	}
+	Type *mt = base_type(m.type);
+	GB_ASSERT(mt->kind == Type_Matrix);
+	
+	if (lb_is_matrix_simdable(mt)) {
+		LLVMValueRef vector = lb_matrix_to_trimmed_vector(p, m);
+		return lb_matrix_cast_vector_to_type(p, vector, type);
+	}
+	
+	lbAddr res = lb_add_local_generated(p, type, true);
+	
+	i64 row_count = mt->Matrix.row_count;
+	i64 column_count = mt->Matrix.column_count;
+	for (i64 j = 0; j < column_count; j++) {
+		for (i64 i = 0; i < row_count; i++) {
+			lbValue src = lb_emit_matrix_ev(p, m, i, j);
+			lbValue dst = lb_emit_matrix_epi(p, res.addr, i, j);
+			lb_emit_store(p, dst, src);
+		}
+	}
+	return lb_addr_load(p, res);
+}
+
+
+lbValue lb_emit_outer_product(lbProcedure *p, lbValue a, lbValue b, Type *type) {
+	Type *mt = base_type(type);
+	Type *at = base_type(a.type);
+	Type *bt = base_type(b.type);
+	GB_ASSERT(mt->kind == Type_Matrix);
+	GB_ASSERT(at->kind == Type_Array);
+	GB_ASSERT(bt->kind == Type_Array);
+	
+	
+	i64 row_count = mt->Matrix.row_count;
+	i64 column_count = mt->Matrix.column_count;
+	
+	GB_ASSERT(row_count == at->Array.count);
+	GB_ASSERT(column_count == bt->Array.count);
+	
+	
+	lbAddr res = lb_add_local_generated(p, type, true);
+	
+	for (i64 j = 0; j < column_count; j++) {
+		for (i64 i = 0; i < row_count; i++) {
+			lbValue x = lb_emit_struct_ev(p, a, cast(i32)i);
+			lbValue y = lb_emit_struct_ev(p, b, cast(i32)j);
+			lbValue src = lb_emit_arith(p, Token_Mul, x, y, mt->Matrix.elem);
+			lbValue dst = lb_emit_matrix_epi(p, res.addr, i, j);
+			lb_emit_store(p, dst, src);
+		}
+	}
+	return lb_addr_load(p, res);
+
+}
+
+lbValue lb_emit_matrix_mul(lbProcedure *p, lbValue lhs, lbValue rhs, Type *type) {
+	// TODO(bill): Handle edge case for f16 types on x86(-64) platforms
+	
+	Type *xt = base_type(lhs.type);
+	Type *yt = base_type(rhs.type);
+	
+	GB_ASSERT(is_type_matrix(type));
+	GB_ASSERT(is_type_matrix(xt));
+	GB_ASSERT(is_type_matrix(yt));
+	GB_ASSERT(xt->Matrix.column_count == yt->Matrix.row_count);
+	GB_ASSERT(are_types_identical(xt->Matrix.elem, yt->Matrix.elem));
+		
+	Type *elem = xt->Matrix.elem;
+	
+	unsigned outer_rows    = cast(unsigned)xt->Matrix.row_count;
+	unsigned inner         = cast(unsigned)xt->Matrix.column_count;
+	unsigned outer_columns = cast(unsigned)yt->Matrix.column_count;
+		
+	if (lb_is_matrix_simdable(xt)) {
+		unsigned x_stride = cast(unsigned)matrix_type_stride_in_elems(xt);
+		unsigned y_stride = cast(unsigned)matrix_type_stride_in_elems(yt);
+		
+		auto x_rows    = slice_make<LLVMValueRef>(permanent_allocator(), outer_rows);
+		auto y_columns = slice_make<LLVMValueRef>(permanent_allocator(), outer_columns);
+		
+		LLVMValueRef x_vector = lb_matrix_to_vector(p, lhs);
+		LLVMValueRef y_vector = lb_matrix_to_vector(p, rhs);
+		
+		auto mask_elems = slice_make<LLVMValueRef>(permanent_allocator(), inner);
+		for (unsigned i = 0; i < outer_rows; i++) {
+			for (unsigned j = 0; j < inner; j++) {
+				unsigned offset = x_stride*j + i;
+				mask_elems[j] = lb_const_int(p->module, t_u32, offset).value;
+			}
+			
+			// transpose mask
+			LLVMValueRef mask = LLVMConstVector(mask_elems.data, inner);
+			LLVMValueRef row = LLVMBuildShuffleVector(p->builder, x_vector, LLVMGetUndef(LLVMTypeOf(x_vector)), mask, "");
+			x_rows[i] = row;
+		}
+		
+		for (unsigned i = 0; i < outer_columns; i++) {
+			LLVMValueRef mask = llvm_mask_iota(p->module, y_stride*i, inner);
+			LLVMValueRef column = LLVMBuildShuffleVector(p->builder, y_vector, LLVMGetUndef(LLVMTypeOf(y_vector)), mask, "");
+			y_columns[i] = column;
+		}
+		
+		lbAddr res = lb_add_local_generated(p, type, true);
+		for_array(i, x_rows) {
+			LLVMValueRef x_row = x_rows[i];
+			for_array(j, y_columns) {
+				LLVMValueRef y_column = y_columns[j];
+				LLVMValueRef elem = llvm_vector_dot(p, x_row, y_column);
+				lbValue dst = lb_emit_matrix_epi(p, res.addr, i, j);
+				LLVMBuildStore(p->builder, elem, dst.value);
+			}
+		}		
+		return lb_addr_load(p, res);
+	}
+	
+	{
+		lbAddr res = lb_add_local_generated(p, type, true);
+		
+		auto inners = slice_make<lbValue[2]>(permanent_allocator(), inner);
+		
+		for (unsigned j = 0; j < outer_columns; j++) {
+			for (unsigned i = 0; i < outer_rows; i++) {
+				lbValue dst = lb_emit_matrix_epi(p, res.addr, i, j);
+				for (unsigned k = 0; k < inner; k++) {
+					inners[k][0] = lb_emit_matrix_ev(p, lhs, i, k);
+					inners[k][1] = lb_emit_matrix_ev(p, rhs, k, j);
+				}
+				
+				lbValue sum = lb_const_nil(p->module, elem);
+				for (unsigned k = 0; k < inner; k++) {
+					lbValue a = inners[k][0];
+					lbValue b = inners[k][1];
+					sum = lb_emit_mul_add(p, a, b, sum, elem);
+				}
+				lb_emit_store(p, dst, sum);
+			}
+		}
+		
+		return lb_addr_load(p, res);
+	}
+}
+
+lbValue lb_emit_matrix_mul_vector(lbProcedure *p, lbValue lhs, lbValue rhs, Type *type) {
+	// TODO(bill): Handle edge case for f16 types on x86(-64) platforms
+	
+	Type *mt = base_type(lhs.type);
+	Type *vt = base_type(rhs.type);
+	
+	GB_ASSERT(is_type_matrix(mt));
+	GB_ASSERT(is_type_array_like(vt));
+	
+	i64 vector_count = get_array_type_count(vt);
+	
+	GB_ASSERT(mt->Matrix.column_count == vector_count);
+	GB_ASSERT(are_types_identical(mt->Matrix.elem, base_array_type(vt)));
+	
+	Type *elem = mt->Matrix.elem;
+	
+	if (lb_is_matrix_simdable(mt)) {
+		unsigned stride = cast(unsigned)matrix_type_stride_in_elems(mt);
+		
+		unsigned row_count = cast(unsigned)mt->Matrix.row_count;
+		unsigned column_count = cast(unsigned)mt->Matrix.column_count;
+		auto m_columns = slice_make<LLVMValueRef>(permanent_allocator(), column_count);
+		auto v_rows = slice_make<LLVMValueRef>(permanent_allocator(), column_count);
+		
+		LLVMValueRef matrix_vector = lb_matrix_to_vector(p, lhs);		
+		
+		for (unsigned column_index = 0; column_index < column_count; column_index++) {
+			LLVMValueRef mask = llvm_mask_iota(p->module, stride*column_index, row_count);
+			LLVMValueRef column = LLVMBuildShuffleVector(p->builder, matrix_vector, LLVMGetUndef(LLVMTypeOf(matrix_vector)), mask, "");
+			m_columns[column_index] = column;
+		}
+		
+		for (unsigned row_index = 0; row_index < column_count; row_index++) {
+			LLVMValueRef value = lb_emit_struct_ev(p, rhs, row_index).value;
+			LLVMValueRef row = llvm_vector_broadcast(p, value, row_count);
+			v_rows[row_index] = row;
+		}
+		
+		GB_ASSERT(column_count > 0);
+		
+		LLVMValueRef vector = nullptr;
+		for (i64 i = 0; i < column_count; i++) {
+			if (i == 0) {
+				vector = llvm_vector_mul(p, m_columns[i], v_rows[i]);
+			} else {
+				vector = llvm_vector_mul_add(p, m_columns[i], v_rows[i], vector);
+			}
+		}
+		
+		return lb_matrix_cast_vector_to_type(p, vector, type);
+	}
+	
+	lbAddr res = lb_add_local_generated(p, type, true);
+	
+	for (i64 i = 0; i < mt->Matrix.row_count; i++) {
+		for (i64 j = 0; j < mt->Matrix.column_count; j++) {
+			lbValue dst = lb_emit_matrix_epi(p, res.addr, i, 0);
+			lbValue d0 = lb_emit_load(p, dst);
+			
+			lbValue a = lb_emit_matrix_ev(p, lhs, i, j);
+			lbValue b = lb_emit_struct_ev(p, rhs, cast(i32)j);
+			lbValue c = lb_emit_mul_add(p, a, b, d0, elem);
+			lb_emit_store(p, dst, c);
+		}
+	}
+	
+	return lb_addr_load(p, res);
+}
+
+lbValue lb_emit_vector_mul_matrix(lbProcedure *p, lbValue lhs, lbValue rhs, Type *type) {
+	// TODO(bill): Handle edge case for f16 types on x86(-64) platforms
+	
+	Type *mt = base_type(rhs.type);
+	Type *vt = base_type(lhs.type);
+	
+	GB_ASSERT(is_type_matrix(mt));
+	GB_ASSERT(is_type_array_like(vt));
+	
+	i64 vector_count = get_array_type_count(vt);
+	
+	GB_ASSERT(vector_count == mt->Matrix.row_count);
+	GB_ASSERT(are_types_identical(mt->Matrix.elem, base_array_type(vt)));
+	
+	Type *elem = mt->Matrix.elem;
+	
+	if (lb_is_matrix_simdable(mt)) {
+		unsigned stride = cast(unsigned)matrix_type_stride_in_elems(mt);
+		
+		unsigned row_count = cast(unsigned)mt->Matrix.row_count;
+		unsigned column_count = cast(unsigned)mt->Matrix.column_count; gb_unused(column_count);
+		auto m_columns = slice_make<LLVMValueRef>(permanent_allocator(), row_count);
+		auto v_rows = slice_make<LLVMValueRef>(permanent_allocator(), row_count);
+		
+		LLVMValueRef matrix_vector = lb_matrix_to_vector(p, rhs);
+
+		auto mask_elems = slice_make<LLVMValueRef>(permanent_allocator(), column_count);
+		for (unsigned row_index = 0; row_index < row_count; row_index++) {
+			for (unsigned column_index = 0; column_index < column_count; column_index++) {
+				unsigned offset = row_index + column_index*stride;
+				mask_elems[column_index] = lb_const_int(p->module, t_u32, offset).value;
+			}
+			
+			// transpose mask
+			LLVMValueRef mask = LLVMConstVector(mask_elems.data, column_count);
+			LLVMValueRef column = LLVMBuildShuffleVector(p->builder, matrix_vector, LLVMGetUndef(LLVMTypeOf(matrix_vector)), mask, "");
+			m_columns[row_index] = column;
+		}
+		
+		for (unsigned column_index = 0; column_index < row_count; column_index++) {
+			LLVMValueRef value = lb_emit_struct_ev(p, lhs, column_index).value;
+			LLVMValueRef row = llvm_vector_broadcast(p, value, column_count);
+			v_rows[column_index] = row;
+		}
+		
+		GB_ASSERT(row_count > 0);
+		
+		LLVMValueRef vector = nullptr;
+		for (i64 i = 0; i < row_count; i++) {
+			if (i == 0) {
+				vector = llvm_vector_mul(p, v_rows[i], m_columns[i]);
+			} else {
+				vector = llvm_vector_mul_add(p, v_rows[i], m_columns[i], vector);
+			}
+		}
+
+		lbAddr res = lb_add_local_generated(p, type, true);
+		LLVMValueRef res_ptr = res.addr.value;
+		unsigned alignment = cast(unsigned)gb_max(type_align_of(type), lb_alignof(LLVMTypeOf(vector)));
+		LLVMSetAlignment(res_ptr, alignment);
+		
+		res_ptr = LLVMBuildPointerCast(p->builder, res_ptr, LLVMPointerType(LLVMTypeOf(vector), 0), "");
+		LLVMBuildStore(p->builder, vector, res_ptr);
+		
+		return lb_addr_load(p, res);
+	}
+	
+	lbAddr res = lb_add_local_generated(p, type, true);
+		
+	for (i64 j = 0; j < mt->Matrix.column_count; j++) {
+		for (i64 k = 0; k < mt->Matrix.row_count; k++) {
+			lbValue dst = lb_emit_matrix_epi(p, res.addr, 0, j);
+			lbValue d0 = lb_emit_load(p, dst);
+			
+			lbValue a = lb_emit_struct_ev(p, lhs, cast(i32)k);
+			lbValue b = lb_emit_matrix_ev(p, rhs, k, j);
+			lbValue c = lb_emit_mul_add(p, a, b, d0, elem);
+			lb_emit_store(p, dst, c);
+		}
+	}
+	
+	return lb_addr_load(p, res);
+}
+
+
+
+
+lbValue lb_emit_arith_matrix(lbProcedure *p, TokenKind op, lbValue lhs, lbValue rhs, Type *type, bool component_wise=false) {
+	GB_ASSERT(is_type_matrix(lhs.type) || is_type_matrix(rhs.type));
+	
+	
+	if (op == Token_Mul && !component_wise) {
+		Type *xt = base_type(lhs.type);
+		Type *yt = base_type(rhs.type);
+		
+		if (xt->kind == Type_Matrix) {
+			if (yt->kind == Type_Matrix) {
+				return lb_emit_matrix_mul(p, lhs, rhs, type);
+			} else if (is_type_array_like(yt)) {
+				return lb_emit_matrix_mul_vector(p, lhs, rhs, type);
+			}
+		} else if (is_type_array_like(xt)) {
+			GB_ASSERT(yt->kind == Type_Matrix);
+			return lb_emit_vector_mul_matrix(p, lhs, rhs, type);
+		}
+		
+	} else {
+		if (is_type_matrix(lhs.type)) {
+			rhs = lb_emit_conv(p, rhs, lhs.type);
+		} else {
+			lhs = lb_emit_conv(p, lhs, rhs.type);
+		}
+		
+		Type *xt = base_type(lhs.type);
+		Type *yt = base_type(rhs.type);
+		
+		GB_ASSERT_MSG(are_types_identical(xt, yt), "%s %.*s %s", type_to_string(lhs.type), LIT(token_strings[op]), type_to_string(rhs.type));
+		GB_ASSERT(xt->kind == Type_Matrix);
+		// element-wise arithmetic
+		// pretend it is an array
+		lbValue array_lhs = lhs;
+		lbValue array_rhs = rhs;
+		Type *array_type = alloc_type_array(xt->Matrix.elem, matrix_type_total_internal_elems(xt));
+		GB_ASSERT(type_size_of(array_type) == type_size_of(xt));
+		
+		array_lhs.type = array_type; 
+		array_rhs.type = array_type;
+
+		if (token_is_comparison(op)) {
+			lbValue res = lb_emit_comp(p, op, array_lhs, array_rhs);
+			return lb_emit_conv(p, res, type);
+		} else {
+			lbValue array = lb_emit_arith(p, op, array_lhs, array_rhs, array_type);
+			array.type = type;
+			return array;
+		}
+
+	}
+	
+	GB_PANIC("TODO: lb_emit_arith_matrix");
+
+	return {};
+}
+
 
 
 lbValue lb_emit_arith(lbProcedure *p, TokenKind op, lbValue lhs, lbValue rhs, Type *type) {
 	if (is_type_array_like(lhs.type) || is_type_array_like(rhs.type)) {
 		return lb_emit_arith_array(p, op, lhs, rhs, type);
+	} else if (is_type_matrix(lhs.type) || is_type_matrix(rhs.type)) {
+		return lb_emit_arith_matrix(p, op, lhs, rhs, type);
 	} else if (is_type_complex(type)) {
 		lhs = lb_emit_conv(p, lhs, type);
 		rhs = lb_emit_conv(p, rhs, type);
@@ -749,6 +1283,13 @@ lbValue lb_build_binary_expr(lbProcedure *p, Ast *expr) {
 	ast_node(be, BinaryExpr, expr);
 
 	TypeAndValue tv = type_and_value_of_expr(expr);
+	
+	if (is_type_matrix(be->left->tav.type) || is_type_matrix(be->right->tav.type)) {
+		lbValue left = lb_build_expr(p, be->left);
+		lbValue right = lb_build_expr(p, be->right);
+		return lb_emit_arith_matrix(p, be->op.kind, left, right, default_type(tv.type));
+	}
+	
 
 	switch (be->op.kind) {
 	case Token_Add:
@@ -1417,6 +1958,62 @@ lbValue lb_emit_conv(lbProcedure *p, lbValue value, Type *t) {
 		}
 		return lb_addr_load(p, v);
 	}
+	
+	if (is_type_matrix(dst) && !is_type_matrix(src)) {
+		GB_ASSERT_MSG(dst->Matrix.row_count == dst->Matrix.column_count, "%s <- %s", type_to_string(dst), type_to_string(src));
+		
+		Type *elem = base_array_type(dst);
+		lbValue e = lb_emit_conv(p, value, elem);
+		lbAddr v = lb_add_local_generated(p, t, false);
+		for (i64 i = 0; i < dst->Matrix.row_count; i++) {
+			isize j = cast(isize)i;
+			lbValue ptr = lb_emit_matrix_epi(p, v.addr, j, j);
+			lb_emit_store(p, ptr, e);
+		}
+		
+		
+		return lb_addr_load(p, v);
+	}
+	
+	if (is_type_matrix(dst) && is_type_matrix(src)) {
+		GB_ASSERT(dst->kind == Type_Matrix);
+		GB_ASSERT(src->kind == Type_Matrix);
+		lbAddr v = lb_add_local_generated(p, t, true);
+		
+		if (is_matrix_square(dst) && is_matrix_square(dst)) {
+			for (i64 j = 0; j < dst->Matrix.column_count; j++) {
+				for (i64 i = 0; i < dst->Matrix.row_count; i++) {
+					if (i < src->Matrix.row_count && j < src->Matrix.column_count) {
+						lbValue d = lb_emit_matrix_epi(p, v.addr, i, j);
+						lbValue s = lb_emit_matrix_ev(p, value, i, j);
+						lb_emit_store(p, d, s);
+					} else if (i == j) {
+						lbValue d = lb_emit_matrix_epi(p, v.addr, i, j);
+						lbValue s = lb_const_value(p->module, dst->Matrix.elem, exact_value_i64(1), true);
+						lb_emit_store(p, d, s);
+					}
+				}
+			}
+		} else {
+			i64 dst_count = dst->Matrix.row_count*dst->Matrix.column_count;
+			i64 src_count = src->Matrix.row_count*src->Matrix.column_count;
+			GB_ASSERT(dst_count == src_count);
+			
+			for (i64 j = 0; j < src->Matrix.column_count; j++) {
+				for (i64 i = 0; i < src->Matrix.row_count; i++) {
+					lbValue s = lb_emit_matrix_ev(p, value, i, j);
+					i64 index = i + j*src->Matrix.row_count;					
+					i64 dst_i = index%dst->Matrix.row_count;
+					i64 dst_j = index/dst->Matrix.row_count;
+					lbValue d = lb_emit_matrix_epi(p, v.addr, dst_i, dst_j);
+					lb_emit_store(p, d, s);
+				}
+			}
+		}
+		return lb_addr_load(p, v);
+	}	
+	
+	
 
 	if (is_type_any(dst)) {
 		if (is_type_untyped_nil(src)) {
@@ -2481,6 +3078,10 @@ lbValue lb_build_expr(lbProcedure *p, Ast *expr) {
 	case_ast_node(ie, IndexExpr, expr);
 		return lb_addr_load(p, lb_build_addr(p, expr));
 	case_end;
+	
+	case_ast_node(ie, MatrixIndexExpr, expr);
+		return lb_addr_load(p, lb_build_addr(p, expr));
+	case_end;
 
 	case_ast_node(ia, InlineAsmExpr, expr);
 		Type *t = type_of_expr(expr);
@@ -2976,6 +3577,25 @@ lbAddr lb_build_addr(lbProcedure *p, Ast *expr) {
 			lbValue v = lb_emit_ptr_offset(p, elem, index);
 			return lb_addr(v);
 		}
+		
+		case Type_Matrix: {
+			lbValue matrix = {};
+			matrix = lb_build_addr_ptr(p, ie->expr);
+			if (deref) {
+				matrix = lb_emit_load(p, matrix);
+			}
+			lbValue index = lb_build_expr(p, ie->index);
+			index = lb_emit_conv(p, index, t_int);
+			lbValue elem = lb_emit_matrix_ep(p, matrix, lb_const_int(p->module, t_int, 0), index);
+			elem = lb_emit_conv(p, elem, alloc_type_pointer(type_of_expr(expr)));
+
+			auto index_tv = type_and_value_of_expr(ie->index);
+			if (index_tv.mode != Addressing_Constant) {
+				lbValue len = lb_const_int(p->module, t_int, t->Matrix.column_count);
+				lb_emit_bounds_check(p, ast_token(ie->index), index, len);
+			}
+			return lb_addr(elem);
+		}
 
 
 		case Type_Basic: { // Basic_string
@@ -2998,6 +3618,35 @@ lbAddr lb_build_addr(lbProcedure *p, Ast *expr) {
 		}
 		}
 	case_end;
+	
+	case_ast_node(ie, MatrixIndexExpr, expr);
+		Type *t = base_type(type_of_expr(ie->expr));
+
+		bool deref = is_type_pointer(t);
+		t = base_type(type_deref(t));
+		
+		lbValue m = {};
+		m = lb_build_addr_ptr(p, ie->expr);
+		if (deref) {
+			m = lb_emit_load(p, m);
+		}
+		lbValue row_index = lb_build_expr(p, ie->row_index);
+		lbValue column_index = lb_build_expr(p, ie->column_index);
+		row_index = lb_emit_conv(p, row_index, t_int);
+		column_index = lb_emit_conv(p, column_index, t_int);
+		lbValue elem = lb_emit_matrix_ep(p, m, row_index, column_index);
+
+		auto row_index_tv = type_and_value_of_expr(ie->row_index);
+		auto column_index_tv = type_and_value_of_expr(ie->column_index);
+		if (row_index_tv.mode != Addressing_Constant || column_index_tv.mode != Addressing_Constant) {
+			lbValue row_count = lb_const_int(p->module, t_int, t->Matrix.row_count);
+			lbValue column_count = lb_const_int(p->module, t_int, t->Matrix.column_count);
+			lb_emit_matrix_bounds_check(p, ast_token(ie->row_index), row_index, column_index, row_count, column_count);
+		}
+		return lb_addr(elem);
+		
+		
+	case_end;
 
 	case_ast_node(se, SliceExpr, expr);
 
@@ -3246,6 +3895,7 @@ lbAddr lb_build_addr(lbProcedure *p, Ast *expr) {
 		case Type_Slice:           et = bt->Slice.elem;           break;
 		case Type_BitSet:          et = bt->BitSet.elem;          break;
 		case Type_SimdVector:      et = bt->SimdVector.elem;      break;
+		case Type_Matrix:          et = bt->Matrix.elem;          break;
 		}
 
 		String proc_name = {};
@@ -3777,7 +4427,104 @@ lbAddr lb_build_addr(lbProcedure *p, Ast *expr) {
 			}
 			break;
 		}
+		
+		case Type_Matrix: {
+			if (cl->elems.count > 0) {
+				lb_addr_store(p, v, lb_const_value(p->module, type, exact_value_compound(expr)));
 
+				auto temp_data = array_make<lbCompoundLitElemTempData>(temporary_allocator(), 0, cl->elems.count);
+
+				// NOTE(bill): Separate value, gep, store into their own chunks
+				for_array(i, cl->elems) {
+					Ast *elem = cl->elems[i];
+					
+					if (elem->kind == Ast_FieldValue) {
+						ast_node(fv, FieldValue, elem);
+						if (lb_is_elem_const(fv->value, et)) {
+							continue;
+						}
+						if (is_ast_range(fv->field)) {
+							ast_node(ie, BinaryExpr, fv->field);
+							TypeAndValue lo_tav = ie->left->tav;
+							TypeAndValue hi_tav = ie->right->tav;
+							GB_ASSERT(lo_tav.mode == Addressing_Constant);
+							GB_ASSERT(hi_tav.mode == Addressing_Constant);
+
+							TokenKind op = ie->op.kind;
+							i64 lo = exact_value_to_i64(lo_tav.value);
+							i64 hi = exact_value_to_i64(hi_tav.value);
+							if (op != Token_RangeHalf) {
+								hi += 1;
+							}
+
+							lbValue value = lb_build_expr(p, fv->value);
+
+							for (i64 k = lo; k < hi; k++) {
+								lbCompoundLitElemTempData data = {};
+								data.value = value;
+								
+								data.elem_index = cast(i32)matrix_index_to_offset(bt, k);
+								array_add(&temp_data, data);
+							}
+
+						} else {
+							auto tav = fv->field->tav;
+							GB_ASSERT(tav.mode == Addressing_Constant);
+							i64 index = exact_value_to_i64(tav.value);
+
+							lbValue value = lb_build_expr(p, fv->value);
+							lbCompoundLitElemTempData data = {};
+							data.value = lb_emit_conv(p, value, et);
+							data.expr = fv->value;
+							
+							data.elem_index = cast(i32)matrix_index_to_offset(bt, index);
+							array_add(&temp_data, data);
+						}
+
+					} else {
+						if (lb_is_elem_const(elem, et)) {
+							continue;
+						}
+						lbCompoundLitElemTempData data = {};
+						data.expr = elem;
+						data.elem_index = cast(i32)matrix_index_to_offset(bt, i);
+						array_add(&temp_data, data);
+					}
+				}
+
+				for_array(i, temp_data) {
+					temp_data[i].gep = lb_emit_array_epi(p, lb_addr_get_ptr(p, v), temp_data[i].elem_index);
+				}
+
+				for_array(i, temp_data) {
+					lbValue field_expr = temp_data[i].value;
+					Ast *expr = temp_data[i].expr;
+
+					auto prev_hint = lb_set_copy_elision_hint(p, lb_addr(temp_data[i].gep), expr);
+
+					if (field_expr.value == nullptr) {
+						field_expr = lb_build_expr(p, expr);
+					}
+					Type *t = field_expr.type;
+					GB_ASSERT(t->kind != Type_Tuple);
+					lbValue ev = lb_emit_conv(p, field_expr, et);
+
+					if (!p->copy_elision_hint.used) {
+						temp_data[i].value = ev;
+					}
+
+					lb_reset_copy_elision_hint(p, prev_hint);
+				}
+
+				for_array(i, temp_data) {
+					if (temp_data[i].value.value != nullptr) {
+						lb_emit_store(p, temp_data[i].gep, temp_data[i].value);
+					}
+				}
+			}
+			break;
+		}
+		
 		}
 
 		return v;

+ 59 - 5
src/llvm_backend_general.cpp

@@ -419,6 +419,36 @@ void lb_emit_bounds_check(lbProcedure *p, Token token, lbValue index, lbValue le
 	lb_emit_runtime_call(p, "bounds_check_error", args);
 }
 
+void lb_emit_matrix_bounds_check(lbProcedure *p, Token token, lbValue row_index, lbValue column_index, lbValue row_count, lbValue column_count) {
+	if (build_context.no_bounds_check) {
+		return;
+	}
+	if ((p->state_flags & StateFlag_no_bounds_check) != 0) {
+		return;
+	}
+
+	row_index = lb_emit_conv(p, row_index, t_int);
+	column_index = lb_emit_conv(p, column_index, t_int);
+	row_count = lb_emit_conv(p, row_count, t_int);
+	column_count = lb_emit_conv(p, column_count, t_int);
+
+	lbValue file = lb_find_or_add_entity_string(p->module, get_file_path_string(token.pos.file_id));
+	lbValue line = lb_const_int(p->module, t_i32, token.pos.line);
+	lbValue column = lb_const_int(p->module, t_i32, token.pos.column);
+
+	auto args = array_make<lbValue>(permanent_allocator(), 7);
+	args[0] = file;
+	args[1] = line;
+	args[2] = column;
+	args[3] = row_index;
+	args[4] = column_index;
+	args[5] = row_count;
+	args[6] = column_count;
+
+	lb_emit_runtime_call(p, "matrix_bounds_check_error", args);
+}
+
+
 void lb_emit_multi_pointer_slice_bounds_check(lbProcedure *p, Token token, lbValue low, lbValue high) {
 	if (build_context.no_bounds_check) {
 		return;
@@ -482,8 +512,7 @@ void lb_emit_slice_bounds_check(lbProcedure *p, Token token, lbValue low, lbValu
 	}
 }
 
-bool lb_try_update_alignment(lbValue ptr, unsigned alignment)  {
-	LLVMValueRef addr_ptr = ptr.value;
+bool lb_try_update_alignment(LLVMValueRef addr_ptr, unsigned alignment) {
 	if (LLVMIsAGlobalValue(addr_ptr) || LLVMIsAAllocaInst(addr_ptr) || LLVMIsALoadInst(addr_ptr)) {
 		if (LLVMGetAlignment(addr_ptr) < alignment) {
 			if (LLVMIsAAllocaInst(addr_ptr) || LLVMIsAGlobalValue(addr_ptr)) {
@@ -495,6 +524,11 @@ bool lb_try_update_alignment(lbValue ptr, unsigned alignment)  {
 	return false;
 }
 
+bool lb_try_update_alignment(lbValue ptr, unsigned alignment) {
+	return lb_try_update_alignment(ptr.value, alignment);
+}
+
+
 bool lb_try_vector_cast(lbModule *m, lbValue ptr, LLVMTypeRef *vector_type_) {
 	Type *array_type = base_type(type_deref(ptr.type));
 	GB_ASSERT(is_type_array_like(array_type));
@@ -1930,6 +1964,24 @@ LLVMTypeRef lb_type_internal(lbModule *m, Type *type) {
 			fields[1] = base_integer;
 			return LLVMStructTypeInContext(ctx, fields, field_count, false);
 		}
+		
+	case Type_Matrix:
+		{
+			i64 size = type_size_of(type);
+			i64 elem_size = type_size_of(type->Matrix.elem);
+			GB_ASSERT(elem_size > 0);
+			i64 elem_count = size/elem_size;
+			GB_ASSERT_MSG(elem_count > 0, "%s", type_to_string(type));
+			
+			m->internal_type_level -= 1;
+			
+			LLVMTypeRef elem = lb_type(m, type->Matrix.elem);
+			LLVMTypeRef t = LLVMArrayType(elem, cast(unsigned)elem_count);
+			
+			m->internal_type_level += 1;
+			return t;
+		}
+	
 	}
 
 	GB_PANIC("Invalid type %s", type_to_string(type));
@@ -2013,7 +2065,7 @@ LLVMAttributeRef lb_create_enum_attribute_with_type(LLVMContextRef ctx, char con
 	unsigned kind = 0;
 	String s = make_string_c(name);
 
-	#if (LLVM_VERSION_MAJOR > 12 || (LLVM_VERSION_MAJOR == 12 && (LLVM_VERSION_MINOR > 0 || LLVM_VERSION_PATCH >= 1)))
+	#if ODIN_LLVM_MINIMUM_VERSION_12
 		kind = LLVMGetEnumAttributeKindForName(name, s.len);
 		GB_ASSERT_MSG(kind != 0, "unknown attribute: %s", name);
 		return LLVMCreateTypeAttribute(ctx, kind, type);
@@ -2593,8 +2645,10 @@ lbAddr lb_add_local(lbProcedure *p, Type *type, Entity *e, bool zero_init, i32 p
 	LLVMTypeRef llvm_type = lb_type(p->module, type);
 	LLVMValueRef ptr = LLVMBuildAlloca(p->builder, llvm_type, name);
 
-	// unsigned alignment = 16; // TODO(bill): Make this configurable
-	unsigned alignment = cast(unsigned)lb_alignof(llvm_type);
+	unsigned alignment = cast(unsigned)gb_max(type_align_of(type), lb_alignof(llvm_type));
+	if (is_type_matrix(type)) {
+		alignment *= 2; // NOTE(bill): Just in case
+	}
 	LLVMSetAlignment(ptr, alignment);
 
 	LLVMPositionBuilderAtEnd(p->builder, p->curr_block->block);

+ 83 - 36
src/llvm_backend_proc.cpp

@@ -127,16 +127,7 @@ lbProcedure *lb_create_procedure(lbModule *m, Entity *entity, bool ignore_body)
 
 	lb_ensure_abi_function_type(m, p);
 	lb_add_function_type_attributes(p->value, p->abi_function_type, p->abi_function_type->calling_convention);
-	if (false) {
-		lbCallingConventionKind cc_kind = lbCallingConvention_C;
-		// TODO(bill): Clean up this logic
-		if (!is_arch_wasm()) {
-			cc_kind = lb_calling_convention_map[pt->Proc.calling_convention];
-		}
-		LLVMSetFunctionCallConv(p->value, cc_kind);
-	}
-
-
+	
 	if (pt->Proc.diverging) {
 		lb_add_attribute_to_proc(m, p->value, "noreturn");
 	}
@@ -784,6 +775,57 @@ lbValue lb_emit_runtime_call(lbProcedure *p, char const *c_name, Array<lbValue>
 	return lb_emit_call(p, proc, args);
 }
 
+lbValue lb_emit_conjugate(lbProcedure *p, lbValue val, Type *type) {
+	lbValue res = {};
+	Type *t = val.type;
+	if (is_type_complex(t)) {
+		res = lb_addr_get_ptr(p, lb_add_local_generated(p, type, false));
+		lbValue real = lb_emit_struct_ev(p, val, 0);
+		lbValue imag = lb_emit_struct_ev(p, val, 1);
+		imag = lb_emit_unary_arith(p, Token_Sub, imag, imag.type);
+		lb_emit_store(p, lb_emit_struct_ep(p, res, 0), real);
+		lb_emit_store(p, lb_emit_struct_ep(p, res, 1), imag);
+	} else if (is_type_quaternion(t)) {
+		// @QuaternionLayout
+		res = lb_addr_get_ptr(p, lb_add_local_generated(p, type, false));
+		lbValue real = lb_emit_struct_ev(p, val, 3);
+		lbValue imag = lb_emit_struct_ev(p, val, 0);
+		lbValue jmag = lb_emit_struct_ev(p, val, 1);
+		lbValue kmag = lb_emit_struct_ev(p, val, 2);
+		imag = lb_emit_unary_arith(p, Token_Sub, imag, imag.type);
+		jmag = lb_emit_unary_arith(p, Token_Sub, jmag, jmag.type);
+		kmag = lb_emit_unary_arith(p, Token_Sub, kmag, kmag.type);
+		lb_emit_store(p, lb_emit_struct_ep(p, res, 3), real);
+		lb_emit_store(p, lb_emit_struct_ep(p, res, 0), imag);
+		lb_emit_store(p, lb_emit_struct_ep(p, res, 1), jmag);
+		lb_emit_store(p, lb_emit_struct_ep(p, res, 2), kmag);
+	} else if (is_type_array_like(t)) {
+		res = lb_addr_get_ptr(p, lb_add_local_generated(p, type, true));
+		Type *elem_type = base_array_type(t);
+		i64 count = get_array_type_count(t);
+		for (i64 i = 0; i < count; i++) {
+			lbValue dst = lb_emit_array_epi(p, res, i);
+			lbValue elem = lb_emit_struct_ev(p, val, cast(i32)i);
+			elem = lb_emit_conjugate(p, elem, elem_type);
+			lb_emit_store(p, dst, elem);
+		}
+	} else if (is_type_matrix(t)) {
+		Type *mt = base_type(t);
+		GB_ASSERT(mt->kind == Type_Matrix);
+		Type *elem_type = mt->Matrix.elem;
+		res = lb_addr_get_ptr(p, lb_add_local_generated(p, type, true));
+		for (i64 j = 0; j < mt->Matrix.column_count; j++) {
+			for (i64 i = 0; i < mt->Matrix.row_count; i++) {
+				lbValue dst = lb_emit_matrix_epi(p, res, i, j);
+				lbValue elem = lb_emit_matrix_ev(p, val, i, j);
+				elem = lb_emit_conjugate(p, elem, elem_type);
+				lb_emit_store(p, dst, elem);
+			}
+		}
+	}
+	return lb_emit_load(p, res);
+}
+
 lbValue lb_emit_call(lbProcedure *p, lbValue value, Array<lbValue> const &args, ProcInlining inlining, bool use_copy_elision_hint) {
 	lbModule *m = p->module;
 
@@ -1176,31 +1218,7 @@ lbValue lb_build_builtin_proc(lbProcedure *p, Ast *expr, TypeAndValue const &tv,
 
 	case BuiltinProc_conj: {
 		lbValue val = lb_build_expr(p, ce->args[0]);
-		lbValue res = {};
-		Type *t = val.type;
-		if (is_type_complex(t)) {
-			res = lb_addr_get_ptr(p, lb_add_local_generated(p, tv.type, false));
-			lbValue real = lb_emit_struct_ev(p, val, 0);
-			lbValue imag = lb_emit_struct_ev(p, val, 1);
-			imag = lb_emit_unary_arith(p, Token_Sub, imag, imag.type);
-			lb_emit_store(p, lb_emit_struct_ep(p, res, 0), real);
-			lb_emit_store(p, lb_emit_struct_ep(p, res, 1), imag);
-		} else if (is_type_quaternion(t)) {
-			// @QuaternionLayout
-			res = lb_addr_get_ptr(p, lb_add_local_generated(p, tv.type, false));
-			lbValue real = lb_emit_struct_ev(p, val, 3);
-			lbValue imag = lb_emit_struct_ev(p, val, 0);
-			lbValue jmag = lb_emit_struct_ev(p, val, 1);
-			lbValue kmag = lb_emit_struct_ev(p, val, 2);
-			imag = lb_emit_unary_arith(p, Token_Sub, imag, imag.type);
-			jmag = lb_emit_unary_arith(p, Token_Sub, jmag, jmag.type);
-			kmag = lb_emit_unary_arith(p, Token_Sub, kmag, kmag.type);
-			lb_emit_store(p, lb_emit_struct_ep(p, res, 3), real);
-			lb_emit_store(p, lb_emit_struct_ep(p, res, 0), imag);
-			lb_emit_store(p, lb_emit_struct_ep(p, res, 1), jmag);
-			lb_emit_store(p, lb_emit_struct_ep(p, res, 2), kmag);
-		}
-		return lb_emit_load(p, res);
+		return lb_emit_conjugate(p, val, tv.type);
 	}
 
 	case BuiltinProc_expand_to_tuple: {
@@ -1316,7 +1334,36 @@ lbValue lb_build_builtin_proc(lbProcedure *p, Ast *expr, TypeAndValue const &tv,
 		return lb_soa_zip(p, ce, tv);
 	case BuiltinProc_soa_unzip:
 		return lb_soa_unzip(p, ce, tv);
-
+		
+	case BuiltinProc_transpose:
+		{
+			lbValue m = lb_build_expr(p, ce->args[0]);
+			return lb_emit_matrix_tranpose(p, m, tv.type);
+		}
+		
+	case BuiltinProc_outer_product:
+		{
+			lbValue a = lb_build_expr(p, ce->args[0]);
+			lbValue b = lb_build_expr(p, ce->args[1]);
+			return lb_emit_outer_product(p, a, b, tv.type);
+		}
+	case BuiltinProc_hadamard_product:
+		{
+			lbValue a = lb_build_expr(p, ce->args[0]);
+			lbValue b = lb_build_expr(p, ce->args[1]);
+			if (is_type_array(tv.type)) {
+				return lb_emit_arith(p, Token_Mul, a, b, tv.type);
+			}
+			GB_ASSERT(is_type_matrix(tv.type));
+			return lb_emit_arith_matrix(p, Token_Mul, a, b, tv.type, true);
+		}
+		
+	case BuiltinProc_matrix_flatten:
+		{
+			lbValue m = lb_build_expr(p, ce->args[0]);
+			return lb_emit_matrix_flatten(p, m, tv.type);
+		}
+		
 	// "Intrinsics"
 
 	case BuiltinProc_alloca:

+ 19 - 0
src/llvm_backend_type.cpp

@@ -42,6 +42,7 @@ lbValue lb_typeid(lbModule *m, Type *type) {
 	case Type_Pointer:         kind = Typeid_Pointer;          break;
 	case Type_MultiPointer:    kind = Typeid_Multi_Pointer;    break;
 	case Type_Array:           kind = Typeid_Array;            break;
+	case Type_Matrix:          kind = Typeid_Matrix;           break;
 	case Type_EnumeratedArray: kind = Typeid_Enumerated_Array; break;
 	case Type_Slice:           kind = Typeid_Slice;            break;
 	case Type_DynamicArray:    kind = Typeid_Dynamic_Array;    break;
@@ -868,7 +869,25 @@ void lb_setup_type_info_data(lbProcedure *p) { // NOTE(bill): Setup type_info da
 				lb_emit_store(p, tag, res);
 			}
 			break;
+		case Type_Matrix: 
+			{
+				tag = lb_const_ptr_cast(m, variant_ptr, t_type_info_matrix_ptr);
+				i64 ez = type_size_of(t->Matrix.elem);
+
+				LLVMValueRef vals[5] = {
+					lb_get_type_info_ptr(m, t->Matrix.elem).value,
+					lb_const_int(m, t_int, ez).value,
+					lb_const_int(m, t_int, matrix_type_stride_in_elems(t)).value,
+					lb_const_int(m, t_int, t->Matrix.row_count).value,
+					lb_const_int(m, t_int, t->Matrix.column_count).value,
+				};
 
+				lbValue res = {};
+				res.type = type_deref(tag.type);
+				res.value = llvm_const_named_struct(m, res.type, vals, gb_count_of(vals));
+				lb_emit_store(p, tag, res);
+			}
+			break;
 		}
 
 

+ 295 - 1
src/llvm_backend_utility.cpp

@@ -1221,6 +1221,109 @@ lbValue lb_emit_ptr_offset(lbProcedure *p, lbValue ptr, lbValue index) {
 	return res;
 }
 
+lbValue lb_emit_matrix_epi(lbProcedure *p, lbValue s, isize row, isize column) {
+	Type *t = s.type;
+	GB_ASSERT(is_type_pointer(t));
+	Type *mt = base_type(type_deref(t));
+	
+	Type *ptr = base_array_type(mt);
+	
+	if (column == 0) {
+		GB_ASSERT_MSG(is_type_matrix(mt) || is_type_array_like(mt), "%s", type_to_string(mt));
+		
+		LLVMValueRef indices[2] = {
+			LLVMConstInt(lb_type(p->module, t_int), 0, false),
+			LLVMConstInt(lb_type(p->module, t_int), cast(unsigned)row, false),
+		};
+		
+		lbValue res = {};
+		if (lb_is_const(s)) {
+			res.value = LLVMConstGEP(s.value, indices, gb_count_of(indices));
+		} else {
+			res.value = LLVMBuildGEP(p->builder, s.value, indices, gb_count_of(indices), "");
+		}
+		
+		Type *ptr = base_array_type(mt);
+		res.type = alloc_type_pointer(ptr);
+		return res;
+	} else if (row == 0 && is_type_array_like(mt)) {
+		LLVMValueRef indices[2] = {
+			LLVMConstInt(lb_type(p->module, t_int), 0, false),
+			LLVMConstInt(lb_type(p->module, t_int), cast(unsigned)column, false),
+		};
+		
+		lbValue res = {};
+		if (lb_is_const(s)) {
+			res.value = LLVMConstGEP(s.value, indices, gb_count_of(indices));
+		} else {
+			res.value = LLVMBuildGEP(p->builder, s.value, indices, gb_count_of(indices), "");
+		}
+		
+		Type *ptr = base_array_type(mt);
+		res.type = alloc_type_pointer(ptr);
+		return res;
+	}
+	
+	
+	GB_ASSERT_MSG(is_type_matrix(mt), "%s", type_to_string(mt));
+	
+	isize offset = matrix_indices_to_offset(mt, row, column);
+
+	LLVMValueRef indices[2] = {
+		LLVMConstInt(lb_type(p->module, t_int), 0, false),
+		LLVMConstInt(lb_type(p->module, t_int), cast(unsigned)offset, false),
+	};
+
+	lbValue res = {};
+	if (lb_is_const(s)) {
+		res.value = LLVMConstGEP(s.value, indices, gb_count_of(indices));
+	} else {
+		res.value = LLVMBuildGEP(p->builder, s.value, indices, gb_count_of(indices), "");
+	}
+	res.type = alloc_type_pointer(ptr);
+	return res;
+}
+
+lbValue lb_emit_matrix_ep(lbProcedure *p, lbValue s, lbValue row, lbValue column) {
+	Type *t = s.type;
+	GB_ASSERT(is_type_pointer(t));
+	Type *mt = base_type(type_deref(t));
+	GB_ASSERT_MSG(is_type_matrix(mt), "%s", type_to_string(mt));
+
+	Type *ptr = base_array_type(mt);
+	
+	LLVMValueRef stride_elems = lb_const_int(p->module, t_int, matrix_type_stride_in_elems(mt)).value;
+	
+	row = lb_emit_conv(p, row, t_int);
+	column = lb_emit_conv(p, column, t_int);
+	
+	LLVMValueRef index = LLVMBuildAdd(p->builder, row.value, LLVMBuildMul(p->builder, column.value, stride_elems, ""), "");
+
+	LLVMValueRef indices[2] = {
+		LLVMConstInt(lb_type(p->module, t_int), 0, false),
+		index,
+	};
+
+	lbValue res = {};
+	if (lb_is_const(s)) {
+		res.value = LLVMConstGEP(s.value, indices, gb_count_of(indices));
+	} else {
+		res.value = LLVMBuildGEP(p->builder, s.value, indices, gb_count_of(indices), "");
+	}
+	res.type = alloc_type_pointer(ptr);
+	return res;
+}
+
+
+lbValue lb_emit_matrix_ev(lbProcedure *p, lbValue s, isize row, isize column) {
+	Type *st = base_type(s.type);
+	GB_ASSERT_MSG(is_type_matrix(st), "%s", type_to_string(st));
+	
+	lbValue value = lb_address_from_load_or_generate_local(p, s);
+	lbValue ptr = lb_emit_matrix_epi(p, value, row, column);
+	return lb_emit_load(p, ptr);
+}
+
 
 void lb_fill_slice(lbProcedure *p, lbAddr const &slice, lbValue base_elem, lbValue len) {
 	Type *t = lb_addr_type(slice);
@@ -1380,6 +1483,198 @@ lbValue lb_soa_struct_cap(lbProcedure *p, lbValue value) {
 	return lb_emit_struct_ev(p, value, cast(i32)n);
 }
 
+lbValue lb_emit_mul_add(lbProcedure *p, lbValue a, lbValue b, lbValue c, Type *t) {
+	lbModule *m = p->module;
+	
+	a = lb_emit_conv(p, a, t);
+	b = lb_emit_conv(p, b, t);
+	c = lb_emit_conv(p, c, t);
+	
+	bool is_possible = !is_type_different_to_arch_endianness(t) && is_type_float(t);
+	
+	if (is_possible) {
+		switch (build_context.metrics.arch) {
+		case TargetArch_amd64:
+			if (type_size_of(t) == 2) {
+				is_possible = false;
+			}
+			break;
+		case TargetArch_arm64:
+			// possible
+			break;
+		case TargetArch_386:
+		case TargetArch_wasm32:
+			is_possible = false;
+			break;
+		}
+	}
+
+	if (is_possible) {
+		char const *name = "llvm.fma";
+		unsigned id = LLVMLookupIntrinsicID(name, gb_strlen(name));
+		GB_ASSERT_MSG(id != 0, "Unable to find %s", name);
+		
+		LLVMTypeRef types[1] = {};
+		types[0] = lb_type(m, t);
+		
+		LLVMValueRef ip = LLVMGetIntrinsicDeclaration(m->mod, id, types, gb_count_of(types));
+		LLVMValueRef values[3] = {};
+		values[0] = a.value;
+		values[1] = b.value;
+		values[2] = c.value;
+		LLVMValueRef call = LLVMBuildCall(p->builder, ip, values, gb_count_of(values), "");
+		return {call, t};
+	} else {
+		lbValue x = lb_emit_arith(p, Token_Mul, a, b, t);
+		lbValue y = lb_emit_arith(p, Token_Add, x, c, t);
+		return y;
+	}
+}
+
+LLVMValueRef llvm_mask_iota(lbModule *m, unsigned start, unsigned count) {
+	auto iota = slice_make<LLVMValueRef>(temporary_allocator(), count);
+	for (unsigned i = 0; i < count; i++) {
+		iota[i] = lb_const_int(m, t_u32, start+i).value;
+	}
+	return LLVMConstVector(iota.data, count);
+}
+
+LLVMValueRef llvm_mask_zero(lbModule *m, unsigned count) {
+	return LLVMConstNull(LLVMVectorType(lb_type(m, t_u32), count));
+}
+
+LLVMValueRef llvm_vector_broadcast(lbProcedure *p, LLVMValueRef value, unsigned count) {
+	GB_ASSERT(count > 0);
+	if (LLVMIsConstant(value)) {
+		LLVMValueRef single = LLVMConstVector(&value, 1);
+		if (count == 1) {
+			return single;
+		}
+		LLVMValueRef mask = llvm_mask_zero(p->module, count);
+		return LLVMConstShuffleVector(single, LLVMGetUndef(LLVMTypeOf(single)), mask);
+	}
+	
+	LLVMTypeRef single_type = LLVMVectorType(LLVMTypeOf(value), 1);
+	LLVMValueRef single = LLVMBuildBitCast(p->builder, value, single_type, "");
+	if (count == 1) {
+		return single;
+	}
+	LLVMValueRef mask = llvm_mask_zero(p->module, count);
+	return LLVMBuildShuffleVector(p->builder, single, LLVMGetUndef(LLVMTypeOf(single)), mask, "");
+}
+
+LLVMValueRef llvm_vector_reduce_add(lbProcedure *p, LLVMValueRef value) {
+	LLVMTypeRef type = LLVMTypeOf(value);
+	GB_ASSERT(LLVMGetTypeKind(type) == LLVMVectorTypeKind);
+	LLVMTypeRef elem = LLVMGetElementType(type);
+	
+	char const *name = nullptr;
+	i32 value_offset = 0;
+	i32 value_count  = 0;
+	
+	switch (LLVMGetTypeKind(elem)) {
+	case LLVMHalfTypeKind:
+	case LLVMFloatTypeKind:
+	case LLVMDoubleTypeKind:
+		name = "llvm.vector.reduce.fadd";
+		value_offset = 0;
+		value_count = 2;
+		break;
+	case LLVMIntegerTypeKind:
+		name = "llvm.vector.reduce.add";
+		value_offset = 1;
+		value_count = 1;
+		break;
+	default:
+		GB_PANIC("invalid vector type %s", LLVMPrintTypeToString(type));
+		break;
+	}
+	
+	unsigned id = LLVMLookupIntrinsicID(name, gb_strlen(name));
+	GB_ASSERT_MSG(id != 0, "Unable to find %s", name);
+	
+	LLVMTypeRef types[1] = {};
+	types[0] = type;
+	
+	LLVMValueRef ip = LLVMGetIntrinsicDeclaration(p->module->mod, id, types, gb_count_of(types));
+	LLVMValueRef values[2] = {};
+	values[0] = LLVMConstNull(elem);
+	values[1] = value;
+	LLVMValueRef call = LLVMBuildCall(p->builder, ip, values+value_offset, value_count, "");
+	return call;
+}
+
+LLVMValueRef llvm_vector_add(lbProcedure *p, LLVMValueRef a, LLVMValueRef b) {
+	GB_ASSERT(LLVMTypeOf(a) == LLVMTypeOf(b));
+	
+	LLVMTypeRef elem = LLVMGetElementType(LLVMTypeOf(a));
+	
+	if (LLVMGetTypeKind(elem) == LLVMIntegerTypeKind) {
+		return LLVMBuildAdd(p->builder, a, b, "");
+	}
+	return LLVMBuildFAdd(p->builder, a, b, "");
+}
+
+LLVMValueRef llvm_vector_mul(lbProcedure *p, LLVMValueRef a, LLVMValueRef b) {
+	GB_ASSERT(LLVMTypeOf(a) == LLVMTypeOf(b));
+	
+	LLVMTypeRef elem = LLVMGetElementType(LLVMTypeOf(a));
+	
+	if (LLVMGetTypeKind(elem) == LLVMIntegerTypeKind) {
+		return LLVMBuildMul(p->builder, a, b, "");
+	}
+	return LLVMBuildFMul(p->builder, a, b, "");
+}
+
+
+LLVMValueRef llvm_vector_dot(lbProcedure *p, LLVMValueRef a, LLVMValueRef b) {
+	return llvm_vector_reduce_add(p, llvm_vector_mul(p, a, b));
+}
+
+LLVMValueRef llvm_vector_mul_add(lbProcedure *p, LLVMValueRef a, LLVMValueRef b, LLVMValueRef c) {
+	lbModule *m = p->module;
+	
+	LLVMTypeRef t = LLVMTypeOf(a);
+	GB_ASSERT(t == LLVMTypeOf(b));
+	GB_ASSERT(t == LLVMTypeOf(c));
+	GB_ASSERT(LLVMGetTypeKind(t) == LLVMVectorTypeKind);
+	
+	LLVMTypeRef elem = LLVMGetElementType(t);
+	
+	bool is_possible = false;
+	
+	switch (LLVMGetTypeKind(elem)) {
+	case LLVMHalfTypeKind:
+		is_possible = true;
+		break;
+	case LLVMFloatTypeKind:
+	case LLVMDoubleTypeKind:
+		is_possible = true;
+		break;
+	}
+
+	if (is_possible) {
+		char const *name = "llvm.fmuladd";
+		unsigned id = LLVMLookupIntrinsicID(name, gb_strlen(name));
+		GB_ASSERT_MSG(id != 0, "Unable to find %s", name);
+		
+		LLVMTypeRef types[1] = {};
+		types[0] = t;
+		
+		LLVMValueRef ip = LLVMGetIntrinsicDeclaration(m->mod, id, types, gb_count_of(types));
+		LLVMValueRef values[3] = {};
+		values[0] = a;
+		values[1] = b;
+		values[2] = c;
+		LLVMValueRef call = LLVMBuildCall(p->builder, ip, values, gb_count_of(values), "");
+		return call;
+	} else {
+		LLVMValueRef x = llvm_vector_mul(p, a, b);
+		LLVMValueRef y = llvm_vector_add(p, x, c);
+		return y;
+	}
+}
+
 LLVMValueRef llvm_get_inline_asm(LLVMTypeRef func_type, String const &str, String const &clobbers, bool has_side_effects=true, bool is_align_stack=false, LLVMInlineAsmDialect dialect=LLVMInlineAsmDialectATT) {
 	return LLVMGetInlineAsm(func_type,
 		cast(char *)str.text, cast(size_t)str.len,
@@ -1391,4 +1686,3 @@ LLVMValueRef llvm_get_inline_asm(LLVMTypeRef func_type, String const &str, Strin
 	#endif
 	);
 }
-

+ 55 - 1
src/parser.cpp

@@ -159,6 +159,11 @@ Ast *clone_ast(Ast *node) {
 		n->IndexExpr.expr  = clone_ast(n->IndexExpr.expr);
 		n->IndexExpr.index = clone_ast(n->IndexExpr.index);
 		break;
+	case Ast_MatrixIndexExpr:
+		n->MatrixIndexExpr.expr  = clone_ast(n->MatrixIndexExpr.expr);
+		n->MatrixIndexExpr.row_index = clone_ast(n->MatrixIndexExpr.row_index);
+		n->MatrixIndexExpr.column_index = clone_ast(n->MatrixIndexExpr.column_index);
+		break;
 	case Ast_DerefExpr:
 		n->DerefExpr.expr = clone_ast(n->DerefExpr.expr);
 		break;
@@ -371,6 +376,11 @@ Ast *clone_ast(Ast *node) {
 		n->MapType.key   = clone_ast(n->MapType.key);
 		n->MapType.value = clone_ast(n->MapType.value);
 		break;
+	case Ast_MatrixType:
+		n->MatrixType.row_count    = clone_ast(n->MatrixType.row_count);
+		n->MatrixType.column_count = clone_ast(n->MatrixType.column_count);
+		n->MatrixType.elem         = clone_ast(n->MatrixType.elem);
+		break;
 	}
 
 	return n;
@@ -574,6 +584,15 @@ Ast *ast_deref_expr(AstFile *f, Ast *expr, Token op) {
 }
 
 
+Ast *ast_matrix_index_expr(AstFile *f, Ast *expr, Token open, Token close, Token interval, Ast *row, Ast *column) {
+	Ast *result = alloc_ast_node(f, Ast_MatrixIndexExpr);
+	result->MatrixIndexExpr.expr         = expr;
+	result->MatrixIndexExpr.row_index    = row;
+	result->MatrixIndexExpr.column_index = column;
+	result->MatrixIndexExpr.open         = open;
+	result->MatrixIndexExpr.close        = close;
+	return result;
+}
 
 
 Ast *ast_ident(AstFile *f, Token token) {
@@ -1066,6 +1085,14 @@ Ast *ast_map_type(AstFile *f, Token token, Ast *key, Ast *value) {
 	return result;
 }
 
+Ast *ast_matrix_type(AstFile *f, Token token, Ast *row_count, Ast *column_count, Ast *elem) {
+	Ast *result = alloc_ast_node(f, Ast_MatrixType);
+	result->MatrixType.token = token;
+	result->MatrixType.row_count = row_count;
+	result->MatrixType.column_count = column_count;
+	result->MatrixType.elem = elem;
+	return result;
+}
 
 Ast *ast_foreign_block_decl(AstFile *f, Token token, Ast *foreign_library, Ast *body,
                             CommentGroup *docs) {
@@ -2214,6 +2241,7 @@ Ast *parse_operand(AstFile *f, bool lhs) {
 			count_expr = parse_expr(f, false);
 			f->expr_level--;
 		}
+		
 		expect_token(f, Token_CloseBracket);
 		return ast_array_type(f, token, count_expr, parse_type(f));
 	} break;
@@ -2231,6 +2259,23 @@ Ast *parse_operand(AstFile *f, bool lhs) {
 
 		return ast_map_type(f, token, key, value);
 	} break;
+	
+	case Token_matrix: {
+		Token token = expect_token(f, Token_matrix);
+		Ast *row_count = nullptr;
+		Ast *column_count = nullptr;
+		Ast *type = nullptr;
+		Token open, close;
+		
+		open  = expect_token_after(f, Token_OpenBracket, "matrix");
+		row_count = parse_expr(f, true);
+		expect_token(f, Token_Comma);
+		column_count = parse_expr(f, true);
+		close = expect_token(f, Token_CloseBracket);
+		type = parse_type(f);
+		
+		return ast_matrix_type(f, token, row_count, column_count, type);
+	} break;
 
 	case Token_struct: {
 		Token    token = expect_token(f, Token_struct);
@@ -2524,6 +2569,7 @@ bool is_literal_type(Ast *node) {
 	case Ast_DynamicArrayType:
 	case Ast_MapType:
 	case Ast_BitSetType:
+	case Ast_MatrixType:
 	case Ast_CallExpr:
 		return true;
 	case Ast_MultiPointerType:
@@ -2679,6 +2725,7 @@ Ast *parse_atom_expr(AstFile *f, Ast *operand, bool lhs) {
 			case Token_RangeHalf:
 				syntax_error(f->curr_token, "Expected a colon, not a range");
 				/* fallthrough */
+			case Token_Comma:  // matrix index
 			case Token_Colon:
 				interval = advance_token(f);
 				is_interval = true;
@@ -2694,7 +2741,14 @@ Ast *parse_atom_expr(AstFile *f, Ast *operand, bool lhs) {
 			close = expect_token(f, Token_CloseBracket);
 
 			if (is_interval) {
-				operand = ast_slice_expr(f, operand, open, close, interval, indices[0], indices[1]);
+				if (interval.kind == Token_Comma) {
+					if (indices[0] == nullptr || indices[1] == nullptr) {
+						syntax_error(open, "Matrix index expressions require both row and column indices");
+					}
+					operand = ast_matrix_index_expr(f, operand, open, close, interval, indices[0], indices[1]);
+				} else {
+					operand = ast_slice_expr(f, operand, open, close, interval, indices[0], indices[1]);
+				}
 			} else {
 				operand = ast_index_expr(f, operand, indices[0], open, close);
 			}

+ 7 - 0
src/parser.hpp

@@ -407,6 +407,7 @@ AST_KIND(_ExprBegin,  "",  bool) \
 		bool is_align_stack; \
 		InlineAsmDialectKind dialect; \
 	}) \
+	AST_KIND(MatrixIndexExpr, "matrix index expression",       struct { Ast *expr, *row_index, *column_index; Token open, close; }) \
 AST_KIND(_ExprEnd,       "", bool) \
 AST_KIND(_StmtBegin,     "", bool) \
 	AST_KIND(BadStmt,    "bad statement",                 struct { Token begin, end; }) \
@@ -657,6 +658,12 @@ AST_KIND(_TypeBegin, "", bool) \
 		Ast *key; \
 		Ast *value; \
 	}) \
+	AST_KIND(MatrixType, "matrix type", struct { \
+		Token token;       \
+		Ast *row_count;    \
+		Ast *column_count; \
+		Ast *elem;         \
+	}) \
 AST_KIND(_TypeEnd,  "", bool)
 
 enum AstKind {

+ 4 - 0
src/parser_pos.cpp

@@ -35,6 +35,7 @@ Token ast_token(Ast *node) {
 		}
 		return node->ImplicitSelectorExpr.token;
 	case Ast_IndexExpr:          return node->IndexExpr.open;
+	case Ast_MatrixIndexExpr:    return node->MatrixIndexExpr.open;
 	case Ast_SliceExpr:          return node->SliceExpr.open;
 	case Ast_Ellipsis:           return node->Ellipsis.token;
 	case Ast_FieldValue:         return node->FieldValue.eq;
@@ -103,6 +104,7 @@ Token ast_token(Ast *node) {
 	case Ast_EnumType:         return node->EnumType.token;
 	case Ast_BitSetType:       return node->BitSetType.token;
 	case Ast_MapType:          return node->MapType.token;
+	case Ast_MatrixType:       return node->MatrixType.token;
 	}
 
 	return empty_token;
@@ -168,6 +170,7 @@ Token ast_end_token(Ast *node) {
 		}
 		return node->ImplicitSelectorExpr.token;
 	case Ast_IndexExpr:          return node->IndexExpr.close;
+	case Ast_MatrixIndexExpr:    return node->MatrixIndexExpr.close;
 	case Ast_SliceExpr:          return node->SliceExpr.close;
 	case Ast_Ellipsis:
 		if (node->Ellipsis.expr) {
@@ -345,6 +348,7 @@ Token ast_end_token(Ast *node) {
 		}
 		return ast_end_token(node->BitSetType.elem);
 	case Ast_MapType:          return ast_end_token(node->MapType.value);
+	case Ast_MatrixType:       return ast_end_token(node->MatrixType.elem);
 	}
 
 	return empty_token;

+ 1 - 0
src/tokenizer.cpp

@@ -117,6 +117,7 @@ TOKEN_KIND(Token__KeywordBegin, ""), \
 	TOKEN_KIND(Token_or_else,     "or_else"),     \
 	TOKEN_KIND(Token_or_return,   "or_return"),   \
 	TOKEN_KIND(Token_asm,         "asm"),         \
+	TOKEN_KIND(Token_matrix,      "matrix"),      \
 TOKEN_KIND(Token__KeywordEnd, ""), \
 	TOKEN_KIND(Token_Count, "")
 

+ 283 - 73
src/types.cpp

@@ -270,6 +270,14 @@ struct TypeProc {
 	TYPE_KIND(RelativeSlice, struct {                         \
 		Type *slice_type;                                 \
 		Type *base_integer;                               \
+	})                                                        \
+	TYPE_KIND(Matrix, struct {                                \
+		Type *elem;                                       \
+		i64   row_count;                                  \
+		i64   column_count;                               \
+		Type *generic_row_count;                          \
+		Type *generic_column_count;                       \
+		i64   stride_in_bytes;                            \
 	})
 
 
@@ -341,6 +349,7 @@ enum Typeid_Kind : u8 {
 	Typeid_Simd_Vector,
 	Typeid_Relative_Pointer,
 	Typeid_Relative_Slice,
+	Typeid_Matrix,
 };
 
 // IMPORTANT NOTE(bill): This must match the same as the in core.odin
@@ -349,6 +358,13 @@ enum TypeInfoFlag : u32 {
 	TypeInfoFlag_Simple_Compare = 1<<1,
 };
 
+
+enum : int {
+	MATRIX_ELEMENT_COUNT_MIN = 1,
+	MATRIX_ELEMENT_COUNT_MAX = 16,
+};
+
+
 bool is_type_comparable(Type *t);
 bool is_type_simple_compare(Type *t);
 
@@ -622,6 +638,7 @@ gb_global Type *t_type_info_bit_set              = nullptr;
 gb_global Type *t_type_info_simd_vector          = nullptr;
 gb_global Type *t_type_info_relative_pointer     = nullptr;
 gb_global Type *t_type_info_relative_slice       = nullptr;
+gb_global Type *t_type_info_matrix               = nullptr;
 
 gb_global Type *t_type_info_named_ptr            = nullptr;
 gb_global Type *t_type_info_integer_ptr          = nullptr;
@@ -649,6 +666,7 @@ gb_global Type *t_type_info_bit_set_ptr          = nullptr;
 gb_global Type *t_type_info_simd_vector_ptr      = nullptr;
 gb_global Type *t_type_info_relative_pointer_ptr = nullptr;
 gb_global Type *t_type_info_relative_slice_ptr   = nullptr;
+gb_global Type *t_type_info_matrix_ptr           = nullptr;
 
 gb_global Type *t_allocator                      = nullptr;
 gb_global Type *t_allocator_ptr                  = nullptr;
@@ -667,11 +685,13 @@ gb_global Type *t_hasher_proc = nullptr;
 
 gb_global RecursiveMutex g_type_mutex;
 
+struct TypePath;
 
-i64      type_size_of               (Type *t);
-i64      type_align_of              (Type *t);
-i64      type_offset_of             (Type *t, i32 index);
-gbString type_to_string             (Type *type);
+i64      type_size_of         (Type *t);
+i64      type_align_of        (Type *t);
+i64      type_offset_of       (Type *t, i32 index);
+gbString type_to_string       (Type *type);
+i64      type_size_of_internal(Type *t, TypePath *path);
 void     init_map_internal_types(Type *type);
 Type *   bit_set_to_int(Type *t);
 bool are_types_identical(Type *x, Type *y);
@@ -680,6 +700,74 @@ bool is_type_pointer(Type *t);
 bool is_type_slice(Type *t);
 bool is_type_integer(Type *t);
 bool type_set_offsets(Type *t);
+Type *base_type(Type *t);
+
+i64 type_size_of_internal(Type *t, TypePath *path);
+i64 type_align_of_internal(Type *t, TypePath *path);
+
+
+// IMPORTANT TODO(bill): SHould this TypePath code be removed since type cycle checking is handled much earlier on?
+
+struct TypePath {
+	Array<Entity *> path; // Entity_TypeName;
+	bool failure;
+};
+
+
+void type_path_init(TypePath *tp) {
+	tp->path.allocator = heap_allocator();
+}
+
+void type_path_free(TypePath *tp) {
+	array_free(&tp->path);
+}
+
+void type_path_print_illegal_cycle(TypePath *tp, isize start_index) {
+	GB_ASSERT(tp != nullptr);
+
+	GB_ASSERT(start_index < tp->path.count);
+	Entity *e = tp->path[start_index];
+	GB_ASSERT(e != nullptr);
+	error(e->token, "Illegal type declaration cycle of `%.*s`", LIT(e->token.string));
+	// NOTE(bill): Print cycle, if it's deep enough
+	for (isize j = start_index; j < tp->path.count; j++) {
+		Entity *e = tp->path[j];
+		error(e->token, "\t%.*s refers to", LIT(e->token.string));
+	}
+	// NOTE(bill): This will only print if the path count > 1
+	error(e->token, "\t%.*s", LIT(e->token.string));
+	tp->failure = true;
+	e->type->failure = true;
+	base_type(e->type)->failure = true;
+}
+
+bool type_path_push(TypePath *tp, Type *t) {
+	GB_ASSERT(tp != nullptr);
+	if (t->kind != Type_Named) {
+		return false;
+	}
+	Entity *e = t->Named.type_name;
+
+	for (isize i = 0; i < tp->path.count; i++) {
+		Entity *p = tp->path[i];
+		if (p == e) {
+			type_path_print_illegal_cycle(tp, i);
+		}
+	}
+
+	array_add(&tp->path, e);
+	return true;
+}
+
+void type_path_pop(TypePath *tp) {
+	if (tp != nullptr && tp->path.count > 0) {
+		array_pop(&tp->path);
+	}
+}
+
+
+#define FAILURE_SIZE      0
+#define FAILURE_ALIGNMENT 0
 
 void init_type_mutex(void) {
 	mutex_init(&g_type_mutex);
@@ -804,6 +892,24 @@ Type *alloc_type_array(Type *elem, i64 count, Type *generic_count = nullptr) {
 	return t;
 }
 
+Type *alloc_type_matrix(Type *elem, i64 row_count, i64 column_count, Type *generic_row_count = nullptr, Type *generic_column_count = nullptr) {
+	if (generic_row_count != nullptr || generic_column_count != nullptr) {
+		Type *t = alloc_type(Type_Matrix);
+		t->Matrix.elem                 = elem;
+		t->Matrix.row_count            = row_count;
+		t->Matrix.column_count         = column_count;
+		t->Matrix.generic_row_count    = generic_row_count;
+		t->Matrix.generic_column_count = generic_column_count;
+		return t;
+	}
+	Type *t = alloc_type(Type_Matrix);
+	t->Matrix.elem = elem;
+	t->Matrix.row_count = row_count;
+	t->Matrix.column_count = column_count;
+	return t;
+}
+
+
 Type *alloc_type_enumerated_array(Type *elem, Type *index, ExactValue const *min_value, ExactValue const *max_value, TokenKind op) {
 	Type *t = alloc_type(Type_EnumeratedArray);
 	t->EnumeratedArray.elem = elem;
@@ -1208,6 +1314,132 @@ bool is_type_enumerated_array(Type *t) {
 	t = base_type(t);
 	return t->kind == Type_EnumeratedArray;
 }
+bool is_type_matrix(Type *t) {
+	t = base_type(t);
+	return t->kind == Type_Matrix;
+}
+
+i64 matrix_align_of(Type *t, struct TypePath *tp) {
+	t = base_type(t);
+	GB_ASSERT(t->kind == Type_Matrix);
+	
+	Type *elem = t->Matrix.elem;
+	i64 row_count = gb_max(t->Matrix.row_count, 1);
+
+	bool pop = type_path_push(tp, elem);
+	if (tp->failure) {
+		return FAILURE_ALIGNMENT;
+	}
+
+	i64 elem_align = type_align_of_internal(elem, tp);
+	if (pop) type_path_pop(tp);
+	
+	i64 elem_size = type_size_of(elem);
+	
+
+	// NOTE(bill, 2021-10-25): The alignment strategy here is to have zero padding
+	// It would be better for performance to pad each column so that each column
+	// could be maximally aligned but as a compromise, having no padding will be
+	// beneficial to third libraries that assume no padding
+	
+	i64 total_expected_size = row_count*t->Matrix.column_count*elem_size;
+	// i64 min_alignment = prev_pow2(elem_align * row_count);
+	i64 min_alignment = prev_pow2(total_expected_size);
+	while ((total_expected_size % min_alignment) != 0) {
+		min_alignment >>= 1;
+	}
+	GB_ASSERT(min_alignment >= elem_align);
+	
+	i64 align = gb_min(min_alignment, build_context.max_align);
+	return align;
+}
+
+
+i64 matrix_type_stride_in_bytes(Type *t, struct TypePath *tp) {
+	t = base_type(t);
+	GB_ASSERT(t->kind == Type_Matrix);
+	if (t->Matrix.stride_in_bytes != 0) {
+		return t->Matrix.stride_in_bytes;
+	} else if (t->Matrix.row_count == 0) {
+		return 0;
+	}
+	
+	i64 elem_size;
+	if (tp != nullptr) {
+		elem_size = type_size_of_internal(t->Matrix.elem, tp);
+	} else {
+		elem_size = type_size_of(t->Matrix.elem);
+	}
+
+	i64 stride_in_bytes = 0;
+	
+	// NOTE(bill, 2021-10-25): The alignment strategy here is to have zero padding
+	// It would be better for performance to pad each column so that each column
+	// could be maximally aligned but as a compromise, having no padding will be
+	// beneficial to third libraries that assume no padding
+	i64 row_count = t->Matrix.row_count;
+	stride_in_bytes = elem_size*row_count;
+	
+	t->Matrix.stride_in_bytes = stride_in_bytes;
+	return stride_in_bytes;
+}
+
+i64 matrix_type_stride_in_elems(Type *t) {
+	t = base_type(t);
+	GB_ASSERT(t->kind == Type_Matrix);
+	i64 stride = matrix_type_stride_in_bytes(t, nullptr);
+	return stride/gb_max(1, type_size_of(t->Matrix.elem));
+}
+
+
+i64 matrix_type_total_internal_elems(Type *t) {
+	t = base_type(t);
+	GB_ASSERT(t->kind == Type_Matrix);
+	i64 size = type_size_of(t);
+	i64 elem_size = type_size_of(t->Matrix.elem);
+	return size/gb_max(elem_size, 1);
+}
+
+i64 matrix_indices_to_offset(Type *t, i64 row_index, i64 column_index) {
+	t = base_type(t);
+	GB_ASSERT(t->kind == Type_Matrix);
+	GB_ASSERT(0 <= row_index && row_index < t->Matrix.row_count);
+	GB_ASSERT(0 <= column_index && column_index < t->Matrix.column_count);
+	i64 stride_elems = matrix_type_stride_in_elems(t);
+	return stride_elems*column_index + row_index;
+}
+i64 matrix_index_to_offset(Type *t, i64 index) {
+	t = base_type(t);
+	GB_ASSERT(t->kind == Type_Matrix);
+	
+	i64 row_index    = index%t->Matrix.row_count;
+	i64 column_index = index/t->Matrix.row_count;
+	return matrix_indices_to_offset(t, row_index, column_index);
+}
+
+
+
+bool is_matrix_square(Type *t) {
+	t = base_type(t);
+	GB_ASSERT(t->kind == Type_Matrix);
+	return t->Matrix.row_count == t->Matrix.column_count;
+}
+
+bool is_type_valid_for_matrix_elems(Type *t) {
+	t = base_type(t);
+	if (is_type_integer(t)) {
+		return true;
+	} else if (is_type_float(t)) {
+		return true;
+	} else if (is_type_complex(t)) {
+		return true;
+	} 
+	if (t->kind == Type_Generic) {
+		return true;
+	}
+	return false;
+}
+
 bool is_type_dynamic_array(Type *t) {
 	t = base_type(t);
 	return t->kind == Type_DynamicArray;
@@ -1241,6 +1473,8 @@ Type *base_array_type(Type *t) {
 		return bt->EnumeratedArray.elem;
 	} else if (is_type_simd_vector(bt)) {
 		return bt->SimdVector.elem;
+	} else if (is_type_matrix(bt)) {
+		return bt->Matrix.elem;
 	}
 	return t;
 }
@@ -1315,11 +1549,16 @@ i64 get_array_type_count(Type *t) {
 Type *core_array_type(Type *t) {
 	for (;;) {
 		t = base_array_type(t);
-		if (t->kind != Type_Array && t->kind != Type_EnumeratedArray && t->kind != Type_SimdVector) {
+		switch (t->kind) {
+		case Type_Array:
+		case Type_EnumeratedArray:
+		case Type_SimdVector:
+		case Type_Matrix:
 			break;
+		default:
+			return t;
 		}
 	}
-	return t;
 }
 
 
@@ -1651,6 +1890,8 @@ bool is_type_indexable(Type *t) {
 		return true;
 	case Type_RelativeSlice:
 		return true;
+	case Type_Matrix:
+		return true;
 	}
 	return false;
 }
@@ -1668,6 +1909,8 @@ bool is_type_sliceable(Type *t) {
 		return false;
 	case Type_RelativeSlice:
 		return true;
+	case Type_Matrix:
+		return false;
 	}
 	return false;
 }
@@ -1934,6 +2177,8 @@ bool is_type_comparable(Type *t) {
 		return is_type_comparable(t->Array.elem);
 	case Type_Proc:
 		return true;
+	case Type_Matrix:
+		return is_type_comparable(t->Matrix.elem);
 
 	case Type_BitSet:
 		return true;
@@ -1995,6 +2240,9 @@ bool is_type_simple_compare(Type *t) {
 	case Type_Proc:
 	case Type_BitSet:
 		return true;
+		
+	case Type_Matrix:
+		return is_type_simple_compare(t->Matrix.elem);
 
 	case Type_Struct:
 		for_array(i, t->Struct.fields) {
@@ -2107,6 +2355,14 @@ bool are_types_identical(Type *x, Type *y) {
 			return (x->Array.count == y->Array.count) && are_types_identical(x->Array.elem, y->Array.elem);
 		}
 		break;
+		
+	case Type_Matrix:
+		if (y->kind == Type_Matrix) {
+			return x->Matrix.row_count == y->Matrix.row_count &&
+			       x->Matrix.column_count == y->Matrix.column_count &&
+			       are_types_identical(x->Matrix.elem, y->Matrix.elem);
+		}
+		break;
 
 	case Type_DynamicArray:
 		if (y->kind == Type_DynamicArray) {
@@ -2812,71 +3068,6 @@ Slice<i32> struct_fields_index_by_increasing_offset(gbAllocator allocator, Type
 
 
 
-
-// IMPORTANT TODO(bill): SHould this TypePath code be removed since type cycle checking is handled much earlier on?
-
-struct TypePath {
-	Array<Entity *> path; // Entity_TypeName;
-	bool failure;
-};
-
-
-void type_path_init(TypePath *tp) {
-	tp->path.allocator = heap_allocator();
-}
-
-void type_path_free(TypePath *tp) {
-	array_free(&tp->path);
-}
-
-void type_path_print_illegal_cycle(TypePath *tp, isize start_index) {
-	GB_ASSERT(tp != nullptr);
-
-	GB_ASSERT(start_index < tp->path.count);
-	Entity *e = tp->path[start_index];
-	GB_ASSERT(e != nullptr);
-	error(e->token, "Illegal type declaration cycle of `%.*s`", LIT(e->token.string));
-	// NOTE(bill): Print cycle, if it's deep enough
-	for (isize j = start_index; j < tp->path.count; j++) {
-		Entity *e = tp->path[j];
-		error(e->token, "\t%.*s refers to", LIT(e->token.string));
-	}
-	// NOTE(bill): This will only print if the path count > 1
-	error(e->token, "\t%.*s", LIT(e->token.string));
-	tp->failure = true;
-	e->type->failure = true;
-	base_type(e->type)->failure = true;
-}
-
-bool type_path_push(TypePath *tp, Type *t) {
-	GB_ASSERT(tp != nullptr);
-	if (t->kind != Type_Named) {
-		return false;
-	}
-	Entity *e = t->Named.type_name;
-
-	for (isize i = 0; i < tp->path.count; i++) {
-		Entity *p = tp->path[i];
-		if (p == e) {
-			type_path_print_illegal_cycle(tp, i);
-		}
-	}
-
-	array_add(&tp->path, e);
-	return true;
-}
-
-void type_path_pop(TypePath *tp) {
-	if (tp != nullptr && tp->path.count > 0) {
-		array_pop(&tp->path);
-	}
-}
-
-
-#define FAILURE_SIZE      0
-#define FAILURE_ALIGNMENT 0
-
-
 i64 type_size_of_internal (Type *t, TypePath *path);
 i64 type_align_of_internal(Type *t, TypePath *path);
 i64 type_size_of(Type *t);
@@ -2982,7 +3173,7 @@ i64 type_align_of_internal(Type *t, TypePath *path) {
 		if (path->failure) {
 			return FAILURE_ALIGNMENT;
 		}
-		i64 align = type_align_of_internal(t->Array.elem, path);
+		i64 align = type_align_of_internal(elem, path);
 		if (pop) type_path_pop(path);
 		return align;
 	}
@@ -2993,7 +3184,7 @@ i64 type_align_of_internal(Type *t, TypePath *path) {
 		if (path->failure) {
 			return FAILURE_ALIGNMENT;
 		}
-		i64 align = type_align_of_internal(t->EnumeratedArray.elem, path);
+		i64 align = type_align_of_internal(elem, path);
 		if (pop) type_path_pop(path);
 		return align;
 	}
@@ -3102,6 +3293,9 @@ i64 type_align_of_internal(Type *t, TypePath *path) {
 		// IMPORTANT TODO(bill): Figure out the alignment of vector types
 		return gb_clamp(next_pow2(type_size_of_internal(t, path)), 1, build_context.max_align);
 	}
+	
+	case Type_Matrix: 
+		return matrix_align_of(t, path);
 
 	case Type_RelativePointer:
 		return type_align_of_internal(t->RelativePointer.base_integer, path);
@@ -3369,6 +3563,17 @@ i64 type_size_of_internal(Type *t, TypePath *path) {
 		Type *elem = t->SimdVector.elem;
 		return count * type_size_of_internal(elem, path);
 	}
+	
+	case Type_Matrix: {
+		bool pop = type_path_push(path, t->Matrix.elem);
+		if (path->failure) {
+			return FAILURE_SIZE;
+		}
+		i64 stride_in_bytes = matrix_type_stride_in_bytes(t, path);
+		if (pop) type_path_pop(path);
+
+		return stride_in_bytes * t->Matrix.column_count;
+	}
 
 	case Type_RelativePointer:
 		return type_size_of_internal(t->RelativePointer.base_integer, path);
@@ -3830,6 +4035,11 @@ gbString write_type_to_string(gbString str, Type *type) {
 		str = gb_string_append_fmt(str, ") ");
 		str = write_type_to_string(str, type->RelativeSlice.slice_type);
 		break;
+		
+	case Type_Matrix:
+		str = gb_string_appendc(str, gb_bprintf("matrix[%d, %d]", cast(int)type->Matrix.row_count, cast(int)type->Matrix.column_count));
+		str = write_type_to_string(str, type->Matrix.elem);
+		break;
 	}
 
 	return str;

+ 6 - 2
vendor/vulkan/_gen/create_vulkan_odin_wrapper.py

@@ -262,6 +262,7 @@ def parse_constants(f):
 
 
 def parse_enums(f):
+    f.write("import \"core:c\"\n\n")
     f.write("// Enums\n")
 
     data = re.findall(r"typedef enum Vk(\w+) {(.+?)} \w+;", src, re.S)
@@ -405,8 +406,8 @@ def parse_structs(f):
             if "Flag_Bits" in type_:
                 comment = " // only single bit set"
             t = do_type(type_, prev_name, fname)
-            if t == "Structure_Type" and n == "type":
-                n = "s_type"
+            if n == "matrix":
+                n = "mat"
 
             ffields.append(tuple([n, t, comment]))
             prev_name = fname
@@ -467,6 +468,7 @@ def parse_procedures(f):
 
     max_len = max(len(n) for n, t in ff)
 
+    f.write("import \"core:c\"\n\n")
     f.write("// Procedure Types\n\n");
     for n, t in ff:
         f.write("{} :: #type {}\n".format(n.ljust(max_len), t.replace('"c"', '"system"')))
@@ -587,6 +589,8 @@ MAX_GLOBAL_PRIORITY_SIZE_EXT  :: 16
     with open("../structs.odin", 'w', encoding='utf-8') as f:
         f.write(BASE)
         f.write("""
+import "core:c"
+
 when ODIN_OS == "windows" {
 \timport win32 "core:sys/windows"
 

+ 1 - 1
vendor/vulkan/structs.odin

@@ -3750,7 +3750,7 @@ PhysicalDeviceRayTracingPropertiesNV :: struct {
 }
 
 TransformMatrixKHR :: struct {
-	matrix: [3][4]f32,
+	mat: [3][4]f32,
 }
 
 AabbPositionsKHR :: struct {