|
@@ -648,18 +648,23 @@ slow_form:
|
|
i64 inner = xt->Matrix.column_count;
|
|
i64 inner = xt->Matrix.column_count;
|
|
i64 outer_columns = yt->Matrix.column_count;
|
|
i64 outer_columns = yt->Matrix.column_count;
|
|
|
|
|
|
|
|
+ auto inners = slice_make<lbValue[2]>(permanent_allocator(), inner);
|
|
|
|
+
|
|
for (i64 j = 0; j < outer_columns; j++) {
|
|
for (i64 j = 0; j < outer_columns; j++) {
|
|
for (i64 i = 0; i < outer_rows; i++) {
|
|
for (i64 i = 0; i < outer_rows; i++) {
|
|
|
|
+ lbValue dst = lb_emit_matrix_epi(p, res.addr, i, j);
|
|
for (i64 k = 0; k < inner; k++) {
|
|
for (i64 k = 0; k < inner; k++) {
|
|
- lbValue dst = lb_emit_matrix_epi(p, res.addr, i, j);
|
|
|
|
- lbValue d0 = lb_emit_load(p, dst);
|
|
|
|
-
|
|
|
|
- lbValue a = lb_emit_matrix_ev(p, lhs, i, k);
|
|
|
|
- lbValue b = lb_emit_matrix_ev(p, rhs, k, j);
|
|
|
|
- lbValue c = lb_emit_arith(p, Token_Mul, a, b, elem);
|
|
|
|
- lbValue d = lb_emit_arith(p, Token_Add, d0, c, elem);
|
|
|
|
- lb_emit_store(p, dst, d);
|
|
|
|
|
|
+ inners[k][0] = lb_emit_matrix_ev(p, lhs, i, k);
|
|
|
|
+ inners[k][1] = lb_emit_matrix_ev(p, rhs, k, j);
|
|
}
|
|
}
|
|
|
|
+
|
|
|
|
+ lbValue sum = lb_emit_load(p, dst);
|
|
|
|
+ for (i64 k = 0; k < inner; k++) {
|
|
|
|
+ lbValue a = inners[k][0];
|
|
|
|
+ lbValue b = inners[k][1];
|
|
|
|
+ sum = lb_emit_mul_add(p, a, b, sum, elem);
|
|
|
|
+ }
|
|
|
|
+ lb_emit_store(p, dst, sum);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
@@ -3626,6 +3631,7 @@ lbAddr lb_build_addr(lbProcedure *p, Ast *expr) {
|
|
case Type_Slice: et = bt->Slice.elem; break;
|
|
case Type_Slice: et = bt->Slice.elem; break;
|
|
case Type_BitSet: et = bt->BitSet.elem; break;
|
|
case Type_BitSet: et = bt->BitSet.elem; break;
|
|
case Type_SimdVector: et = bt->SimdVector.elem; break;
|
|
case Type_SimdVector: et = bt->SimdVector.elem; break;
|
|
|
|
+ case Type_Matrix: et = bt->Matrix.elem; break;
|
|
}
|
|
}
|
|
|
|
|
|
String proc_name = {};
|
|
String proc_name = {};
|
|
@@ -4157,7 +4163,104 @@ lbAddr lb_build_addr(lbProcedure *p, Ast *expr) {
|
|
}
|
|
}
|
|
break;
|
|
break;
|
|
}
|
|
}
|
|
|
|
+
|
|
|
|
+ case Type_Matrix: {
|
|
|
|
+ if (cl->elems.count > 0) {
|
|
|
|
+ lb_addr_store(p, v, lb_const_value(p->module, type, exact_value_compound(expr)));
|
|
|
|
+
|
|
|
|
+ auto temp_data = array_make<lbCompoundLitElemTempData>(temporary_allocator(), 0, cl->elems.count);
|
|
|
|
|
|
|
|
+ // NOTE(bill): Separate value, gep, store into their own chunks
|
|
|
|
+ for_array(i, cl->elems) {
|
|
|
|
+ Ast *elem = cl->elems[i];
|
|
|
|
+
|
|
|
|
+ if (elem->kind == Ast_FieldValue) {
|
|
|
|
+ ast_node(fv, FieldValue, elem);
|
|
|
|
+ if (lb_is_elem_const(fv->value, et)) {
|
|
|
|
+ continue;
|
|
|
|
+ }
|
|
|
|
+ if (is_ast_range(fv->field)) {
|
|
|
|
+ ast_node(ie, BinaryExpr, fv->field);
|
|
|
|
+ TypeAndValue lo_tav = ie->left->tav;
|
|
|
|
+ TypeAndValue hi_tav = ie->right->tav;
|
|
|
|
+ GB_ASSERT(lo_tav.mode == Addressing_Constant);
|
|
|
|
+ GB_ASSERT(hi_tav.mode == Addressing_Constant);
|
|
|
|
+
|
|
|
|
+ TokenKind op = ie->op.kind;
|
|
|
|
+ i64 lo = exact_value_to_i64(lo_tav.value);
|
|
|
|
+ i64 hi = exact_value_to_i64(hi_tav.value);
|
|
|
|
+ if (op != Token_RangeHalf) {
|
|
|
|
+ hi += 1;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ lbValue value = lb_build_expr(p, fv->value);
|
|
|
|
+
|
|
|
|
+ for (i64 k = lo; k < hi; k++) {
|
|
|
|
+ lbCompoundLitElemTempData data = {};
|
|
|
|
+ data.value = value;
|
|
|
|
+
|
|
|
|
+ data.elem_index = cast(i32)matrix_index_to_offset(bt, k);
|
|
|
|
+ array_add(&temp_data, data);
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ } else {
|
|
|
|
+ auto tav = fv->field->tav;
|
|
|
|
+ GB_ASSERT(tav.mode == Addressing_Constant);
|
|
|
|
+ i64 index = exact_value_to_i64(tav.value);
|
|
|
|
+
|
|
|
|
+ lbValue value = lb_build_expr(p, fv->value);
|
|
|
|
+ lbCompoundLitElemTempData data = {};
|
|
|
|
+ data.value = lb_emit_conv(p, value, et);
|
|
|
|
+ data.expr = fv->value;
|
|
|
|
+
|
|
|
|
+ data.elem_index = cast(i32)matrix_index_to_offset(bt, index);
|
|
|
|
+ array_add(&temp_data, data);
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ } else {
|
|
|
|
+ if (lb_is_elem_const(elem, et)) {
|
|
|
|
+ continue;
|
|
|
|
+ }
|
|
|
|
+ lbCompoundLitElemTempData data = {};
|
|
|
|
+ data.expr = elem;
|
|
|
|
+ data.elem_index = cast(i32)matrix_index_to_offset(bt, i);
|
|
|
|
+ array_add(&temp_data, data);
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ for_array(i, temp_data) {
|
|
|
|
+ temp_data[i].gep = lb_emit_array_epi(p, lb_addr_get_ptr(p, v), temp_data[i].elem_index);
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ for_array(i, temp_data) {
|
|
|
|
+ lbValue field_expr = temp_data[i].value;
|
|
|
|
+ Ast *expr = temp_data[i].expr;
|
|
|
|
+
|
|
|
|
+ auto prev_hint = lb_set_copy_elision_hint(p, lb_addr(temp_data[i].gep), expr);
|
|
|
|
+
|
|
|
|
+ if (field_expr.value == nullptr) {
|
|
|
|
+ field_expr = lb_build_expr(p, expr);
|
|
|
|
+ }
|
|
|
|
+ Type *t = field_expr.type;
|
|
|
|
+ GB_ASSERT(t->kind != Type_Tuple);
|
|
|
|
+ lbValue ev = lb_emit_conv(p, field_expr, et);
|
|
|
|
+
|
|
|
|
+ if (!p->copy_elision_hint.used) {
|
|
|
|
+ temp_data[i].value = ev;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ lb_reset_copy_elision_hint(p, prev_hint);
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ for_array(i, temp_data) {
|
|
|
|
+ if (temp_data[i].value.value != nullptr) {
|
|
|
|
+ lb_emit_store(p, temp_data[i].gep, temp_data[i].value);
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ break;
|
|
|
|
+ }
|
|
|
|
+
|
|
}
|
|
}
|
|
|
|
|
|
return v;
|
|
return v;
|