Browse Source

ARM64: Fuse FP multiply-add/sub.

Contributed by Djordje Kovacevic and Stefan Pejic from RT-RK.com.
Mike Pall 8 years ago
parent
commit
2772cbc36e
2 changed files with 30 additions and 2 deletions
  1. 25 2
      src/lj_asm_arm64.h
  2. 5 0
      src/lj_emit_arm64.h

+ 25 - 2
src/lj_asm_arm64.h

@@ -327,6 +327,27 @@ static void asm_fusexref(ASMState *as, A64Ins ai, Reg rd, IRRef ref,
   emit_lso(as, ai, (rd & 31), base, ofs);
 }
 
+/* Fuse FP multiply-add/sub. */
+static int asm_fusemadd(ASMState *as, IRIns *ir, A64Ins ai, A64Ins air)
+{
+  IRRef lref = ir->op1, rref = ir->op2;
+  IRIns *irm;
+  if (lref != rref &&
+      ((mayfuse(as, lref) && (irm = IR(lref), irm->o == IR_MUL) &&
+       ra_noreg(irm->r)) ||
+       (mayfuse(as, rref) && (irm = IR(rref), irm->o == IR_MUL) &&
+       (rref = lref, ai = air, ra_noreg(irm->r))))) {
+    Reg dest = ra_dest(as, ir, RSET_FPR);
+    Reg add = ra_hintalloc(as, rref, dest, RSET_FPR);
+    Reg left = ra_alloc2(as, irm,
+			 rset_exclude(rset_exclude(RSET_FPR, dest), add));
+    Reg right = (left >> 8); left &= 255;
+    emit_dnma(as, ai, (dest & 31), (left & 31), (right & 31), (add & 31));
+    return 1;
+  }
+  return 0;
+}
+
 /* -- Calls --------------------------------------------------------------- */
 
 /* Generate a call to a C function. */
@@ -1308,7 +1329,8 @@ static void asm_intmul(ASMState *as, IRIns *ir)
 static void asm_add(ASMState *as, IRIns *ir)
 {
   if (irt_isnum(ir->t)) {
-    asm_fparith(as, ir, A64I_FADDd);
+    if (!asm_fusemadd(as, ir, A64I_FMADDd, A64I_FMADDd))
+      asm_fparith(as, ir, A64I_FADDd);
     return;
   }
   asm_intop_s(as, ir, A64I_ADDw);
@@ -1317,7 +1339,8 @@ static void asm_add(ASMState *as, IRIns *ir)
 static void asm_sub(ASMState *as, IRIns *ir)
 {
   if (irt_isnum(ir->t)) {
-    asm_fparith(as, ir, A64I_FSUBd);
+    if (!asm_fusemadd(as, ir, A64I_FNMSUBd, A64I_FMSUBd))
+      asm_fparith(as, ir, A64I_FSUBd);
     return;
   }
   asm_intop_s(as, ir, A64I_SUBw);

+ 5 - 0
src/lj_emit_arm64.h

@@ -74,6 +74,11 @@ static uint32_t emit_isfpk64(uint64_t n)
 
 /* -- Emit basic instructions --------------------------------------------- */
 
+static void emit_dnma(ASMState *as, A64Ins ai, Reg rd, Reg rn, Reg rm, Reg ra)
+{
+  *--as->mcp = ai | A64F_D(rd) | A64F_N(rn) | A64F_M(rm) | A64F_A(ra);
+}
+
 static void emit_dnm(ASMState *as, A64Ins ai, Reg rd, Reg rn, Reg rm)
 {
   *--as->mcp = ai | A64F_D(rd) | A64F_N(rn) | A64F_M(rm);