Browse Source

Use SSE variants for IRFPM_FLOOR/CEIL/TRUNC unless SSE4.1 available.

Mike Pall 16 years ago
parent
commit
44a9d7b00c
2 changed files with 21 additions and 0 deletions
  1. 18 0
      src/lj_asm.c
  2. 3 0
      src/lj_vm.h

+ 18 - 0
src/lj_asm.c

@@ -2020,6 +2020,16 @@ static void asm_fpmath(ASMState *as, IRIns *ir)
       as->mcp[0] = as->mcp[1]; as->mcp[1] = 0x0f;  /* Swap 0F and REX. */
       as->mcp[0] = as->mcp[1]; as->mcp[1] = 0x0f;  /* Swap 0F and REX. */
     }
     }
     *--as->mcp = 0x66;  /* 1st byte of ROUNDSD opcode. */
     *--as->mcp = 0x66;  /* 1st byte of ROUNDSD opcode. */
+  } else if (fpm <= IRFPM_TRUNC) {
+    /* The modified regs must match with the *.dasc implementation. */
+    RegSet drop = RSET_RANGE(RID_XMM0, RID_XMM3+1)|RID2RSET(RID_EAX);
+    if (ra_hasreg(ir->r))
+      rset_clear(drop, ir->r);  /* Dest reg handled below. */
+    ra_evictset(as, drop);
+    ra_destreg(as, ir, RID_XMM0);
+    emit_call(as, fpm == IRFPM_FLOOR ? lj_vm_floor_sse :
+		  fpm == IRFPM_CEIL ? lj_vm_ceil_sse : lj_vm_trunc_sse);
+    ra_left(as, RID_XMM0, ir->op1);
   } else {
   } else {
     int32_t ofs = sps_scale(ir->s);  /* Use spill slot or slots SPS_TEMP1/2. */
     int32_t ofs = sps_scale(ir->s);  /* Use spill slot or slots SPS_TEMP1/2. */
     Reg dest = ir->r;
     Reg dest = ir->r;
@@ -3275,6 +3285,14 @@ static void asm_setup_regsp(ASMState *as, Trace *T)
       if (inloop)
       if (inloop)
 	as->modset = RSET_SCRATCH;
 	as->modset = RSET_SCRATCH;
       break;
       break;
+    case IR_FPMATH:
+      if (ir->op2 <= IRFPM_TRUNC && !(as->flags & JIT_F_SSE4_1)) {
+	ir->prev = REGSP_HINT(RID_XMM0);
+	if (inloop)
+	  as->modset |= RSET_RANGE(RID_XMM0, RID_XMM3+1)|RID2RSET(RID_EAX);
+	continue;
+      }
+      break;
     /* Non-constant shift counts need to be in RID_ECX. */
     /* Non-constant shift counts need to be in RID_ECX. */
     case IR_BSHL: case IR_BSHR: case IR_BSAR: case IR_BROL: case IR_BROR:
     case IR_BSHL: case IR_BSHR: case IR_BSAR: case IR_BROL: case IR_BROR:
       if (!irref_isk(ir->op2) && !ra_hashint(IR(ir->op2)->r))
       if (!irref_isk(ir->op2) && !ra_hashint(IR(ir->op2)->r))

+ 3 - 0
src/lj_vm.h

@@ -37,6 +37,9 @@ LJ_ASMF void lj_vm_exit_interp(void);
 LJ_ASMF void lj_vm_floor(void);
 LJ_ASMF void lj_vm_floor(void);
 LJ_ASMF void lj_vm_ceil(void);
 LJ_ASMF void lj_vm_ceil(void);
 LJ_ASMF void lj_vm_trunc(void);
 LJ_ASMF void lj_vm_trunc(void);
+LJ_ASMF void lj_vm_floor_sse(void);
+LJ_ASMF void lj_vm_ceil_sse(void);
+LJ_ASMF void lj_vm_trunc_sse(void);
 LJ_ASMF void lj_vm_exp(void);
 LJ_ASMF void lj_vm_exp(void);
 LJ_ASMF void lj_vm_exp2(void);
 LJ_ASMF void lj_vm_exp2(void);
 LJ_ASMF void lj_vm_pow(void);
 LJ_ASMF void lj_vm_pow(void);