浏览代码

Remove obsolete non-truncating number to integer conversions.

Mike Pall 13 年之前
父节点
当前提交
b359ce804b
共有 12 个文件被更改,包括 31 次插入55 次删除
  1. 1 2
      src/jit/dump.lua
  2. 1 1
      src/lj_asm_mips.h
  3. 2 5
      src/lj_asm_x86.h
  4. 5 6
      src/lj_crecord.c
  5. 0 1
      src/lj_ir.h
  6. 0 4
      src/lj_obj.h
  7. 10 17
      src/lj_opt_fold.c
  8. 1 2
      src/lj_opt_narrow.c
  9. 0 4
      src/lj_target_arm.h
  10. 0 2
      src/lj_target_x86.h
  11. 6 6
      src/vm_mips.dasc
  12. 5 5
      src/vm_x86.dasc

+ 1 - 2
src/jit/dump.lua

@@ -269,8 +269,7 @@ local litname = {
   ["CONV  "] = setmetatable({}, { __index = function(t, mode)
     local s = irtype[band(mode, 31)]
     s = irtype[band(shr(mode, 5), 31)].."."..s
-    if band(mode, 0x400) ~= 0 then s = s.." trunc"
-    elseif band(mode, 0x800) ~= 0 then s = s.." sext" end
+    if band(mode, 0x800) ~= 0 then s = s.." sext" end
     local c = shr(mode, 14)
     if c == 2 then s = s.." index" elseif c == 3 then s = s.." check" end
     t[mode] = s

+ 1 - 1
src/lj_asm_mips.h

@@ -1000,7 +1000,7 @@ static void asm_sload(ASMState *as, IRIns *ir)
       if (irt_isint(t)) {
 	Reg tmp = ra_scratch(as, RSET_FPR);
 	emit_tg(as, MIPSI_MFC1, dest, tmp);
-	emit_fg(as, MIPSI_CVT_W_D, tmp, tmp);
+	emit_fg(as, MIPSI_TRUNC_W_D, tmp, tmp);
 	dest = tmp;
 	t.irt = IRT_NUM;  /* Check for original type. */
       } else {

+ 2 - 5
src/lj_asm_x86.h

@@ -726,9 +726,7 @@ static void asm_conv(ASMState *as, IRIns *ir)
       asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR));
     } else {
       Reg dest = ra_dest(as, ir, RSET_GPR);
-      x86Op op = st == IRT_NUM ?
-		 ((ir->op2 & IRCONV_TRUNC) ? XO_CVTTSD2SI : XO_CVTSD2SI) :
-		 ((ir->op2 & IRCONV_TRUNC) ? XO_CVTTSS2SI : XO_CVTSS2SI);
+      x86Op op = st == IRT_NUM ? XO_CVTTSD2SI : XO_CVTTSS2SI;
       if (LJ_64 ? irt_isu64(ir->t) : irt_isu32(ir->t)) {
 	/* LJ_64: For inputs >= 2^63 add -2^64, convert again. */
 	/* LJ_32: For inputs >= 2^31 add -2^31, convert again and add 2^31. */
@@ -850,7 +848,6 @@ static void asm_conv_int64_fp(ASMState *as, IRIns *ir)
   Reg lo, hi;
   lua_assert(st == IRT_NUM || st == IRT_FLOAT);
   lua_assert(dt == IRT_I64 || dt == IRT_U64);
-  lua_assert(((ir-1)->op2 & IRCONV_TRUNC));
   hi = ra_dest(as, ir, RSET_GPR);
   lo = ra_dest(as, ir-1, rset_exclude(RSET_GPR, hi));
   if (ra_used(ir-1)) emit_rmro(as, XO_MOV, lo, RID_ESP, 0);
@@ -1457,7 +1454,7 @@ static void asm_sload(ASMState *as, IRIns *ir)
     lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t));
     if ((ir->op2 & IRSLOAD_CONVERT)) {
       t.irt = irt_isint(t) ? IRT_NUM : IRT_INT;  /* Check for original type. */
-      emit_rmro(as, irt_isint(t) ? XO_CVTSI2SD : XO_CVTSD2SI, dest, base, ofs);
+      emit_rmro(as, irt_isint(t) ? XO_CVTSI2SD : XO_CVTTSD2SI, dest, base, ofs);
     } else {
       emit_rmro(as, irt_isnum(t) ? XO_MOVSD : XO_MOV, dest, base, ofs);
     }

+ 5 - 6
src/lj_crecord.c

@@ -446,7 +446,7 @@ static TRef crec_ct_ct(jit_State *J, CType *d, CType *s, TRef dp, TRef sp,
     /* fallthrough */
   case CCX(I, F):
     if (dt == IRT_CDATA || st == IRT_CDATA) goto err_nyi;
-    sp = emitconv(sp, dsize < 4 ? IRT_INT : dt, st, IRCONV_TRUNC|IRCONV_ANY);
+    sp = emitconv(sp, dsize < 4 ? IRT_INT : dt, st, IRCONV_ANY);
     goto xstore;
   case CCX(I, P):
   case CCX(I, A):
@@ -522,7 +522,7 @@ static TRef crec_ct_ct(jit_State *J, CType *d, CType *s, TRef dp, TRef sp,
     if (st == IRT_CDATA) goto err_nyi;
     /* The signed conversion is cheaper. x64 really has 47 bit pointers. */
     sp = emitconv(sp, (LJ_64 && dsize == 8) ? IRT_I64 : IRT_U32,
-		  st, IRCONV_TRUNC|IRCONV_ANY);
+		  st, IRCONV_ANY);
     goto xstore;
 
   /* Destination is an array. */
@@ -1229,7 +1229,7 @@ static TRef crec_arith_int64(jit_State *J, TRef *sp, CType **s, MMS mm)
     for (i = 0; i < 2; i++) {
       IRType st = tref_type(sp[i]);
       if (st == IRT_NUM || st == IRT_FLOAT)
-	sp[i] = emitconv(sp[i], dt, st, IRCONV_TRUNC|IRCONV_ANY);
+	sp[i] = emitconv(sp[i], dt, st, IRCONV_ANY);
       else if (!(st == IRT_I64 || st == IRT_U64))
 	sp[i] = emitconv(sp[i], dt, IRT_INT,
 			 (s[i]->info & CTF_UNSIGNED) ? 0 : IRCONV_SEXT);
@@ -1297,15 +1297,14 @@ static TRef crec_arith_ptr(jit_State *J, TRef *sp, CType **s, MMS mm)
     CTypeID id;
 #if LJ_64
     if (t == IRT_NUM || t == IRT_FLOAT)
-      tr = emitconv(tr, IRT_INTP, t, IRCONV_TRUNC|IRCONV_ANY);
+      tr = emitconv(tr, IRT_INTP, t, IRCONV_ANY);
     else if (!(t == IRT_I64 || t == IRT_U64))
       tr = emitconv(tr, IRT_INTP, IRT_INT,
 		    ((t - IRT_I8) & 1) ? 0 : IRCONV_SEXT);
 #else
     if (!tref_typerange(sp[1], IRT_I8, IRT_U32)) {
       tr = emitconv(tr, IRT_INTP, t,
-		    (t == IRT_NUM || t == IRT_FLOAT) ?
-		    IRCONV_TRUNC|IRCONV_ANY : 0);
+		    (t == IRT_NUM || t == IRT_FLOAT) ? IRCONV_ANY : 0);
     }
 #endif
     tr = emitir(IRT(IR_MUL, IRT_INTP), tr, lj_ir_kintp(J, sz));

+ 0 - 1
src/lj_ir.h

@@ -227,7 +227,6 @@ IRFLDEF(FLENUM)
 #define IRCONV_DSH		5
 #define IRCONV_NUM_INT		((IRT_NUM<<IRCONV_DSH)|IRT_INT)
 #define IRCONV_INT_NUM		((IRT_INT<<IRCONV_DSH)|IRT_NUM)
-#define IRCONV_TRUNC		0x0400	/* Truncate number to integer. */
 #define IRCONV_SEXT		0x0800	/* Sign-extend integer to integer. */
 #define IRCONV_MODEMASK		0x0fff
 #define IRCONV_CONVMASK		0xf000

+ 0 - 4
src/lj_obj.h

@@ -810,11 +810,7 @@ static LJ_AINLINE int32_t lj_num2bit(lua_Number n)
 #endif
 }
 
-#if LJ_TARGET_X86 && !defined(__SSE2__)
-#define lj_num2int(n)   lj_num2bit((n))
-#else
 #define lj_num2int(n)   ((int32_t)(n))
-#endif
 
 static LJ_AINLINE uint64_t lj_num2u64(lua_Number n)
 {

+ 10 - 17
src/lj_opt_fold.c

@@ -647,27 +647,22 @@ LJFOLD(CONV KNUM IRCONV_INT_NUM)
 LJFOLDF(kfold_conv_knum_int_num)
 {
   lua_Number n = knumleft;
-  if (!(fins->op2 & IRCONV_TRUNC)) {
-    int32_t k = lj_num2int(n);
-    if (irt_isguard(fins->t) && n != (lua_Number)k) {
-      /* We're about to create a guard which always fails, like CONV +1.5.
-      ** Some pathological loops cause this during LICM, e.g.:
-      **   local x,k,t = 0,1.5,{1,[1.5]=2}
-      **   for i=1,200 do x = x+ t[k]; k = k == 1 and 1.5 or 1 end
-      **   assert(x == 300)
-      */
-      return FAILFOLD;
-    }
-    return INTFOLD(k);
-  } else {
-    return INTFOLD((int32_t)n);
+  int32_t k = lj_num2int(n);
+  if (irt_isguard(fins->t) && n != (lua_Number)k) {
+    /* We're about to create a guard which always fails, like CONV +1.5.
+    ** Some pathological loops cause this during LICM, e.g.:
+    **   local x,k,t = 0,1.5,{1,[1.5]=2}
+    **   for i=1,200 do x = x+ t[k]; k = k == 1 and 1.5 or 1 end
+    **   assert(x == 300)
+    */
+    return FAILFOLD;
   }
+  return INTFOLD(k);
 }
 
 LJFOLD(CONV KNUM IRCONV_U32_NUM)
 LJFOLDF(kfold_conv_knum_u32_num)
 {
-  lua_assert((fins->op2 & IRCONV_TRUNC));
 #ifdef _MSC_VER
   {  /* Workaround for MSVC bug. */
     volatile uint32_t u = (uint32_t)knumleft;
@@ -681,14 +676,12 @@ LJFOLDF(kfold_conv_knum_u32_num)
 LJFOLD(CONV KNUM IRCONV_I64_NUM)
 LJFOLDF(kfold_conv_knum_i64_num)
 {
-  lua_assert((fins->op2 & IRCONV_TRUNC));
   return INT64FOLD((uint64_t)(int64_t)knumleft);
 }
 
 LJFOLD(CONV KNUM IRCONV_U64_NUM)
 LJFOLDF(kfold_conv_knum_u64_num)
 {
-  lua_assert((fins->op2 & IRCONV_TRUNC));
   return INT64FOLD(lj_num2u64(knumleft));
 }
 

+ 1 - 2
src/lj_opt_narrow.c

@@ -496,8 +496,7 @@ TRef LJ_FASTCALL lj_opt_narrow_cindex(jit_State *J, TRef tr)
 {
   lua_assert(tref_isnumber(tr));
   if (tref_isnum(tr))
-    return emitir(IRT(IR_CONV, IRT_INTP), tr,
-		  (IRT_INTP<<5)|IRT_NUM|IRCONV_TRUNC|IRCONV_ANY);
+    return emitir(IRT(IR_CONV, IRT_INTP), tr, (IRT_INTP<<5)|IRT_NUM|IRCONV_ANY);
   /* Undefined overflow semantics allow stripping of ADDOV, SUBOV and MULOV. */
   return narrow_stripov(J, tr, IR_MULOV,
 			LJ_64 ? ((IRT_INTP<<5)|IRT_INT|IRCONV_SEXT) :

+ 0 - 4
src/lj_target_arm.h

@@ -243,10 +243,6 @@ typedef enum ARMIns {
   ARMI_VCVT_S32_F64 = 0xeebd0bc0,
   ARMI_VCVT_U32_F32 = 0xeebc0ac0,
   ARMI_VCVT_U32_F64 = 0xeebc0bc0,
-  ARMI_VCVTR_S32_F32 = 0xeebd0a40,
-  ARMI_VCVTR_S32_F64 = 0xeebd0b40,
-  ARMI_VCVTR_U32_F32 = 0xeebc0a40,
-  ARMI_VCVTR_U32_F64 = 0xeebc0b40,
   ARMI_VCVT_F32_S32 = 0xeeb80ac0,
   ARMI_VCVT_F64_S32 = 0xeeb80bc0,
   ARMI_VCVT_F32_U32 = 0xeeb80a40,

+ 0 - 2
src/lj_target_x86.h

@@ -277,10 +277,8 @@ typedef enum {
   XO_ROUNDSD =	0x0b3a0ffc,  /* Really 66 0f 3a 0b. See asm_fpmath. */
   XO_UCOMISD =	XO_660f(2e),
   XO_CVTSI2SD =	XO_f20f(2a),
-  XO_CVTSD2SI =	XO_f20f(2d),
   XO_CVTTSD2SI=	XO_f20f(2c),
   XO_CVTSI2SS =	XO_f30f(2a),
-  XO_CVTSS2SI =	XO_f30f(2d),
   XO_CVTTSS2SI=	XO_f30f(2c),
   XO_CVTSS2SD =	XO_f30f(5a),
   XO_CVTSD2SS =	XO_f20f(5a),

+ 6 - 6
src/vm_mips.dasc

@@ -1188,7 +1188,7 @@ static void build_subroutines(BuildCtx *ctx)
   |   mtc1 TMP0, FARG1
   |  beqz AT, ->fff_fallback
   |.  lw PC, FRAME_PC(BASE)
-  |   cvt.w.d FRET1, FARG2
+  |   trunc.w.d FRET1, FARG2
   |  cvt.d.w FARG1, FARG1
   |   lw TMP0, TAB:CARG1->asize
   |   lw TMP1, TAB:CARG1->array
@@ -1522,7 +1522,7 @@ static void build_subroutines(BuildCtx *ctx)
   |.  nop
   |
   |.ffunc_nn math_ldexp
-  |  cvt.w.d FARG2, FARG2
+  |  trunc.w.d FARG2, FARG2
   |  load_got ldexp
   |  mfc1 CARG3, FARG2
   |  call_extern
@@ -1622,7 +1622,7 @@ static void build_subroutines(BuildCtx *ctx)
   |.  sltiu AT, CARG3, LJ_TISNUM
   |  beqz AT, ->fff_fallback
   |.  li CARG3, 1
-  |   cvt.w.d FARG1, FARG1
+  |   trunc.w.d FARG1, FARG1
   |  addiu CARG2, sp, ARG5_OFS
   |  sltiu AT, TMP0, 256
   |   mfc1 TMP0, FARG1
@@ -1652,7 +1652,7 @@ static void build_subroutines(BuildCtx *ctx)
   |    ldc1 f2, 8(BASE)
   |  beqz AT, >1
   |.  li CARG4, -1
-  |   cvt.w.d f0, f0
+  |   trunc.w.d f0, f0
   |  sltiu AT, CARG3, LJ_TISNUM
   |  beqz AT, ->fff_fallback
   |.  mfc1 CARG4, f0
@@ -1660,7 +1660,7 @@ static void build_subroutines(BuildCtx *ctx)
   |  sltiu AT, CARG2, LJ_TISNUM
   |  beqz AT, ->fff_fallback
   |.  li AT, LJ_TSTR
-  |  cvt.w.d f2, f2
+  |  trunc.w.d f2, f2
   |  bne TMP0, AT, ->fff_fallback
   |.  lw CARG2, STR:CARG1->len
   |  mfc1 CARG3, f2
@@ -1700,7 +1700,7 @@ static void build_subroutines(BuildCtx *ctx)
   |  or AT, AT, TMP0
   |  bnez AT, ->fff_fallback
   |.  sltiu AT, CARG4, LJ_TISNUM
-  |   cvt.w.d f0, f0
+  |   trunc.w.d f0, f0
   |  beqz AT, ->fff_fallback
   |.  lw TMP0, STR:CARG1->len
   |   mfc1 CARG3, f0

+ 5 - 5
src/vm_x86.dasc

@@ -1622,7 +1622,7 @@ static void build_subroutines(BuildCtx *ctx)
   |  movsd xmm0, qword [BASE+8]
   |  sseconst_1 xmm1, RBa
   |  addsd xmm0, xmm1
-  |  cvtsd2si RD, xmm0
+  |  cvttsd2si RD, xmm0
   |  movsd qword [BASE-8], xmm0
   |.endif
   |  mov TAB:RB, [BASE]
@@ -1975,7 +1975,7 @@ static void build_subroutines(BuildCtx *ctx)
   |  movsd xmm0, qword [BASE]
   |  call ->vm_ .. func .. _sse
   |.if DUALNUM
-  |  cvtsd2si RB, xmm0
+  |  cvttsd2si RB, xmm0
   |  cmp RB, 0x80000000
   |  jne ->fff_resi
   |  cvtsi2sd xmm1, RB
@@ -2968,7 +2968,7 @@ static void build_subroutines(BuildCtx *ctx)
   |// Args in xmm0/xmm1. Ret in xmm0. xmm0-xmm2 and RC (eax) modified.
   |// Needs 16 byte scratch area for x86. Also called from JIT code.
   |->vm_pow_sse:
-  |  cvtsd2si eax, xmm1
+  |  cvttsd2si eax, xmm1
   |  cvtsi2sd xmm2, eax
   |  ucomisd xmm1, xmm2
   |  jnz >8				// Branch for FP exponents.
@@ -4376,7 +4376,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
     |  // Convert number to int and back and compare.
     |  checknum RC, >5
     |  movsd xmm0, qword [BASE+RC*8]
-    |  cvtsd2si RC, xmm0
+    |  cvttsd2si RC, xmm0
     |  cvtsi2sd xmm1, RC
     |  ucomisd xmm0, xmm1
     |  jne ->vmeta_tgetv		// Generic numeric key? Use fallback.
@@ -4516,7 +4516,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
     |  // Convert number to int and back and compare.
     |  checknum RC, >5
     |  movsd xmm0, qword [BASE+RC*8]
-    |  cvtsd2si RC, xmm0
+    |  cvttsd2si RC, xmm0
     |  cvtsi2sd xmm1, RC
     |  ucomisd xmm0, xmm1
     |  jne ->vmeta_tsetv		// Generic numeric key? Use fallback.