Browse Source

Add IR_TOI64.

Mike Pall 14 years ago
parent
commit
159f51d811
5 changed files with 56 additions and 4 deletions
  1. 4 2
      lib/dump.lua
  2. 19 0
      src/lj_asm.c
  3. 6 2
      src/lj_ir.h
  4. 26 0
      src/lj_opt_fold.c
  5. 1 0
      src/lj_target_x86.h

+ 4 - 2
lib/dump.lua

@@ -219,7 +219,8 @@ span.irt_int, span.irt_i8, span.irt_u8, span.irt_i16, span.irt_u16 { color: #b04
 
 
 local colorize, irtype
 local colorize, irtype
 
 
--- Lookup table to convert some literals into names.
+-- Lookup tables to convert some literals into names.
+local tointname = { [0] = "check", "index", "", "Z", "S", "T", }
 local litname = {
 local litname = {
   ["SLOAD "] = setmetatable({}, { __index = function(t, mode)
   ["SLOAD "] = setmetatable({}, { __index = function(t, mode)
     local s = ""
     local s = ""
@@ -233,7 +234,8 @@ local litname = {
     return s
     return s
   end}),
   end}),
   ["XLOAD "] = { [0] = "", "R", "U", "RU", },
   ["XLOAD "] = { [0] = "", "R", "U", "RU", },
-  ["TOINT "] = { [0] = "check", "index", "", },
+  ["TOINT "] = tointname,
+  ["TOI64 "] = tointname,
   ["FLOAD "] = vmdef.irfield,
   ["FLOAD "] = vmdef.irfield,
   ["FREF  "] = vmdef.irfield,
   ["FREF  "] = vmdef.irfield,
   ["FPMATH"] = vmdef.irfpm,
   ["FPMATH"] = vmdef.irfpm,

+ 19 - 0
src/lj_asm.c

@@ -1595,6 +1595,24 @@ static void asm_tobit(ASMState *as, IRIns *ir)
   ra_left(as, tmp, ir->op1);
   ra_left(as, tmp, ir->op1);
 }
 }
 
 
+static void asm_toi64(ASMState *as, IRIns *ir)
+{
+  Reg dest = ra_dest(as, ir, RSET_GPR);
+  IRRef lref = ir->op1;
+  lua_assert(LJ_64);  /* NYI: 32 bit register pairs. */
+  if (ir->op2 == IRTOINT_TRUNCI64) {
+    Reg left = asm_fuseload(as, lref, RSET_FPR);
+    emit_mrm(as, XO_CVTTSD2SI, dest|REX_64, left);
+  } else if (ir->op2 == IRTOINT_ZEXT64) {
+    /* Nothing to do. This assumes 32 bit regs are already zero-extended. */
+    ra_left(as, dest, lref);  /* But may need to move regs. */
+  } else {
+    Reg left = asm_fuseload(as, lref, RSET_GPR);
+    emit_mrm(as, XO_MOVSXd, dest|REX_64, left);
+    lua_assert(ir->op2 == IRTOINT_SEXT64);
+  }
+}
+
 static void asm_strto(ASMState *as, IRIns *ir)
 static void asm_strto(ASMState *as, IRIns *ir)
 {
 {
   /* Force a spill slot for the destination register (if any). */
   /* Force a spill slot for the destination register (if any). */
@@ -3531,6 +3549,7 @@ static void asm_ir(ASMState *as, IRIns *ir)
       asm_toint(as, ir); break;
       asm_toint(as, ir); break;
     break;
     break;
   case IR_TOBIT: asm_tobit(as, ir); break;
   case IR_TOBIT: asm_tobit(as, ir); break;
+  case IR_TOI64: asm_toi64(as, ir); break;
   case IR_TOSTR: asm_tostr(as, ir); break;
   case IR_TOSTR: asm_tostr(as, ir); break;
   case IR_STRTO: asm_strto(as, ir); break;
   case IR_STRTO: asm_strto(as, ir); break;
 
 

+ 6 - 2
src/lj_ir.h

@@ -119,6 +119,7 @@
   _(TONUM,	N , ref, ___) \
   _(TONUM,	N , ref, ___) \
   _(TOINT,	N , ref, lit) \
   _(TOINT,	N , ref, lit) \
   _(TOBIT,	N , ref, ref) \
   _(TOBIT,	N , ref, ref) \
+  _(TOI64,	N , ref, lit) \
   _(TOSTR,	N , ref, ___) \
   _(TOSTR,	N , ref, ___) \
   _(STRTO,	N , ref, ___) \
   _(STRTO,	N , ref, ___) \
   \
   \
@@ -204,11 +205,14 @@ IRFLDEF(FLENUM)
 #define IRXLOAD_READONLY	1	/* Load from read-only data. */
 #define IRXLOAD_READONLY	1	/* Load from read-only data. */
 #define IRXLOAD_UNALIGNED	2	/* Unaligned load. */
 #define IRXLOAD_UNALIGNED	2	/* Unaligned load. */
 
 
-/* TOINT mode, stored in op2. Ordered by strength of the checks. */
+/* TOINT/TOI64 mode, stored in op2. Ordered by strength of the checks. */
 #define IRTOINT_CHECK		0	/* Number checked for integerness. */
 #define IRTOINT_CHECK		0	/* Number checked for integerness. */
 #define IRTOINT_INDEX		1	/* Checked + special backprop rules. */
 #define IRTOINT_INDEX		1	/* Checked + special backprop rules. */
 #define IRTOINT_ANY		2	/* Any FP number is ok. */
 #define IRTOINT_ANY		2	/* Any FP number is ok. */
-#define IRTOINT_TOBIT		3	/* Cache only: TOBIT conversion. */
+#define IRTOINT_ZEXT64		3	/* Convert uint32_t to int64_t. */
+#define IRTOINT_SEXT64		4	/* Convert int32_t to int64_t. */
+#define IRTOINT_TRUNCI64	5	/* Truncate number to int64_t. */
+#define IRTOINT_TOBIT		6	/* Cache only: TOBIT conversion. */
 
 
 /* C call info for CALL* instructions. */
 /* C call info for CALL* instructions. */
 typedef struct CCallInfo {
 typedef struct CCallInfo {

+ 26 - 0
src/lj_opt_fold.c

@@ -270,6 +270,23 @@ LJFOLDF(kfold_toint)
   return INTFOLD(k);
   return INTFOLD(k);
 }
 }
 
 
+LJFOLD(TOI64 KINT any)
+LJFOLDF(kfold_toi64_kint)
+{
+  lua_assert(fins->op2 == IRTOINT_ZEXT64 || fins->op2 == IRTOINT_SEXT64);
+  if (fins->op2 == IRTOINT_ZEXT64)
+    return lj_ir_kint64(J, (int64_t)(uint32_t)fleft->i);
+  else
+    return lj_ir_kint64(J, (int64_t)(int32_t)fleft->i);
+}
+
+LJFOLD(TOI64 KNUM any)
+LJFOLDF(kfold_toi64_knum)
+{
+  lua_assert(fins->op2 == IRTOINT_TRUNCI64);
+  return lj_ir_kint64(J, (int64_t)knumleft);
+}
+
 LJFOLD(TOSTR KNUM)
 LJFOLD(TOSTR KNUM)
 LJFOLDF(kfold_tostr_knum)
 LJFOLDF(kfold_tostr_knum)
 {
 {
@@ -471,6 +488,15 @@ LJFOLDF(shortcut_leftleft_across_phi)
   return fleft->op1;  /* f(g(x)) ==> x */
   return fleft->op1;  /* f(g(x)) ==> x */
 }
 }
 
 
+LJFOLD(TOI64 TONUM any)
+LJFOLDF(shortcut_leftleft_toint64)
+{
+  /* Fold even across PHI to avoid expensive int->num->int64 conversions. */
+  fins->op1 = fleft->op1;   /* (int64_t)(double)(int)x ==> (int64_t)x */
+  fins->op2 = IRTOINT_SEXT64;
+  return RETRYFOLD;
+}
+
 /* -- FP algebraic simplifications ---------------------------------------- */
 /* -- FP algebraic simplifications ---------------------------------------- */
 
 
 /* FP arithmetic is tricky -- there's not much to simplify.
 /* FP arithmetic is tricky -- there's not much to simplify.

+ 1 - 0
src/lj_target_x86.h

@@ -230,6 +230,7 @@ typedef enum {
   XO_MOVZXw =	XO_0f(b7),
   XO_MOVZXw =	XO_0f(b7),
   XO_MOVSXb =	XO_0f(be),
   XO_MOVSXb =	XO_0f(be),
   XO_MOVSXw =	XO_0f(bf),
   XO_MOVSXw =	XO_0f(bf),
+  XO_MOVSXd =	XO_(63),
   XO_BSWAP =	XO_0f(c8),
   XO_BSWAP =	XO_0f(c8),
 
 
   XO_MOVSD =	XO_f20f(10),
   XO_MOVSD =	XO_f20f(10),