|
@@ -1,6 +1,6 @@
|
|
|
/*
|
|
|
** x86/x64 IR assembler (SSA IR -> machine code).
|
|
|
-** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
|
|
|
+** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
|
|
|
*/
|
|
|
|
|
|
/* -- Guard handling ------------------------------------------------------ */
|
|
@@ -325,11 +325,11 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow)
|
|
|
as->mrm.base = as->mrm.idx = RID_NONE;
|
|
|
return RID_MRM;
|
|
|
}
|
|
|
- } else if (ir->o == IR_KINT64) {
|
|
|
+ } else if (ref == REF_BASE || ir->o == IR_KINT64) {
|
|
|
RegSet avail = as->freeset & ~as->modset & RSET_GPR;
|
|
|
lua_assert(allow != RSET_EMPTY);
|
|
|
if (!(avail & (avail-1))) { /* Fuse if less than two regs available. */
|
|
|
- as->mrm.ofs = ptr2addr(ir_kint64(ir));
|
|
|
+ as->mrm.ofs = ptr2addr(ref == REF_BASE ? (void *)&J2G(as->J)->jit_base : (void *)ir_kint64(ir));
|
|
|
as->mrm.base = as->mrm.idx = RID_NONE;
|
|
|
return RID_MRM;
|
|
|
}
|
|
@@ -369,7 +369,7 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow)
|
|
|
return RID_MRM;
|
|
|
}
|
|
|
}
|
|
|
- if (!(as->freeset & allow) && !irref_isk(ref) &&
|
|
|
+ if (!(as->freeset & allow) && !emit_canremat(ref) &&
|
|
|
(allow == RSET_EMPTY || ra_hasspill(ir->s) || iscrossref(as, ref)))
|
|
|
goto fusespill;
|
|
|
return ra_allocref(as, ref, allow);
|
|
@@ -531,7 +531,7 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
|
|
|
static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
|
|
|
{
|
|
|
RegSet drop = RSET_SCRATCH;
|
|
|
- int hiop = (LJ_32 && (ir+1)->o == IR_HIOP);
|
|
|
+ int hiop = (LJ_32 && (ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t));
|
|
|
if ((ci->flags & CCI_NOFPRCLOBBER))
|
|
|
drop &= ~RSET_FPR;
|
|
|
if (ra_hasreg(ir->r))
|
|
@@ -1215,7 +1215,6 @@ static void asm_newref(ASMState *as, IRIns *ir)
|
|
|
|
|
|
static void asm_uref(ASMState *as, IRIns *ir)
|
|
|
{
|
|
|
- /* NYI: Check that UREFO is still open and not aliasing a slot. */
|
|
|
Reg dest = ra_dest(as, ir, RSET_GPR);
|
|
|
if (irref_isk(ir->op1)) {
|
|
|
GCfunc *fn = ir_kfunc(IR(ir->op1));
|
|
@@ -2776,6 +2775,106 @@ static void asm_setup_target(ASMState *as)
|
|
|
|
|
|
/* -- Trace patching ------------------------------------------------------ */
|
|
|
|
|
|
+static const uint8_t map_op1[256] = {
|
|
|
+0x92,0x92,0x92,0x92,0x52,0x45,0x51,0x51,0x92,0x92,0x92,0x92,0x52,0x45,0x51,0x20,
|
|
|
+0x92,0x92,0x92,0x92,0x52,0x45,0x51,0x51,0x92,0x92,0x92,0x92,0x52,0x45,0x51,0x51,
|
|
|
+0x92,0x92,0x92,0x92,0x52,0x45,0x10,0x51,0x92,0x92,0x92,0x92,0x52,0x45,0x10,0x51,
|
|
|
+0x92,0x92,0x92,0x92,0x52,0x45,0x10,0x51,0x92,0x92,0x92,0x92,0x52,0x45,0x10,0x51,
|
|
|
+#if LJ_64
|
|
|
+0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x14,0x14,0x14,0x14,0x14,0x14,0x14,0x14,
|
|
|
+#else
|
|
|
+0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,
|
|
|
+#endif
|
|
|
+0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,
|
|
|
+0x51,0x51,0x92,0x92,0x10,0x10,0x12,0x11,0x45,0x86,0x52,0x93,0x51,0x51,0x51,0x51,
|
|
|
+0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,
|
|
|
+0x93,0x86,0x93,0x93,0x92,0x92,0x92,0x92,0x92,0x92,0x92,0x92,0x92,0x92,0x92,0x92,
|
|
|
+0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x47,0x51,0x51,0x51,0x51,0x51,
|
|
|
+#if LJ_64
|
|
|
+0x59,0x59,0x59,0x59,0x51,0x51,0x51,0x51,0x52,0x45,0x51,0x51,0x51,0x51,0x51,0x51,
|
|
|
+#else
|
|
|
+0x55,0x55,0x55,0x55,0x51,0x51,0x51,0x51,0x52,0x45,0x51,0x51,0x51,0x51,0x51,0x51,
|
|
|
+#endif
|
|
|
+0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x05,0x05,0x05,0x05,0x05,0x05,0x05,0x05,
|
|
|
+0x93,0x93,0x53,0x51,0x70,0x71,0x93,0x86,0x54,0x51,0x53,0x51,0x51,0x52,0x51,0x51,
|
|
|
+0x92,0x92,0x92,0x92,0x52,0x52,0x51,0x51,0x92,0x92,0x92,0x92,0x92,0x92,0x92,0x92,
|
|
|
+0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x45,0x45,0x47,0x52,0x51,0x51,0x51,0x51,
|
|
|
+0x10,0x51,0x10,0x10,0x51,0x51,0x63,0x66,0x51,0x51,0x51,0x51,0x51,0x51,0x92,0x92
|
|
|
+};
|
|
|
+
|
|
|
+static const uint8_t map_op2[256] = {
|
|
|
+0x93,0x93,0x93,0x93,0x52,0x52,0x52,0x52,0x52,0x52,0x51,0x52,0x51,0x93,0x52,0x94,
|
|
|
+0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,
|
|
|
+0x53,0x53,0x53,0x53,0x53,0x53,0x53,0x53,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,
|
|
|
+0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x34,0x51,0x35,0x51,0x51,0x51,0x51,0x51,
|
|
|
+0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,
|
|
|
+0x53,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,
|
|
|
+0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,
|
|
|
+0x94,0x54,0x54,0x54,0x93,0x93,0x93,0x52,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,
|
|
|
+0x46,0x46,0x46,0x46,0x46,0x46,0x46,0x46,0x46,0x46,0x46,0x46,0x46,0x46,0x46,0x46,
|
|
|
+0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,
|
|
|
+0x52,0x52,0x52,0x93,0x94,0x93,0x51,0x51,0x52,0x52,0x52,0x93,0x94,0x93,0x93,0x93,
|
|
|
+0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x94,0x93,0x93,0x93,0x93,0x93,
|
|
|
+0x93,0x93,0x94,0x93,0x94,0x94,0x94,0x93,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,
|
|
|
+0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,
|
|
|
+0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,
|
|
|
+0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x52
|
|
|
+};
|
|
|
+
|
|
|
+static uint32_t asm_x86_inslen(const uint8_t* p)
|
|
|
+{
|
|
|
+ uint32_t result = 0;
|
|
|
+ uint32_t prefixes = 0;
|
|
|
+ uint32_t x = map_op1[*p];
|
|
|
+ for (;;) {
|
|
|
+ switch (x >> 4) {
|
|
|
+ case 0: return result + x + (prefixes & 4);
|
|
|
+ case 1: prefixes |= x; x = map_op1[*++p]; result++; break;
|
|
|
+ case 2: x = map_op2[*++p]; break;
|
|
|
+ case 3: p++; goto mrm;
|
|
|
+ case 4: result -= (prefixes & 2); /* fallthrough */
|
|
|
+ case 5: return result + (x & 15);
|
|
|
+ case 6: /* Group 3. */
|
|
|
+ if (p[1] & 0x38) x = 2;
|
|
|
+ else if ((prefixes & 2) && (x == 0x66)) x = 4;
|
|
|
+ goto mrm;
|
|
|
+ case 7: /* VEX c4/c5. */
|
|
|
+ if (LJ_32 && p[1] < 0xc0) {
|
|
|
+ x = 2;
|
|
|
+ goto mrm;
|
|
|
+ }
|
|
|
+ if (x == 0x70) {
|
|
|
+ x = *++p & 0x1f;
|
|
|
+ result++;
|
|
|
+ if (x >= 2) {
|
|
|
+ p += 2;
|
|
|
+ result += 2;
|
|
|
+ goto mrm;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ p++;
|
|
|
+ result++;
|
|
|
+ x = map_op2[*++p];
|
|
|
+ break;
|
|
|
+ case 8: result -= (prefixes & 2); /* fallthrough */
|
|
|
+ case 9: mrm: /* ModR/M and possibly SIB. */
|
|
|
+ result += (x & 15);
|
|
|
+ x = *++p;
|
|
|
+ switch (x >> 6) {
|
|
|
+ case 0: if ((x & 7) == 5) return result + 4; break;
|
|
|
+ case 1: result++; break;
|
|
|
+ case 2: result += 4; break;
|
|
|
+ case 3: return result;
|
|
|
+ }
|
|
|
+ if ((x & 7) == 4) {
|
|
|
+ result++;
|
|
|
+ if (x < 0x40 && (p[1] & 7) == 5) result += 4;
|
|
|
+ }
|
|
|
+ return result;
|
|
|
+ }
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
/* Patch exit jumps of existing machine code to a new target. */
|
|
|
void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target)
|
|
|
{
|
|
@@ -2788,18 +2887,13 @@ void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target)
|
|
|
if (len > 5 && p[len-5] == XI_JMP && p+len-6 + *(int32_t *)(p+len-4) == px)
|
|
|
*(int32_t *)(p+len-4) = jmprel(p+len, target);
|
|
|
/* Do not patch parent exit for a stack check. Skip beyond vmstate update. */
|
|
|
- for (; p < pe; p++)
|
|
|
- if (*(uint32_t *)(p+(LJ_64 ? 3 : 2)) == stateaddr && p[0] == XI_MOVmi) {
|
|
|
- p += LJ_64 ? 11 : 10;
|
|
|
+ for (; p < pe; p += asm_x86_inslen(p))
|
|
|
+ if (*(uint32_t *)(p+(LJ_64 ? 3 : 2)) == stateaddr && p[0] == XI_MOVmi)
|
|
|
break;
|
|
|
- }
|
|
|
lua_assert(p < pe);
|
|
|
- for (; p < pe; p++) {
|
|
|
- if ((*(uint16_t *)p & 0xf0ff) == 0x800f && p + *(int32_t *)(p+2) == px) {
|
|
|
+ for (; p < pe; p += asm_x86_inslen(p))
|
|
|
+ if ((*(uint16_t *)p & 0xf0ff) == 0x800f && p + *(int32_t *)(p+2) == px)
|
|
|
*(int32_t *)(p+2) = jmprel(p+6, target);
|
|
|
- p += 5;
|
|
|
- }
|
|
|
- }
|
|
|
lj_mcode_sync(T->mcode, T->mcode + T->szmcode);
|
|
|
lj_mcode_patch(J, mcarea, 1);
|
|
|
}
|