2
0
Эх сурвалжийг харах

MIPS64, part 2: Add MIPS64 hard-float JIT compiler backend.

Contributed by Djordje Kovacevic and Stefan Pejic from RT-RK.com.
Sponsored by Cisco Systems, Inc.
Mike Pall 8 жил өмнө
parent
commit
a25c0b99b8

+ 2 - 1
Makefile

@@ -87,7 +87,8 @@ FILE_PC= luajit.pc
 FILES_INC= lua.h lualib.h lauxlib.h luaconf.h lua.hpp luajit.h
 FILES_JITLIB= bc.lua bcsave.lua dump.lua p.lua v.lua zone.lua \
 	      dis_x86.lua dis_x64.lua dis_arm.lua dis_arm64.lua \
-	      dis_ppc.lua dis_mips.lua dis_mipsel.lua vmdef.lua
+	      dis_ppc.lua dis_mips.lua dis_mipsel.lua dis_mips64.lua \
+	      dis_mips64el.lua vmdef.lua
 
 ifeq (,$(findstring Windows,$(OS)))
   HOST_SYS:= $(shell uname -s)

+ 30 - 15
src/jit/dis_mips.lua

@@ -34,15 +34,17 @@ local map_special = {
   "jrS",	"jalrD1S",	"movzDST",	"movnDST",
   "syscallY",	"breakY",	false,		"sync",
   "mfhiD",	"mthiS",	"mfloD",	"mtloS",
-  false,	false,		false,		false,
+  "dsllvDST",	false,		"dsrlvDST",	"dsravDST",
   "multST",	"multuST",	"divST",	"divuST",
-  false,	false,		false,		false,
+  "dmultST",	"dmultuST",	"ddivST",	"ddivuST",
   "addDST",	"addu|moveDST0", "subDST",	"subu|neguDS0T",
   "andDST",	"or|moveDST0",	"xorDST",	"nor|notDST0",
   false,	false,		"sltDST",	"sltuDST",
-  false,	false,		false,		false,
+  "daddDST",	"dadduDST",	"dsubDST",	"dsubuDST",
   "tgeSTZ",	"tgeuSTZ",	"tltSTZ",	"tltuSTZ",
-  "teqSTZ",	false,		"tneSTZ",
+  "teqSTZ",	false,		"tneSTZ",	false,
+  "dsllDTA",	false,		"dsrlDTA",	"dsraDTA",
+  "dsll32DTA",	false,		"dsrl32DTA",	"dsra32DTA",
 }
 
 local map_special2 = {
@@ -60,11 +62,17 @@ local map_bshfl = {
   [24] = "sehDT",
 }
 
+local map_dbshfl = {
+  shift = 6, mask = 31,
+  [2] = "dsbhDT",
+  [5] = "dshdDT",
+}
+
 local map_special3 = {
   shift = 0, mask = 63,
-  [0] = "extTSAK", [4] = "insTSAL",
-  [32] = map_bshfl,
-  [59] = "rdhwrTD",
+  [0]  = "extTSAK", [1]  = "dextmTSAP", [3]  = "dextTSAK",
+  [4]  = "insTSAL", [6]  = "dinsuTSEQ", [7]  = "dinsTSAL",
+  [32] = map_bshfl, [36] = map_dbshfl,  [59] = "rdhwrTD",
 }
 
 local map_regimm = {
@@ -178,8 +186,8 @@ local map_cop1bc = {
 
 local map_cop1 = {
   shift = 21, mask = 31,
-  [0] = "mfc1TG", false,	"cfc1TG",	"mfhc1TG",
-  "mtc1TG",	false,		"ctc1TG",	"mthc1TG",
+  [0] = "mfc1TG", "dmfc1TG",	"cfc1TG",	"mfhc1TG",
+  "mtc1TG",	"dmtc1TG",	"ctc1TG",	"mthc1TG",
   map_cop1bc,	false,		false,		false,
   false,	false,		false,		false,
   map_cop1s,	map_cop1d,	false,		false,
@@ -213,16 +221,16 @@ local map_pri = {
   "andiTSU",	"ori|liTS0U",	"xoriTSU",	"luiTU",
   map_cop0,	map_cop1,	false,		map_cop1x,
   "beql|beqzlST0B",	"bnel|bnezlST0B",	"blezlSB",	"bgtzlSB",
-  false,	false,		false,		false,
+  "daddiTSI",	"daddiuTSI",	false,		false,
   map_special2,	"jalxJ",	false,		map_special3,
   "lbTSO",	"lhTSO",	"lwlTSO",	"lwTSO",
   "lbuTSO",	"lhuTSO",	"lwrTSO",	false,
   "sbTSO",	"shTSO",	"swlTSO",	"swTSO",
   false,	false,		"swrTSO",	"cacheNSO",
   "llTSO",	"lwc1HSO",	"lwc2TSO",	"prefNSO",
-  false,	"ldc1HSO",	"ldc2TSO",	false,
+  false,	"ldc1HSO",	"ldc2TSO",	"ldTSO",
   "scTSO",	"swc1HSO",	"swc2TSO",	false,
-  false,	"sdc1HSO",	"sdc2TSO",	false,
+  false,	"sdc1HSO",	"sdc2TSO",	"sdTSO",
 }
 
 ------------------------------------------------------------------------------
@@ -306,6 +314,8 @@ local function disass_ins(ctx)
       x = "f"..band(rshift(op, 21), 31)
     elseif p == "A" then
       x = band(rshift(op, 6), 31)
+    elseif p == "E" then
+      x = band(rshift(op, 6), 31) + 32
     elseif p == "M" then
       x = band(rshift(op, 11), 31)
     elseif p == "N" then
@@ -315,8 +325,12 @@ local function disass_ins(ctx)
       if x == 0 then x = nil end
     elseif p == "K" then
       x = band(rshift(op, 11), 31) + 1
+    elseif p == "P" then
+      x = band(rshift(op, 11), 31) + 33
     elseif p == "L" then
       x = band(rshift(op, 11), 31) - last + 1
+    elseif p == "Q" then
+      x = band(rshift(op, 11), 31) - last + 33
     elseif p == "I" then
       x = arshift(lshift(op, 16), 16)
     elseif p == "U" then
@@ -330,11 +344,12 @@ local function disass_ins(ctx)
     elseif p == "B" then
       x = ctx.addr + ctx.pos + arshift(lshift(op, 16), 16)*4 + 4
       ctx.rel = x
-      x = "0x"..tohex(x)
+      x = format("0x%08x", x)
     elseif p == "J" then
-      x = band(ctx.addr + ctx.pos, 0xf0000000) + band(op, 0x03ffffff)*4
+      local a = ctx.addr + ctx.pos
+      x = a - band(a, 0x0fffffff) + band(op, 0x03ffffff)*4
       ctx.rel = x
-      x = "0x"..tohex(x)
+      x = format("0x%08x", x)
     elseif p == "V" then
       x = band(rshift(op, 8), 7)
       if x == 0 then x = nil end

+ 17 - 0
src/jit/dis_mips64.lua

@@ -0,0 +1,17 @@
+----------------------------------------------------------------------------
+-- LuaJIT MIPS64 disassembler wrapper module.
+--
+-- Copyright (C) 2005-2017 Mike Pall. All rights reserved.
+-- Released under the MIT license. See Copyright Notice in luajit.h
+----------------------------------------------------------------------------
+-- This module just exports the big-endian functions from the
+-- MIPS disassembler module. All the interesting stuff is there.
+------------------------------------------------------------------------------
+
+local dis_mips = require((string.match(..., ".*%.") or "").."dis_mips")
+return {
+  create = dis_mips.create,
+  disass = dis_mips.disass,
+  regname = dis_mips.regname
+}
+

+ 17 - 0
src/jit/dis_mips64el.lua

@@ -0,0 +1,17 @@
+----------------------------------------------------------------------------
+-- LuaJIT MIPS64EL disassembler wrapper module.
+--
+-- Copyright (C) 2005-2017 Mike Pall. All rights reserved.
+-- Released under the MIT license. See Copyright Notice in luajit.h
+----------------------------------------------------------------------------
+-- This module just exports the little-endian functions from the
+-- MIPS disassembler module. All the interesting stuff is there.
+------------------------------------------------------------------------------
+
+local dis_mips = require((string.match(..., ".*%.") or "").."dis_mips")
+return {
+  create = dis_mips.create_el,
+  disass = dis_mips.disass_el,
+  regname = dis_mips.regname
+}
+

+ 1 - 1
src/jit/dump.lua

@@ -85,7 +85,7 @@ local nexitsym = 0
 local function fillsymtab_tr(tr, nexit)
   local t = {}
   symtabmt.__index = t
-  if jit.arch == "mips" or jit.arch == "mipsel" then
+  if jit.arch:sub(1, 4) == "mips" then
     t[traceexitstub(tr, 0)] = "exit"
     return
   end

+ 3 - 1
src/lj_arch.h

@@ -332,10 +332,12 @@
 #define LJ_ARCH_BITS		32
 #define LJ_TARGET_MIPS32	1
 #else
+#if LJ_ABI_SOFTFP || !LJ_ARCH_HASFPU
+#define LJ_ARCH_NOJIT		1	/* NYI */
+#endif
 #define LJ_ARCH_BITS		64
 #define LJ_TARGET_MIPS64	1
 #define LJ_TARGET_GC64		1
-#define LJ_ARCH_NOJIT		1	/* NYI */
 #endif
 #define LJ_TARGET_MIPS		1
 #define LJ_TARGET_EHRETREG	4

Файлын зөрүү хэтэрхий том тул дарагдсан байна
+ 428 - 98
src/lj_asm_mips.h


+ 93 - 14
src/lj_emit_mips.h

@@ -3,6 +3,28 @@
 ** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
 */
 
+#if LJ_64
+static intptr_t get_k64val(IRIns *ir)
+{
+  if (ir->o == IR_KINT64) {
+    return (intptr_t)ir_kint64(ir)->u64;
+  } else if (ir->o == IR_KGC) {
+    return (intptr_t)ir_kgc(ir);
+  } else if (ir->o == IR_KPTR || ir->o == IR_KKPTR) {
+    return (intptr_t)ir_kptr(ir);
+  } else {
+    lua_assert(ir->o == IR_KINT || ir->o == IR_KNULL);
+    return ir->i;  /* Sign-extended. */
+  }
+}
+#endif
+
+#if LJ_64
+#define get_kval(ir)		get_k64val(ir)
+#else
+#define get_kval(ir)		((ir)->i)
+#endif
+
 /* -- Emit basic instructions --------------------------------------------- */
 
 static void emit_dst(ASMState *as, MIPSIns mi, Reg rd, Reg rs, Reg rt)
@@ -35,7 +57,7 @@ static void emit_fgh(ASMState *as, MIPSIns mi, Reg rf, Reg rg, Reg rh)
 
 static void emit_rotr(ASMState *as, Reg dest, Reg src, Reg tmp, uint32_t shift)
 {
-  if ((as->flags & JIT_F_MIPSXXR2)) {
+  if (LJ_64 || (as->flags & JIT_F_MIPSXXR2)) {
     emit_dta(as, MIPSI_ROTR, dest, src, shift);
   } else {
     emit_dst(as, MIPSI_OR, dest, dest, tmp);
@@ -44,13 +66,21 @@ static void emit_rotr(ASMState *as, Reg dest, Reg src, Reg tmp, uint32_t shift)
   }
 }
 
+#if LJ_64
+static void emit_tsml(ASMState *as, MIPSIns mi, Reg rt, Reg rs, uint32_t msb,
+		      uint32_t lsb)
+{
+  *--as->mcp = mi | MIPSF_T(rt) | MIPSF_S(rs) | MIPSF_M(msb) | MIPSF_L(lsb);
+}
+#endif
+
 /* -- Emit loads/stores --------------------------------------------------- */
 
 /* Prefer rematerialization of BASE/L from global_State over spills. */
 #define emit_canremat(ref)	((ref) <= REF_BASE)
 
 /* Try to find a one step delta relative to another constant. */
-static int emit_kdelta1(ASMState *as, Reg t, int32_t i)
+static int emit_kdelta1(ASMState *as, Reg t, intptr_t i)
 {
   RegSet work = ~as->freeset & RSET_GPR;
   while (work) {
@@ -58,9 +88,10 @@ static int emit_kdelta1(ASMState *as, Reg t, int32_t i)
     IRRef ref = regcost_ref(as->cost[r]);
     lua_assert(r != t);
     if (ref < ASMREF_L) {
-      int32_t delta = i - (ra_iskref(ref) ? ra_krefk(as, ref) : IR(ref)->i);
+      intptr_t delta = (intptr_t)((uintptr_t)i -
+	(uintptr_t)(ra_iskref(ref) ? ra_krefk(as, ref) : get_kval(IR(ref))));
       if (checki16(delta)) {
-	emit_tsi(as, MIPSI_ADDIU, t, r, delta);
+	emit_tsi(as, MIPSI_AADDIU, t, r, delta);
 	return 1;
       }
     }
@@ -76,8 +107,8 @@ static void emit_loadi(ASMState *as, Reg r, int32_t i)
     emit_ti(as, MIPSI_LI, r, i);
   } else {
     if ((i & 0xffff)) {
-      int32_t jgl = i32ptr(J2G(as->J));
-      if ((uint32_t)(i-jgl) < 65536) {
+      intptr_t jgl = (intptr_t)(void *)J2G(as->J);
+      if ((uintptr_t)(i-jgl) < 65536) {
 	emit_tsi(as, MIPSI_ADDIU, r, RID_JGL, i-jgl-32768);
 	return;
       } else if (emit_kdelta1(as, r, i)) {
@@ -92,7 +123,39 @@ static void emit_loadi(ASMState *as, Reg r, int32_t i)
   }
 }
 
+#if LJ_64
+/* Load a 64 bit constant into a GPR. */
+static void emit_loadu64(ASMState *as, Reg r, uint64_t u64)
+{
+  if (checki32((int64_t)u64)) {
+    emit_loadi(as, r, (int32_t)u64);
+  } else {
+    uint64_t delta = u64 - (uint64_t)(void *)J2G(as->J);
+    if (delta < 65536) {
+      emit_tsi(as, MIPSI_DADDIU, r, RID_JGL, (int32_t)(delta-32768));
+    } else if (emit_kdelta1(as, r, (intptr_t)u64)) {
+      return;
+    } else {
+      if ((u64 & 0xffff)) {
+	emit_tsi(as, MIPSI_ORI, r, r, u64 & 0xffff);
+      }
+      if (((u64 >> 16) & 0xffff)) {
+	emit_dta(as, MIPSI_DSLL, r, r, 16);
+	emit_tsi(as, MIPSI_ORI, r, r, (u64 >> 16) & 0xffff);
+	emit_dta(as, MIPSI_DSLL, r, r, 16);
+      } else {
+	emit_dta(as, MIPSI_DSLL32, r, r, 0);
+      }
+      emit_loadi(as, r, (int32_t)(u64 >> 32));
+    }
+    /* TODO: There are probably more optimization opportunities. */
+  }
+}
+
+#define emit_loada(as, r, addr)		emit_loadu64(as, (r), u64ptr((addr)))
+#else
 #define emit_loada(as, r, addr)		emit_loadi(as, (r), i32ptr((addr)))
+#endif
 
 static Reg ra_allock(ASMState *as, intptr_t k, RegSet allow);
 static void ra_allockreg(ASMState *as, intptr_t k, Reg r);
@@ -100,8 +163,8 @@ static void ra_allockreg(ASMState *as, intptr_t k, Reg r);
 /* Get/set from constant pointer. */
 static void emit_lsptr(ASMState *as, MIPSIns mi, Reg r, void *p, RegSet allow)
 {
-  int32_t jgl = i32ptr(J2G(as->J));
-  int32_t i = i32ptr(p);
+  intptr_t jgl = (intptr_t)(J2G(as->J));
+  intptr_t i = (intptr_t)(p);
   Reg base;
   if ((uint32_t)(i-jgl) < 65536) {
     i = i-jgl-32768;
@@ -112,8 +175,24 @@ static void emit_lsptr(ASMState *as, MIPSIns mi, Reg r, void *p, RegSet allow)
   emit_tsi(as, mi, r, base, i);
 }
 
+#if LJ_64
+static void emit_loadk64(ASMState *as, Reg r, IRIns *ir)
+{
+  const uint64_t *k = &ir_k64(ir)->u64;
+  Reg r64 = r;
+  if (rset_test(RSET_FPR, r)) {
+    r64 = RID_TMP;
+    emit_tg(as, MIPSI_DMTC1, r64, r);
+  }
+  if ((uint32_t)((intptr_t)k-(intptr_t)J2G(as->J)) < 65536)
+    emit_lsptr(as, MIPSI_LD, r64, (void *)k, 0);
+  else
+    emit_loadu64(as, r64, *k);
+}
+#else
 #define emit_loadk64(as, r, ir) \
   emit_lsptr(as, MIPSI_LDC1, ((r) & 31), (void *)&ir_knum((ir))->u64, RSET_GPR)
+#endif
 
 /* Get/set global_State fields. */
 static void emit_lsglptr(ASMState *as, MIPSIns mi, Reg r, int32_t ofs)
@@ -122,9 +201,9 @@ static void emit_lsglptr(ASMState *as, MIPSIns mi, Reg r, int32_t ofs)
 }
 
 #define emit_getgl(as, r, field) \
-  emit_lsglptr(as, MIPSI_LW, (r), (int32_t)offsetof(global_State, field))
+  emit_lsglptr(as, MIPSI_AL, (r), (int32_t)offsetof(global_State, field))
 #define emit_setgl(as, r, field) \
-  emit_lsglptr(as, MIPSI_SW, (r), (int32_t)offsetof(global_State, field))
+  emit_lsglptr(as, MIPSI_AS, (r), (int32_t)offsetof(global_State, field))
 
 /* Trace number is determined from per-trace exit stubs. */
 #define emit_setvmstate(as, i)		UNUSED(i)
@@ -164,7 +243,7 @@ static void emit_call(ASMState *as, void *target, int needcfa)
     needcfa = 1;
   }
   as->mcp = p;
-  if (needcfa) ra_allockreg(as, i32ptr(target), RID_CFUNCADDR);
+  if (needcfa) ra_allockreg(as, (intptr_t)target, RID_CFUNCADDR);
 }
 
 /* -- Emit generic operations --------------------------------------------- */
@@ -185,7 +264,7 @@ static void emit_movrr(ASMState *as, IRIns *ir, Reg dst, Reg src)
 static void emit_loadofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs)
 {
   if (r < RID_MAX_GPR)
-    emit_tsi(as, MIPSI_LW, r, base, ofs);
+    emit_tsi(as, irt_is64(ir->t) ? MIPSI_LD : MIPSI_LW, r, base, ofs);
   else
     emit_tsi(as, irt_isnum(ir->t) ? MIPSI_LDC1 : MIPSI_LWC1,
 	     (r & 31), base, ofs);
@@ -195,7 +274,7 @@ static void emit_loadofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs)
 static void emit_storeofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs)
 {
   if (r < RID_MAX_GPR)
-    emit_tsi(as, MIPSI_SW, r, base, ofs);
+    emit_tsi(as, irt_is64(ir->t) ? MIPSI_SD : MIPSI_SW, r, base, ofs);
   else
     emit_tsi(as, irt_isnum(ir->t) ? MIPSI_SDC1 : MIPSI_SWC1,
 	     (r&31), base, ofs);
@@ -206,7 +285,7 @@ static void emit_addptr(ASMState *as, Reg r, int32_t ofs)
 {
   if (ofs) {
     lua_assert(checki16(ofs));
-    emit_tsi(as, MIPSI_ADDIU, r, r, ofs);
+    emit_tsi(as, MIPSI_AADDIU, r, r, ofs);
   }
 }
 

+ 8 - 0
src/lj_jit.h

@@ -337,6 +337,10 @@ enum {
 #endif
 #if LJ_TARGET_MIPS
   LJ_K64_2P31,		/* 2^31 */
+#if LJ_64
+  LJ_K64_2P63,		/* 2^63 */
+  LJ_K64_M2P64,		/* -2^64 */
+#endif
 #endif
   LJ_K64__MAX,
 };
@@ -351,6 +355,10 @@ enum {
 #endif
 #if LJ_TARGET_PPC || LJ_TARGET_MIPS
   LJ_K32_2P31,		/* 2^31 */
+#endif
+#if LJ_TARGET_MIPS64
+  LJ_K32_2P63,		/* 2^63 */
+  LJ_K32_M2P64,		/* -2^64 */
 #endif
   LJ_K32__MAX
 };

+ 3 - 3
src/lj_mcode.c

@@ -206,7 +206,7 @@ static void mcode_protect(jit_State *J, int prot)
 
 #if LJ_TARGET_X64
 #define mcode_validptr(p)	((p) && (uintptr_t)(p) < (uintptr_t)1<<47)
-#elif LJ_TARGET_ARM64
+#elif LJ_TARGET_ARM64 || LJ_TARGET_MIPS64
 /* We have no clue about the valid VA range. It could be 39 - 52 bits. */
 #define mcode_validptr(p)	(p)
 #else
@@ -224,8 +224,8 @@ static void *mcode_alloc(jit_State *J, size_t sz)
   */
 #if LJ_TARGET_MIPS
   /* Use the middle of the 256MB-aligned region. */
-  uintptr_t target = ((uintptr_t)(void *)lj_vm_exit_handler & 0xf0000000u) +
-		     0x08000000u;
+  uintptr_t target = ((uintptr_t)(void *)lj_vm_exit_handler &
+		      ~(uintptr_t)0x0fffffffu) + 0x08000000u;
 #else
   uintptr_t target = (uintptr_t)(void *)lj_vm_exit_handler & ~(uintptr_t)0xffff;
 #endif

+ 2 - 1
src/lj_snap.c

@@ -723,8 +723,9 @@ static void snap_restoredata(GCtrace *T, ExitState *ex,
 #else
 	if (LJ_BE && sz == 4) src++;
 #endif
-      }
+      } else
 #endif
+      if (LJ_64 && LJ_BE && sz == 4) src++;
     }
   }
   lua_assert(sz == 1 || sz == 2 || sz == 4 || sz == 8);

+ 73 - 17
src/lj_target_mips.h

@@ -81,7 +81,7 @@ enum {
    RID2RSET(RID_SYS1)|RID2RSET(RID_SYS2)|RID2RSET(RID_JGL)|RID2RSET(RID_GP))
 #define RSET_GPR	(RSET_RANGE(RID_MIN_GPR, RID_MAX_GPR) - RSET_FIXED)
 #if LJ_SOFTFP
-#define RSET_FPR	0
+#define RSET_FPR		0
 #else
 #if LJ_32
 #define RSET_FPR \
@@ -90,11 +90,11 @@ enum {
    RID2RSET(RID_F16)|RID2RSET(RID_F18)|RID2RSET(RID_F20)|RID2RSET(RID_F22)|\
    RID2RSET(RID_F24)|RID2RSET(RID_F26)|RID2RSET(RID_F28)|RID2RSET(RID_F30))
 #else
-#define RSET_FPR	RSET_RANGE(RID_MIN_FPR, RID_MAX_FPR)
+#define RSET_FPR		RSET_RANGE(RID_MIN_FPR, RID_MAX_FPR)
 #endif
 #endif
-#define RSET_ALL	(RSET_GPR|RSET_FPR)
-#define RSET_INIT	RSET_ALL
+#define RSET_ALL		(RSET_GPR|RSET_FPR)
+#define RSET_INIT		RSET_ALL
 
 #define RSET_SCRATCH_GPR \
   (RSET_RANGE(RID_R1, RID_R15+1)|\
@@ -192,8 +192,12 @@ static LJ_AINLINE uint32_t *exitstub_trace_addr_(uint32_t *p)
 #define MIPSF_F(r)	((r) << 6)
 #define MIPSF_A(n)	((n) << 6)
 #define MIPSF_M(n)	((n) << 11)
+#define MIPSF_L(n)	((n) << 6)
 
 typedef enum MIPSIns {
+  MIPSI_D = 0x38,
+  MIPSI_DV = 0x10,
+  MIPSI_D32 = 0x3c,
   /* Integer instructions. */
   MIPSI_MOVE = 0x00000025,
   MIPSI_NOP = 0x00000000,
@@ -202,22 +206,27 @@ typedef enum MIPSIns {
   MIPSI_LU = 0x34000000,
   MIPSI_LUI = 0x3c000000,
 
-  MIPSI_ADDIU = 0x24000000,
+  MIPSI_AND = 0x00000024,
   MIPSI_ANDI = 0x30000000,
+  MIPSI_OR = 0x00000025,
   MIPSI_ORI = 0x34000000,
+  MIPSI_XOR = 0x00000026,
   MIPSI_XORI = 0x38000000,
+  MIPSI_NOR = 0x00000027,
+
+  MIPSI_SLT = 0x0000002a,
+  MIPSI_SLTU = 0x0000002b,
   MIPSI_SLTI = 0x28000000,
   MIPSI_SLTIU = 0x2c000000,
 
   MIPSI_ADDU = 0x00000021,
+  MIPSI_ADDIU = 0x24000000,
+  MIPSI_SUB = 0x00000022,
   MIPSI_SUBU = 0x00000023,
   MIPSI_MUL = 0x70000002,
-  MIPSI_AND = 0x00000024,
-  MIPSI_OR = 0x00000025,
-  MIPSI_XOR = 0x00000026,
-  MIPSI_NOR = 0x00000027,
-  MIPSI_SLT = 0x0000002a,
-  MIPSI_SLTU = 0x0000002b,
+  MIPSI_DIV = 0x0000001a,
+  MIPSI_DIVU = 0x0000001b,
+
   MIPSI_MOVZ = 0x0000000a,
   MIPSI_MOVN = 0x0000000b,
   MIPSI_MFHI = 0x00000010,
@@ -228,14 +237,18 @@ typedef enum MIPSIns {
   MIPSI_SRL = 0x00000002,
   MIPSI_SRA = 0x00000003,
   MIPSI_ROTR = 0x00200002,	/* MIPSXXR2 */
+  MIPSI_DROTR = 0x0020003a,
+  MIPSI_DROTR32 = 0x0020003e,
   MIPSI_SLLV = 0x00000004,
   MIPSI_SRLV = 0x00000006,
   MIPSI_SRAV = 0x00000007,
   MIPSI_ROTRV = 0x00000046,	/* MIPSXXR2 */
+  MIPSI_DROTRV = 0x00000056,
 
   MIPSI_SEB = 0x7c000420,	/* MIPSXXR2 */
   MIPSI_SEH = 0x7c000620,	/* MIPSXXR2 */
   MIPSI_WSBH = 0x7c0000a0,	/* MIPSXXR2 */
+  MIPSI_DSBH = 0x7c0000a4,
 
   MIPSI_B = 0x10000000,
   MIPSI_J = 0x08000000,
@@ -253,7 +266,9 @@ typedef enum MIPSIns {
 
   /* Load/store instructions. */
   MIPSI_LW = 0x8c000000,
+  MIPSI_LD = 0xdc000000,
   MIPSI_SW = 0xac000000,
+  MIPSI_SD = 0xfc000000,
   MIPSI_LB = 0x80000000,
   MIPSI_SB = 0xa0000000,
   MIPSI_LH = 0x84000000,
@@ -266,13 +281,48 @@ typedef enum MIPSIns {
   MIPSI_SDC1 = 0xf4000000,
 
   /* MIPS64 instructions. */
-  MIPSI_DSLL = 0x00000038,
-  MIPSI_LD = 0xdc000000,
+  MIPSI_DADD = 0x0000002c,
+  MIPSI_DADDI = 0x60000000,
+  MIPSI_DADDU = 0x0000002d,
   MIPSI_DADDIU = 0x64000000,
-  MIPSI_SD = 0xfc000000,
-  MIPSI_DMFC1 = 0x44200000,
+  MIPSI_DSUB = 0x0000002e,
+  MIPSI_DSUBU = 0x0000002f,
+  MIPSI_DDIV = 0x0000001e,
+  MIPSI_DDIVU = 0x0000001f,
+  MIPSI_DMULT = 0x0000001c,
+  MIPSI_DMULTU = 0x0000001d,
+
+  MIPSI_DSLL = 0x00000038,
+  MIPSI_DSRL = 0x0000003a,
+  MIPSI_DSLLV = 0x00000014,
+  MIPSI_DSRLV = 0x00000016,
+  MIPSI_DSRA = 0x0000003b,
+  MIPSI_DSRAV = 0x00000017,
   MIPSI_DSRA32 = 0x0000003f,
-  MIPSI_MFHC1 = 0x44600000,
+  MIPSI_DSLL32 = 0x0000003c,
+  MIPSI_DSRL32 = 0x0000003e,
+  MIPSI_DSHD = 0x7c000164,
+
+  MIPSI_AADDU = LJ_32 ? MIPSI_ADDU : MIPSI_DADDU,
+  MIPSI_AADDIU = LJ_32 ? MIPSI_ADDIU : MIPSI_DADDIU,
+  MIPSI_ASUBU = LJ_32 ? MIPSI_SUBU : MIPSI_DSUBU,
+  MIPSI_AL = LJ_32 ? MIPSI_LW : MIPSI_LD,
+  MIPSI_AS = LJ_32 ? MIPSI_SW : MIPSI_SD,
+
+  /* Extract/insert instructions. */
+  MIPSI_DEXTM = 0x7c000001,
+  MIPSI_DEXTU = 0x7c000002,
+  MIPSI_DEXT = 0x7c000003,
+  MIPSI_DINSM = 0x7c000005,
+  MIPSI_DINSU = 0x7c000006,
+  MIPSI_DINS = 0x7c000007,
+
+  MIPSI_RINT_D = 0x4620001a,
+  MIPSI_RINT_S = 0x4600001a,
+  MIPSI_RINT = 0x4400001a,
+  MIPSI_FLOOR_D = 0x4620000b,
+  MIPSI_CEIL_D = 0x4620000a,
+  MIPSI_ROUND_D = 0x46200008,
 
   /* FP instructions. */
   MIPSI_MOV_S = 0x46000006,
@@ -298,24 +348,30 @@ typedef enum MIPSIns {
   MIPSI_CVT_W_D = 0x46200024,
   MIPSI_CVT_S_W = 0x46800020,
   MIPSI_CVT_D_W = 0x46800021,
+  MIPSI_CVT_S_L = 0x46a00020,
+  MIPSI_CVT_D_L = 0x46a00021,
 
   MIPSI_TRUNC_W_S = 0x4600000d,
   MIPSI_TRUNC_W_D = 0x4620000d,
+  MIPSI_TRUNC_L_S = 0x46000009,
+  MIPSI_TRUNC_L_D = 0x46200009,
   MIPSI_FLOOR_W_S = 0x4600000f,
   MIPSI_FLOOR_W_D = 0x4620000f,
 
   MIPSI_MFC1 = 0x44000000,
   MIPSI_MTC1 = 0x44800000,
+  MIPSI_DMTC1 = 0x44a00000,
+  MIPSI_DMFC1 = 0x44200000,
 
   MIPSI_BC1F = 0x45000000,
   MIPSI_BC1T = 0x45010000,
 
   MIPSI_C_EQ_D = 0x46200032,
+  MIPSI_C_OLT_S = 0x46000034,
   MIPSI_C_OLT_D = 0x46200034,
   MIPSI_C_ULT_D = 0x46200035,
   MIPSI_C_OLE_D = 0x46200036,
   MIPSI_C_ULE_D = 0x46200037,
-
 } MIPSIns;
 
 #endif

+ 9 - 2
src/lj_trace.c

@@ -319,13 +319,15 @@ void lj_trace_initstate(global_State *g)
   /* Initialize 32/64 bit constants. */
 #if LJ_TARGET_X86ORX64
   J->k64[LJ_K64_TOBIT].u64 = U64x(43380000,00000000);
-  J->k64[LJ_K64_2P64].u64 = U64x(43f00000,00000000);
-  J->k64[LJ_K64_M2P64].u64 = U64x(c3f00000,00000000);
 #if LJ_32
   J->k64[LJ_K64_M2P64_31].u64 = U64x(c1e00000,00000000);
 #endif
+  J->k64[LJ_K64_2P64].u64 = U64x(43f00000,00000000);
   J->k32[LJ_K32_M2P64_31] = LJ_64 ? 0xdf800000 : 0xcf000000;
 #endif
+#if LJ_TARGET_X86ORX64 || LJ_TARGET_MIPS64
+  J->k64[LJ_K64_M2P64].u64 = U64x(c3f00000,00000000);
+#endif
 #if LJ_TARGET_PPC
   J->k32[LJ_K32_2P52_2P31] = 0x59800004;
   J->k32[LJ_K32_2P52] = 0x59800000;
@@ -335,6 +337,11 @@ void lj_trace_initstate(global_State *g)
 #endif
 #if LJ_TARGET_MIPS
   J->k64[LJ_K64_2P31].u64 = U64x(41e00000,00000000);
+#if LJ_64
+  J->k64[LJ_K64_2P63].u64 = U64x(43e00000,00000000);
+  J->k32[LJ_K32_2P63] = 0x5f000000;
+  J->k32[LJ_K32_M2P64] = 0xdf800000;
+#endif
 #endif
 }
 

+ 225 - 12
src/vm_mips64.dasc

@@ -327,7 +327,13 @@
 |.macro jmp_extern; jr CFUNCADDR; .endmacro
 |
 |.macro hotcheck, delta, target
-|  NYI
+|  dsrl TMP1, PC, 1
+|  andi TMP1, TMP1, 126
+|  daddu TMP1, TMP1, DISPATCH
+|  lhu TMP2, GG_DISP2HOT(TMP1)
+|  addiu TMP2, TMP2, -delta
+|  bltz TMP2, target
+|.  sh TMP2, GG_DISP2HOT(TMP1)
 |.endmacro
 |
 |.macro hotloop
@@ -2150,7 +2156,21 @@ static void build_subroutines(BuildCtx *ctx)
   |//-----------------------------------------------------------------------
   |
   |->vm_record:				// Dispatch target for recording phase.
-  |  NYI
+  |.if JIT
+  |  lbu TMP3, DISPATCH_GL(hookmask)(DISPATCH)
+  |  andi AT, TMP3, HOOK_VMEVENT	// No recording while in vmevent.
+  |  bnez AT, >5
+  |  // Decrement the hookcount for consistency, but always do the call.
+  |.  lw TMP2, DISPATCH_GL(hookcount)(DISPATCH)
+  |  andi AT, TMP3, HOOK_ACTIVE
+  |  bnez AT, >1
+  |.  addiu TMP2, TMP2, -1
+  |  andi AT, TMP3, LUA_MASKLINE|LUA_MASKCOUNT
+  |  beqz AT, >1
+  |.  nop
+  |  b >1
+  |.  sw TMP2, DISPATCH_GL(hookcount)(DISPATCH)
+  |.endif
   |
   |->vm_rethook:			// Dispatch target for return hooks.
   |  lbu TMP3, DISPATCH_GL(hookmask)(DISPATCH)
@@ -2201,7 +2221,25 @@ static void build_subroutines(BuildCtx *ctx)
   |.  lw MULTRES, -24+LO(RB)		// Restore MULTRES for *M ins.
   |
   |->vm_hotloop:			// Hot loop counter underflow.
-  |  NYI
+  |.if JIT
+  |  ld LFUNC:TMP1, FRAME_FUNC(BASE)
+  |   daddiu CARG1, DISPATCH, GG_DISP2J
+  |  cleartp LFUNC:TMP1
+  |   sd PC, SAVE_PC
+  |  ld TMP1, LFUNC:TMP1->pc
+  |   move CARG2, PC
+  |   sd L, DISPATCH_J(L)(DISPATCH)
+  |  lbu TMP1, PC2PROTO(framesize)(TMP1)
+  |  load_got lj_trace_hot
+  |   sd BASE, L->base
+  |  dsll TMP1, TMP1, 3
+  |  daddu TMP1, BASE, TMP1
+  |  call_intern lj_trace_hot		// (jit_State *J, const BCIns *pc)
+  |.  sd TMP1, L->top
+  |  b <3
+  |.  nop
+  |.endif
+  |
   |
   |->vm_callhook:			// Dispatch target for call hooks.
   |.if JIT
@@ -2235,21 +2273,69 @@ static void build_subroutines(BuildCtx *ctx)
   |
   |->cont_stitch:			// Trace stitching.
   |.if JIT
-  |  NYI
+  |  // RA = resultptr, RB = meta base
+  |  lw INS, -4(PC)
+  |    ld TRACE:TMP2, -40(RB)		// Save previous trace.
+  |  decode_RA8a RC, INS
+  |   daddiu AT, MULTRES, -8
+  |    cleartp TRACE:TMP2
+  |  decode_RA8b RC
+  |   beqz AT, >2
+  |. daddu RC, BASE, RC			// Call base.
+  |1:  // Move results down.
+  |  ld CARG1, 0(RA)
+  |   daddiu AT, AT, -8
+  |    daddiu RA, RA, 8
+  |  sd CARG1, 0(RC)
+  |   bnez AT, <1
+  |.   daddiu RC, RC, 8
+  |2:
+  |   decode_RA8a RA, INS
+  |    decode_RB8a RB, INS
+  |   decode_RA8b RA
+  |    decode_RB8b RB
+  |   daddu RA, RA, RB
+  |   daddu RA, BASE, RA
+  |3:
+  |   sltu AT, RC, RA
+  |   bnez AT, >9			// More results wanted?
+  |.   nop
+  |
+  |  lhu TMP3, TRACE:TMP2->traceno
+  |  lhu RD, TRACE:TMP2->link
+  |  beq RD, TMP3, ->cont_nop		// Blacklisted.
+  |.  load_got lj_dispatch_stitch
+  |  bnez RD, =>BC_JLOOP		// Jump to stitched trace.
+  |.  sll RD, RD, 3
+  |
+  |  // Stitch a new trace to the previous trace.
+  |  sw TMP3, DISPATCH_J(exitno)(DISPATCH)
+  |  sd L, DISPATCH_J(L)(DISPATCH)
+  |  sd BASE, L->base
+  |  daddiu CARG1, DISPATCH, GG_DISP2J
+  |  call_intern lj_dispatch_stitch	// (jit_State *J, const BCIns *pc)
+  |.  move CARG2, PC
+  |  b ->cont_nop
+  |.  ld BASE, L->base
+  |
+  |9:
+  |  sd TISNIL, 0(RC)
+  |  b <3
+  |.  daddiu RC, RC, 8
   |.endif
   |
   |->vm_profhook:			// Dispatch target for profiler hook.
 #if LJ_HASPROFILE
   |  load_got lj_dispatch_profile
-  |   sw MULTRES, SAVE_MULTRES
+  |   sd MULTRES, SAVE_MULTRES
   |  move CARG2, PC
-  |   sw BASE, L->base
+  |   sd BASE, L->base
   |  call_intern lj_dispatch_profile	// (lua_State *L, const BCIns *pc)
   |.  move CARG1, L
   |  // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction.
   |  daddiu PC, PC, -4
   |  b ->cont_nop
-  |.  lw BASE, L->base
+  |.  ld BASE, L->base
 #endif
   |
   |//-----------------------------------------------------------------------
@@ -2259,6 +2345,7 @@ static void build_subroutines(BuildCtx *ctx)
   |.macro savex_, a, b
   |.if FPU
   |  sdc1 f..a, a*8(sp)
+  |  sdc1 f..b, b*8(sp)
   |  sd r..a, 32*8+a*8(sp)
   |  sd r..b, 32*8+b*8(sp)
   |.else
@@ -2269,11 +2356,124 @@ static void build_subroutines(BuildCtx *ctx)
   |
   |->vm_exit_handler:
   |.if JIT
-  |  NYI
+  |.if FPU
+  |  daddiu sp, sp, -(32*8+32*8)
+  |.else
+  |  daddiu sp, sp, -(32*8)
+  |.endif
+  |  savex_ 0, 1
+  |  savex_ 2, 3
+  |  savex_ 4, 5
+  |  savex_ 6, 7
+  |  savex_ 8, 9
+  |  savex_ 10, 11
+  |  savex_ 12, 13
+  |  savex_ 14, 15
+  |  savex_ 16, 17
+  |  savex_ 18, 19
+  |  savex_ 20, 21
+  |  savex_ 22, 23
+  |  savex_ 24, 25
+  |  savex_ 26, 27
+  |  savex_ 28, 30
+  |.if FPU
+  |  sdc1 f29, 29*8(sp)
+  |  sdc1 f31, 31*8(sp)
+  |  sd r0, 32*8+31*8(sp)		// Clear RID_TMP.
+  |  daddiu TMP2, sp, 32*8+32*8		// Recompute original value of sp.
+  |  sd TMP2, 32*8+29*8(sp)		// Store sp in RID_SP
+  |.else
+  |  sd r0, 31*8(sp)			// Clear RID_TMP.
+  |  daddiu TMP2, sp, 32*8		// Recompute original value of sp.
+  |  sd TMP2, 29*8(sp)			// Store sp in RID_SP
+  |.endif
+  |  li_vmstate EXIT
+  |  daddiu DISPATCH, JGL, -GG_DISP2G-32768
+  |  lw TMP1, 0(TMP2)			// Load exit number.
+  |  st_vmstate
+  |  ld L, DISPATCH_GL(cur_L)(DISPATCH)
+  |   ld BASE, DISPATCH_GL(jit_base)(DISPATCH)
+  |  load_got lj_trace_exit
+  |  sd L, DISPATCH_J(L)(DISPATCH)
+  |  sw ra, DISPATCH_J(parent)(DISPATCH)  // Store trace number.
+  |   sd BASE, L->base
+  |  sw TMP1, DISPATCH_J(exitno)(DISPATCH)  // Store exit number.
+  |  daddiu CARG1, DISPATCH, GG_DISP2J
+  |   sd r0, DISPATCH_GL(jit_base)(DISPATCH)
+  |  call_intern lj_trace_exit		// (jit_State *J, ExitState *ex)
+  |.  move CARG2, sp
+  |  // Returns MULTRES (unscaled) or negated error code.
+  |  ld TMP1, L->cframe
+  |  li AT, -4
+  |   ld BASE, L->base
+  |  and sp, TMP1, AT
+  |   ld PC, SAVE_PC			// Get SAVE_PC.
+  |  b >1
+  |.  sd L, SAVE_L			// Set SAVE_L (on-trace resume/yield).
   |.endif
   |->vm_exit_interp:
   |.if JIT
-  |  NYI
+  |  // CRET1 = MULTRES or negated error code, BASE, PC and JGL set.
+  |  ld L, SAVE_L
+  |   daddiu DISPATCH, JGL, -GG_DISP2G-32768
+  |  sd BASE, L->base
+  |1:
+  |  bltz CRET1, >9			// Check for error from exit.
+  |.  ld LFUNC:RB, FRAME_FUNC(BASE)
+  |    .FPU lui TMP3, 0x59c0		// TOBIT = 2^52 + 2^51 (float).
+  |  dsll MULTRES, CRET1, 3
+  |  cleartp LFUNC:RB
+  |  sd MULTRES, SAVE_MULTRES
+  |    li TISNIL, LJ_TNIL
+  |     li TISNUM, LJ_TISNUM		// Setup type comparison constants.
+  |    .FPU mtc1 TMP3, TOBIT
+  |  ld TMP1, LFUNC:RB->pc
+  |   sd r0, DISPATCH_GL(jit_base)(DISPATCH)
+  |  ld KBASE, PC2PROTO(k)(TMP1)
+  |    .FPU cvt.d.s TOBIT, TOBIT
+  |  // Modified copy of ins_next which handles function header dispatch, too.
+  |  lw INS, 0(PC)
+  |   daddiu PC, PC, 4
+  |    // Assumes TISNIL == ~LJ_VMST_INTERP == -1
+  |    sw TISNIL, DISPATCH_GL(vmstate)(DISPATCH)
+  |  decode_OP8a TMP1, INS
+  |  decode_OP8b TMP1
+  |    sltiu TMP2, TMP1, BC_FUNCF*8
+  |  daddu TMP0, DISPATCH, TMP1
+  |   decode_RD8a RD, INS
+  |  ld AT, 0(TMP0)
+  |   decode_RA8a RA, INS
+  |    beqz TMP2, >2
+  |.  decode_RA8b RA
+  |  jr AT
+  |.  decode_RD8b RD
+  |2:
+  |  sltiu TMP2, TMP1, (BC_FUNCC+2)*8	// Fast function?
+  |  bnez TMP2, >3
+  |.  ld TMP1, FRAME_PC(BASE)
+  |  // Check frame below fast function.
+  |  andi TMP0, TMP1, FRAME_TYPE
+  |  bnez TMP0, >3			// Trace stitching continuation?
+  |.  nop
+  |  // Otherwise set KBASE for Lua function below fast function.
+  |  lw TMP2, -4(TMP1)
+  |  decode_RA8a TMP0, TMP2
+  |  decode_RA8b TMP0
+  |  dsubu TMP1, BASE, TMP0
+  |  ld LFUNC:TMP2, -32(TMP1)
+  |  cleartp LFUNC:TMP2
+  |  ld TMP1, LFUNC:TMP2->pc
+  |  ld KBASE, PC2PROTO(k)(TMP1)
+  |3:
+  |  daddiu RC, MULTRES, -8
+  |  jr AT
+  |.  daddu RA, RA, BASE
+  |
+  |9:  // Rethrow error from the right C frame.
+  |  load_got lj_err_throw
+  |  negu CARG2, CRET1
+  |  call_intern lj_err_throw		// (lua_State *L, int errcode)
+  |.  move CARG1, L
   |.endif
   |
   |//-----------------------------------------------------------------------
@@ -4013,7 +4213,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
     |  ins_next2
     |
     |7:  // Possible table write barrier for the value. Skip valiswhite check.
-    |  barrierback TAB:RB, TMP3, TMP0, <2
+    |  barrierback TAB:CARG2, TMP3, TMP0, <2
     break;
 
   case BC_TSETM:
@@ -4632,7 +4832,18 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
 
   case BC_JLOOP:
     |.if JIT
-    |  NYI
+    |  // RA = base*8 (ignored), RD = traceno*8
+    |  ld TMP1, DISPATCH_J(trace)(DISPATCH)
+    |   li AT, 0
+    |  daddu TMP1, TMP1, RD
+    |  // Traces on MIPS don't store the trace number, so use 0.
+    |   sd AT, DISPATCH_GL(vmstate)(DISPATCH)
+    |  ld TRACE:TMP2, 0(TMP1)
+    |   sd BASE, DISPATCH_GL(jit_base)(DISPATCH)
+    |  ld TMP2, TRACE:TMP2->mcode
+    |   sd L, DISPATCH_GL(tmpbuf.L)(DISPATCH)
+    |  jr TMP2
+    |.  daddiu JGL, DISPATCH, GG_DISP2G+32768
     |.endif
     break;
 
@@ -4694,10 +4905,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
 
   case BC_IFUNCV:
     |  // BASE = new base, RA = BASE+framesize*8, RB = LFUNC, RC = nargs*8
+    |   li TMP0, LJ_TFUNC
     |   daddu TMP1, BASE, RC
     |  ld TMP2, L->maxstack
+    |   settp LFUNC:RB, TMP0
     |  daddu TMP0, RA, RC
-    |   sd LFUNC:RB, 0(TMP1)		// Store (untagged) copy of LFUNC.
+    |   sd LFUNC:RB, 0(TMP1)		// Store (tagged) copy of LFUNC.
     |   daddiu TMP3, RC, 16+FRAME_VARG
     |  sltu AT, TMP0, TMP2
     |    ld KBASE, -4+PC2PROTO(k)(PC)

Энэ ялгаанд хэт олон файл өөрчлөгдсөн тул зарим файлыг харуулаагүй болно