Browse Source

ARM: Add VFP and hard-float ABI variants to JIT compiler.

Mike Pall 13 years ago
parent
commit
4dc9e22def
8 changed files with 704 additions and 95 deletions
  1. 2 1
      src/jit/dis_arm.lua
  2. 3 0
      src/lj_arch.h
  3. 2 1
      src/lj_asm.c
  4. 574 87
      src/lj_asm_arm.h
  5. 53 2
      src/lj_emit_arm.h
  6. 2 2
      src/lj_ircall.h
  7. 61 2
      src/lj_target_arm.h
  8. 7 0
      src/lj_vm.h

+ 2 - 1
src/jit/dis_arm.lua

@@ -676,7 +676,8 @@ end
 
 -- Return register name for RID.
 local function regname_(r)
-  return map_gpr[r]
+  if r < 16 then return map_gpr[r] end
+  return "d"..(r-16)
 end
 
 -- Public module functions.

+ 3 - 0
src/lj_arch.h

@@ -362,6 +362,9 @@
 #ifndef LJ_ARCH_HASFPU
 #define LJ_ARCH_HASFPU		1
 #endif
+#ifndef LJ_ABI_SOFTFP
+#define LJ_ABI_SOFTFP		0
+#endif
 #define LJ_SOFTFP		(!LJ_ARCH_HASFPU)
 
 #if LJ_ARCH_ENDIAN == LUAJIT_BE

+ 2 - 1
src/lj_asm.c

@@ -1610,6 +1610,7 @@ static void asm_setup_regsp(ASMState *as)
 	break;
       /* fallthrough */
     case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
+      if (!LJ_SOFTFP && irt_isnum(ir->t)) break;
       ir->prev = (uint16_t)REGSP_HINT((rload & 15));
       rload = lj_ror(rload, 4);
       continue;
@@ -1641,7 +1642,7 @@ static void asm_setup_regsp(ASMState *as)
 	}
 	break;
 #endif
-#if LJ_NEED_FP64
+#if !LJ_SOFTFP && LJ_NEED_FP64
       case IR_CONV:
 	if (irt_isfp((ir-1)->t)) {
 	  ir->prev = REGSP_HINT(RID_FPRET);

File diff suppressed because it is too large
+ 574 - 87
src/lj_asm_arm.h


+ 53 - 2
src/lj_emit_arm.h

@@ -103,6 +103,15 @@ static void emit_lso(ASMState *as, ARMIns ai, Reg rd, Reg rn, int32_t ofs)
   *--as->mcp = ai | ARMI_LS_P | ARMF_D(rd) | ARMF_N(rn) | ofs;
 }
 
+#if !LJ_SOFTFP
+static void emit_vlso(ASMState *as, ARMIns ai, Reg rd, Reg rn, int32_t ofs)
+{
+  lua_assert(ofs >= -1020 && ofs <= 1020 && (ofs&3) == 0);
+  if (ofs < 0) ofs = -ofs; else ai |= ARMI_LS_U;
+  *--as->mcp = ai | ARMI_LS_P | ARMF_D(rd & 15) | ARMF_N(rn) | (ofs >> 2);
+}
+#endif
+
 /* -- Emit loads/stores --------------------------------------------------- */
 
 /* Prefer spills of BASE/L. */
@@ -208,6 +217,28 @@ static void emit_lsptr(ASMState *as, ARMIns ai, Reg r, void *p)
 	   (i & 4095));
 }
 
+#if !LJ_SOFTFP
+/* Load a number constant into an FPR. */
+static void emit_loadn(ASMState *as, Reg r, cTValue *tv)
+{
+  int32_t i;
+  if ((as->flags & JIT_F_VFPV3) && !tv->u32.lo) {
+    uint32_t hi = tv->u32.hi;
+    uint32_t b = ((hi >> 22) & 0x1ff);
+    if (!(hi & 0xffff) && (b == 0x100 || b == 0x0ff)) {
+      *--as->mcp = ARMI_VMOVI_D | ARMF_D(r & 15) |
+		   ((tv->u32.hi >> 12) & 0x00080000) |
+		   ((tv->u32.hi >> 4) & 0x00070000) |
+		   ((tv->u32.hi >> 16) & 0x0000000f);
+      return;
+    }
+  }
+  i = i32ptr(tv);
+  emit_vlso(as, ARMI_VLDR_D, r,
+	    ra_allock(as, (i & ~1020), RSET_GPR), (i & 1020));
+}
+#endif
+
 /* Get/set global_State fields. */
 #define emit_getgl(as, r, field) \
   emit_lsptr(as, ARMI_LDR, (r), (void *)&J2G(as->J)->field)
@@ -256,7 +287,15 @@ static void emit_call(ASMState *as, void *target)
 /* Generic move between two regs. */
 static void emit_movrr(ASMState *as, IRIns *ir, Reg dst, Reg src)
 {
+#if LJ_SOFTFP
   lua_assert(!irt_isnum(ir->t)); UNUSED(ir);
+#else
+  if (dst >= RID_MAX_GPR) {
+    emit_dm(as, irt_isnum(ir->t) ? ARMI_VMOV_D : ARMI_VMOV_S,
+	    (dst & 15), (src & 15));
+    return;
+  }
+#endif
   if (as->mcp != as->mcloop) {  /* Swap early registers for loads/stores. */
     MCode ins = *as->mcp, swp = (src^dst);
     if ((ins & 0x0c000000) == 0x04000000 && (ins & 0x02000010) != 0x02000010) {
@@ -272,15 +311,27 @@ static void emit_movrr(ASMState *as, IRIns *ir, Reg dst, Reg src)
 /* Generic load of register from stack slot. */
 static void emit_spload(ASMState *as, IRIns *ir, Reg r, int32_t ofs)
 {
+#if LJ_SOFTFP
   lua_assert(!irt_isnum(ir->t)); UNUSED(ir);
-  emit_lso(as, ARMI_LDR, r, RID_SP, ofs);
+#else
+  if (r >= RID_MAX_GPR)
+    emit_vlso(as, irt_isnum(ir->t) ? ARMI_VLDR_D : ARMI_VLDR_S, r, RID_SP, ofs);
+  else
+#endif
+    emit_lso(as, ARMI_LDR, r, RID_SP, ofs);
 }
 
 /* Generic store of register to stack slot. */
 static void emit_spstore(ASMState *as, IRIns *ir, Reg r, int32_t ofs)
 {
+#if LJ_SOFTFP
   lua_assert(!irt_isnum(ir->t)); UNUSED(ir);
-  emit_lso(as, ARMI_STR, r, RID_SP, ofs);
+#else
+  if (r >= RID_MAX_GPR)
+    emit_vlso(as, irt_isnum(ir->t) ? ARMI_VSTR_D : ARMI_VSTR_S, r, RID_SP, ofs);
+  else
+#endif
+    emit_lso(as, ARMI_STR, r, RID_SP, ofs);
 }
 
 /* Emit an arithmetic/logic operation with a constant operand. */

+ 2 - 2
src/lj_ircall.h

@@ -66,7 +66,7 @@ typedef struct CCallInfo {
 #define IRCALLCOND_SOFTFP_FFI(x)	NULL
 #endif
 
-#define LJ_NEED_FP64			(LJ_TARGET_PPC || LJ_TARGET_MIPS)
+#define LJ_NEED_FP64	(LJ_TARGET_ARM || LJ_TARGET_PPC || LJ_TARGET_MIPS)
 
 #if LJ_HASFFI && (LJ_SOFTFP || LJ_NEED_FP64)
 #define IRCALLCOND_FP64_FFI(x)		x
@@ -242,7 +242,7 @@ extern uint32_t softfp_f2ui(float a);
 #endif
 #endif
 
-#if LJ_HASFFI && LJ_NEED_FP64
+#if LJ_HASFFI && LJ_NEED_FP64 && !(LJ_TARGET_ARM && LJ_SOFTFP)
 #ifdef __GNUC__
 #define fp64_l2d __floatdidf
 #define fp64_ul2d __floatundidf

+ 61 - 2
src/lj_target_arm.h

@@ -32,7 +32,11 @@ enum {
   RID_RET = RID_R0,
   RID_RETLO = RID_R0,
   RID_RETHI = RID_R1,
+#if LJ_SOFTFP
   RID_FPRET = RID_R0,
+#else
+  RID_FPRET = RID_D0,
+#endif
 
   /* These definitions must match with the *.dasc file(s): */
   RID_BASE = RID_R9,		/* Interpreter BASE. */
@@ -68,11 +72,10 @@ enum {
    RID2RSET(RID_R9)|RID2RSET(RID_R11))
 #if LJ_SOFTFP
 #define RSET_FPR		0
-#define RSET_ALL		RSET_GPR
 #else
 #define RSET_FPR		(RSET_RANGE(RID_MIN_FPR, RID_MAX_FPR))
-#define RSET_ALL		(RSET_GPR|RSET_FPR)
 #endif
+#define RSET_ALL		(RSET_GPR|RSET_FPR)
 #define RSET_INIT		RSET_ALL
 
 /* ABI-specific register sets. lr is an implicit scratch register. */
@@ -91,6 +94,15 @@ enum {
 #define REGARG_FIRSTGPR		RID_R0
 #define REGARG_LASTGPR		RID_R3
 #define REGARG_NUMGPR		4
+#if LJ_ABI_SOFTFP
+#define REGARG_FIRSTFPR		0
+#define REGARG_LASTFPR		0
+#define REGARG_NUMFPR		0
+#else
+#define REGARG_FIRSTFPR		RID_D0
+#define REGARG_LASTFPR		RID_D7
+#define REGARG_NUMFPR		8
+#endif
 
 /* -- Spill slots --------------------------------------------------------- */
 
@@ -199,6 +211,53 @@ typedef enum ARMIns {
   /* ARMv6T2 */
   ARMI_MOVW = 0xe3000000,
   ARMI_MOVT = 0xe3400000,
+
+  /* VFP */
+  ARMI_VMOV_D = 0xeeb00b40,
+  ARMI_VMOV_S = 0xeeb00a40,
+  ARMI_VMOVI_D = 0xeeb00b00,
+
+  ARMI_VMOV_R_S = 0xee100a10,
+  ARMI_VMOV_S_R = 0xee000a10,
+  ARMI_VMOV_RR_D = 0xec500b10,
+  ARMI_VMOV_D_RR = 0xec400b10,
+
+  ARMI_VADD_D = 0xee300b00,
+  ARMI_VSUB_D = 0xee300b40,
+  ARMI_VMUL_D = 0xee200b00,
+  ARMI_VMLA_D = 0xee000b00,
+  ARMI_VMLS_D = 0xee000b40,
+  ARMI_VNMLS_D = 0xee100b00,
+  ARMI_VDIV_D = 0xee800b00,
+
+  ARMI_VABS_D = 0xeeb00bc0,
+  ARMI_VNEG_D = 0xeeb10b40,
+  ARMI_VSQRT_D = 0xeeb10bc0,
+
+  ARMI_VCMP_D = 0xeeb40b40,
+  ARMI_VCMPZ_D = 0xeeb50b40,
+
+  ARMI_VMRS = 0xeef1fa10,
+
+  ARMI_VCVT_S32_F32 = 0xeebd0ac0,
+  ARMI_VCVT_S32_F64 = 0xeebd0bc0,
+  ARMI_VCVT_U32_F32 = 0xeebc0ac0,
+  ARMI_VCVT_U32_F64 = 0xeebc0bc0,
+  ARMI_VCVTR_S32_F32 = 0xeebd0a40,
+  ARMI_VCVTR_S32_F64 = 0xeebd0b40,
+  ARMI_VCVTR_U32_F32 = 0xeebc0a40,
+  ARMI_VCVTR_U32_F64 = 0xeebc0b40,
+  ARMI_VCVT_F32_S32 = 0xeeb80ac0,
+  ARMI_VCVT_F64_S32 = 0xeeb80bc0,
+  ARMI_VCVT_F32_U32 = 0xeeb80a40,
+  ARMI_VCVT_F64_U32 = 0xeeb80b40,
+  ARMI_VCVT_F32_F64 = 0xeeb70bc0,
+  ARMI_VCVT_F64_F32 = 0xeeb70ac0,
+
+  ARMI_VLDR_S = 0xed100a00,
+  ARMI_VLDR_D = 0xed100b00,
+  ARMI_VSTR_S = 0xed000a00,
+  ARMI_VSTR_D = 0xed000b00,
 } ARMIns;
 
 typedef enum ARMShift {

+ 7 - 0
src/lj_vm.h

@@ -55,6 +55,10 @@ LJ_ASMF void lj_vm_exit_interp(void);
 #else
 LJ_ASMF double lj_vm_floor(double);
 LJ_ASMF double lj_vm_ceil(double);
+#if LJ_TARGET_ARM
+LJ_ASMF double lj_vm_floor_hf(double);
+LJ_ASMF double lj_vm_ceil_hf(double);
+#endif
 #endif
 
 #if LJ_HASJIT
@@ -71,6 +75,9 @@ LJ_ASMF void lj_vm_powi_sse(void);
 #define lj_vm_trunc	trunc
 #else
 LJ_ASMF double lj_vm_trunc(double);
+#if LJ_TARGET_ARM
+LJ_ASMF double lj_vm_trunc_hf(double);
+#endif
 #endif
 LJ_ASMF double lj_vm_powi(double, int32_t);
 #ifdef LUAJIT_NO_LOG2

Some files were not shown because too many files changed in this diff