Przeglądaj źródła

Miscellaneous cleanups for x64 interpreter.

Mike Pall 16 lat temu
rodzic
commit
c4e9dc0012
2 zmienionych plików z 54 dodań i 27 usunięć
  1. 43 16
      src/buildvm_x86.dasc
  2. 11 11
      src/buildvm_x86.h

+ 43 - 16
src/buildvm_x86.dasc

@@ -19,10 +19,14 @@
 |// Fixed register assignments for the interpreter.
 |// This is very fragile and has many dependencies. Caveat emptor.
 |.define BASE,		edx		// Not C callee-save, refetched anyway.
-|.if not X64 or X64WIN
+|.if not X64
 |.define KBASE,		edi		// Must be C callee-save.
 |.define KBASEa,	KBASE
 |.define PC,		esi		// Must be C callee-save.
+|.elif X64WIN
+|.define KBASE,		edi		// Must be C callee-save.
+|.define KBASEa,	rdi
+|.define PC,		esi		// Must be C callee-save.
 |.else
 |.define KBASE,		r15d		// Must be C callee-save.
 |.define KBASEa,	r15
@@ -136,7 +140,9 @@
 |.define FPARG1,	qword [esp]
 |// TMPQ overlaps TMP1/TMP2. ARG5/NRESULTS overlap TMP1/TMP2 (and TMPQ).
 |.define TMPQ,		qword [esp+aword*4]
+|.define TMP3,		ARG4
 |.define ARG5,		TMP1
+|.define TMPa,		TMP1
 |.define NRESULTS,	TMP2
 |
 |// Arguments for vm_call and vm_pcall.
@@ -183,7 +189,9 @@
 |// TMPQ overlaps TMP1/TMP2. NRESULTS overlaps TMP2 (and TMPQ).
 |.define TMPQ,		qword [rsp+aword*10]
 |.define NRESULTS,	TMP2
+|.define TMPa,		ARG5
 |.define ARG5d,		dword [rsp+aword*4]
+|.define TMP3,		ARG5d
 |
 |//-----------------------------------------------------------------------
 |.else			// x64/POSIX stack layout
@@ -205,7 +213,7 @@
 |.define SAVE_R2,	aword [rsp+aword*6]
 |.define SAVE_R1,	aword [rsp+aword*5]	//<-- rsp after register saves.
 |.define SAVE_CFRAME,	aword [rsp+aword*4]
-|.define UNUSED1,	aword [rsp+aword*3]
+|.define TMPa,		aword [rsp+aword*3]
 |//----- ^^^ awords above, vvv dwords below
 |.define SAVE_PC,	dword [rsp+dword*5]
 |.define SAVE_L,	dword [rsp+dword*4]
@@ -217,6 +225,7 @@
 |
 |// TMPQ overlaps TMP1/TMP2. NRESULTS overlaps TMP2 (and TMPQ).
 |.define TMPQ,		qword [rsp]
+|.define TMP3,		dword [rsp+aword*3]
 |.define NRESULTS,	TMP2
 |
 |.endif
@@ -900,9 +909,11 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
     |  cvtsi2sd xmm0, RC
     |  movsd TMPQ, xmm0
   } else {
+    |.if not X64
     |  mov ARG4, RC
     |  fild ARG4
     |  fstp TMPQ
+    |.endif
   }
   |  lea RC, TMP1			// Store temp. TValue in TMP1/TMP2.
   |  jmp >1
@@ -971,9 +982,11 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
     |  cvtsi2sd xmm0, RC
     |  movsd TMPQ, xmm0
   } else {
+    |.if not X64
     |  mov ARG4, RC
     |  fild ARG4
     |  fstp TMPQ
+    |.endif
   }
   |  lea RC, TMP1			// Store temp. TValue in TMP1/TMP2.
   |  jmp >1
@@ -2182,8 +2195,10 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
     |  cvtsd2si RC, qword [RA+8]
     |  mov ARG3, RC
   } else {
+    |.if not X64
     |  fld qword [RA+8]
     |  fistp ARG3
+    |.endif
   }
   |  mov RC, TMP2
   |  cmp RB, RC				// len < end? (unsigned compare)
@@ -2324,7 +2339,9 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
   if (sse) {
     |  cvtsi2sd xmm0, RC; jmp ->fff_resxmm0
   } else {
+    |.if not X64
     |  mov ARG1, RC; fild ARG1; jmp ->fff_resn
+    |.endif
   }
   |
   |//-- Bit library --------------------------------------------------------
@@ -2339,12 +2356,14 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
     |  cvtsi2sd xmm0, RB
     |  jmp ->fff_resxmm0
   } else {
+    |.if not X64
     |.ffunc_n bit_tobit
     |  mov TMP1, TOBIT_BIAS
     |  fadd TMP1
     |  fstp FPARG1			// 64 bit FP store.
     |  fild ARG1			// 32 bit integer load (s2lfwd ok).
     |  jmp ->fff_resn
+    |.endif
   }
   |
   |.macro .ffunc_bit, name
@@ -2354,11 +2373,13 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
   |  addsd xmm0, xmm1
   |  movd RB, xmm0
   ||} else {
+  |.if not X64
   |  .ffunc_n name
   |  mov TMP1, TOBIT_BIAS
   |  fadd TMP1
   |  fstp FPARG1
   |  mov RB, ARG1
+  |.endif
   ||}
   |.endmacro
   |
@@ -2409,11 +2430,13 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
     |  mov BASE, TMP1
     |  jmp ->fff_resxmm0
   } else {
+    |.if not X64
     |->fff_resbit:
     |->fff_resbit_op:
     |  mov ARG1, RB
     |  fild ARG1
     |  jmp ->fff_resn
+    |.endif
   }
   |
   |->fff_fallback_bit_op:
@@ -2433,6 +2456,7 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
   |  movd RB, xmm0
   |  movd RA, xmm1
   ||} else {
+  |.if not X64
   |  .ffunc_nn name
   |  mov TMP1, TOBIT_BIAS
   |  fadd TMP1
@@ -2442,6 +2466,7 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
   |  mov RC, RA				// Assumes RA is ecx.
   |  mov RA, ARG3
   |  mov RB, ARG1
+  |.endif
   ||}
   |  ins RB, cl
   |  mov RA, RC
@@ -2467,7 +2492,7 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
   |  sub BASE, RA
   |  mov [RA-4], PC
   |  mov SAVE_PC, PC			// Redundant (but a defined value).
-  |  mov ARG3, BASE			// Save old BASE (relative).
+  |  mov TMP1, BASE			// Save old BASE (relative).
   |  mov L:RB->base, RA
   |  lea RC, [RA+NARGS:RC*8-8]
   |  mov ARG1, L:RB
@@ -2486,7 +2511,7 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
   |  shr RC, 3
   |  add NARGS:RC, 1
   |  mov LFUNC:RB, [RA-8]
-  |  mov BASE, ARG3			// Restore old BASE.
+  |  mov BASE, TMP1			// Restore old BASE.
   |  add BASE, RA
   |  cmp [RA-4], PC;  jne >2		// Callable modified by handler?
   |  jmp aword LFUNC:RB->gate		// Retry the call.
@@ -2509,11 +2534,11 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
   |
   |->fff_gcstep:			// Call GC step function.
   |  // RA = new base, RC = nargs+1
-  |  pop RB				// Must keep stack at same level.
-  |  mov ARG3, RB			// Save return address
+  |  pop RBa				// Must keep stack at same level.
+  |  mov TMPa, RBa			// Save return address
   |  mov L:RB, SAVE_L
   |  sub BASE, RA
-  |  mov ARG2, BASE			// Save old BASE (relative).
+  |  mov TMP2, BASE			// Save old BASE (relative).
   |  mov [RA-4], PC
   |  mov SAVE_PC, PC			// Redundant (but a defined value).
   |  mov L:RB->base, RA
@@ -2531,10 +2556,10 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
   |  shr RC, 3
   |  add NARGS:RC, 1
   |  mov PC, [RA-4]
-  |  mov BASE, ARG2			// Restore old BASE.
+  |  mov BASE, TMP2			// Restore old BASE.
   |  add BASE, RA
-  |  mov RB, ARG3
-  |  push RB				// Restore return address.
+  |  mov RBa, TMPa
+  |  push RBa				// Restore return address.
   |  mov LFUNC:RB, [RA-8]
   |  ret
   |
@@ -3369,9 +3394,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse)
       |  cvtsi2sd xmm0, RC
       |  mov BASE, RB			// Restore BASE.
     } else {
+      |.if not X64
       |  mov ARG1, RC
       |  mov BASE, RB			// Restore BASE.
       |  fild ARG1
+      |.endif
     }
     |  movzx RA, PC_RA
     |  jmp <1
@@ -3994,7 +4021,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse)
     |  mov TMP1, STR:RC
     |  mov TMP2, LJ_TSTR
     |  lea RC, TMP1			// Store temp. TValue in TMP1/TMP2.
-    |  mov ARG4, TAB:RB			// Save TAB:RB for us.
+    |  mov TMP3, TAB:RB			// Save TAB:RB for us.
     |  mov ARG2, TAB:RB
     |  mov L:RB, SAVE_L
     |  mov ARG3, RC
@@ -4004,7 +4031,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse)
     |  call extern lj_tab_newkey	// (lua_State *L, GCtab *t, TValue *k)
     |  // Handles write barrier for the new key. TValue * returned in eax (RC).
     |  mov BASE, L:RB->base
-    |  mov TAB:RB, ARG4			// Need TAB:RB for barrier.
+    |  mov TAB:RB, TMP3			// Need TAB:RB for barrier.
     |  mov RA, eax
     |  jmp <2				// Must check write barrier for value.
     |
@@ -4211,7 +4238,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse)
     |  lea RA, [BASE+RA*8]
     |  mov PROTO:RC, LFUNC:RC->pt
     |  movzx RC, byte PROTO:RC->numparams
-    |  mov ARG3, KBASE			// Need one more free register.
+    |  mov TMP1, KBASE			// Need one more free register.
     |  lea KBASE, [BASE+RC*8+(8+FRAME_VARG)]
     |  sub KBASE, [BASE-4]
     |  // Note: KBASE may now be even _above_ BASE if nargs was < numparams.
@@ -4237,7 +4264,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse)
     |  cmp RA, RB
     |  jb <2
     |3:
-    |  mov KBASE, ARG3
+    |  mov KBASE, TMP1
     |  ins_next
     |
     |5:  // Copy all varargs.
@@ -4496,11 +4523,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse)
     |  ins_AD	// RA = base (ignored), RD = traceno
     |  mov RA, [DISPATCH+DISPATCH_J(trace)]
     |  mov TRACE:RD, [RA+RD*4]
-    |  mov RD, TRACE:RD->mcode
+    |  mov RDa, TRACE:RD->mcode
     |  mov L:RB, SAVE_L
     |  mov [DISPATCH+DISPATCH_GL(jit_base)], BASE
     |  mov [DISPATCH+DISPATCH_GL(jit_L)], L:RB
-    |  jmp RD
+    |  jmp RDa
 #endif
     break;
 

+ 11 - 11
src/buildvm_x86.h

@@ -407,17 +407,17 @@ static const unsigned char build_actionlist[14716] = {
   216,68,36,16,221,92,36,8,216,68,36,16,221,28,36,137,200,139,76,36,8,139,44,
   36,255,211,205,137,193,252,233,244,129,248,116,184,237,252,233,244,54,248,
   118,184,237,248,54,139,108,36,48,41,202,137,113,252,252,137,116,36,24,137,
-  84,36,8,137,141,233,141,68,193,252,248,137,44,36,141,144,233,137,133,233,
+  84,36,16,137,141,233,141,68,193,252,248,137,44,36,141,144,233,137,133,233,
   139,73,252,248,59,149,233,15,135,244,251,252,255,145,233,133,192,15,133,244,
   249,248,1,139,141,233,255,139,133,233,41,200,193,232,3,131,192,1,139,105,
-  252,248,139,84,36,8,1,202,57,113,252,252,15,133,244,248,252,255,165,233,248,
-  2,129,121,253,252,252,239,15,133,244,29,252,255,165,233,248,3,139,141,233,
-  139,84,36,8,1,202,252,233,244,68,248,5,199,68,36,4,237,232,251,1,0,252,233,
-  244,1,248,65,93,137,108,36,8,139,108,36,48,41,202,137,84,36,4,137,113,252,
-  252,137,116,36,24,137,141,233,141,68,193,252,248,137,44,36,137,133,233,255,
-  232,251,1,19,139,141,233,139,133,233,41,200,193,232,3,131,192,1,139,113,252,
-  252,139,84,36,4,1,202,139,108,36,8,85,139,105,252,248,195,248,136,255,15,
-  182,131,233,168,235,15,133,244,251,168,235,15,133,244,247,168,235,15,132,
+  252,248,139,84,36,16,1,202,57,113,252,252,15,133,244,248,252,255,165,233,
+  248,2,129,121,253,252,252,239,15,133,244,29,252,255,165,233,248,3,139,141,
+  233,139,84,36,8,1,202,252,233,244,68,248,5,199,68,36,4,237,232,251,1,0,252,
+  233,244,1,248,65,93,137,108,36,16,139,108,36,48,41,202,137,84,36,20,137,113,
+  252,252,137,116,36,24,137,141,233,141,68,193,252,248,137,44,36,137,133,233,
+  255,232,251,1,19,139,141,233,139,133,233,41,200,193,232,3,131,192,1,139,113,
+  252,252,139,84,36,20,1,202,139,108,36,16,85,139,105,252,248,195,248,136,255,
+  15,182,131,233,168,235,15,133,244,251,168,235,15,133,244,247,168,235,15,132,
   244,247,252,255,139,233,252,233,244,247,255,248,137,15,182,131,233,168,235,
   15,133,244,251,168,235,15,132,244,251,252,255,139,233,15,132,244,247,168,
   235,15,132,244,251,248,1,139,108,36,48,139,68,36,20,137,68,36,8,137,149,233,
@@ -685,11 +685,11 @@ static const unsigned char build_actionlist[14716] = {
   236,137,41,137,65,4,139,105,252,240,139,65,252,244,137,105,8,137,65,12,139,
   105,224,139,65,228,137,105,252,248,137,65,252,252,129,252,248,239,184,3,0,
   0,0,15,133,244,29,252,255,165,233,255,15,182,252,236,139,66,252,248,141,12,
-  202,139,128,233,15,182,128,233,137,124,36,8,141,188,253,194,233,43,122,252,
+  202,139,128,233,15,182,128,233,137,124,36,16,141,188,253,194,233,43,122,252,
   252,133,252,237,15,132,244,251,141,108,252,233,252,248,57,215,15,131,244,
   248,248,1,139,71,252,248,137,1,139,71,252,252,131,199,8,137,65,4,131,193,
   8,57,252,233,15,131,244,249,57,215,15,130,244,1,248,2,199,65,4,237,131,193,
-  8,57,252,233,15,130,244,2,248,3,139,124,36,8,139,6,15,182,204,15,182,232,
+  8,57,252,233,15,130,244,2,248,3,139,124,36,16,139,6,15,182,204,15,182,232,
   131,198,4,193,232,16,252,255,36,171,248,5,199,68,36,20,1,0,0,0,137,208,41,
   252,248,15,134,244,3,255,137,197,193,252,237,3,137,108,36,4,131,197,1,137,
   108,36,20,139,108,36,48,1,200,59,133,233,15,135,244,253,248,6,139,71,252,