Browse Source

Use fastcall for remaining 1-arg/2-arg calls from interpreter.

Simplifies conversion to x64 calling conventions.
Mike Pall 15 years ago
parent
commit
bc47063708
15 changed files with 745 additions and 801 deletions
  1. 59 116
      src/buildvm_x86.dasc
  2. 665 665
      src/buildvm_x86.h
  3. 3 2
      src/lib_base.c
  4. 1 1
      src/lj_func.c
  5. 1 1
      src/lj_func.h
  6. 1 1
      src/lj_gc.c
  7. 1 1
      src/lj_gc.h
  8. 2 2
      src/lj_meta.c
  9. 2 2
      src/lj_meta.h
  10. 2 2
      src/lj_state.c
  11. 2 2
      src/lj_state.h
  12. 1 1
      src/lj_tab.c
  13. 1 1
      src/lj_tab.h
  14. 2 2
      src/lj_trace.c
  15. 2 2
      src/lj_trace.h

+ 59 - 116
src/buildvm_x86.dasc

@@ -588,14 +588,9 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
   |  // - A return back from a lua_call() with (high) nresults adjustment.
   |  mov L:RB->top, BASE		// Save current top held in BASE (yes).
   |  mov NRESULTS, RD			// Need to fill only remainder with nil.
-  |.if X64
-  |  mov CARG2d, RA			// Caveat: CARG1d may be RA.
-  |  mov CARG1d, L:RB
-  |.else
-  |  mov ARG2, RA			// Grow by wanted nresults+1.
-  |  mov ARG1, L:RB
-  |.endif
-  |  call extern lj_state_growstack	// (lua_State *L, int n)
+  |  mov FCARG2, RA
+  |  mov FCARG1, L:RB
+  |  call extern lj_state_growstack@8	// (lua_State *L, int n)
   |  mov BASE, L:RB->top		// Need the (realloced) L->top in BASE.
   |  jmp <3
   |
@@ -653,11 +648,7 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
   |//-- Grow stack on-demand -----------------------------------------------
   |
   |->gate_c_growstack:			// Grow stack for C function.
-  |.if X64
-  |  mov CARG2d, LUA_MINSTACK
-  |.else
-  |  mov ARG2, LUA_MINSTACK
-  |.endif
+  |  mov FCARG2, LUA_MINSTACK
   |  jmp >1
   |
   |->gate_lv_growstack:			// Grow stack for vararg Lua function.
@@ -677,17 +668,12 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
   |  mov L:RB->base, BASE
   |  mov L:RB->top, RC
   |  mov SAVE_PC, PC
-  |.if X64
-  |  mov CARG2d, RA
-  |  mov CARG1d, L:RB			// Caveat: CARG1d may be RA.
-  |.else
-  |  mov ARG2, RA
-  |  mov ARG1, L:RB
-  |.endif
+  |  mov FCARG2, RA
   |1:
+  |  mov FCARG1, L:RB
   |  // L:RB = L, L->base = new base, L->top = top
   |  // SAVE_PC = initial PC+1 (undefined for C functions)
-  |  call extern lj_state_growstack	// (lua_State *L, int n)
+  |  call extern lj_state_growstack@8	// (lua_State *L, int n)
   |  mov RA, L:RB->base
   |  mov RC, L:RB->top
   |  mov LFUNC:RB, [RA-8]
@@ -1189,20 +1175,12 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
   |  jmp aword LFUNC:RB->gate
   |
   |->vmeta_len:
-  |.if X64
   |  mov L:RB, SAVE_L
-  |  mov L:RB->base, BASE		// Caveat: CARG2d may be BASE.
-  |  lea CARG2d, [BASE+RD*8]
-  |  mov CARG1d, L:RB
-  |.else
-  |  lea RD, [BASE+RD*8]
-  |  mov L:RB, SAVE_L
-  |  mov ARG2, RD
-  |  mov ARG1, L:RB
   |  mov L:RB->base, BASE
-  |.endif
+  |  lea FCARG2, [BASE+RD*8]		// Caveat: FCARG2 == BASE
+  |  mov L:FCARG1, L:RB
   |  mov SAVE_PC, PC
-  |  call extern lj_meta_len		// (lua_State *L, TValue *o)
+  |  call extern lj_meta_len@8		// (lua_State *L, TValue *o)
   |  // TValue * (metamethod) returned in eax (RC).
   |  mov BASE, L:RB->base
   |  jmp ->vmeta_binop			// Binop call for compatibility.
@@ -1243,19 +1221,12 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
   |//-- Argument coercion for 'for' statement ------------------------------
   |
   |->vmeta_for:
-  |.if X64
-  |  mov L:RB, SAVE_L
-  |  mov L:RB->base, BASE		// Caveat: CARG2d may be BASE.
-  |  mov CARG2d, RA
-  |  mov CARG1d, L:RB			// Caveat: CARG1d may be RA.
-  |.else
   |  mov L:RB, SAVE_L
-  |  mov ARG2, RA
-  |  mov ARG1, L:RB
   |  mov L:RB->base, BASE
-  |.endif
+  |  mov FCARG2, RA			// Caveat: FCARG2 == BASE
+  |  mov L:FCARG1, L:RB			// Caveat: FCARG1 == RA
   |  mov SAVE_PC, PC
-  |  call extern lj_meta_for	// (lua_State *L, TValue *base)
+  |  call extern lj_meta_for@8	// (lua_State *L, TValue *base)
   |  mov BASE, L:RB->base
   |  mov RC, [PC-4]
   |  movzx RA, RCH
@@ -1572,30 +1543,20 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
   |.ffunc_1 ipairs_aux
   |  cmp dword [RA+4], LJ_TTAB;  jne ->fff_fallback
   |  cmp dword [RA+12], LJ_TISNUM;  ja ->fff_fallback
-  |  // Caveat: xmm0/xmm1/ARG2 used in getinth call, too.
   if (sse) {
     |  movsd xmm0, qword [RA+8]
     |  sseconst_1 xmm1, RBa
-    |.if X64WIN
-    |  addsd xmm1, xmm0
-    |  cvtsd2si RC, xmm1
-    |  movsd qword [RA-8], xmm1
-    |.else
     |  addsd xmm0, xmm1
     |  cvtsd2si RC, xmm0
     |  movsd qword [RA-8], xmm0
-    |  .if not X64
-    |    mov ARG2, RC
-    |  .endif
-    |.endif
   } else {
     |.if not X64
     |  fld qword [RA+8]
     |  fld1
     |  faddp st1
-    |  fist ARG2
+    |  fist ARG1
     |  fstp qword [RA-8]
-    |  mov RC, ARG2
+    |  mov RC, ARG1
     |.endif
   }
   |  mov TAB:RB, [RA]
@@ -1611,14 +1572,15 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
   |  jmp ->fff_res2
   |2:  // Check for empty hash part first. Otherwise call C function.
   |  cmp dword TAB:RB->hmask, 0; je ->fff_res0
-  |.if X64
-  |  mov CARG1d, TAB:RB
-  |.else
-  |  mov ARG1, TAB:RB
-  |.endif
   |  mov TMP1, BASE			// Save BASE and RA.
+  |.if X64 and not X64WIN
+  |  mov FCARG1, TAB:RB
   |  mov RB, RA
-  |  call extern lj_tab_getinth		// (GCtab *t, int32_t key)
+  |.else
+  |  xchg FCARG1, TAB:RB		// Caveat: FCARG1 == RA
+  |.endif
+  |  mov FCARG2, RC
+  |  call extern lj_tab_getinth@8	// (GCtab *t, int32_t key)
   |  // cTValue * or NULL returned in eax (RC).
   |  mov RA, RB
   |  mov BASE, TMP1
@@ -1825,28 +1787,22 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
   |  mov RD, 1+2			// nresults+1 = 1 + false + error.
   |  jmp <7
   |.else
-  |.if X64
-  |  mov CARG2d, L:PC
-  |  mov CARG1d, L:RB
-  |.else
-  |  mov ARG2, L:PC
-  |  mov ARG1, L:RB
-  |.endif
-  |  call extern lj_ffh_coroutine_wrap_err  // (lua_State *L, lua_State *co)
+  |  mov FCARG2, L:PC
+  |  mov FCARG1, L:RB
+  |  call extern lj_ffh_coroutine_wrap_err@8  // (lua_State *L, lua_State *co)
   |  // Error function does not return.
   |.endif
   |
   |9:  // Handle stack expansion on return from yield.
-  |  mov L:RA, ARG1			// The callee doesn't modify SAVE_L.
-  |  mov L:RA->top, KBASE		// Undo coroutine stack clearing.
   |.if X64
-  |  mov CARG2d, PC
-  |  mov CARG1d, L:RB
+  |  mov L:RA, TMP1
   |.else
-  |  mov ARG2, PC
-  |  mov ARG1, L:RB
+  |  mov L:RA, ARG1			// The callee doesn't modify SAVE_L.
   |.endif
-  |  call extern lj_state_growstack	// (lua_State *L, int n)
+  |  mov L:RA->top, KBASE		// Undo coroutine stack clearing.
+  |  mov FCARG2, PC
+  |  mov FCARG1, L:RB
+  |  call extern lj_state_growstack@8	// (lua_State *L, int n)
   |  mov BASE, L:RB->base
   |  jmp <4				// Retry the stack move.
   |.endmacro
@@ -2493,13 +2449,17 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
   |  mov TMP1, BASE			// Save old BASE (relative).
   |  mov L:RB->base, RA
   |  lea RC, [RA+NARGS:RC*8-8]
-  |  mov ARG1, L:RB
   |  lea BASE, [RC+8*LUA_MINSTACK]	// Ensure enough space for handler.
   |  mov L:RB->top, RC
-  |  mov CFUNC:RA, [RA-8]
+  |  mov CFUNC:RC, [RA-8]
   |  cmp BASE, L:RB->maxstack
   |  ja >5				// Need to grow stack.
-  |  call aword CFUNC:RA->f		// (lua_State *L)
+  |.if X64
+  |  mov CARG1d, L:RB
+  |.else
+  |  mov ARG1, L:RB
+  |.endif
+  |  call aword CFUNC:RC->f		// (lua_State *L)
   |  // Either throws an error or recovers and returns 0 or NRESULTS (+1).
   |  test RC, RC;  jnz >3
   |1:  // Returned 0: retry fast path.
@@ -2526,8 +2486,9 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
   |  jmp ->fff_res
   |
   |5:  // Grow stack for fallback handler.
-  |  mov ARG2, LUA_MINSTACK
-  |  call extern lj_state_growstack	// (lua_State *L, int n)
+  |  mov FCARG2, LUA_MINSTACK
+  |  mov FCARG1, L:RB
+  |  call extern lj_state_growstack@8	// (lua_State *L, int n)
   |  jmp <1				// Dumb retry (goes through ff first).
   |
   |->fff_gcstep:			// Call GC step function.
@@ -2541,13 +2502,9 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
   |  mov SAVE_PC, PC			// Redundant (but a defined value).
   |  mov L:RB->base, RA
   |  lea RC, [RA+NARGS:RC*8-8]
-  |.if X64
-  |  mov CARG1d, L:RB
-  |.else
-  |  mov ARG1, L:RB
-  |.endif
+  |  mov FCARG1, L:RB
   |  mov L:RB->top, RC
-  |  call extern lj_gc_step		// (lua_State *L)
+  |  call extern lj_gc_step@4		// (lua_State *L)
   |  mov RA, L:RB->base
   |  mov RC, L:RB->top
   |  sub RC, RA
@@ -2619,17 +2576,11 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
 #if LJ_HASJIT
   |  mov L:RB, SAVE_L
   |  mov L:RB->base, BASE
-  |.if X64
-  |  mov CARG2d, PC
-  |  lea CARG1d, [DISPATCH+GG_DISP2J]
-  |.else
-  |  lea RA, [DISPATCH+GG_DISP2J]
-  |  mov ARG2, PC
-  |  mov ARG1, RA
-  |.endif
+  |  mov FCARG2, PC
+  |  lea FCARG1, [DISPATCH+GG_DISP2J]
   |  mov [DISPATCH+DISPATCH_J(L)], L:RB
   |  mov SAVE_PC, PC
-  |  call extern lj_trace_hot		// (jit_State *J, const BCIns *pc)
+  |  call extern lj_trace_hot@8		// (jit_State *J, const BCIns *pc)
   |  jmp <4
 #endif
   |
@@ -2637,17 +2588,11 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
 #if LJ_HASJIT
   |  mov L:RB, SAVE_L
   |  mov L:RB->base, BASE
-  |.if X64
-  |  mov CARG2d, PC
-  |  lea CARG1d, [DISPATCH+GG_DISP2J]
-  |.else
-  |  lea RA, [DISPATCH+GG_DISP2J]
-  |  mov ARG2, PC
-  |  mov ARG1, RA
-  |.endif
+  |  mov FCARG2, PC
+  |  lea FCARG1, [DISPATCH+GG_DISP2J]
   |  mov [DISPATCH+DISPATCH_J(L)], L:RB
   |  mov SAVE_PC, PC
-  |  call extern lj_trace_hot		// (jit_State *J, const BCIns *pc)
+  |  call extern lj_trace_hot@8		// (jit_State *J, const BCIns *pc)
   |  mov BASE, L:RB->base
   |  // Dispatch the first instruction and optionally record it.
   |  ins_next
@@ -2689,12 +2634,10 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
   |  mov L:RB, [DISPATCH+DISPATCH_GL(jit_L)]
   |  mov BASE, [DISPATCH+DISPATCH_GL(jit_base)]
   |  mov [DISPATCH+DISPATCH_J(L)], L:RB
-  |  lea RC, [esp+16]
   |  mov L:RB->base, BASE
-  |  lea RA, [DISPATCH+GG_DISP2J]
-  |  mov ARG2, RC
-  |  mov ARG1, RA
-  |  call extern lj_trace_exit		// (jit_State *J, ExitState *ex)
+  |  lea FCARG2, [esp+16]
+  |  lea FCARG1, [DISPATCH+GG_DISP2J]
+  |  call extern lj_trace_exit@8	// (jit_State *J, ExitState *ex)
   |  // Interpreter C frame returned in eax.
   |  mov esp, eax			// Reposition stack to C frame.
   |  mov BASE, L:RB->base
@@ -3863,11 +3806,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse)
     |  mov L:RB, SAVE_L
     |  cmp dword L:RB->openupval, 0
     |  je >1
-    |  lea RA, [BASE+RA*8]
-    |  mov ARG2, RA
-    |  mov ARG1, L:RB
     |  mov L:RB->base, BASE
-    |  call extern lj_func_closeuv	// (lua_State *L, TValue *level)
+    |  lea FCARG2, [BASE+RA*8]		// Caveat: FCARG2 == BASE
+    |  mov L:FCARG1, L:RB		// Caveat: FCARG1 == RA
+    |  call extern lj_func_closeuv@8	// (lua_State *L, TValue *level)
     |  mov BASE, L:RB->base
     |1:
     |  ins_next
@@ -4456,7 +4398,6 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse)
     |  jbe <3				// No vararg slots?
     |  mov RB, RC
     |  shr RB, 3
-    |  mov ARG2, RB			// Store this for stack growth below.
     |  add RB, 1
     |  mov NRESULTS, RB			// NRESULTS = #varargs+1
     |  mov L:RB, SAVE_L
@@ -4479,8 +4420,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse)
     |  mov L:RB->top, RA
     |  mov SAVE_PC, PC
     |  sub KBASE, BASE			// Need delta, because BASE may change.
-    |  mov ARG1, L:RB
-    |  call extern lj_state_growstack	// (lua_State *L, int n)
+    |  mov FCARG2, NRESULTS
+    |  sub FCARG2, 1
+    |  mov FCARG1, L:RB
+    |  call extern lj_state_growstack@8	// (lua_State *L, int n)
     |  mov BASE, L:RB->base
     |  mov RA, L:RB->top
     |  add KBASE, BASE

File diff suppressed because it is too large
+ 665 - 665
src/buildvm_x86.h


+ 3 - 2
src/lib_base.c

@@ -507,10 +507,11 @@ LJLIB_NOREG LJLIB_ASM(coroutine_wrap_aux)
 
 /* Inline declarations. */
 LJ_ASMF void lj_ff_coroutine_wrap_aux(void);
-LJ_FUNCA_NORET void lj_ffh_coroutine_wrap_err(lua_State *L, lua_State *co);
+LJ_FUNCA_NORET void LJ_FASTCALL lj_ffh_coroutine_wrap_err(lua_State *L,
+							  lua_State *co);
 
 /* Error handler, called from assembler VM. */
-void lj_ffh_coroutine_wrap_err(lua_State *L, lua_State *co)
+void LJ_FASTCALL lj_ffh_coroutine_wrap_err(lua_State *L, lua_State *co)
 {
   co->top--; copyTV(L, L->top, co->top); L->top++;
   if (tvisstr(L->top-1))

+ 1 - 1
src/lj_func.c

@@ -103,7 +103,7 @@ static GCupval *func_finduv(lua_State *L, TValue *slot)
 }
 
 /* Close all open upvalues pointing to some stack level or above. */
-void lj_func_closeuv(lua_State *L, TValue *level)
+void LJ_FASTCALL lj_func_closeuv(lua_State *L, TValue *level)
 {
   GCupval *uv;
   global_State *g = G(L);

+ 1 - 1
src/lj_func.h

@@ -13,7 +13,7 @@ LJ_FUNC GCproto *lj_func_newproto(lua_State *L);
 LJ_FUNC void LJ_FASTCALL lj_func_freeproto(global_State *g, GCproto *pt);
 
 /* Upvalues. */
-LJ_FUNCA void lj_func_closeuv(lua_State *L, TValue *level);
+LJ_FUNCA void LJ_FASTCALL lj_func_closeuv(lua_State *L, TValue *level);
 LJ_FUNC void LJ_FASTCALL lj_func_freeuv(global_State *g, GCupval *uv);
 
 /* Functions (closures). */

+ 1 - 1
src/lj_gc.c

@@ -613,7 +613,7 @@ static size_t gc_onestep(lua_State *L)
 }
 
 /* Perform a limited amount of incremental GC steps. */
-int lj_gc_step(lua_State *L)
+int LJ_FASTCALL lj_gc_step(lua_State *L)
 {
   global_State *g = G(L);
   MSize lim;

+ 1 - 1
src/lj_gc.h

@@ -42,7 +42,7 @@ enum { GCSpause, GCSpropagate, GCSsweepstring, GCSsweep, GCSfinalize };
 LJ_FUNC size_t lj_gc_separateudata(global_State *g, int all);
 LJ_FUNC void lj_gc_finalizeudata(lua_State *L);
 LJ_FUNC void lj_gc_freeall(global_State *g);
-LJ_FUNCA int lj_gc_step(lua_State *L);
+LJ_FUNCA int LJ_FASTCALL lj_gc_step(lua_State *L);
 LJ_FUNCA void LJ_FASTCALL lj_gc_step_fixtop(lua_State *L);
 LJ_FUNC void LJ_FASTCALL lj_gc_step_jit(lua_State *L, MSize steps);
 LJ_FUNC void lj_gc_fullgc(lua_State *L);

+ 2 - 2
src/lj_meta.c

@@ -269,7 +269,7 @@ TValue *lj_meta_cat(lua_State *L, TValue *top, int left)
 }
 
 /* Helper for LEN. __len metamethod. */
-TValue *lj_meta_len(lua_State *L, cTValue *o)
+TValue * LJ_FASTCALL lj_meta_len(lua_State *L, cTValue *o)
 {
   cTValue *mo = lj_meta_lookup(L, o, MM_len);
   if (tvisnil(mo)) {
@@ -349,7 +349,7 @@ void lj_meta_call(lua_State *L, TValue *func, TValue *top)
 }
 
 /* Helper for FORI. Coercion. */
-void lj_meta_for(lua_State *L, TValue *base)
+void LJ_FASTCALL lj_meta_for(lua_State *L, TValue *base)
 {
   if (!str2num(base, base)) lj_err_msg(L, LJ_ERR_FORINIT);
   if (!str2num(base+1, base+1)) lj_err_msg(L, LJ_ERR_FORLIM);

+ 2 - 2
src/lj_meta.h

@@ -24,10 +24,10 @@ LJ_FUNCA TValue *lj_meta_tset(lua_State *L, cTValue *o, cTValue *k);
 LJ_FUNCA TValue *lj_meta_arith(lua_State *L, TValue *ra, cTValue *rb,
 			       cTValue *rc, BCReg op);
 LJ_FUNCA TValue *lj_meta_cat(lua_State *L, TValue *top, int left);
-LJ_FUNCA TValue *lj_meta_len(lua_State *L, cTValue *o);
+LJ_FUNCA TValue * LJ_FASTCALL lj_meta_len(lua_State *L, cTValue *o);
 LJ_FUNCA TValue *lj_meta_equal(lua_State *L, GCobj *o1, GCobj *o2, int ne);
 LJ_FUNCA TValue *lj_meta_comp(lua_State *L, cTValue *o1, cTValue *o2, int op);
 LJ_FUNCA void lj_meta_call(lua_State *L, TValue *func, TValue *top);
-LJ_FUNCA void lj_meta_for(lua_State *L, TValue *base);
+LJ_FUNCA void LJ_FASTCALL lj_meta_for(lua_State *L, TValue *base);
 
 #endif

+ 2 - 2
src/lj_state.c

@@ -85,7 +85,7 @@ void lj_state_shrinkstack(lua_State *L, MSize used)
 }
 
 /* Try to grow stack. */
-void lj_state_growstack(lua_State *L, MSize need)
+void LJ_FASTCALL lj_state_growstack(lua_State *L, MSize need)
 {
   if (L->stacksize > LJ_STACK_MAXEX)  /* overflow while handling overflow? */
     lj_err_throw(L, LUA_ERRERR);
@@ -99,7 +99,7 @@ void lj_state_growstack(lua_State *L, MSize need)
   }
 }
 
-void lj_state_growstack1(lua_State *L)
+void LJ_FASTCALL lj_state_growstack1(lua_State *L)
 {
   lj_state_growstack(L, 1);
 }

+ 2 - 2
src/lj_state.h

@@ -16,8 +16,8 @@
 
 LJ_FUNC void lj_state_relimitstack(lua_State *L);
 LJ_FUNC void lj_state_shrinkstack(lua_State *L, MSize used);
-LJ_FUNCA void lj_state_growstack(lua_State *L, MSize need);
-LJ_FUNC void lj_state_growstack1(lua_State *L);
+LJ_FUNCA void LJ_FASTCALL lj_state_growstack(lua_State *L, MSize need);
+LJ_FUNC void LJ_FASTCALL lj_state_growstack1(lua_State *L);
 
 static LJ_AINLINE void lj_state_checkstack(lua_State *L, MSize need)
 {

+ 1 - 1
src/lj_tab.c

@@ -371,7 +371,7 @@ void lj_tab_reasize(lua_State *L, GCtab *t, uint32_t nasize)
 
 /* -- Table getters ------------------------------------------------------- */
 
-cTValue *lj_tab_getinth(GCtab *t, int32_t key)
+cTValue * LJ_FASTCALL lj_tab_getinth(GCtab *t, int32_t key)
 {
   TValue k;
   Node *n;

+ 1 - 1
src/lj_tab.h

@@ -18,7 +18,7 @@ LJ_FUNCA void lj_tab_reasize(lua_State *L, GCtab *t, uint32_t nasize);
 
 /* Caveat: all getters except lj_tab_get() can return NULL! */
 
-LJ_FUNCA cTValue *lj_tab_getinth(GCtab *t, int32_t key);
+LJ_FUNCA cTValue * LJ_FASTCALL lj_tab_getinth(GCtab *t, int32_t key);
 LJ_FUNC cTValue *lj_tab_getstr(GCtab *t, GCstr *key);
 LJ_FUNCA cTValue *lj_tab_get(lua_State *L, GCtab *t, cTValue *key);
 

+ 2 - 2
src/lj_trace.c

@@ -552,7 +552,7 @@ static void trace_new(jit_State *J)
 }
 
 /* A hotcount triggered. Start recording a root trace. */
-void lj_trace_hot(jit_State *J, const BCIns *pc)
+void LJ_FASTCALL lj_trace_hot(jit_State *J, const BCIns *pc)
 {
   lua_State *L = J->L;
   L->top = curr_topL(L);  /* Only called from Lua and NRESULTS is not used. */
@@ -564,7 +564,7 @@ void lj_trace_hot(jit_State *J, const BCIns *pc)
 }
 
 /* A trace exited. Restore interpreter state and check for hot exits. */
-void *lj_trace_exit(jit_State *J, void *exptr)
+void * LJ_FASTCALL lj_trace_exit(jit_State *J, void *exptr)
 {
   lua_State *L = J->L;
   void *cf;

+ 2 - 2
src/lj_trace.h

@@ -32,8 +32,8 @@ LJ_FUNC void lj_trace_freestate(global_State *g);
 
 /* Event handling. */
 LJ_FUNC void lj_trace_ins(jit_State *J);
-LJ_FUNCA void lj_trace_hot(jit_State *J, const BCIns *pc);
-LJ_FUNCA void *lj_trace_exit(jit_State *J, void *exptr);
+LJ_FUNCA void LJ_FASTCALL lj_trace_hot(jit_State *J, const BCIns *pc);
+LJ_FUNCA void * LJ_FASTCALL lj_trace_exit(jit_State *J, void *exptr);
 
 /* Signal asynchronous abort of trace or end of trace. */
 #define lj_trace_abort(g)	(G2J(g)->state &= ~LJ_TRACE_ACTIVE)

Some files were not shown because too many files changed in this diff