|
@@ -369,18 +369,76 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow)
|
|
|
|
|
|
/* -- Calls --------------------------------------------------------------- */
|
|
|
|
|
|
+/* Count the required number of stack slots for a call. */
|
|
|
+static int asm_count_call_slots(ASMState *as, const CCallInfo *ci, IRRef *args)
|
|
|
+{
|
|
|
+ uint32_t i, nargs = CCI_NARGS(ci);
|
|
|
+ int nslots = 0;
|
|
|
+#if LJ_64
|
|
|
+ if (LJ_ABI_WIN) {
|
|
|
+ nslots = (int)(nargs*2); /* Only matters for more than four args. */
|
|
|
+ } else {
|
|
|
+ int ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR;
|
|
|
+ for (i = 0; i < nargs; i++)
|
|
|
+ if (args[i] && irt_isfp(IR(args[i])->t)) {
|
|
|
+ if (nfpr > 0) nfpr--; else nslots += 2;
|
|
|
+ } else {
|
|
|
+ if (ngpr > 0) ngpr--; else nslots += 2;
|
|
|
+ }
|
|
|
+ }
|
|
|
+#else
|
|
|
+ int ngpr = 0;
|
|
|
+ if ((ci->flags & CCI_CC_MASK) == CCI_CC_FASTCALL)
|
|
|
+ ngpr = 2;
|
|
|
+ else if ((ci->flags & CCI_CC_MASK) == CCI_CC_THISCALL)
|
|
|
+ ngpr = 1;
|
|
|
+ for (i = 0; i < nargs; i++)
|
|
|
+ if (args[i] && irt_isfp(IR(args[i])->t)) {
|
|
|
+ nslots += irt_isnum(IR(args[i])->t) ? 2 : 1;
|
|
|
+ } else {
|
|
|
+ if (ngpr > 0) ngpr--; else nslots++;
|
|
|
+ }
|
|
|
+#endif
|
|
|
+ return nslots;
|
|
|
+}
|
|
|
+
|
|
|
/* Generate a call to a C function. */
|
|
|
static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
|
|
|
{
|
|
|
uint32_t n, nargs = CCI_NARGS(ci);
|
|
|
int32_t ofs = STACKARG_OFS;
|
|
|
- uint32_t gprs = REGARG_GPRS;
|
|
|
#if LJ_64
|
|
|
+ uint32_t gprs = REGARG_GPRS;
|
|
|
Reg fpr = REGARG_FIRSTFPR;
|
|
|
+#if !LJ_ABI_WIN
|
|
|
+ MCode *patchnfpr = NULL;
|
|
|
+#endif
|
|
|
+#else
|
|
|
+ uint32_t gprs = 0;
|
|
|
+ if ((ci->flags & CCI_CC_MASK) != CCI_CC_CDECL) {
|
|
|
+ if ((ci->flags & CCI_CC_MASK) == CCI_CC_THISCALL)
|
|
|
+ gprs = (REGARG_GPRS & 31);
|
|
|
+ else if ((ci->flags & CCI_CC_MASK) == CCI_CC_FASTCALL)
|
|
|
+ gprs = REGARG_GPRS;
|
|
|
+ }
|
|
|
#endif
|
|
|
- lua_assert(!(nargs > 2 && (ci->flags&CCI_FASTCALL))); /* Avoid stack adj. */
|
|
|
if ((void *)ci->func)
|
|
|
emit_call(as, ci->func);
|
|
|
+#if LJ_64
|
|
|
+ if ((ci->flags & CCI_VARARG)) { /* Special handling for vararg calls. */
|
|
|
+#if LJ_ABI_WIN
|
|
|
+ for (n = 0; n < 4 && n < nargs; n++) {
|
|
|
+ IRIns *ir = IR(args[n]);
|
|
|
+ if (irt_isfp(ir->t)) /* Duplicate FPRs in GPRs. */
|
|
|
+ emit_rr(as, XO_MOVDto, (irt_isnum(ir->t) ? REX_64 : 0) | (fpr+n),
|
|
|
+ ((gprs >> (n*5)) & 31)); /* Either MOVD or MOVQ. */
|
|
|
+ }
|
|
|
+#else
|
|
|
+ patchnfpr = --as->mcp; /* Indicate number of used FPRs in register al. */
|
|
|
+ *--as->mcp = XI_MOVrib | RID_EAX;
|
|
|
+#endif
|
|
|
+ }
|
|
|
+#endif
|
|
|
for (n = 0; n < nargs; n++) { /* Setup args. */
|
|
|
IRRef ref = args[n];
|
|
|
IRIns *ir = IR(ref);
|
|
@@ -392,15 +450,16 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
|
|
|
#elif LJ_64
|
|
|
/* POSIX/x64 argument registers are used in order of appearance. */
|
|
|
if (irt_isfp(ir->t)) {
|
|
|
- r = fpr <= REGARG_LASTFPR ? fpr : 0; fpr++;
|
|
|
+ r = fpr <= REGARG_LASTFPR ? fpr++ : 0;
|
|
|
} else {
|
|
|
r = gprs & 31; gprs >>= 5;
|
|
|
}
|
|
|
#else
|
|
|
- if (irt_isfp(ir->t) || !(ci->flags & CCI_FASTCALL)) {
|
|
|
+ if (ref && irt_isfp(ir->t)) {
|
|
|
r = 0;
|
|
|
} else {
|
|
|
r = gprs & 31; gprs >>= 5;
|
|
|
+ if (!ref) continue;
|
|
|
}
|
|
|
#endif
|
|
|
if (r) { /* Argument is in a register. */
|
|
@@ -442,6 +501,9 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
|
|
|
ofs += sizeof(intptr_t);
|
|
|
}
|
|
|
}
|
|
|
+#if LJ_64 && !LJ_ABI_WIN
|
|
|
+ if (patchnfpr) *patchnfpr = fpr - REGARG_FIRSTFPR;
|
|
|
+#endif
|
|
|
}
|
|
|
|
|
|
/* Setup result reg/sp for call. Evict scratch regs. */
|
|
@@ -503,23 +565,50 @@ static void asm_call(ASMState *as, IRIns *ir)
|
|
|
asm_gencall(as, ci, args);
|
|
|
}
|
|
|
|
|
|
+/* Return a constant function pointer or NULL for indirect calls. */
|
|
|
+static void *asm_callx_func(ASMState *as, IRIns *irf, IRRef func)
|
|
|
+{
|
|
|
+#if LJ_32
|
|
|
+ UNUSED(as);
|
|
|
+ if (irref_isk(func))
|
|
|
+ return (void *)irf->i;
|
|
|
+#else
|
|
|
+ if (irref_isk(func)) {
|
|
|
+ MCode *p;
|
|
|
+ if (irf->o == IR_KINT64)
|
|
|
+ p = (MCode *)(void *)ir_k64(irf)->u64;
|
|
|
+ else
|
|
|
+ p = (MCode *)(void *)(uintptr_t)(uint32_t)irf->i;
|
|
|
+ if (p - as->mcp == (int32_t)(p - as->mcp))
|
|
|
+ return p; /* Call target is still in +-2GB range. */
|
|
|
+ /* Avoid the indirect case of emit_call(). Try to hoist func addr. */
|
|
|
+ }
|
|
|
+#endif
|
|
|
+ return NULL;
|
|
|
+}
|
|
|
+
|
|
|
static void asm_callx(ASMState *as, IRIns *ir)
|
|
|
{
|
|
|
IRRef args[CCI_NARGS_MAX];
|
|
|
CCallInfo ci;
|
|
|
+ IRRef func;
|
|
|
IRIns *irf;
|
|
|
ci.flags = asm_callx_flags(as, ir);
|
|
|
asm_collectargs(as, ir, &ci, args);
|
|
|
asm_setupresult(as, ir, &ci);
|
|
|
- irf = IR(ir->op2);
|
|
|
- if (LJ_32 && irref_isk(ir->op2)) { /* Call to constant address on x86. */
|
|
|
- ci.func = (ASMFunction)(void *)(uintptr_t)(uint32_t)irf->i;
|
|
|
- } else {
|
|
|
- /* Prefer a non-argument register or RID_RET for indirect calls. */
|
|
|
- RegSet allow = (RSET_GPR & ~RSET_SCRATCH)|RID2RSET(RID_RET);
|
|
|
- Reg r = ra_alloc1(as, ir->op2, allow);
|
|
|
+#if LJ_32
|
|
|
+ /* Have to readjust stack after non-cdecl calls due to callee cleanup. */
|
|
|
+ if ((ci.flags & CCI_CC_MASK) != CCI_CC_CDECL)
|
|
|
+ emit_spsub(as, 4 * asm_count_call_slots(as, &ci, args));
|
|
|
+#endif
|
|
|
+ func = ir->op2; irf = IR(func);
|
|
|
+ if (irf->o == IR_CARG) { func = irf->op1; irf = IR(func); }
|
|
|
+ ci.func = (ASMFunction)asm_callx_func(as, irf, func);
|
|
|
+ if (!(void *)ci.func) {
|
|
|
+ /* Use a (hoistable) non-scratch register for indirect calls. */
|
|
|
+ RegSet allow = (RSET_GPR & ~RSET_SCRATCH);
|
|
|
+ Reg r = ra_alloc1(as, func, allow);
|
|
|
emit_rr(as, XO_GROUP5, XOg_CALL, r);
|
|
|
- ci.func = (ASMFunction)(void *)0;
|
|
|
}
|
|
|
asm_gencall(as, &ci, args);
|
|
|
}
|
|
@@ -2608,35 +2697,14 @@ static void asm_ir(ASMState *as, IRIns *ir)
|
|
|
static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci)
|
|
|
{
|
|
|
IRRef args[CCI_NARGS_MAX];
|
|
|
- uint32_t nargs = (int)CCI_NARGS(ci);
|
|
|
- int nslots = 0;
|
|
|
+ int nslots;
|
|
|
asm_collectargs(as, ir, ci, args);
|
|
|
-#if LJ_64
|
|
|
- if (LJ_ABI_WIN) {
|
|
|
- nslots = (int)(nargs*2); /* Only matters for more than four args. */
|
|
|
- } else {
|
|
|
- uint32_t i;
|
|
|
- int ngpr = 6, nfpr = 8;
|
|
|
- for (i = 0; i < nargs; i++)
|
|
|
- if (args[i] && irt_isfp(IR(args[i])->t)) {
|
|
|
- if (nfpr > 0) nfpr--; else nslots += 2;
|
|
|
- } else {
|
|
|
- if (ngpr > 0) ngpr--; else nslots += 2;
|
|
|
- }
|
|
|
- }
|
|
|
+ nslots = asm_count_call_slots(as, ci, args);
|
|
|
if (nslots > as->evenspill) /* Leave room for args in stack slots. */
|
|
|
as->evenspill = nslots;
|
|
|
+#if LJ_64
|
|
|
return irt_isfp(ir->t) ? REGSP_HINT(RID_FPRET) : REGSP_HINT(RID_RET);
|
|
|
#else
|
|
|
- if ((ci->flags & CCI_FASTCALL)) {
|
|
|
- lua_assert(nargs <= 2);
|
|
|
- } else {
|
|
|
- uint32_t i;
|
|
|
- for (i = 0; i < nargs; i++)
|
|
|
- nslots += (args[i] && irt_isnum(IR(args[i])->t)) ? 2 : 1;
|
|
|
- if (nslots > as->evenspill) /* Leave room for args. */
|
|
|
- as->evenspill = nslots;
|
|
|
- }
|
|
|
return irt_isfp(ir->t) ? REGSP_INIT : REGSP_HINT(RID_RET);
|
|
|
#endif
|
|
|
}
|