| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722 |
- #include "all.h"
- typedef struct AClass AClass;
- typedef struct RAlloc RAlloc;
- struct AClass {
- Typ *type;
- int inmem;
- int align;
- uint size;
- int cls[2];
- Ref ref[2];
- };
- struct RAlloc {
- Ins i;
- RAlloc *link;
- };
- static void
- classify(AClass *a, Typ *t, uint s)
- {
- Field *f;
- int *cls;
- uint n, s1;
- for (n=0, s1=s; n<t->nunion; n++, s=s1)
- for (f=t->fields[n]; f->type!=FEnd; f++) {
- assert(s <= 16);
- cls = &a->cls[s/8];
- switch (f->type) {
- case FEnd:
- die("unreachable");
- case FPad:
- /* don't change anything */
- s += f->len;
- break;
- case Fs:
- case Fd:
- if (*cls == Kx)
- *cls = Kd;
- s += f->len;
- break;
- case Fb:
- case Fh:
- case Fw:
- case Fl:
- *cls = Kl;
- s += f->len;
- break;
- case FTyp:
- classify(a, &typ[f->len], s);
- s += typ[f->len].size;
- break;
- }
- }
- }
- static void
- typclass(AClass *a, Typ *t)
- {
- uint sz, al;
- sz = t->size;
- al = 1u << t->align;
- /* the ABI requires sizes to be rounded
- * up to the nearest multiple of 8, moreover
- * it makes it easy load and store structures
- * in registers
- */
- if (al < 8)
- al = 8;
- sz = (sz + al-1) & -al;
- a->type = t;
- a->size = sz;
- a->align = t->align;
- if (t->isdark || sz > 16 || sz == 0) {
- /* large or unaligned structures are
- * required to be passed in memory
- */
- a->inmem = 1;
- return;
- }
- a->cls[0] = Kx;
- a->cls[1] = Kx;
- a->inmem = 0;
- classify(a, t, 0);
- }
- static int
- retr(Ref reg[2], AClass *aret)
- {
- static int retreg[2][2] = {{RAX, RDX}, {XMM0, XMM0+1}};
- int n, k, ca, nr[2];
- nr[0] = nr[1] = 0;
- ca = 0;
- for (n=0; (uint)n*8<aret->size; n++) {
- k = KBASE(aret->cls[n]);
- reg[n] = TMP(retreg[k][nr[k]++]);
- ca += 1 << (2 * k);
- }
- return ca;
- }
- static void
- selret(Blk *b, Fn *fn)
- {
- int j, k, ca;
- Ref r, r0, reg[2];
- AClass aret;
- j = b->jmp.type;
- if (!isret(j) || j == Jret0)
- return;
- r0 = b->jmp.arg;
- b->jmp.type = Jret0;
- if (j == Jretc) {
- typclass(&aret, &typ[fn->retty]);
- if (aret.inmem) {
- assert(rtype(fn->retr) == RTmp);
- emit(Ocopy, Kl, TMP(RAX), fn->retr, R);
- emit(Oblit1, 0, R, INT(aret.type->size), R);
- emit(Oblit0, 0, R, r0, fn->retr);
- ca = 1;
- } else {
- ca = retr(reg, &aret);
- if (aret.size > 8) {
- r = newtmp("abi", Kl, fn);
- emit(Oload, Kl, reg[1], r, R);
- emit(Oadd, Kl, r, r0, getcon(8, fn));
- }
- emit(Oload, Kl, reg[0], r0, R);
- }
- } else {
- k = j - Jretw;
- if (KBASE(k) == 0) {
- emit(Ocopy, k, TMP(RAX), r0, R);
- ca = 1;
- } else {
- emit(Ocopy, k, TMP(XMM0), r0, R);
- ca = 1 << 2;
- }
- }
- b->jmp.arg = CALL(ca);
- }
- static int
- argsclass(Ins *i0, Ins *i1, AClass *ac, int op, AClass *aret, Ref *env)
- {
- int varc, envc, nint, ni, nsse, ns, n, *pn;
- AClass *a;
- Ins *i;
- if (aret && aret->inmem)
- nint = 5; /* hidden argument */
- else
- nint = 6;
- nsse = 8;
- varc = 0;
- envc = 0;
- for (i=i0, a=ac; i<i1; i++, a++)
- switch (i->op - op + Oarg) {
- case Oarg:
- if (KBASE(i->cls) == 0)
- pn = &nint;
- else
- pn = &nsse;
- if (*pn > 0) {
- --*pn;
- a->inmem = 0;
- } else
- a->inmem = 2;
- a->align = 3;
- a->size = 8;
- a->cls[0] = i->cls;
- break;
- case Oargc:
- n = i->arg[0].val;
- typclass(a, &typ[n]);
- if (a->inmem)
- continue;
- ni = ns = 0;
- for (n=0; (uint)n*8<a->size; n++)
- if (KBASE(a->cls[n]) == 0)
- ni++;
- else
- ns++;
- if (nint >= ni && nsse >= ns) {
- nint -= ni;
- nsse -= ns;
- } else
- a->inmem = 1;
- break;
- case Oarge:
- envc = 1;
- if (op == Opar)
- *env = i->to;
- else
- *env = i->arg[0];
- break;
- case Oargv:
- varc = 1;
- break;
- default:
- die("unreachable");
- }
- if (varc && envc)
- err("sysv abi does not support variadic env calls");
- return ((varc|envc) << 12) | ((6-nint) << 4) | ((8-nsse) << 8);
- }
- int amd64_sysv_rsave[] = {
- RDI, RSI, RDX, RCX, R8, R9, R10, R11, RAX,
- XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
- XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, -1
- };
- int amd64_sysv_rclob[] = {RBX, R12, R13, R14, R15, -1};
- MAKESURE(sysv_arrays_ok,
- sizeof amd64_sysv_rsave == (NGPS+NFPS+1) * sizeof(int) &&
- sizeof amd64_sysv_rclob == (NCLR+1) * sizeof(int)
- );
- /* layout of call's second argument (RCall)
- *
- * 29 12 8 4 3 0
- * |0...00|x|xxxx|xxxx|xx|xx| range
- * | | | | ` gp regs returned (0..2)
- * | | | ` sse regs returned (0..2)
- * | | ` gp regs passed (0..6)
- * | ` sse regs passed (0..8)
- * ` 1 if rax is used to pass data (0..1)
- */
- bits
- amd64_sysv_retregs(Ref r, int p[2])
- {
- bits b;
- int ni, nf;
- assert(rtype(r) == RCall);
- b = 0;
- ni = r.val & 3;
- nf = (r.val >> 2) & 3;
- if (ni >= 1)
- b |= BIT(RAX);
- if (ni >= 2)
- b |= BIT(RDX);
- if (nf >= 1)
- b |= BIT(XMM0);
- if (nf >= 2)
- b |= BIT(XMM1);
- if (p) {
- p[0] = ni;
- p[1] = nf;
- }
- return b;
- }
- bits
- amd64_sysv_argregs(Ref r, int p[2])
- {
- bits b;
- int j, ni, nf, ra;
- assert(rtype(r) == RCall);
- b = 0;
- ni = (r.val >> 4) & 15;
- nf = (r.val >> 8) & 15;
- ra = (r.val >> 12) & 1;
- for (j=0; j<ni; j++)
- b |= BIT(amd64_sysv_rsave[j]);
- for (j=0; j<nf; j++)
- b |= BIT(XMM0+j);
- if (p) {
- p[0] = ni + ra;
- p[1] = nf;
- }
- return b | (ra ? BIT(RAX) : 0);
- }
- static Ref
- rarg(int ty, int *ni, int *ns)
- {
- if (KBASE(ty) == 0)
- return TMP(amd64_sysv_rsave[(*ni)++]);
- else
- return TMP(XMM0 + (*ns)++);
- }
- static void
- selcall(Fn *fn, Ins *i0, Ins *i1, RAlloc **rap)
- {
- Ins *i;
- AClass *ac, *a, aret;
- int ca, ni, ns, al;
- uint stk, off;
- Ref r, r1, r2, reg[2], env;
- RAlloc *ra;
- env = R;
- ac = alloc((i1-i0) * sizeof ac[0]);
- if (!req(i1->arg[1], R)) {
- assert(rtype(i1->arg[1]) == RType);
- typclass(&aret, &typ[i1->arg[1].val]);
- ca = argsclass(i0, i1, ac, Oarg, &aret, &env);
- } else
- ca = argsclass(i0, i1, ac, Oarg, 0, &env);
- for (stk=0, a=&ac[i1-i0]; a>ac;)
- if ((--a)->inmem) {
- if (a->align > 4)
- err("sysv abi requires alignments of 16 or less");
- stk += a->size;
- if (a->align == 4)
- stk += stk & 15;
- }
- stk += stk & 15;
- if (stk) {
- r = getcon(-(int64_t)stk, fn);
- emit(Osalloc, Kl, R, r, R);
- }
- if (!req(i1->arg[1], R)) {
- if (aret.inmem) {
- /* get the return location from eax
- * it saves one callee-save reg */
- r1 = newtmp("abi", Kl, fn);
- emit(Ocopy, Kl, i1->to, TMP(RAX), R);
- ca += 1;
- } else {
- /* todo, may read out of bounds.
- * gcc did this up until 5.2, but
- * this should still be fixed.
- */
- if (aret.size > 8) {
- r = newtmp("abi", Kl, fn);
- aret.ref[1] = newtmp("abi", aret.cls[1], fn);
- emit(Ostorel, 0, R, aret.ref[1], r);
- emit(Oadd, Kl, r, i1->to, getcon(8, fn));
- }
- aret.ref[0] = newtmp("abi", aret.cls[0], fn);
- emit(Ostorel, 0, R, aret.ref[0], i1->to);
- ca += retr(reg, &aret);
- if (aret.size > 8)
- emit(Ocopy, aret.cls[1], aret.ref[1], reg[1], R);
- emit(Ocopy, aret.cls[0], aret.ref[0], reg[0], R);
- r1 = i1->to;
- }
- /* allocate return pad */
- ra = alloc(sizeof *ra);
- /* specific to NAlign == 3 */
- al = aret.align >= 2 ? aret.align - 2 : 0;
- ra->i = (Ins){Oalloc+al, Kl, r1, {getcon(aret.size, fn)}};
- ra->link = (*rap);
- *rap = ra;
- } else {
- ra = 0;
- if (KBASE(i1->cls) == 0) {
- emit(Ocopy, i1->cls, i1->to, TMP(RAX), R);
- ca += 1;
- } else {
- emit(Ocopy, i1->cls, i1->to, TMP(XMM0), R);
- ca += 1 << 2;
- }
- }
- emit(Ocall, i1->cls, R, i1->arg[0], CALL(ca));
- if (!req(R, env))
- emit(Ocopy, Kl, TMP(RAX), env, R);
- else if ((ca >> 12) & 1) /* vararg call */
- emit(Ocopy, Kw, TMP(RAX), getcon((ca >> 8) & 15, fn), R);
- ni = ns = 0;
- if (ra && aret.inmem)
- emit(Ocopy, Kl, rarg(Kl, &ni, &ns), ra->i.to, R); /* pass hidden argument */
- for (i=i0, a=ac; i<i1; i++, a++) {
- if (i->op >= Oarge || a->inmem)
- continue;
- r1 = rarg(a->cls[0], &ni, &ns);
- if (i->op == Oargc) {
- if (a->size > 8) {
- r2 = rarg(a->cls[1], &ni, &ns);
- r = newtmp("abi", Kl, fn);
- emit(Oload, a->cls[1], r2, r, R);
- emit(Oadd, Kl, r, i->arg[1], getcon(8, fn));
- }
- emit(Oload, a->cls[0], r1, i->arg[1], R);
- } else
- emit(Ocopy, i->cls, r1, i->arg[0], R);
- }
- if (!stk)
- return;
- r = newtmp("abi", Kl, fn);
- for (i=i0, a=ac, off=0; i<i1; i++, a++) {
- if (i->op >= Oarge || !a->inmem)
- continue;
- r1 = newtmp("abi", Kl, fn);
- if (i->op == Oargc) {
- if (a->align == 4)
- off += off & 15;
- emit(Oblit1, 0, R, INT(a->type->size), R);
- emit(Oblit0, 0, R, i->arg[1], r1);
- } else
- emit(Ostorel, 0, R, i->arg[0], r1);
- emit(Oadd, Kl, r1, r, getcon(off, fn));
- off += a->size;
- }
- emit(Osalloc, Kl, r, getcon(stk, fn), R);
- }
- static int
- selpar(Fn *fn, Ins *i0, Ins *i1)
- {
- AClass *ac, *a, aret;
- Ins *i;
- int ni, ns, s, al, fa;
- Ref r, env;
- env = R;
- ac = alloc((i1-i0) * sizeof ac[0]);
- curi = &insb[NIns];
- ni = ns = 0;
- if (fn->retty >= 0) {
- typclass(&aret, &typ[fn->retty]);
- fa = argsclass(i0, i1, ac, Opar, &aret, &env);
- } else
- fa = argsclass(i0, i1, ac, Opar, 0, &env);
- fn->reg = amd64_sysv_argregs(CALL(fa), 0);
- for (i=i0, a=ac; i<i1; i++, a++) {
- if (i->op != Oparc || a->inmem)
- continue;
- if (a->size > 8) {
- r = newtmp("abi", Kl, fn);
- a->ref[1] = newtmp("abi", Kl, fn);
- emit(Ostorel, 0, R, a->ref[1], r);
- emit(Oadd, Kl, r, i->to, getcon(8, fn));
- }
- a->ref[0] = newtmp("abi", Kl, fn);
- emit(Ostorel, 0, R, a->ref[0], i->to);
- /* specific to NAlign == 3 */
- al = a->align >= 2 ? a->align - 2 : 0;
- emit(Oalloc+al, Kl, i->to, getcon(a->size, fn), R);
- }
- if (fn->retty >= 0 && aret.inmem) {
- r = newtmp("abi", Kl, fn);
- emit(Ocopy, Kl, r, rarg(Kl, &ni, &ns), R);
- fn->retr = r;
- }
- for (i=i0, a=ac, s=4; i<i1; i++, a++) {
- switch (a->inmem) {
- case 1:
- if (a->align > 4)
- err("sysv abi requires alignments of 16 or less");
- if (a->align == 4)
- s = (s+3) & -4;
- fn->tmp[i->to.val].slot = -s;
- s += a->size / 4;
- continue;
- case 2:
- emit(Oload, i->cls, i->to, SLOT(-s), R);
- s += 2;
- continue;
- }
- if (i->op == Opare)
- continue;
- r = rarg(a->cls[0], &ni, &ns);
- if (i->op == Oparc) {
- emit(Ocopy, a->cls[0], a->ref[0], r, R);
- if (a->size > 8) {
- r = rarg(a->cls[1], &ni, &ns);
- emit(Ocopy, a->cls[1], a->ref[1], r, R);
- }
- } else
- emit(Ocopy, i->cls, i->to, r, R);
- }
- if (!req(R, env))
- emit(Ocopy, Kl, env, TMP(RAX), R);
- return fa | (s*4)<<12;
- }
- static Blk *
- split(Fn *fn, Blk *b)
- {
- Blk *bn;
- ++fn->nblk;
- bn = newblk();
- bn->nins = &insb[NIns] - curi;
- idup(&bn->ins, curi, bn->nins);
- curi = &insb[NIns];
- bn->visit = ++b->visit;
- strf(bn->name, "%s.%d", b->name, b->visit);
- bn->loop = b->loop;
- bn->link = b->link;
- b->link = bn;
- return bn;
- }
- static void
- chpred(Blk *b, Blk *bp, Blk *bp1)
- {
- Phi *p;
- uint a;
- for (p=b->phi; p; p=p->link) {
- for (a=0; p->blk[a]!=bp; a++)
- assert(a+1<p->narg);
- p->blk[a] = bp1;
- }
- }
- static void
- selvaarg(Fn *fn, Blk *b, Ins *i)
- {
- Ref loc, lreg, lstk, nr, r0, r1, c4, c8, c16, c, ap;
- Blk *b0, *bstk, *breg;
- int isint;
- c4 = getcon(4, fn);
- c8 = getcon(8, fn);
- c16 = getcon(16, fn);
- ap = i->arg[0];
- isint = KBASE(i->cls) == 0;
- /* @b [...]
- r0 =l add ap, (0 or 4)
- nr =l loadsw r0
- r1 =w cultw nr, (48 or 176)
- jnz r1, @breg, @bstk
- @breg
- r0 =l add ap, 16
- r1 =l loadl r0
- lreg =l add r1, nr
- r0 =w add nr, (8 or 16)
- r1 =l add ap, (0 or 4)
- storew r0, r1
- @bstk
- r0 =l add ap, 8
- lstk =l loadl r0
- r1 =l add lstk, 8
- storel r1, r0
- @b0
- %loc =l phi @breg %lreg, @bstk %lstk
- i->to =(i->cls) load %loc
- */
- loc = newtmp("abi", Kl, fn);
- emit(Oload, i->cls, i->to, loc, R);
- b0 = split(fn, b);
- b0->jmp = b->jmp;
- b0->s1 = b->s1;
- b0->s2 = b->s2;
- if (b->s1)
- chpred(b->s1, b, b0);
- if (b->s2 && b->s2 != b->s1)
- chpred(b->s2, b, b0);
- lreg = newtmp("abi", Kl, fn);
- nr = newtmp("abi", Kl, fn);
- r0 = newtmp("abi", Kw, fn);
- r1 = newtmp("abi", Kl, fn);
- emit(Ostorew, Kw, R, r0, r1);
- emit(Oadd, Kl, r1, ap, isint ? CON_Z : c4);
- emit(Oadd, Kw, r0, nr, isint ? c8 : c16);
- r0 = newtmp("abi", Kl, fn);
- r1 = newtmp("abi", Kl, fn);
- emit(Oadd, Kl, lreg, r1, nr);
- emit(Oload, Kl, r1, r0, R);
- emit(Oadd, Kl, r0, ap, c16);
- breg = split(fn, b);
- breg->jmp.type = Jjmp;
- breg->s1 = b0;
- lstk = newtmp("abi", Kl, fn);
- r0 = newtmp("abi", Kl, fn);
- r1 = newtmp("abi", Kl, fn);
- emit(Ostorel, Kw, R, r1, r0);
- emit(Oadd, Kl, r1, lstk, c8);
- emit(Oload, Kl, lstk, r0, R);
- emit(Oadd, Kl, r0, ap, c8);
- bstk = split(fn, b);
- bstk->jmp.type = Jjmp;
- bstk->s1 = b0;
- b0->phi = alloc(sizeof *b0->phi);
- *b0->phi = (Phi){
- .cls = Kl, .to = loc,
- .narg = 2,
- .blk = vnew(2, sizeof b0->phi->blk[0], PFn),
- .arg = vnew(2, sizeof b0->phi->arg[0], PFn),
- };
- b0->phi->blk[0] = bstk;
- b0->phi->blk[1] = breg;
- b0->phi->arg[0] = lstk;
- b0->phi->arg[1] = lreg;
- r0 = newtmp("abi", Kl, fn);
- r1 = newtmp("abi", Kw, fn);
- b->jmp.type = Jjnz;
- b->jmp.arg = r1;
- b->s1 = breg;
- b->s2 = bstk;
- c = getcon(isint ? 48 : 176, fn);
- emit(Ocmpw+Ciult, Kw, r1, nr, c);
- emit(Oloadsw, Kl, nr, r0, R);
- emit(Oadd, Kl, r0, ap, isint ? CON_Z : c4);
- }
- static void
- selvastart(Fn *fn, int fa, Ref ap)
- {
- Ref r0, r1;
- int gp, fp, sp;
- gp = ((fa >> 4) & 15) * 8;
- fp = 48 + ((fa >> 8) & 15) * 16;
- sp = fa >> 12;
- r0 = newtmp("abi", Kl, fn);
- r1 = newtmp("abi", Kl, fn);
- emit(Ostorel, Kw, R, r1, r0);
- emit(Oadd, Kl, r1, TMP(RBP), getcon(-176, fn));
- emit(Oadd, Kl, r0, ap, getcon(16, fn));
- r0 = newtmp("abi", Kl, fn);
- r1 = newtmp("abi", Kl, fn);
- emit(Ostorel, Kw, R, r1, r0);
- emit(Oadd, Kl, r1, TMP(RBP), getcon(sp, fn));
- emit(Oadd, Kl, r0, ap, getcon(8, fn));
- r0 = newtmp("abi", Kl, fn);
- emit(Ostorew, Kw, R, getcon(fp, fn), r0);
- emit(Oadd, Kl, r0, ap, getcon(4, fn));
- emit(Ostorew, Kw, R, getcon(gp, fn), ap);
- }
- void
- amd64_sysv_abi(Fn *fn)
- {
- Blk *b;
- Ins *i, *i0, *ip;
- RAlloc *ral;
- int n, fa;
- for (b=fn->start; b; b=b->link)
- b->visit = 0;
- /* lower parameters */
- for (b=fn->start, i=b->ins; i<&b->ins[b->nins]; i++)
- if (!ispar(i->op))
- break;
- fa = selpar(fn, b->ins, i);
- n = b->nins - (i - b->ins) + (&insb[NIns] - curi);
- i0 = alloc(n * sizeof(Ins));
- ip = icpy(ip = i0, curi, &insb[NIns] - curi);
- ip = icpy(ip, i, &b->ins[b->nins] - i);
- b->nins = n;
- b->ins = i0;
- /* lower calls, returns, and vararg instructions */
- ral = 0;
- b = fn->start;
- do {
- if (!(b = b->link))
- b = fn->start; /* do it last */
- if (b->visit)
- continue;
- curi = &insb[NIns];
- selret(b, fn);
- for (i=&b->ins[b->nins]; i!=b->ins;)
- switch ((--i)->op) {
- default:
- emiti(*i);
- break;
- case Ocall:
- for (i0=i; i0>b->ins; i0--)
- if (!isarg((i0-1)->op))
- break;
- selcall(fn, i0, i, &ral);
- i = i0;
- break;
- case Ovastart:
- selvastart(fn, fa, i->arg[0]);
- break;
- case Ovaarg:
- selvaarg(fn, b, i);
- break;
- case Oarg:
- case Oargc:
- die("unreachable");
- }
- if (b == fn->start)
- for (; ral; ral=ral->link)
- emiti(ral->i);
- b->nins = &insb[NIns] - curi;
- idup(&b->ins, curi, b->nins);
- } while (b != fn->start);
- if (debug['A']) {
- fprintf(stderr, "\n> After ABI lowering:\n");
- printfn(fn, stderr);
- }
- }
|