| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656 |
- #include "all.h"
- #define CMP(X) \
- X(Ciule, "be") \
- X(Ciult, "b") \
- X(Cisle, "le") \
- X(Cislt, "l") \
- X(Cisgt, "g") \
- X(Cisge, "ge") \
- X(Ciugt, "a") \
- X(Ciuge, "ae") \
- X(Cieq, "z") \
- X(Cine, "nz") \
- X(NCmpI+Cfle, "be") \
- X(NCmpI+Cflt, "b") \
- X(NCmpI+Cfgt, "a") \
- X(NCmpI+Cfge, "ae") \
- X(NCmpI+Cfeq, "z") \
- X(NCmpI+Cfne, "nz") \
- X(NCmpI+Cfo, "np") \
- X(NCmpI+Cfuo, "p")
- enum {
- SLong = 0,
- SWord = 1,
- SShort = 2,
- SByte = 3,
- Ki = -1, /* matches Kw and Kl */
- Ka = -2, /* matches all classes */
- };
- /* Instruction format strings:
- *
- * if the format string starts with -, the instruction
- * is assumed to be 3-address and is put in 2-address
- * mode using an extra mov if necessary
- *
- * if the format string starts with +, the same as the
- * above applies, but commutativity is also assumed
- *
- * %k is used to set the class of the instruction,
- * it'll expand to "l", "q", "ss", "sd", depending
- * on the instruction class
- * %0 designates the first argument
- * %1 designates the second argument
- * %= designates the result
- *
- * if %k is not used, a prefix to 0, 1, or = must be
- * added, it can be:
- * M - memory reference
- * L - long (64 bits)
- * W - word (32 bits)
- * H - short (16 bits)
- * B - byte (8 bits)
- * S - single precision float
- * D - double precision float
- */
- static struct {
- short op;
- short cls;
- char *fmt;
- } omap[] = {
- { Oadd, Ka, "+add%k %1, %=" },
- { Osub, Ka, "-sub%k %1, %=" },
- { Oand, Ki, "+and%k %1, %=" },
- { Oor, Ki, "+or%k %1, %=" },
- { Oxor, Ki, "+xor%k %1, %=" },
- { Osar, Ki, "-sar%k %B1, %=" },
- { Oshr, Ki, "-shr%k %B1, %=" },
- { Oshl, Ki, "-shl%k %B1, %=" },
- { Omul, Ki, "+imul%k %1, %=" },
- { Omul, Ks, "+mulss %1, %=" },
- { Omul, Kd, "+mulsd %1, %=" },
- { Odiv, Ka, "-div%k %1, %=" },
- { Ostorel, Ka, "movq %L0, %M1" },
- { Ostorew, Ka, "movl %W0, %M1" },
- { Ostoreh, Ka, "movw %H0, %M1" },
- { Ostoreb, Ka, "movb %B0, %M1" },
- { Ostores, Ka, "movss %S0, %M1" },
- { Ostored, Ka, "movsd %D0, %M1" },
- { Oload, Ka, "mov%k %M0, %=" },
- { Oloadsw, Kl, "movslq %M0, %L=" },
- { Oloadsw, Kw, "movl %M0, %W=" },
- { Oloaduw, Ki, "movl %M0, %W=" },
- { Oloadsh, Ki, "movsw%k %M0, %=" },
- { Oloaduh, Ki, "movzw%k %M0, %=" },
- { Oloadsb, Ki, "movsb%k %M0, %=" },
- { Oloadub, Ki, "movzb%k %M0, %=" },
- { Oextsw, Kl, "movslq %W0, %L=" },
- { Oextuw, Kl, "movl %W0, %W=" },
- { Oextsh, Ki, "movsw%k %H0, %=" },
- { Oextuh, Ki, "movzw%k %H0, %=" },
- { Oextsb, Ki, "movsb%k %B0, %=" },
- { Oextub, Ki, "movzb%k %B0, %=" },
- { Oexts, Kd, "cvtss2sd %0, %=" },
- { Otruncd, Ks, "cvtsd2ss %0, %=" },
- { Ostosi, Ki, "cvttss2si%k %0, %=" },
- { Odtosi, Ki, "cvttsd2si%k %0, %=" },
- { Oswtof, Ka, "cvtsi2%k %W0, %=" },
- { Osltof, Ka, "cvtsi2%k %L0, %=" },
- { Ocast, Ki, "movq %D0, %L=" },
- { Ocast, Ka, "movq %L0, %D=" },
- { Oaddr, Ki, "lea%k %M0, %=" },
- { Oswap, Ki, "xchg%k %0, %1" },
- { Osign, Kl, "cqto" },
- { Osign, Kw, "cltd" },
- { Oxdiv, Ki, "div%k %0" },
- { Oxidiv, Ki, "idiv%k %0" },
- { Oxcmp, Ks, "ucomiss %S0, %S1" },
- { Oxcmp, Kd, "ucomisd %D0, %D1" },
- { Oxcmp, Ki, "cmp%k %0, %1" },
- { Oxtest, Ki, "test%k %0, %1" },
- #define X(c, s) \
- { Oflag+c, Ki, "set" s " %B=\n\tmovzb%k %B=, %=" },
- CMP(X)
- #undef X
- { NOp, 0, 0 }
- };
- static char *rname[][4] = {
- [RAX] = {"rax", "eax", "ax", "al"},
- [RBX] = {"rbx", "ebx", "bx", "bl"},
- [RCX] = {"rcx", "ecx", "cx", "cl"},
- [RDX] = {"rdx", "edx", "dx", "dl"},
- [RSI] = {"rsi", "esi", "si", "sil"},
- [RDI] = {"rdi", "edi", "di", "dil"},
- [RBP] = {"rbp", "ebp", "bp", "bpl"},
- [RSP] = {"rsp", "esp", "sp", "spl"},
- [R8 ] = {"r8" , "r8d", "r8w", "r8b"},
- [R9 ] = {"r9" , "r9d", "r9w", "r9b"},
- [R10] = {"r10", "r10d", "r10w", "r10b"},
- [R11] = {"r11", "r11d", "r11w", "r11b"},
- [R12] = {"r12", "r12d", "r12w", "r12b"},
- [R13] = {"r13", "r13d", "r13w", "r13b"},
- [R14] = {"r14", "r14d", "r14w", "r14b"},
- [R15] = {"r15", "r15d", "r15w", "r15b"},
- };
- static int
- slot(Ref r, Fn *fn)
- {
- int s;
- s = rsval(r);
- assert(s <= fn->slot);
- /* specific to NAlign == 3 */
- if (s < 0)
- return -4 * s;
- else if (fn->vararg)
- return -176 + -4 * (fn->slot - s);
- else
- return -4 * (fn->slot - s);
- }
- static void
- emitcon(Con *con, FILE *f)
- {
- char *p, *l;
- switch (con->type) {
- case CAddr:
- l = str(con->sym.id);
- p = l[0] == '"' ? "" : T.assym;
- if (con->sym.type == SThr) {
- if (T.apple)
- fprintf(f, "%s%s@TLVP", p, l);
- else
- fprintf(f, "%%fs:%s%s@tpoff", p, l);
- } else
- fprintf(f, "%s%s", p, l);
- if (con->bits.i)
- fprintf(f, "%+"PRId64, con->bits.i);
- break;
- case CBits:
- fprintf(f, "%"PRId64, con->bits.i);
- break;
- default:
- die("unreachable");
- }
- }
- static char *
- regtoa(int reg, int sz)
- {
- static char buf[6];
- assert(reg <= XMM15);
- if (reg >= XMM0) {
- sprintf(buf, "xmm%d", reg-XMM0);
- return buf;
- } else
- return rname[reg][sz];
- }
- static Ref
- getarg(char c, Ins *i)
- {
- switch (c) {
- case '0':
- return i->arg[0];
- case '1':
- return i->arg[1];
- case '=':
- return i->to;
- default:
- die("invalid arg letter %c", c);
- }
- }
- static void emitins(Ins, Fn *, FILE *);
- static void
- emitcopy(Ref r1, Ref r2, int k, Fn *fn, FILE *f)
- {
- Ins icp;
- icp.op = Ocopy;
- icp.arg[0] = r2;
- icp.to = r1;
- icp.cls = k;
- emitins(icp, fn, f);
- }
- static void
- emitf(char *s, Ins *i, Fn *fn, FILE *f)
- {
- static char clstoa[][3] = {"l", "q", "ss", "sd"};
- char c;
- int sz;
- Ref ref;
- Mem *m;
- Con off;
- switch (*s) {
- case '+':
- if (req(i->arg[1], i->to)) {
- ref = i->arg[0];
- i->arg[0] = i->arg[1];
- i->arg[1] = ref;
- }
- /* fall through */
- case '-':
- assert((!req(i->arg[1], i->to) || req(i->arg[0], i->to)) &&
- "cannot convert to 2-address");
- emitcopy(i->to, i->arg[0], i->cls, fn, f);
- s++;
- break;
- }
- fputc('\t', f);
- Next:
- while ((c = *s++) != '%')
- if (!c) {
- fputc('\n', f);
- return;
- } else
- fputc(c, f);
- switch ((c = *s++)) {
- case '%':
- fputc('%', f);
- break;
- case 'k':
- fputs(clstoa[i->cls], f);
- break;
- case '0':
- case '1':
- case '=':
- sz = KWIDE(i->cls) ? SLong : SWord;
- s--;
- goto Ref;
- case 'D':
- case 'S':
- sz = SLong; /* does not matter for floats */
- Ref:
- c = *s++;
- ref = getarg(c, i);
- switch (rtype(ref)) {
- case RTmp:
- assert(isreg(ref));
- fprintf(f, "%%%s", regtoa(ref.val, sz));
- break;
- case RSlot:
- fprintf(f, "%d(%%rbp)", slot(ref, fn));
- break;
- case RMem:
- Mem:
- m = &fn->mem[ref.val];
- if (rtype(m->base) == RSlot) {
- off.type = CBits;
- off.bits.i = slot(m->base, fn);
- addcon(&m->offset, &off, 1);
- m->base = TMP(RBP);
- }
- if (m->offset.type != CUndef)
- emitcon(&m->offset, f);
- fputc('(', f);
- if (!req(m->base, R))
- fprintf(f, "%%%s", regtoa(m->base.val, SLong));
- else if (m->offset.type == CAddr)
- fprintf(f, "%%rip");
- if (!req(m->index, R))
- fprintf(f, ", %%%s, %d",
- regtoa(m->index.val, SLong),
- m->scale
- );
- fputc(')', f);
- break;
- case RCon:
- fputc('$', f);
- emitcon(&fn->con[ref.val], f);
- break;
- default:
- die("unreachable");
- }
- break;
- case 'L':
- sz = SLong;
- goto Ref;
- case 'W':
- sz = SWord;
- goto Ref;
- case 'H':
- sz = SShort;
- goto Ref;
- case 'B':
- sz = SByte;
- goto Ref;
- case 'M':
- c = *s++;
- ref = getarg(c, i);
- switch (rtype(ref)) {
- case RMem:
- goto Mem;
- case RSlot:
- fprintf(f, "%d(%%rbp)", slot(ref, fn));
- break;
- case RCon:
- off = fn->con[ref.val];
- emitcon(&off, f);
- if (off.type == CAddr)
- if (off.sym.type != SThr || T.apple)
- fprintf(f, "(%%rip)");
- break;
- case RTmp:
- assert(isreg(ref));
- fprintf(f, "(%%%s)", regtoa(ref.val, SLong));
- break;
- default:
- die("unreachable");
- }
- break;
- default:
- die("invalid format specifier %%%c", c);
- }
- goto Next;
- }
- static void *negmask[4] = {
- [Ks] = (uint32_t[4]){ 0x80000000 },
- [Kd] = (uint64_t[2]){ 0x8000000000000000 },
- };
- static void
- emitins(Ins i, Fn *fn, FILE *f)
- {
- Ref r;
- int64_t val;
- int o, t0;
- Ins ineg;
- Con *con;
- char *sym;
- switch (i.op) {
- default:
- Table:
- /* most instructions are just pulled out of
- * the table omap[], some special cases are
- * detailed below */
- for (o=0;; o++) {
- /* this linear search should really be a binary
- * search */
- if (omap[o].op == NOp)
- die("no match for %s(%c)",
- optab[i.op].name, "wlsd"[i.cls]);
- if (omap[o].op == i.op)
- if (omap[o].cls == i.cls
- || (omap[o].cls == Ki && KBASE(i.cls) == 0)
- || (omap[o].cls == Ka))
- break;
- }
- emitf(omap[o].fmt, &i, fn, f);
- break;
- case Onop:
- /* just do nothing for nops, they are inserted
- * by some passes */
- break;
- case Omul:
- /* here, we try to use the 3-addresss form
- * of multiplication when possible */
- if (rtype(i.arg[1]) == RCon) {
- r = i.arg[0];
- i.arg[0] = i.arg[1];
- i.arg[1] = r;
- }
- if (KBASE(i.cls) == 0 /* only available for ints */
- && rtype(i.arg[0]) == RCon
- && rtype(i.arg[1]) == RTmp) {
- emitf("imul%k %0, %1, %=", &i, fn, f);
- break;
- }
- goto Table;
- case Osub:
- /* we have to use the negation trick to handle
- * some 3-address subtractions */
- if (req(i.to, i.arg[1]) && !req(i.arg[0], i.to)) {
- ineg = (Ins){Oneg, i.cls, i.to, {i.to}};
- emitins(ineg, fn, f);
- emitf("add%k %0, %=", &i, fn, f);
- break;
- }
- goto Table;
- case Oneg:
- if (!req(i.to, i.arg[0]))
- emitf("mov%k %0, %=", &i, fn, f);
- if (KBASE(i.cls) == 0)
- emitf("neg%k %=", &i, fn, f);
- else
- fprintf(f,
- "\txorp%c %sfp%d(%%rip), %%%s\n",
- "xxsd"[i.cls],
- T.asloc,
- stashbits(negmask[i.cls], 16),
- regtoa(i.to.val, SLong)
- );
- break;
- case Odiv:
- /* use xmm15 to adjust the instruction when the
- * conversion to 2-address in emitf() would fail */
- if (req(i.to, i.arg[1])) {
- i.arg[1] = TMP(XMM0+15);
- emitf("mov%k %=, %1", &i, fn, f);
- emitf("mov%k %0, %=", &i, fn, f);
- i.arg[0] = i.to;
- }
- goto Table;
- case Ocopy:
- /* copies are used for many things; see my note
- * to understand how to load big constants:
- * https://c9x.me/notes/2015-09-19.html */
- assert(rtype(i.to) != RMem);
- if (req(i.to, R) || req(i.arg[0], R))
- break;
- if (req(i.to, i.arg[0]))
- break;
- t0 = rtype(i.arg[0]);
- if (i.cls == Kl
- && t0 == RCon
- && fn->con[i.arg[0].val].type == CBits) {
- val = fn->con[i.arg[0].val].bits.i;
- if (isreg(i.to))
- if (val >= 0 && val <= UINT32_MAX) {
- emitf("movl %W0, %W=", &i, fn, f);
- break;
- }
- if (rtype(i.to) == RSlot)
- if (val < INT32_MIN || val > INT32_MAX) {
- emitf("movl %0, %=", &i, fn, f);
- emitf("movl %0>>32, 4+%=", &i, fn, f);
- break;
- }
- }
- if (isreg(i.to)
- && t0 == RCon
- && fn->con[i.arg[0].val].type == CAddr) {
- emitf("lea%k %M0, %=", &i, fn, f);
- break;
- }
- if (rtype(i.to) == RSlot
- && (t0 == RSlot || t0 == RMem)) {
- i.cls = KWIDE(i.cls) ? Kd : Ks;
- i.arg[1] = TMP(XMM0+15);
- emitf("mov%k %0, %1", &i, fn, f);
- emitf("mov%k %1, %=", &i, fn, f);
- break;
- }
- /* conveniently, the assembler knows if it
- * should use movabsq when reading movq */
- emitf("mov%k %0, %=", &i, fn, f);
- break;
- case Oaddr:
- if (!T.apple
- && rtype(i.arg[0]) == RCon
- && fn->con[i.arg[0].val].sym.type == SThr) {
- /* derive the symbol address from the TCB
- * address at offset 0 of %fs */
- assert(isreg(i.to));
- con = &fn->con[i.arg[0].val];
- sym = str(con->sym.id);
- emitf("movq %%fs:0, %L=", &i, fn, f);
- fprintf(f, "\tleaq %s%s@tpoff",
- sym[0] == '"' ? "" : T.assym, sym);
- if (con->bits.i)
- fprintf(f, "%+"PRId64, con->bits.i);
- fprintf(f, "(%%%s), %%%s\n",
- regtoa(i.to.val, SLong),
- regtoa(i.to.val, SLong));
- break;
- }
- goto Table;
- case Ocall:
- /* calls simply have a weird syntax in AT&T
- * assembly... */
- switch (rtype(i.arg[0])) {
- case RCon:
- fprintf(f, "\tcallq ");
- emitcon(&fn->con[i.arg[0].val], f);
- fprintf(f, "\n");
- break;
- case RTmp:
- emitf("callq *%L0", &i, fn, f);
- break;
- default:
- die("invalid call argument");
- }
- break;
- case Osalloc:
- /* there is no good reason why this is here
- * maybe we should split Osalloc in 2 different
- * instructions depending on the result
- */
- emitf("subq %L0, %%rsp", &i, fn, f);
- if (!req(i.to, R))
- emitcopy(i.to, TMP(RSP), Kl, fn, f);
- break;
- case Oswap:
- if (KBASE(i.cls) == 0)
- goto Table;
- /* for floats, there is no swap instruction
- * so we use xmm15 as a temporary
- */
- emitcopy(TMP(XMM0+15), i.arg[0], i.cls, fn, f);
- emitcopy(i.arg[0], i.arg[1], i.cls, fn, f);
- emitcopy(i.arg[1], TMP(XMM0+15), i.cls, fn, f);
- break;
- case Odbgloc:
- emitdbgloc(i.arg[0].val, i.arg[1].val, f);
- break;
- }
- }
- static uint64_t
- framesz(Fn *fn)
- {
- uint64_t i, o, f;
- /* specific to NAlign == 3 */
- for (i=0, o=0; i<NCLR; i++)
- o ^= 1 & (fn->reg >> amd64_sysv_rclob[i]);
- f = fn->slot;
- f = (f + 3) & -4;
- return 4*f + 8*o + 176*fn->vararg;
- }
- void
- amd64_emitfn(Fn *fn, FILE *f)
- {
- static char *ctoa[] = {
- #define X(c, s) [c] = s,
- CMP(X)
- #undef X
- };
- static int id0;
- Blk *b, *s;
- Ins *i, itmp;
- int *r, c, o, n, lbl;
- uint64_t fs;
- emitfnlnk(fn->name, &fn->lnk, f);
- fputs("\tpushq %rbp\n\tmovq %rsp, %rbp\n", f);
- fs = framesz(fn);
- if (fs)
- fprintf(f, "\tsubq $%"PRIu64", %%rsp\n", fs);
- if (fn->vararg) {
- o = -176;
- for (r=amd64_sysv_rsave; r<&amd64_sysv_rsave[6]; r++, o+=8)
- fprintf(f, "\tmovq %%%s, %d(%%rbp)\n", rname[*r][0], o);
- for (n=0; n<8; ++n, o+=16)
- fprintf(f, "\tmovaps %%xmm%d, %d(%%rbp)\n", n, o);
- }
- for (r=amd64_sysv_rclob; r<&amd64_sysv_rclob[NCLR]; r++)
- if (fn->reg & BIT(*r)) {
- itmp.arg[0] = TMP(*r);
- emitf("pushq %L0", &itmp, fn, f);
- fs += 8;
- }
- for (lbl=0, b=fn->start; b; b=b->link) {
- if (lbl || b->npred > 1)
- fprintf(f, "%sbb%d:\n", T.asloc, id0+b->id);
- for (i=b->ins; i!=&b->ins[b->nins]; i++)
- emitins(*i, fn, f);
- lbl = 1;
- switch (b->jmp.type) {
- case Jhlt:
- fprintf(f, "\tud2\n");
- break;
- case Jret0:
- if (fn->dynalloc)
- fprintf(f,
- "\tmovq %%rbp, %%rsp\n"
- "\tsubq $%"PRIu64", %%rsp\n",
- fs
- );
- for (r=&amd64_sysv_rclob[NCLR]; r>amd64_sysv_rclob;)
- if (fn->reg & BIT(*--r)) {
- itmp.arg[0] = TMP(*r);
- emitf("popq %L0", &itmp, fn, f);
- }
- fprintf(f,
- "\tleave\n"
- "\tret\n"
- );
- break;
- case Jjmp:
- Jmp:
- if (b->s1 != b->link)
- fprintf(f, "\tjmp %sbb%d\n",
- T.asloc, id0+b->s1->id);
- else
- lbl = 0;
- break;
- default:
- c = b->jmp.type - Jjf;
- if (0 <= c && c <= NCmp) {
- if (b->link == b->s2) {
- s = b->s1;
- b->s1 = b->s2;
- b->s2 = s;
- } else
- c = cmpneg(c);
- fprintf(f, "\tj%s %sbb%d\n", ctoa[c],
- T.asloc, id0+b->s2->id);
- goto Jmp;
- }
- die("unhandled jump %d", b->jmp.type);
- }
- }
- id0 += fn->nblk;
- if (!T.apple)
- elf_emitfnfin(fn->name, f);
- }
|