emit.c 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726
  1. #include "all.h"
  2. typedef struct E E;
  3. struct E {
  4. FILE *f;
  5. Fn *fn;
  6. int fp;
  7. uint64_t fsz;
  8. int nclob;
  9. };
  10. #define CMP(X) \
  11. X(Ciule, "be", "a") \
  12. X(Ciult, "b", "ae") \
  13. X(Cisle, "le", "g") \
  14. X(Cislt, "l", "ge") \
  15. X(Cisgt, "g", "le") \
  16. X(Cisge, "ge", "l") \
  17. X(Ciugt, "a", "be") \
  18. X(Ciuge, "ae", "b") \
  19. X(Cieq, "z", "nz") \
  20. X(Cine, "nz", "z") \
  21. X(NCmpI+Cfle, "be", "a") \
  22. X(NCmpI+Cflt, "b", "ae") \
  23. X(NCmpI+Cfgt, "a", "be") \
  24. X(NCmpI+Cfge, "ae", "b") \
  25. X(NCmpI+Cfo, "np", "p") \
  26. X(NCmpI+Cfuo, "p", "np")
  27. enum {
  28. SLong = 0,
  29. SWord = 1,
  30. SShort = 2,
  31. SByte = 3,
  32. Ki = -1, /* matches Kw and Kl */
  33. Ka = -2, /* matches all classes */
  34. };
  35. /* Instruction format strings:
  36. *
  37. * if the format string starts with -, the instruction
  38. * is assumed to be 3-address and is put in 2-address
  39. * mode using an extra mov if necessary
  40. *
  41. * if the format string starts with +, the same as the
  42. * above applies, but commutativity is also assumed
  43. *
  44. * %k is used to set the class of the instruction,
  45. * it'll expand to "l", "q", "ss", "sd", depending
  46. * on the instruction class
  47. * %0 designates the first argument
  48. * %1 designates the second argument
  49. * %= designates the result
  50. *
  51. * if %k is not used, a prefix to 0, 1, or = must be
  52. * added, it can be:
  53. * M - memory reference
  54. * L - long (64 bits)
  55. * W - word (32 bits)
  56. * H - short (16 bits)
  57. * B - byte (8 bits)
  58. * S - single precision float
  59. * D - double precision float
  60. */
  61. static struct {
  62. short op;
  63. short cls;
  64. char *fmt;
  65. } omap[] = {
  66. { Oadd, Ka, "+add%k %1, %=" },
  67. { Osub, Ka, "-sub%k %1, %=" },
  68. { Oand, Ki, "+and%k %1, %=" },
  69. { Oor, Ki, "+or%k %1, %=" },
  70. { Oxor, Ki, "+xor%k %1, %=" },
  71. { Osar, Ki, "-sar%k %B1, %=" },
  72. { Oshr, Ki, "-shr%k %B1, %=" },
  73. { Oshl, Ki, "-shl%k %B1, %=" },
  74. { Omul, Ki, "+imul%k %1, %=" },
  75. { Omul, Ks, "+mulss %1, %=" },
  76. { Omul, Kd, "+mulsd %1, %=" },
  77. { Odiv, Ka, "-div%k %1, %=" },
  78. { Ostorel, Ka, "movq %L0, %M1" },
  79. { Ostorew, Ka, "movl %W0, %M1" },
  80. { Ostoreh, Ka, "movw %H0, %M1" },
  81. { Ostoreb, Ka, "movb %B0, %M1" },
  82. { Ostores, Ka, "movss %S0, %M1" },
  83. { Ostored, Ka, "movsd %D0, %M1" },
  84. { Oload, Ka, "mov%k %M0, %=" },
  85. { Oloadsw, Kl, "movslq %M0, %L=" },
  86. { Oloadsw, Kw, "movl %M0, %W=" },
  87. { Oloaduw, Ki, "movl %M0, %W=" },
  88. { Oloadsh, Ki, "movsw%k %M0, %=" },
  89. { Oloaduh, Ki, "movzw%k %M0, %=" },
  90. { Oloadsb, Ki, "movsb%k %M0, %=" },
  91. { Oloadub, Ki, "movzb%k %M0, %=" },
  92. { Oextsw, Kl, "movslq %W0, %L=" },
  93. { Oextuw, Kl, "movl %W0, %W=" },
  94. { Oextsh, Ki, "movsw%k %H0, %=" },
  95. { Oextuh, Ki, "movzw%k %H0, %=" },
  96. { Oextsb, Ki, "movsb%k %B0, %=" },
  97. { Oextub, Ki, "movzb%k %B0, %=" },
  98. { Oexts, Kd, "cvtss2sd %0, %=" },
  99. { Otruncd, Ks, "cvtsd2ss %0, %=" },
  100. { Ostosi, Ki, "cvttss2si%k %0, %=" },
  101. { Odtosi, Ki, "cvttsd2si%k %0, %=" },
  102. { Oswtof, Ka, "cvtsi2%k %W0, %=" },
  103. { Osltof, Ka, "cvtsi2%k %L0, %=" },
  104. { Ocast, Ki, "movq %D0, %L=" },
  105. { Ocast, Ka, "movq %L0, %D=" },
  106. { Oaddr, Ki, "lea%k %M0, %=" },
  107. { Oswap, Ki, "xchg%k %0, %1" },
  108. { Osign, Kl, "cqto" },
  109. { Osign, Kw, "cltd" },
  110. { Oxdiv, Ki, "div%k %0" },
  111. { Oxidiv, Ki, "idiv%k %0" },
  112. { Oxcmp, Ks, "ucomiss %S0, %S1" },
  113. { Oxcmp, Kd, "ucomisd %D0, %D1" },
  114. { Oxcmp, Ki, "cmp%k %0, %1" },
  115. { Oxtest, Ki, "test%k %0, %1" },
  116. #define X(c, s, _) \
  117. { Oflag+c, Ki, "set" s " %B=\n\tmovzb%k %B=, %=" },
  118. CMP(X)
  119. #undef X
  120. { Oflagfeq, Ki, "setz %B=\n\tmovzb%k %B=, %=" },
  121. { Oflagfne, Ki, "setnz %B=\n\tmovzb%k %B=, %=" },
  122. { NOp, 0, 0 }
  123. };
  124. static char cmov[][2][16] = {
  125. #define X(c, s0, s1) \
  126. [c] = { \
  127. "cmov" s0 " %0, %=", \
  128. "cmov" s1 " %1, %=", \
  129. },
  130. CMP(X)
  131. #undef X
  132. };
  133. static char *rname[][4] = {
  134. [RAX] = {"rax", "eax", "ax", "al"},
  135. [RBX] = {"rbx", "ebx", "bx", "bl"},
  136. [RCX] = {"rcx", "ecx", "cx", "cl"},
  137. [RDX] = {"rdx", "edx", "dx", "dl"},
  138. [RSI] = {"rsi", "esi", "si", "sil"},
  139. [RDI] = {"rdi", "edi", "di", "dil"},
  140. [RBP] = {"rbp", "ebp", "bp", "bpl"},
  141. [RSP] = {"rsp", "esp", "sp", "spl"},
  142. [R8 ] = {"r8" , "r8d", "r8w", "r8b"},
  143. [R9 ] = {"r9" , "r9d", "r9w", "r9b"},
  144. [R10] = {"r10", "r10d", "r10w", "r10b"},
  145. [R11] = {"r11", "r11d", "r11w", "r11b"},
  146. [R12] = {"r12", "r12d", "r12w", "r12b"},
  147. [R13] = {"r13", "r13d", "r13w", "r13b"},
  148. [R14] = {"r14", "r14d", "r14w", "r14b"},
  149. [R15] = {"r15", "r15d", "r15w", "r15b"},
  150. };
  151. static int
  152. slot(Ref r, E *e)
  153. {
  154. int s;
  155. s = rsval(r);
  156. assert(s <= e->fn->slot);
  157. /* specific to NAlign == 3 */
  158. if (s < 0) {
  159. if (e->fp == RSP)
  160. return 4*-s - 8 + e->fsz + e->nclob*8;
  161. else
  162. return 4*-s;
  163. }
  164. else if (e->fp == RSP)
  165. return 4*s + e->nclob*8;
  166. else if (e->fn->vararg)
  167. return -176 + -4 * (e->fn->slot - s);
  168. else
  169. return -4 * (e->fn->slot - s);
  170. }
  171. static void
  172. emitcon(Con *con, E *e)
  173. {
  174. char *p, *l;
  175. switch (con->type) {
  176. case CAddr:
  177. l = str(con->sym.id);
  178. p = l[0] == '"' ? "" : T.assym;
  179. if (con->sym.type == SThr) {
  180. if (T.apple)
  181. fprintf(e->f, "%s%s@TLVP", p, l);
  182. else
  183. fprintf(e->f, "%%fs:%s%s@tpoff", p, l);
  184. } else
  185. fprintf(e->f, "%s%s", p, l);
  186. if (con->bits.i)
  187. fprintf(e->f, "%+"PRId64, con->bits.i);
  188. break;
  189. case CBits:
  190. fprintf(e->f, "%"PRId64, con->bits.i);
  191. break;
  192. default:
  193. die("unreachable");
  194. }
  195. }
  196. static char *
  197. regtoa(int reg, int sz)
  198. {
  199. static char buf[6];
  200. assert(reg <= XMM15);
  201. if (reg >= XMM0) {
  202. sprintf(buf, "xmm%d", reg-XMM0);
  203. return buf;
  204. } else
  205. return rname[reg][sz];
  206. }
  207. static Ref
  208. getarg(char c, Ins *i)
  209. {
  210. switch (c) {
  211. case '0':
  212. return i->arg[0];
  213. case '1':
  214. return i->arg[1];
  215. case '=':
  216. return i->to;
  217. default:
  218. die("invalid arg letter %c", c);
  219. }
  220. }
  221. static void emitins(Ins, E *);
  222. static void
  223. emitcopy(Ref r1, Ref r2, int k, E *e)
  224. {
  225. Ins icp;
  226. icp.op = Ocopy;
  227. icp.arg[0] = r2;
  228. icp.to = r1;
  229. icp.cls = k;
  230. emitins(icp, e);
  231. }
  232. static void
  233. emitf(char *s, Ins *i, E *e)
  234. {
  235. static char clstoa[][3] = {"l", "q", "ss", "sd"};
  236. char c;
  237. int sz;
  238. Ref ref;
  239. Mem *m;
  240. Con off;
  241. switch (*s) {
  242. case '+':
  243. if (req(i->arg[1], i->to)) {
  244. ref = i->arg[0];
  245. i->arg[0] = i->arg[1];
  246. i->arg[1] = ref;
  247. }
  248. /* fall through */
  249. case '-':
  250. assert((!req(i->arg[1], i->to) || req(i->arg[0], i->to)) &&
  251. "cannot convert to 2-address");
  252. emitcopy(i->to, i->arg[0], i->cls, e);
  253. s++;
  254. break;
  255. }
  256. fputc('\t', e->f);
  257. Next:
  258. while ((c = *s++) != '%')
  259. if (!c) {
  260. fputc('\n', e->f);
  261. return;
  262. } else
  263. fputc(c, e->f);
  264. switch ((c = *s++)) {
  265. case '%':
  266. fputc('%', e->f);
  267. break;
  268. case 'k':
  269. fputs(clstoa[i->cls], e->f);
  270. break;
  271. case '0':
  272. case '1':
  273. case '=':
  274. sz = KWIDE(i->cls) ? SLong : SWord;
  275. s--;
  276. goto Ref;
  277. case 'D':
  278. case 'S':
  279. sz = SLong; /* does not matter for floats */
  280. Ref:
  281. c = *s++;
  282. ref = getarg(c, i);
  283. switch (rtype(ref)) {
  284. case RTmp:
  285. assert(isreg(ref));
  286. fprintf(e->f, "%%%s", regtoa(ref.val, sz));
  287. break;
  288. case RSlot:
  289. fprintf(e->f, "%d(%%%s)",
  290. slot(ref, e),
  291. regtoa(e->fp, SLong)
  292. );
  293. break;
  294. case RMem:
  295. Mem:
  296. m = &e->fn->mem[ref.val];
  297. if (rtype(m->base) == RSlot) {
  298. off.type = CBits;
  299. off.bits.i = slot(m->base, e);
  300. addcon(&m->offset, &off, 1);
  301. m->base = TMP(e->fp);
  302. }
  303. if (m->offset.type != CUndef)
  304. emitcon(&m->offset, e);
  305. fputc('(', e->f);
  306. if (!req(m->base, R))
  307. fprintf(e->f, "%%%s",
  308. regtoa(m->base.val, SLong)
  309. );
  310. else if (m->offset.type == CAddr)
  311. fprintf(e->f, "%%rip");
  312. if (!req(m->index, R))
  313. fprintf(e->f, ", %%%s, %d",
  314. regtoa(m->index.val, SLong),
  315. m->scale
  316. );
  317. fputc(')', e->f);
  318. break;
  319. case RCon:
  320. fputc('$', e->f);
  321. emitcon(&e->fn->con[ref.val], e);
  322. break;
  323. default:
  324. die("unreachable");
  325. }
  326. break;
  327. case 'L':
  328. sz = SLong;
  329. goto Ref;
  330. case 'W':
  331. sz = SWord;
  332. goto Ref;
  333. case 'H':
  334. sz = SShort;
  335. goto Ref;
  336. case 'B':
  337. sz = SByte;
  338. goto Ref;
  339. case 'M':
  340. c = *s++;
  341. ref = getarg(c, i);
  342. switch (rtype(ref)) {
  343. case RMem:
  344. goto Mem;
  345. case RSlot:
  346. fprintf(e->f, "%d(%%%s)",
  347. slot(ref, e),
  348. regtoa(e->fp, SLong)
  349. );
  350. break;
  351. case RCon:
  352. off = e->fn->con[ref.val];
  353. emitcon(&off, e);
  354. if (off.type == CAddr)
  355. if (off.sym.type != SThr || T.apple)
  356. fprintf(e->f, "(%%rip)");
  357. break;
  358. case RTmp:
  359. assert(isreg(ref));
  360. fprintf(e->f, "(%%%s)", regtoa(ref.val, SLong));
  361. break;
  362. default:
  363. die("unreachable");
  364. }
  365. break;
  366. default:
  367. die("invalid format specifier %%%c", c);
  368. }
  369. goto Next;
  370. }
  371. static bits negmask[4] = {
  372. [Ks] = 0x80000000,
  373. [Kd] = 0x8000000000000000,
  374. };
  375. static void
  376. emitins(Ins i, E *e)
  377. {
  378. Ref r;
  379. int64_t val;
  380. int o, t0;
  381. Ins ineg;
  382. Con *con;
  383. char *sym;
  384. switch (i.op) {
  385. default:
  386. if (isxsel(i.op))
  387. goto case_Oxsel;
  388. Table:
  389. /* most instructions are just pulled out of
  390. * the table omap[], some special cases are
  391. * detailed below */
  392. for (o=0;; o++) {
  393. /* this linear search should really be a binary
  394. * search */
  395. if (omap[o].op == NOp)
  396. die("no match for %s(%c)",
  397. optab[i.op].name, "wlsd"[i.cls]);
  398. if (omap[o].op == i.op)
  399. if (omap[o].cls == i.cls
  400. || (omap[o].cls == Ki && KBASE(i.cls) == 0)
  401. || (omap[o].cls == Ka))
  402. break;
  403. }
  404. emitf(omap[o].fmt, &i, e);
  405. break;
  406. case Onop:
  407. /* just do nothing for nops, they are inserted
  408. * by some passes */
  409. break;
  410. case Omul:
  411. /* here, we try to use the 3-addresss form
  412. * of multiplication when possible */
  413. if (rtype(i.arg[1]) == RCon) {
  414. r = i.arg[0];
  415. i.arg[0] = i.arg[1];
  416. i.arg[1] = r;
  417. }
  418. if (KBASE(i.cls) == 0 /* only available for ints */
  419. && rtype(i.arg[0]) == RCon
  420. && rtype(i.arg[1]) == RTmp) {
  421. emitf("imul%k %0, %1, %=", &i, e);
  422. break;
  423. }
  424. goto Table;
  425. case Osub:
  426. /* we have to use the negation trick to handle
  427. * some 3-address subtractions */
  428. if (req(i.to, i.arg[1]) && !req(i.arg[0], i.to)) {
  429. ineg = (Ins){Oneg, i.cls, i.to, {i.to}};
  430. emitins(ineg, e);
  431. emitf("add%k %0, %=", &i, e);
  432. break;
  433. }
  434. goto Table;
  435. case Oneg:
  436. if (!req(i.to, i.arg[0]))
  437. emitf("mov%k %0, %=", &i, e);
  438. if (KBASE(i.cls) == 0)
  439. emitf("neg%k %=", &i, e);
  440. else
  441. fprintf(e->f,
  442. "\txorp%c %sfp%d(%%rip), %%%s\n",
  443. "xxsd"[i.cls],
  444. T.asloc,
  445. stashbits(negmask[i.cls], 16),
  446. regtoa(i.to.val, SLong)
  447. );
  448. break;
  449. case Odiv:
  450. /* use xmm15 to adjust the instruction when the
  451. * conversion to 2-address in emitf() would fail */
  452. if (req(i.to, i.arg[1])) {
  453. i.arg[1] = TMP(XMM0+15);
  454. emitf("mov%k %=, %1", &i, e);
  455. emitf("mov%k %0, %=", &i, e);
  456. i.arg[0] = i.to;
  457. }
  458. goto Table;
  459. case Ocopy:
  460. /* copies are used for many things; see my note
  461. * to understand how to load big constants:
  462. * https://c9x.me/notes/2015-09-19.html */
  463. assert(rtype(i.to) != RMem);
  464. if (req(i.to, R) || req(i.arg[0], R))
  465. break;
  466. if (req(i.to, i.arg[0]))
  467. break;
  468. t0 = rtype(i.arg[0]);
  469. if (i.cls == Kl
  470. && t0 == RCon
  471. && e->fn->con[i.arg[0].val].type == CBits) {
  472. val = e->fn->con[i.arg[0].val].bits.i;
  473. if (isreg(i.to))
  474. if (val >= 0 && val <= UINT32_MAX) {
  475. emitf("movl %W0, %W=", &i, e);
  476. break;
  477. }
  478. if (rtype(i.to) == RSlot)
  479. if (val < INT32_MIN || val > INT32_MAX) {
  480. emitf("movl %0, %=", &i, e);
  481. emitf("movl %0>>32, 4+%=", &i, e);
  482. break;
  483. }
  484. }
  485. if (isreg(i.to)
  486. && t0 == RCon
  487. && e->fn->con[i.arg[0].val].type == CAddr) {
  488. emitf("lea%k %M0, %=", &i, e);
  489. break;
  490. }
  491. if (rtype(i.to) == RSlot
  492. && (t0 == RSlot || t0 == RMem)) {
  493. i.cls = KWIDE(i.cls) ? Kd : Ks;
  494. i.arg[1] = TMP(XMM0+15);
  495. emitf("mov%k %0, %1", &i, e);
  496. emitf("mov%k %1, %=", &i, e);
  497. break;
  498. }
  499. /* conveniently, the assembler knows if it
  500. * should use movabsq when reading movq */
  501. emitf("mov%k %0, %=", &i, e);
  502. break;
  503. case Oaddr:
  504. if (!T.apple
  505. && rtype(i.arg[0]) == RCon
  506. && e->fn->con[i.arg[0].val].sym.type == SThr) {
  507. /* derive the symbol address from the TCB
  508. * address at offset 0 of %fs */
  509. assert(isreg(i.to));
  510. con = &e->fn->con[i.arg[0].val];
  511. sym = str(con->sym.id);
  512. emitf("movq %%fs:0, %L=", &i, e);
  513. fprintf(e->f, "\tleaq %s%s@tpoff",
  514. sym[0] == '"' ? "" : T.assym, sym);
  515. if (con->bits.i)
  516. fprintf(e->f, "%+"PRId64,
  517. con->bits.i);
  518. fprintf(e->f, "(%%%s), %%%s\n",
  519. regtoa(i.to.val, SLong),
  520. regtoa(i.to.val, SLong));
  521. break;
  522. }
  523. goto Table;
  524. case Ocall:
  525. /* calls simply have a weird syntax in AT&T
  526. * assembly... */
  527. switch (rtype(i.arg[0])) {
  528. case RCon:
  529. fprintf(e->f, "\tcallq ");
  530. emitcon(&e->fn->con[i.arg[0].val], e);
  531. fprintf(e->f, "\n");
  532. break;
  533. case RTmp:
  534. emitf("callq *%L0", &i, e);
  535. break;
  536. default:
  537. die("invalid call argument");
  538. }
  539. break;
  540. case Osalloc:
  541. /* there is no good reason why this is here
  542. * maybe we should split Osalloc in 2 different
  543. * instructions depending on the result
  544. */
  545. assert(e->fp == RBP);
  546. emitf("subq %L0, %%rsp", &i, e);
  547. if (!req(i.to, R))
  548. emitcopy(i.to, TMP(RSP), Kl, e);
  549. break;
  550. case Oswap:
  551. if (KBASE(i.cls) == 0)
  552. goto Table;
  553. /* for floats, there is no swap instruction
  554. * so we use xmm15 as a temporary
  555. */
  556. emitcopy(TMP(XMM0+15), i.arg[0], i.cls, e);
  557. emitcopy(i.arg[0], i.arg[1], i.cls, e);
  558. emitcopy(i.arg[1], TMP(XMM0+15), i.cls, e);
  559. break;
  560. case Odbgloc:
  561. emitdbgloc(i.arg[0].val, i.arg[1].val, e->f);
  562. break;
  563. case_Oxsel:
  564. if (req(i.to, i.arg[1]))
  565. emitf(cmov[i.op-Oxsel][0], &i, e);
  566. else {
  567. if (!req(i.to, i.arg[0]))
  568. emitf("mov %0, %=", &i, e);
  569. emitf(cmov[i.op-Oxsel][1], &i, e);
  570. }
  571. break;
  572. }
  573. }
  574. static void
  575. framesz(E *e)
  576. {
  577. uint64_t i, o, f;
  578. /* specific to NAlign == 3 */
  579. o = 0;
  580. if (!e->fn->leaf) {
  581. for (i=0, o=0; i<NCLR; i++)
  582. o ^= e->fn->reg >> amd64_sysv_rclob[i];
  583. o &= 1;
  584. }
  585. f = e->fn->slot;
  586. f = (f + 3) & -4;
  587. if (f > 0
  588. && e->fp == RSP
  589. && e->fn->salign == 4)
  590. f += 2;
  591. e->fsz = 4*f + 8*o + 176*e->fn->vararg;
  592. }
  593. void
  594. amd64_emitfn(Fn *fn, FILE *f)
  595. {
  596. static char *ctoa[] = {
  597. #define X(c, s, _) [c] = s,
  598. CMP(X)
  599. #undef X
  600. };
  601. static int id0;
  602. Blk *b, *s;
  603. Ins *i, itmp;
  604. int *r, c, o, n, lbl;
  605. uint p;
  606. E *e;
  607. e = &(E){.f = f, .fn = fn};
  608. emitfnlnk(fn->name, &fn->lnk, f);
  609. fputs("\tendbr64\n", f);
  610. if (!fn->leaf || fn->vararg || fn->dynalloc) {
  611. e->fp = RBP;
  612. fputs("\tpushq %rbp\n\tmovq %rsp, %rbp\n", f);
  613. } else
  614. e->fp = RSP;
  615. framesz(e);
  616. if (e->fsz)
  617. fprintf(f, "\tsubq $%"PRIu64", %%rsp\n", e->fsz);
  618. if (fn->vararg) {
  619. o = -176;
  620. for (r=amd64_sysv_rsave; r<&amd64_sysv_rsave[6]; r++, o+=8)
  621. fprintf(f, "\tmovq %%%s, %d(%%rbp)\n", rname[*r][0], o);
  622. for (n=0; n<8; ++n, o+=16)
  623. fprintf(f, "\tmovaps %%xmm%d, %d(%%rbp)\n", n, o);
  624. }
  625. for (r=amd64_sysv_rclob; r<&amd64_sysv_rclob[NCLR]; r++)
  626. if (fn->reg & BIT(*r)) {
  627. itmp.arg[0] = TMP(*r);
  628. emitf("pushq %L0", &itmp, e);
  629. e->nclob++;
  630. }
  631. for (lbl=0, b=fn->start; b; b=b->link) {
  632. if (lbl || b->npred > 1) {
  633. for (p=0; p<b->npred; p++)
  634. if (b->pred[p]->id >= b->id)
  635. break;
  636. if (p != b->npred)
  637. fprintf(f, ".p2align 4\n");
  638. fprintf(f, "%sbb%d:\n", T.asloc, id0+b->id);
  639. }
  640. for (i=b->ins; i!=&b->ins[b->nins]; i++)
  641. emitins(*i, e);
  642. lbl = 1;
  643. switch (b->jmp.type) {
  644. case Jhlt:
  645. fprintf(f, "\tud2\n");
  646. break;
  647. case Jret0:
  648. if (fn->dynalloc)
  649. fprintf(f,
  650. "\tmovq %%rbp, %%rsp\n"
  651. "\tsubq $%"PRIu64", %%rsp\n",
  652. e->fsz + e->nclob * 8);
  653. for (r=&amd64_sysv_rclob[NCLR]; r>amd64_sysv_rclob;)
  654. if (fn->reg & BIT(*--r)) {
  655. itmp.arg[0] = TMP(*r);
  656. emitf("popq %L0", &itmp, e);
  657. }
  658. if (e->fp == RBP)
  659. fputs("\tleave\n", f);
  660. else if (e->fsz)
  661. fprintf(f,
  662. "\taddq $%"PRIu64", %%rsp\n",
  663. e->fsz);
  664. fputs("\tret\n", f);
  665. break;
  666. case Jjmp:
  667. Jmp:
  668. if (b->s1 != b->link)
  669. fprintf(f, "\tjmp %sbb%d\n",
  670. T.asloc, id0+b->s1->id);
  671. else
  672. lbl = 0;
  673. break;
  674. default:
  675. c = b->jmp.type - Jjf;
  676. if (0 <= c && c <= NCmp) {
  677. if (b->link == b->s2) {
  678. s = b->s1;
  679. b->s1 = b->s2;
  680. b->s2 = s;
  681. } else
  682. c = cmpneg(c);
  683. fprintf(f, "\tj%s %sbb%d\n", ctoa[c],
  684. T.asloc, id0+b->s2->id);
  685. goto Jmp;
  686. }
  687. die("unhandled jump %d", b->jmp.type);
  688. }
  689. }
  690. id0 += fn->nblk;
  691. if (!T.apple)
  692. elf_emitfnfin(fn->name, f);
  693. }