emit.c 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705
  1. #include "all.h"
  2. typedef struct E E;
  3. struct E {
  4. FILE *f;
  5. Fn *fn;
  6. int fp;
  7. uint64_t fsz;
  8. int nclob;
  9. };
  10. #define CMP(X) \
  11. X(Ciule, "be") \
  12. X(Ciult, "b") \
  13. X(Cisle, "le") \
  14. X(Cislt, "l") \
  15. X(Cisgt, "g") \
  16. X(Cisge, "ge") \
  17. X(Ciugt, "a") \
  18. X(Ciuge, "ae") \
  19. X(Cieq, "z") \
  20. X(Cine, "nz") \
  21. X(NCmpI+Cfle, "be") \
  22. X(NCmpI+Cflt, "b") \
  23. X(NCmpI+Cfgt, "a") \
  24. X(NCmpI+Cfge, "ae") \
  25. X(NCmpI+Cfeq, "z") \
  26. X(NCmpI+Cfne, "nz") \
  27. X(NCmpI+Cfo, "np") \
  28. X(NCmpI+Cfuo, "p")
  29. enum {
  30. SLong = 0,
  31. SWord = 1,
  32. SShort = 2,
  33. SByte = 3,
  34. Ki = -1, /* matches Kw and Kl */
  35. Ka = -2, /* matches all classes */
  36. };
  37. /* Instruction format strings:
  38. *
  39. * if the format string starts with -, the instruction
  40. * is assumed to be 3-address and is put in 2-address
  41. * mode using an extra mov if necessary
  42. *
  43. * if the format string starts with +, the same as the
  44. * above applies, but commutativity is also assumed
  45. *
  46. * %k is used to set the class of the instruction,
  47. * it'll expand to "l", "q", "ss", "sd", depending
  48. * on the instruction class
  49. * %0 designates the first argument
  50. * %1 designates the second argument
  51. * %= designates the result
  52. *
  53. * if %k is not used, a prefix to 0, 1, or = must be
  54. * added, it can be:
  55. * M - memory reference
  56. * L - long (64 bits)
  57. * W - word (32 bits)
  58. * H - short (16 bits)
  59. * B - byte (8 bits)
  60. * S - single precision float
  61. * D - double precision float
  62. */
  63. static struct {
  64. short op;
  65. short cls;
  66. char *fmt;
  67. } omap[] = {
  68. { Oadd, Ka, "+add%k %1, %=" },
  69. { Osub, Ka, "-sub%k %1, %=" },
  70. { Oand, Ki, "+and%k %1, %=" },
  71. { Oor, Ki, "+or%k %1, %=" },
  72. { Oxor, Ki, "+xor%k %1, %=" },
  73. { Osar, Ki, "-sar%k %B1, %=" },
  74. { Oshr, Ki, "-shr%k %B1, %=" },
  75. { Oshl, Ki, "-shl%k %B1, %=" },
  76. { Omul, Ki, "+imul%k %1, %=" },
  77. { Omul, Ks, "+mulss %1, %=" },
  78. { Omul, Kd, "+mulsd %1, %=" },
  79. { Odiv, Ka, "-div%k %1, %=" },
  80. { Ostorel, Ka, "movq %L0, %M1" },
  81. { Ostorew, Ka, "movl %W0, %M1" },
  82. { Ostoreh, Ka, "movw %H0, %M1" },
  83. { Ostoreb, Ka, "movb %B0, %M1" },
  84. { Ostores, Ka, "movss %S0, %M1" },
  85. { Ostored, Ka, "movsd %D0, %M1" },
  86. { Oload, Ka, "mov%k %M0, %=" },
  87. { Oloadsw, Kl, "movslq %M0, %L=" },
  88. { Oloadsw, Kw, "movl %M0, %W=" },
  89. { Oloaduw, Ki, "movl %M0, %W=" },
  90. { Oloadsh, Ki, "movsw%k %M0, %=" },
  91. { Oloaduh, Ki, "movzw%k %M0, %=" },
  92. { Oloadsb, Ki, "movsb%k %M0, %=" },
  93. { Oloadub, Ki, "movzb%k %M0, %=" },
  94. { Oextsw, Kl, "movslq %W0, %L=" },
  95. { Oextuw, Kl, "movl %W0, %W=" },
  96. { Oextsh, Ki, "movsw%k %H0, %=" },
  97. { Oextuh, Ki, "movzw%k %H0, %=" },
  98. { Oextsb, Ki, "movsb%k %B0, %=" },
  99. { Oextub, Ki, "movzb%k %B0, %=" },
  100. { Oexts, Kd, "cvtss2sd %0, %=" },
  101. { Otruncd, Ks, "cvtsd2ss %0, %=" },
  102. { Ostosi, Ki, "cvttss2si%k %0, %=" },
  103. { Odtosi, Ki, "cvttsd2si%k %0, %=" },
  104. { Oswtof, Ka, "cvtsi2%k %W0, %=" },
  105. { Osltof, Ka, "cvtsi2%k %L0, %=" },
  106. { Ocast, Ki, "movq %D0, %L=" },
  107. { Ocast, Ka, "movq %L0, %D=" },
  108. { Oaddr, Ki, "lea%k %M0, %=" },
  109. { Oswap, Ki, "xchg%k %0, %1" },
  110. { Osign, Kl, "cqto" },
  111. { Osign, Kw, "cltd" },
  112. { Oxdiv, Ki, "div%k %0" },
  113. { Oxidiv, Ki, "idiv%k %0" },
  114. { Oxcmp, Ks, "ucomiss %S0, %S1" },
  115. { Oxcmp, Kd, "ucomisd %D0, %D1" },
  116. { Oxcmp, Ki, "cmp%k %0, %1" },
  117. { Oxtest, Ki, "test%k %0, %1" },
  118. #define X(c, s) \
  119. { Oflag+c, Ki, "set" s " %B=\n\tmovzb%k %B=, %=" },
  120. CMP(X)
  121. #undef X
  122. { NOp, 0, 0 }
  123. };
  124. static char *rname[][4] = {
  125. [RAX] = {"rax", "eax", "ax", "al"},
  126. [RBX] = {"rbx", "ebx", "bx", "bl"},
  127. [RCX] = {"rcx", "ecx", "cx", "cl"},
  128. [RDX] = {"rdx", "edx", "dx", "dl"},
  129. [RSI] = {"rsi", "esi", "si", "sil"},
  130. [RDI] = {"rdi", "edi", "di", "dil"},
  131. [RBP] = {"rbp", "ebp", "bp", "bpl"},
  132. [RSP] = {"rsp", "esp", "sp", "spl"},
  133. [R8 ] = {"r8" , "r8d", "r8w", "r8b"},
  134. [R9 ] = {"r9" , "r9d", "r9w", "r9b"},
  135. [R10] = {"r10", "r10d", "r10w", "r10b"},
  136. [R11] = {"r11", "r11d", "r11w", "r11b"},
  137. [R12] = {"r12", "r12d", "r12w", "r12b"},
  138. [R13] = {"r13", "r13d", "r13w", "r13b"},
  139. [R14] = {"r14", "r14d", "r14w", "r14b"},
  140. [R15] = {"r15", "r15d", "r15w", "r15b"},
  141. };
  142. static int
  143. slot(Ref r, E *e)
  144. {
  145. int s;
  146. s = rsval(r);
  147. assert(s <= e->fn->slot);
  148. /* specific to NAlign == 3 */
  149. if (s < 0) {
  150. if (e->fp == RSP)
  151. return 4*-s - 8 + e->fsz + e->nclob*8;
  152. else
  153. return 4*-s;
  154. }
  155. else if (e->fp == RSP)
  156. return 4*s + e->nclob*8;
  157. else if (e->fn->vararg)
  158. return -176 + -4 * (e->fn->slot - s);
  159. else
  160. return -4 * (e->fn->slot - s);
  161. }
  162. static void
  163. emitcon(Con *con, E *e)
  164. {
  165. char *p, *l;
  166. switch (con->type) {
  167. case CAddr:
  168. l = str(con->sym.id);
  169. p = l[0] == '"' ? "" : T.assym;
  170. if (con->sym.type == SThr) {
  171. if (T.apple)
  172. fprintf(e->f, "%s%s@TLVP", p, l);
  173. else
  174. fprintf(e->f, "%%fs:%s%s@tpoff", p, l);
  175. } else
  176. fprintf(e->f, "%s%s", p, l);
  177. if (con->bits.i)
  178. fprintf(e->f, "%+"PRId64, con->bits.i);
  179. break;
  180. case CBits:
  181. fprintf(e->f, "%"PRId64, con->bits.i);
  182. break;
  183. default:
  184. die("unreachable");
  185. }
  186. }
  187. static char *
  188. regtoa(int reg, int sz)
  189. {
  190. static char buf[6];
  191. assert(reg <= XMM15);
  192. if (reg >= XMM0) {
  193. sprintf(buf, "xmm%d", reg-XMM0);
  194. return buf;
  195. } else
  196. return rname[reg][sz];
  197. }
  198. static Ref
  199. getarg(char c, Ins *i)
  200. {
  201. switch (c) {
  202. case '0':
  203. return i->arg[0];
  204. case '1':
  205. return i->arg[1];
  206. case '=':
  207. return i->to;
  208. default:
  209. die("invalid arg letter %c", c);
  210. }
  211. }
  212. static void emitins(Ins, E *);
  213. static void
  214. emitcopy(Ref r1, Ref r2, int k, E *e)
  215. {
  216. Ins icp;
  217. icp.op = Ocopy;
  218. icp.arg[0] = r2;
  219. icp.to = r1;
  220. icp.cls = k;
  221. emitins(icp, e);
  222. }
  223. static void
  224. emitf(char *s, Ins *i, E *e)
  225. {
  226. static char clstoa[][3] = {"l", "q", "ss", "sd"};
  227. char c;
  228. int sz;
  229. Ref ref;
  230. Mem *m;
  231. Con off;
  232. switch (*s) {
  233. case '+':
  234. if (req(i->arg[1], i->to)) {
  235. ref = i->arg[0];
  236. i->arg[0] = i->arg[1];
  237. i->arg[1] = ref;
  238. }
  239. /* fall through */
  240. case '-':
  241. assert((!req(i->arg[1], i->to) || req(i->arg[0], i->to)) &&
  242. "cannot convert to 2-address");
  243. emitcopy(i->to, i->arg[0], i->cls, e);
  244. s++;
  245. break;
  246. }
  247. fputc('\t', e->f);
  248. Next:
  249. while ((c = *s++) != '%')
  250. if (!c) {
  251. fputc('\n', e->f);
  252. return;
  253. } else
  254. fputc(c, e->f);
  255. switch ((c = *s++)) {
  256. case '%':
  257. fputc('%', e->f);
  258. break;
  259. case 'k':
  260. fputs(clstoa[i->cls], e->f);
  261. break;
  262. case '0':
  263. case '1':
  264. case '=':
  265. sz = KWIDE(i->cls) ? SLong : SWord;
  266. s--;
  267. goto Ref;
  268. case 'D':
  269. case 'S':
  270. sz = SLong; /* does not matter for floats */
  271. Ref:
  272. c = *s++;
  273. ref = getarg(c, i);
  274. switch (rtype(ref)) {
  275. case RTmp:
  276. assert(isreg(ref));
  277. fprintf(e->f, "%%%s", regtoa(ref.val, sz));
  278. break;
  279. case RSlot:
  280. fprintf(e->f, "%d(%%%s)",
  281. slot(ref, e),
  282. regtoa(e->fp, SLong)
  283. );
  284. break;
  285. case RMem:
  286. Mem:
  287. m = &e->fn->mem[ref.val];
  288. if (rtype(m->base) == RSlot) {
  289. off.type = CBits;
  290. off.bits.i = slot(m->base, e);
  291. addcon(&m->offset, &off, 1);
  292. m->base = TMP(e->fp);
  293. }
  294. if (m->offset.type != CUndef)
  295. emitcon(&m->offset, e);
  296. fputc('(', e->f);
  297. if (!req(m->base, R))
  298. fprintf(e->f, "%%%s",
  299. regtoa(m->base.val, SLong)
  300. );
  301. else if (m->offset.type == CAddr)
  302. fprintf(e->f, "%%rip");
  303. if (!req(m->index, R))
  304. fprintf(e->f, ", %%%s, %d",
  305. regtoa(m->index.val, SLong),
  306. m->scale
  307. );
  308. fputc(')', e->f);
  309. break;
  310. case RCon:
  311. fputc('$', e->f);
  312. emitcon(&e->fn->con[ref.val], e);
  313. break;
  314. default:
  315. die("unreachable");
  316. }
  317. break;
  318. case 'L':
  319. sz = SLong;
  320. goto Ref;
  321. case 'W':
  322. sz = SWord;
  323. goto Ref;
  324. case 'H':
  325. sz = SShort;
  326. goto Ref;
  327. case 'B':
  328. sz = SByte;
  329. goto Ref;
  330. case 'M':
  331. c = *s++;
  332. ref = getarg(c, i);
  333. switch (rtype(ref)) {
  334. case RMem:
  335. goto Mem;
  336. case RSlot:
  337. fprintf(e->f, "%d(%%%s)",
  338. slot(ref, e),
  339. regtoa(e->fp, SLong)
  340. );
  341. break;
  342. case RCon:
  343. off = e->fn->con[ref.val];
  344. emitcon(&off, e);
  345. if (off.type == CAddr)
  346. if (off.sym.type != SThr || T.apple)
  347. fprintf(e->f, "(%%rip)");
  348. break;
  349. case RTmp:
  350. assert(isreg(ref));
  351. fprintf(e->f, "(%%%s)", regtoa(ref.val, SLong));
  352. break;
  353. default:
  354. die("unreachable");
  355. }
  356. break;
  357. default:
  358. die("invalid format specifier %%%c", c);
  359. }
  360. goto Next;
  361. }
  362. static void *negmask[4] = {
  363. [Ks] = (uint32_t[4]){ 0x80000000 },
  364. [Kd] = (uint64_t[2]){ 0x8000000000000000 },
  365. };
  366. static void
  367. emitins(Ins i, E *e)
  368. {
  369. Ref r;
  370. int64_t val;
  371. int o, t0;
  372. Ins ineg;
  373. Con *con;
  374. char *sym;
  375. switch (i.op) {
  376. default:
  377. Table:
  378. /* most instructions are just pulled out of
  379. * the table omap[], some special cases are
  380. * detailed below */
  381. for (o=0;; o++) {
  382. /* this linear search should really be a binary
  383. * search */
  384. if (omap[o].op == NOp)
  385. die("no match for %s(%c)",
  386. optab[i.op].name, "wlsd"[i.cls]);
  387. if (omap[o].op == i.op)
  388. if (omap[o].cls == i.cls
  389. || (omap[o].cls == Ki && KBASE(i.cls) == 0)
  390. || (omap[o].cls == Ka))
  391. break;
  392. }
  393. emitf(omap[o].fmt, &i, e);
  394. break;
  395. case Onop:
  396. /* just do nothing for nops, they are inserted
  397. * by some passes */
  398. break;
  399. case Omul:
  400. /* here, we try to use the 3-addresss form
  401. * of multiplication when possible */
  402. if (rtype(i.arg[1]) == RCon) {
  403. r = i.arg[0];
  404. i.arg[0] = i.arg[1];
  405. i.arg[1] = r;
  406. }
  407. if (KBASE(i.cls) == 0 /* only available for ints */
  408. && rtype(i.arg[0]) == RCon
  409. && rtype(i.arg[1]) == RTmp) {
  410. emitf("imul%k %0, %1, %=", &i, e);
  411. break;
  412. }
  413. goto Table;
  414. case Osub:
  415. /* we have to use the negation trick to handle
  416. * some 3-address subtractions */
  417. if (req(i.to, i.arg[1]) && !req(i.arg[0], i.to)) {
  418. ineg = (Ins){Oneg, i.cls, i.to, {i.to}};
  419. emitins(ineg, e);
  420. emitf("add%k %0, %=", &i, e);
  421. break;
  422. }
  423. goto Table;
  424. case Oneg:
  425. if (!req(i.to, i.arg[0]))
  426. emitf("mov%k %0, %=", &i, e);
  427. if (KBASE(i.cls) == 0)
  428. emitf("neg%k %=", &i, e);
  429. else
  430. fprintf(e->f,
  431. "\txorp%c %sfp%d(%%rip), %%%s\n",
  432. "xxsd"[i.cls],
  433. T.asloc,
  434. stashbits(negmask[i.cls], 16),
  435. regtoa(i.to.val, SLong)
  436. );
  437. break;
  438. case Odiv:
  439. /* use xmm15 to adjust the instruction when the
  440. * conversion to 2-address in emitf() would fail */
  441. if (req(i.to, i.arg[1])) {
  442. i.arg[1] = TMP(XMM0+15);
  443. emitf("mov%k %=, %1", &i, e);
  444. emitf("mov%k %0, %=", &i, e);
  445. i.arg[0] = i.to;
  446. }
  447. goto Table;
  448. case Ocopy:
  449. /* copies are used for many things; see my note
  450. * to understand how to load big constants:
  451. * https://c9x.me/notes/2015-09-19.html */
  452. assert(rtype(i.to) != RMem);
  453. if (req(i.to, R) || req(i.arg[0], R))
  454. break;
  455. if (req(i.to, i.arg[0]))
  456. break;
  457. t0 = rtype(i.arg[0]);
  458. if (i.cls == Kl
  459. && t0 == RCon
  460. && e->fn->con[i.arg[0].val].type == CBits) {
  461. val = e->fn->con[i.arg[0].val].bits.i;
  462. if (isreg(i.to))
  463. if (val >= 0 && val <= UINT32_MAX) {
  464. emitf("movl %W0, %W=", &i, e);
  465. break;
  466. }
  467. if (rtype(i.to) == RSlot)
  468. if (val < INT32_MIN || val > INT32_MAX) {
  469. emitf("movl %0, %=", &i, e);
  470. emitf("movl %0>>32, 4+%=", &i, e);
  471. break;
  472. }
  473. }
  474. if (isreg(i.to)
  475. && t0 == RCon
  476. && e->fn->con[i.arg[0].val].type == CAddr) {
  477. emitf("lea%k %M0, %=", &i, e);
  478. break;
  479. }
  480. if (rtype(i.to) == RSlot
  481. && (t0 == RSlot || t0 == RMem)) {
  482. i.cls = KWIDE(i.cls) ? Kd : Ks;
  483. i.arg[1] = TMP(XMM0+15);
  484. emitf("mov%k %0, %1", &i, e);
  485. emitf("mov%k %1, %=", &i, e);
  486. break;
  487. }
  488. /* conveniently, the assembler knows if it
  489. * should use movabsq when reading movq */
  490. emitf("mov%k %0, %=", &i, e);
  491. break;
  492. case Oaddr:
  493. if (!T.apple
  494. && rtype(i.arg[0]) == RCon
  495. && e->fn->con[i.arg[0].val].sym.type == SThr) {
  496. /* derive the symbol address from the TCB
  497. * address at offset 0 of %fs */
  498. assert(isreg(i.to));
  499. con = &e->fn->con[i.arg[0].val];
  500. sym = str(con->sym.id);
  501. emitf("movq %%fs:0, %L=", &i, e);
  502. fprintf(e->f, "\tleaq %s%s@tpoff",
  503. sym[0] == '"' ? "" : T.assym, sym);
  504. if (con->bits.i)
  505. fprintf(e->f, "%+"PRId64,
  506. con->bits.i);
  507. fprintf(e->f, "(%%%s), %%%s\n",
  508. regtoa(i.to.val, SLong),
  509. regtoa(i.to.val, SLong));
  510. break;
  511. }
  512. goto Table;
  513. case Ocall:
  514. /* calls simply have a weird syntax in AT&T
  515. * assembly... */
  516. switch (rtype(i.arg[0])) {
  517. case RCon:
  518. fprintf(e->f, "\tcallq ");
  519. emitcon(&e->fn->con[i.arg[0].val], e);
  520. fprintf(e->f, "\n");
  521. break;
  522. case RTmp:
  523. emitf("callq *%L0", &i, e);
  524. break;
  525. default:
  526. die("invalid call argument");
  527. }
  528. break;
  529. case Osalloc:
  530. /* there is no good reason why this is here
  531. * maybe we should split Osalloc in 2 different
  532. * instructions depending on the result
  533. */
  534. assert(e->fp == RBP);
  535. emitf("subq %L0, %%rsp", &i, e);
  536. if (!req(i.to, R))
  537. emitcopy(i.to, TMP(RSP), Kl, e);
  538. break;
  539. case Oswap:
  540. if (KBASE(i.cls) == 0)
  541. goto Table;
  542. /* for floats, there is no swap instruction
  543. * so we use xmm15 as a temporary
  544. */
  545. emitcopy(TMP(XMM0+15), i.arg[0], i.cls, e);
  546. emitcopy(i.arg[0], i.arg[1], i.cls, e);
  547. emitcopy(i.arg[1], TMP(XMM0+15), i.cls, e);
  548. break;
  549. case Odbgloc:
  550. emitdbgloc(i.arg[0].val, i.arg[1].val, e->f);
  551. break;
  552. }
  553. }
  554. static void
  555. framesz(E *e)
  556. {
  557. uint64_t i, o, f;
  558. /* specific to NAlign == 3 */
  559. o = 0;
  560. if (!e->fn->leaf) {
  561. for (i=0, o=0; i<NCLR; i++)
  562. o ^= e->fn->reg >> amd64_sysv_rclob[i];
  563. o &= 1;
  564. }
  565. f = e->fn->slot;
  566. f = (f + 3) & -4;
  567. if (f > 0
  568. && e->fp == RSP
  569. && e->fn->salign == 4)
  570. f += 2;
  571. e->fsz = 4*f + 8*o + 176*e->fn->vararg;
  572. }
  573. void
  574. amd64_emitfn(Fn *fn, FILE *f)
  575. {
  576. static char *ctoa[] = {
  577. #define X(c, s) [c] = s,
  578. CMP(X)
  579. #undef X
  580. };
  581. static int id0;
  582. Blk *b, *s;
  583. Ins *i, itmp;
  584. int *r, c, o, n, lbl;
  585. uint p;
  586. E *e;
  587. e = &(E){.f = f, .fn = fn};
  588. emitfnlnk(fn->name, &fn->lnk, f);
  589. fputs("\tendbr64\n", f);
  590. if (!fn->leaf || fn->vararg || fn->dynalloc) {
  591. e->fp = RBP;
  592. fputs("\tpushq %rbp\n\tmovq %rsp, %rbp\n", f);
  593. } else
  594. e->fp = RSP;
  595. framesz(e);
  596. if (e->fsz)
  597. fprintf(f, "\tsubq $%"PRIu64", %%rsp\n", e->fsz);
  598. if (fn->vararg) {
  599. o = -176;
  600. for (r=amd64_sysv_rsave; r<&amd64_sysv_rsave[6]; r++, o+=8)
  601. fprintf(f, "\tmovq %%%s, %d(%%rbp)\n", rname[*r][0], o);
  602. for (n=0; n<8; ++n, o+=16)
  603. fprintf(f, "\tmovaps %%xmm%d, %d(%%rbp)\n", n, o);
  604. }
  605. for (r=amd64_sysv_rclob; r<&amd64_sysv_rclob[NCLR]; r++)
  606. if (fn->reg & BIT(*r)) {
  607. itmp.arg[0] = TMP(*r);
  608. emitf("pushq %L0", &itmp, e);
  609. e->nclob++;
  610. }
  611. for (lbl=0, b=fn->start; b; b=b->link) {
  612. if (lbl || b->npred > 1) {
  613. for (p=0; p<b->npred; p++)
  614. if (b->pred[p]->id >= b->id)
  615. break;
  616. if (p != b->npred)
  617. fprintf(f, ".p2align 4\n");
  618. fprintf(f, "%sbb%d:\n", T.asloc, id0+b->id);
  619. }
  620. for (i=b->ins; i!=&b->ins[b->nins]; i++)
  621. emitins(*i, e);
  622. lbl = 1;
  623. switch (b->jmp.type) {
  624. case Jhlt:
  625. fprintf(f, "\tud2\n");
  626. break;
  627. case Jret0:
  628. if (fn->dynalloc)
  629. fprintf(f,
  630. "\tmovq %%rbp, %%rsp\n"
  631. "\tsubq $%"PRIu64", %%rsp\n",
  632. e->fsz + e->nclob * 8);
  633. for (r=&amd64_sysv_rclob[NCLR]; r>amd64_sysv_rclob;)
  634. if (fn->reg & BIT(*--r)) {
  635. itmp.arg[0] = TMP(*r);
  636. emitf("popq %L0", &itmp, e);
  637. }
  638. if (e->fp == RBP)
  639. fputs("\tleave\n", f);
  640. else if (e->fsz)
  641. fprintf(f,
  642. "\taddq $%"PRIu64", %%rsp\n",
  643. e->fsz);
  644. fputs("\tret\n", f);
  645. break;
  646. case Jjmp:
  647. Jmp:
  648. if (b->s1 != b->link)
  649. fprintf(f, "\tjmp %sbb%d\n",
  650. T.asloc, id0+b->s1->id);
  651. else
  652. lbl = 0;
  653. break;
  654. default:
  655. c = b->jmp.type - Jjf;
  656. if (0 <= c && c <= NCmp) {
  657. if (b->link == b->s2) {
  658. s = b->s1;
  659. b->s1 = b->s2;
  660. b->s2 = s;
  661. } else
  662. c = cmpneg(c);
  663. fprintf(f, "\tj%s %sbb%d\n", ctoa[c],
  664. T.asloc, id0+b->s2->id);
  665. goto Jmp;
  666. }
  667. die("unhandled jump %d", b->jmp.type);
  668. }
  669. }
  670. id0 += fn->nblk;
  671. if (!T.apple)
  672. elf_emitfnfin(fn->name, f);
  673. }