emit.c 18 KB


  1. #include "all.h"
  2. typedef struct E E;
  3. struct E {
  4. FILE *f;
  5. Fn *fn;
  6. int fp;
  7. uint64_t fsz;
  8. int nclob;
  9. };
  10. #define CMP(X) \
  11. X(Ciule, "be", "a") \
  12. X(Ciult, "b", "ae") \
  13. X(Cisle, "le", "g") \
  14. X(Cislt, "l", "ge") \
  15. X(Cisgt, "g", "le") \
  16. X(Cisge, "ge", "l") \
  17. X(Ciugt, "a", "be") \
  18. X(Ciuge, "ae", "b") \
  19. X(Cieq, "z", "nz") \
  20. X(Cine, "nz", "z") \
  21. X(NCmpI+Cfle, "be", "a") \
  22. X(NCmpI+Cflt, "b", "ae") \
  23. X(NCmpI+Cfgt, "a", "be") \
  24. X(NCmpI+Cfge, "ae", "b") \
  25. X(NCmpI+Cfo, "np", "p") \
  26. X(NCmpI+Cfuo, "p", "np")
  27. enum {
  28. SLong = 0,
  29. SWord = 1,
  30. SShort = 2,
  31. SByte = 3,
  32. Ki = -1, /* matches Kw and Kl */
  33. Ka = -2, /* matches all classes */
  34. };
  35. /* Instruction format strings:
  36. *
  37. * if the format string starts with -, the instruction
  38. * is assumed to be 3-address and is put in 2-address
  39. * mode using an extra mov if necessary
  40. *
  41. * if the format string starts with +, the same as the
  42. * above applies, but commutativity is also assumed
  43. *
  44. * %k is used to set the class of the instruction,
  45. * it'll expand to "l", "q", "ss", "sd", depending
  46. * on the instruction class
  47. * %0 designates the first argument
  48. * %1 designates the second argument
  49. * %= designates the result
  50. *
  51. * if %k is not used, a prefix to 0, 1, or = must be
  52. * added, it can be:
  53. * M - memory reference
  54. * L - long (64 bits)
  55. * W - word (32 bits)
  56. * H - short (16 bits)
  57. * B - byte (8 bits)
  58. * S - single precision float
  59. * D - double precision float
  60. */
  61. static struct {
  62. short op;
  63. short cls;
  64. char *fmt;
  65. } omap[] = {
  66. { Oadd, Ka, "+add%k %1, %=" },
  67. { Osub, Ka, "-sub%k %1, %=" },
  68. { Oand, Ki, "+and%k %1, %=" },
  69. { Oor, Ki, "+or%k %1, %=" },
  70. { Oxor, Ki, "+xor%k %1, %=" },
  71. { Osar, Ki, "-sar%k %B1, %=" },
  72. { Oshr, Ki, "-shr%k %B1, %=" },
  73. { Oshl, Ki, "-shl%k %B1, %=" },
  74. { Omul, Ki, "+imul%k %1, %=" },
  75. { Omul, Ks, "+mulss %1, %=" },
  76. { Omul, Kd, "+mulsd %1, %=" },
  77. { Odiv, Ka, "-div%k %1, %=" },
  78. { Ostorel, Ka, "movq %L0, %M1" },
  79. { Ostorew, Ka, "movl %W0, %M1" },
  80. { Ostoreh, Ka, "movw %H0, %M1" },
  81. { Ostoreb, Ka, "movb %B0, %M1" },
  82. { Ostores, Ka, "movss %S0, %M1" },
  83. { Ostored, Ka, "movsd %D0, %M1" },
  84. { Oload, Ka, "mov%k %M0, %=" },
  85. { Oloadsw, Kl, "movslq %M0, %L=" },
  86. { Oloadsw, Kw, "movl %M0, %W=" },
  87. { Oloaduw, Ki, "movl %M0, %W=" },
  88. { Oloadsh, Ki, "movsw%k %M0, %=" },
  89. { Oloaduh, Ki, "movzw%k %M0, %=" },
  90. { Oloadsb, Ki, "movsb%k %M0, %=" },
  91. { Oloadub, Ki, "movzb%k %M0, %=" },
  92. { Oextsw, Kl, "movslq %W0, %L=" },
  93. { Oextuw, Kl, "movl %W0, %W=" },
  94. { Oextsh, Ki, "movsw%k %H0, %=" },
  95. { Oextuh, Ki, "movzw%k %H0, %=" },
  96. { Oextsb, Ki, "movsb%k %B0, %=" },
  97. { Oextub, Ki, "movzb%k %B0, %=" },
  98. { Oexts, Kd, "cvtss2sd %0, %=" },
  99. { Otruncd, Ks, "cvtsd2ss %0, %=" },
  100. { Ostosi, Ki, "cvttss2si%k %0, %=" },
  101. { Odtosi, Ki, "cvttsd2si%k %0, %=" },
  102. { Oswtof, Ka, "cvtsi2%k %W0, %=" },
  103. { Osltof, Ka, "cvtsi2%k %L0, %=" },
  104. { Ocast, Ki, "movq %D0, %L=" },
  105. { Ocast, Ka, "movq %L0, %D=" },
  106. { Oaddr, Ki, "lea%k %M0, %=" },
  107. { Oswap, Ki, "xchg%k %0, %1" },
  108. { Osign, Kl, "cqto" },
  109. { Osign, Kw, "cltd" },
  110. { Oxdiv, Ki, "div%k %0" },
  111. { Oxidiv, Ki, "idiv%k %0" },
  112. { Oxcmp, Ks, "ucomiss %S0, %S1" },
  113. { Oxcmp, Kd, "ucomisd %D0, %D1" },
  114. { Oxcmp, Ki, "cmp%k %0, %1" },
  115. { Oxtest, Ki, "test%k %0, %1" },
  116. #define X(c, s, _) \
  117. { Oflag+c, Ki, "set" s " %B=\n\tmovzb%k %B=, %=" },
  118. CMP(X)
  119. #undef X
  120. { Oflagfeq, Ki, "setz %B=\n\tmovzb%k %B=, %=" },
  121. { Oflagfne, Ki, "setnz %B=\n\tmovzb%k %B=, %=" },
  122. { NOp, 0, 0 }
  123. };
  124. static char cmov[][2][16] = {
  125. #define X(c, s0, s1) \
  126. [c] = { \
  127. "cmov" s0 " %0, %=", \
  128. "cmov" s1 " %1, %=", \
  129. },
  130. CMP(X)
  131. #undef X
  132. };
  133. static char *rname[][4] = {
  134. [RAX] = {"rax", "eax", "ax", "al"},
  135. [RBX] = {"rbx", "ebx", "bx", "bl"},
  136. [RCX] = {"rcx", "ecx", "cx", "cl"},
  137. [RDX] = {"rdx", "edx", "dx", "dl"},
  138. [RSI] = {"rsi", "esi", "si", "sil"},
  139. [RDI] = {"rdi", "edi", "di", "dil"},
  140. [RBP] = {"rbp", "ebp", "bp", "bpl"},
  141. [RSP] = {"rsp", "esp", "sp", "spl"},
  142. [R8 ] = {"r8" , "r8d", "r8w", "r8b"},
  143. [R9 ] = {"r9" , "r9d", "r9w", "r9b"},
  144. [R10] = {"r10", "r10d", "r10w", "r10b"},
  145. [R11] = {"r11", "r11d", "r11w", "r11b"},
  146. [R12] = {"r12", "r12d", "r12w", "r12b"},
  147. [R13] = {"r13", "r13d", "r13w", "r13b"},
  148. [R14] = {"r14", "r14d", "r14w", "r14b"},
  149. [R15] = {"r15", "r15d", "r15w", "r15b"},
  150. };
  151. static int
  152. slot(Ref r, E *e)
  153. {
  154. int s;
  155. s = rsval(r);
  156. assert(s <= e->fn->slot);
  157. /* specific to NAlign == 3 */
  158. if (s < 0) {
  159. if (e->fp == RSP)
  160. return 4*-s - 8 + e->fsz + e->nclob*8;
  161. else
  162. return 4*-s;
  163. }
  164. else if (e->fp == RSP)
  165. return 4*s + e->nclob*8;
  166. else if (e->fn->vararg) {
  167. if (T.windows)
  168. return -4 * (e->fn->slot - s);
  169. else
  170. return -176 + -4 * (e->fn->slot - s);
  171. } else
  172. return -4 * (e->fn->slot - s);
  173. }
  174. static void
  175. emitcon(Con *con, E *e)
  176. {
  177. char *p, *l;
  178. switch (con->type) {
  179. case CAddr:
  180. l = str(con->sym.id);
  181. p = l[0] == '"' ? "" : T.assym;
  182. if (con->sym.type == SThr) {
  183. if (T.apple)
  184. fprintf(e->f, "%s%s@TLVP", p, l);
  185. else
  186. fprintf(e->f, "%%fs:%s%s@tpoff", p, l);
  187. } else
  188. fprintf(e->f, "%s%s", p, l);
  189. if (con->bits.i)
  190. fprintf(e->f, "%+"PRId64, con->bits.i);
  191. break;
  192. case CBits:
  193. fprintf(e->f, "%"PRId64, con->bits.i);
  194. break;
  195. default:
  196. die("unreachable");
  197. }
  198. }
  199. static char *
  200. regtoa(int reg, int sz)
  201. {
  202. static char buf[6];
  203. assert(reg <= XMM15);
  204. if (reg >= XMM0) {
  205. sprintf(buf, "xmm%d", reg-XMM0);
  206. return buf;
  207. } else
  208. return rname[reg][sz];
  209. }
  210. static Ref
  211. getarg(char c, Ins *i)
  212. {
  213. switch (c) {
  214. case '0':
  215. return i->arg[0];
  216. case '1':
  217. return i->arg[1];
  218. case '=':
  219. return i->to;
  220. default:
  221. die("invalid arg letter %c", c);
  222. }
  223. }
  224. static void emitins(Ins, E *);
  225. static void
  226. emitcopy(Ref r1, Ref r2, int k, E *e)
  227. {
  228. Ins icp;
  229. icp.op = Ocopy;
  230. icp.arg[0] = r2;
  231. icp.to = r1;
  232. icp.cls = k;
  233. emitins(icp, e);
  234. }
  235. static void
  236. emitf(char *s, Ins *i, E *e)
  237. {
  238. static char clstoa[][3] = {"l", "q", "ss", "sd"};
  239. char c;
  240. int sz;
  241. Ref ref;
  242. Mem *m;
  243. Con off;
  244. switch (*s) {
  245. case '+':
  246. if (req(i->arg[1], i->to)) {
  247. ref = i->arg[0];
  248. i->arg[0] = i->arg[1];
  249. i->arg[1] = ref;
  250. }
  251. /* fall through */
  252. case '-':
  253. assert((!req(i->arg[1], i->to) || req(i->arg[0], i->to)) &&
  254. "cannot convert to 2-address");
  255. emitcopy(i->to, i->arg[0], i->cls, e);
  256. s++;
  257. break;
  258. }
  259. fputc('\t', e->f);
  260. Next:
  261. while ((c = *s++) != '%')
  262. if (!c) {
  263. fputc('\n', e->f);
  264. return;
  265. } else
  266. fputc(c, e->f);
  267. switch ((c = *s++)) {
  268. case '%':
  269. fputc('%', e->f);
  270. break;
  271. case 'k':
  272. fputs(clstoa[i->cls], e->f);
  273. break;
  274. case '0':
  275. case '1':
  276. case '=':
  277. sz = KWIDE(i->cls) ? SLong : SWord;
  278. s--;
  279. goto Ref;
  280. case 'D':
  281. case 'S':
  282. sz = SLong; /* does not matter for floats */
  283. Ref:
  284. c = *s++;
  285. ref = getarg(c, i);
  286. switch (rtype(ref)) {
  287. case RTmp:
  288. assert(isreg(ref));
  289. fprintf(e->f, "%%%s", regtoa(ref.val, sz));
  290. break;
  291. case RSlot:
  292. fprintf(e->f, "%d(%%%s)",
  293. slot(ref, e),
  294. regtoa(e->fp, SLong)
  295. );
  296. break;
  297. case RMem:
  298. Mem:
  299. m = &e->fn->mem[ref.val];
  300. if (rtype(m->base) == RSlot) {
  301. off.type = CBits;
  302. off.bits.i = slot(m->base, e);
  303. addcon(&m->offset, &off, 1);
  304. m->base = TMP(e->fp);
  305. }
  306. if (m->offset.type != CUndef)
  307. emitcon(&m->offset, e);
  308. fputc('(', e->f);
  309. if (!req(m->base, R))
  310. fprintf(e->f, "%%%s",
  311. regtoa(m->base.val, SLong)
  312. );
  313. else if (m->offset.type == CAddr)
  314. fprintf(e->f, "%%rip");
  315. if (!req(m->index, R))
  316. fprintf(e->f, ", %%%s, %d",
  317. regtoa(m->index.val, SLong),
  318. m->scale
  319. );
  320. fputc(')', e->f);
  321. break;
  322. case RCon:
  323. fputc('$', e->f);
  324. emitcon(&e->fn->con[ref.val], e);
  325. break;
  326. default:
  327. die("unreachable");
  328. }
  329. break;
  330. case 'L':
  331. sz = SLong;
  332. goto Ref;
  333. case 'W':
  334. sz = SWord;
  335. goto Ref;
  336. case 'H':
  337. sz = SShort;
  338. goto Ref;
  339. case 'B':
  340. sz = SByte;
  341. goto Ref;
  342. case 'M':
  343. c = *s++;
  344. ref = getarg(c, i);
  345. switch (rtype(ref)) {
  346. case RMem:
  347. goto Mem;
  348. case RSlot:
  349. fprintf(e->f, "%d(%%%s)",
  350. slot(ref, e),
  351. regtoa(e->fp, SLong)
  352. );
  353. break;
  354. case RCon:
  355. off = e->fn->con[ref.val];
  356. emitcon(&off, e);
  357. if (off.type == CAddr)
  358. if (off.sym.type != SThr || T.apple)
  359. fprintf(e->f, "(%%rip)");
  360. break;
  361. case RTmp:
  362. assert(isreg(ref));
  363. fprintf(e->f, "(%%%s)", regtoa(ref.val, SLong));
  364. break;
  365. default:
  366. die("unreachable");
  367. }
  368. break;
  369. default:
  370. die("invalid format specifier %%%c", c);
  371. }
  372. goto Next;
  373. }
  374. static bits negmask[4] = {
  375. [Ks] = 0x80000000,
  376. [Kd] = 0x8000000000000000,
  377. };
  378. static void
  379. emitins(Ins i, E *e)
  380. {
  381. Ref r;
  382. int64_t val;
  383. int o, t0;
  384. Ins ineg;
  385. Con *con;
  386. char *sym;
  387. switch (i.op) {
  388. default:
  389. if (isxsel(i.op))
  390. goto case_Oxsel;
  391. Table:
  392. /* most instructions are just pulled out of
  393. * the table omap[], some special cases are
  394. * detailed below */
  395. for (o=0;; o++) {
  396. /* this linear search should really be a binary
  397. * search */
  398. if (omap[o].op == NOp)
  399. die("no match for %s(%c)",
  400. optab[i.op].name, "wlsd"[i.cls]);
  401. if (omap[o].op == i.op)
  402. if (omap[o].cls == i.cls
  403. || (omap[o].cls == Ki && KBASE(i.cls) == 0)
  404. || (omap[o].cls == Ka))
  405. break;
  406. }
  407. emitf(omap[o].fmt, &i, e);
  408. break;
  409. case Onop:
  410. /* just do nothing for nops, they are inserted
  411. * by some passes */
  412. break;
  413. case Omul:
  414. /* here, we try to use the 3-addresss form
  415. * of multiplication when possible */
  416. if (rtype(i.arg[1]) == RCon) {
  417. r = i.arg[0];
  418. i.arg[0] = i.arg[1];
  419. i.arg[1] = r;
  420. }
  421. if (KBASE(i.cls) == 0 /* only available for ints */
  422. && rtype(i.arg[0]) == RCon
  423. && rtype(i.arg[1]) == RTmp) {
  424. emitf("imul%k %0, %1, %=", &i, e);
  425. break;
  426. }
  427. goto Table;
  428. case Osub:
  429. /* we have to use the negation trick to handle
  430. * some 3-address subtractions */
  431. if (req(i.to, i.arg[1]) && !req(i.arg[0], i.to)) {
  432. ineg = (Ins){Oneg, i.cls, i.to, {i.to}};
  433. emitins(ineg, e);
  434. emitf("add%k %0, %=", &i, e);
  435. break;
  436. }
  437. goto Table;
  438. case Oneg:
  439. if (!req(i.to, i.arg[0]))
  440. emitf("mov%k %0, %=", &i, e);
  441. if (KBASE(i.cls) == 0)
  442. emitf("neg%k %=", &i, e);
  443. else
  444. fprintf(e->f,
  445. "\txorp%c %sfp%d(%%rip), %%%s\n",
  446. "xxsd"[i.cls],
  447. T.asloc,
  448. stashbits(negmask[i.cls], 16),
  449. regtoa(i.to.val, SLong)
  450. );
  451. break;
  452. case Odiv:
  453. /* use xmm15 to adjust the instruction when the
  454. * conversion to 2-address in emitf() would fail */
  455. if (req(i.to, i.arg[1])) {
  456. i.arg[1] = TMP(XMM0+15);
  457. emitf("mov%k %=, %1", &i, e);
  458. emitf("mov%k %0, %=", &i, e);
  459. i.arg[0] = i.to;
  460. }
  461. goto Table;
  462. case Ocopy:
  463. /* copies are used for many things; see my note
  464. * to understand how to load big constants:
  465. * https://c9x.me/notes/2015-09-19.html */
  466. assert(rtype(i.to) != RMem);
  467. if (req(i.to, R) || req(i.arg[0], R))
  468. break;
  469. if (req(i.to, i.arg[0]))
  470. break;
  471. t0 = rtype(i.arg[0]);
  472. if (i.cls == Kl
  473. && t0 == RCon
  474. && e->fn->con[i.arg[0].val].type == CBits) {
  475. val = e->fn->con[i.arg[0].val].bits.i;
  476. if (isreg(i.to))
  477. if (val >= 0 && val <= UINT32_MAX) {
  478. emitf("movl %W0, %W=", &i, e);
  479. break;
  480. }
  481. if (rtype(i.to) == RSlot)
  482. if (val < INT32_MIN || val > INT32_MAX) {
  483. emitf("movl %0, %=", &i, e);
  484. emitf("movl %0>>32, 4+%=", &i, e);
  485. break;
  486. }
  487. }
  488. if (isreg(i.to)
  489. && t0 == RCon
  490. && e->fn->con[i.arg[0].val].type == CAddr) {
  491. emitf("lea%k %M0, %=", &i, e);
  492. break;
  493. }
  494. if (rtype(i.to) == RSlot
  495. && (t0 == RSlot || t0 == RMem)) {
  496. i.cls = KWIDE(i.cls) ? Kd : Ks;
  497. i.arg[1] = TMP(XMM0+15);
  498. emitf("mov%k %0, %1", &i, e);
  499. emitf("mov%k %1, %=", &i, e);
  500. break;
  501. }
  502. /* conveniently, the assembler knows if it
  503. * should use movabsq when reading movq */
  504. emitf("mov%k %0, %=", &i, e);
  505. break;
  506. case Oaddr:
  507. if (!T.apple
  508. && rtype(i.arg[0]) == RCon
  509. && e->fn->con[i.arg[0].val].sym.type == SThr) {
  510. /* derive the symbol address from the TCB
  511. * address at offset 0 of %fs */
  512. assert(isreg(i.to));
  513. con = &e->fn->con[i.arg[0].val];
  514. sym = str(con->sym.id);
  515. emitf("movq %%fs:0, %L=", &i, e);
  516. fprintf(e->f, "\tleaq %s%s@tpoff",
  517. sym[0] == '"' ? "" : T.assym, sym);
  518. if (con->bits.i)
  519. fprintf(e->f, "%+"PRId64,
  520. con->bits.i);
  521. fprintf(e->f, "(%%%s), %%%s\n",
  522. regtoa(i.to.val, SLong),
  523. regtoa(i.to.val, SLong));
  524. break;
  525. }
  526. goto Table;
  527. case Ocall:
  528. /* calls simply have a weird syntax in AT&T
  529. * assembly... */
  530. switch (rtype(i.arg[0])) {
  531. case RCon:
  532. fprintf(e->f, "\tcallq ");
  533. emitcon(&e->fn->con[i.arg[0].val], e);
  534. fprintf(e->f, "\n");
  535. break;
  536. case RTmp:
  537. emitf("callq *%L0", &i, e);
  538. break;
  539. default:
  540. die("invalid call argument");
  541. }
  542. break;
  543. case Osalloc:
  544. /* there is no good reason why this is here
  545. * maybe we should split Osalloc in 2 different
  546. * instructions depending on the result
  547. */
  548. assert(e->fp == RBP);
  549. emitf("subq %L0, %%rsp", &i, e);
  550. if (!req(i.to, R))
  551. emitcopy(i.to, TMP(RSP), Kl, e);
  552. break;
  553. case Oswap:
  554. if (KBASE(i.cls) == 0)
  555. goto Table;
  556. /* for floats, there is no swap instruction
  557. * so we use xmm15 as a temporary
  558. */
  559. emitcopy(TMP(XMM0+15), i.arg[0], i.cls, e);
  560. emitcopy(i.arg[0], i.arg[1], i.cls, e);
  561. emitcopy(i.arg[1], TMP(XMM0+15), i.cls, e);
  562. break;
  563. case Odbgloc:
  564. emitdbgloc(i.arg[0].val, i.arg[1].val, e->f);
  565. break;
  566. case_Oxsel:
  567. if (req(i.to, i.arg[1]))
  568. emitf(cmov[i.op-Oxsel][0], &i, e);
  569. else {
  570. if (!req(i.to, i.arg[0]))
  571. emitf("mov %0, %=", &i, e);
  572. emitf(cmov[i.op-Oxsel][1], &i, e);
  573. }
  574. break;
  575. }
  576. }
  577. static void
  578. sysv_framesz(E *e)
  579. {
  580. uint64_t i, o, f;
  581. /* specific to NAlign == 3 */
  582. o = 0;
  583. if (!e->fn->leaf) {
  584. for (i=0, o=0; i<NCLR_SYSV; i++)
  585. o ^= e->fn->reg >> amd64_sysv_rclob[i];
  586. o &= 1;
  587. }
  588. f = e->fn->slot;
  589. f = (f + 3) & -4;
  590. if (f > 0
  591. && e->fp == RSP
  592. && e->fn->salign == 4)
  593. f += 2;
  594. e->fsz = 4*f + 8*o + 176*e->fn->vararg;
  595. }
  596. void
  597. amd64_sysv_emitfn(Fn *fn, FILE *f)
  598. {
  599. static char *ctoa[] = {
  600. #define X(c, s, _) [c] = s,
  601. CMP(X)
  602. #undef X
  603. };
  604. static int id0;
  605. Blk *b, *s;
  606. Ins *i, itmp;
  607. int *r, c, o, n, lbl;
  608. uint p;
  609. E *e;
  610. e = &(E){.f = f, .fn = fn};
  611. emitfnlnk(fn->name, &fn->lnk, f);
  612. fputs("\tendbr64\n", f);
  613. if (!fn->leaf || fn->vararg || fn->dynalloc) {
  614. e->fp = RBP;
  615. fputs("\tpushq %rbp\n\tmovq %rsp, %rbp\n", f);
  616. } else
  617. e->fp = RSP;
  618. sysv_framesz(e);
  619. if (e->fsz)
  620. fprintf(f, "\tsubq $%"PRIu64", %%rsp\n", e->fsz);
  621. if (fn->vararg) {
  622. o = -176;
  623. for (r=amd64_sysv_rsave; r<&amd64_sysv_rsave[6]; r++, o+=8)
  624. fprintf(f, "\tmovq %%%s, %d(%%rbp)\n", rname[*r][0], o);
  625. for (n=0; n<8; ++n, o+=16)
  626. fprintf(f, "\tmovaps %%xmm%d, %d(%%rbp)\n", n, o);
  627. }
  628. for (r=amd64_sysv_rclob; r<&amd64_sysv_rclob[NCLR_SYSV]; r++)
  629. if (fn->reg & BIT(*r)) {
  630. itmp.arg[0] = TMP(*r);
  631. emitf("pushq %L0", &itmp, e);
  632. e->nclob++;
  633. }
  634. for (lbl=0, b=fn->start; b; b=b->link) {
  635. if (lbl || b->npred > 1) {
  636. for (p=0; p<b->npred; p++)
  637. if (b->pred[p]->id >= b->id)
  638. break;
  639. if (p != b->npred)
  640. fprintf(f, ".p2align 4\n");
  641. fprintf(f, "%sbb%d:\n", T.asloc, id0+b->id);
  642. }
  643. for (i=b->ins; i!=&b->ins[b->nins]; i++)
  644. emitins(*i, e);
  645. lbl = 1;
  646. switch (b->jmp.type) {
  647. case Jhlt:
  648. fprintf(f, "\tud2\n");
  649. break;
  650. case Jret0:
  651. if (fn->dynalloc)
  652. fprintf(f,
  653. "\tmovq %%rbp, %%rsp\n"
  654. "\tsubq $%"PRIu64", %%rsp\n",
  655. e->fsz + e->nclob * 8);
  656. for (r=&amd64_sysv_rclob[NCLR_SYSV]; r>amd64_sysv_rclob;)
  657. if (fn->reg & BIT(*--r)) {
  658. itmp.arg[0] = TMP(*r);
  659. emitf("popq %L0", &itmp, e);
  660. }
  661. if (e->fp == RBP)
  662. fputs("\tleave\n", f);
  663. else if (e->fsz)
  664. fprintf(f,
  665. "\taddq $%"PRIu64", %%rsp\n",
  666. e->fsz);
  667. fputs("\tret\n", f);
  668. break;
  669. case Jjmp:
  670. Jmp:
  671. if (b->s1 != b->link)
  672. fprintf(f, "\tjmp %sbb%d\n",
  673. T.asloc, id0+b->s1->id);
  674. else
  675. lbl = 0;
  676. break;
  677. default:
  678. c = b->jmp.type - Jjf;
  679. if (0 <= c && c <= NCmp) {
  680. if (b->link == b->s2) {
  681. s = b->s1;
  682. b->s1 = b->s2;
  683. b->s2 = s;
  684. } else
  685. c = cmpneg(c);
  686. fprintf(f, "\tj%s %sbb%d\n", ctoa[c],
  687. T.asloc, id0+b->s2->id);
  688. goto Jmp;
  689. }
  690. die("unhandled jump %d", b->jmp.type);
  691. }
  692. }
  693. id0 += fn->nblk;
  694. if (!T.apple)
  695. elf_emitfnfin(fn->name, f);
  696. }
  697. static void
  698. winabi_framesz(E *e)
  699. {
  700. uint64_t i, o, f;
  701. /* specific to NAlign == 3 */
  702. o = 0;
  703. if (!e->fn->leaf) {
  704. for (i=0, o=0; i<NCLR_WIN; i++)
  705. o ^= e->fn->reg >> amd64_winabi_rclob[i];
  706. o &= 1;
  707. }
  708. f = e->fn->slot;
  709. f = (f + 3) & -4;
  710. if (f > 0
  711. && e->fp == RSP
  712. && e->fn->salign == 4)
  713. f += 2;
  714. e->fsz = 4*f + 8*o;
  715. }
  716. void
  717. amd64_winabi_emitfn(Fn *fn, FILE *f)
  718. {
  719. static char *ctoa[] = {
  720. #define X(c, s, _) [c] = s,
  721. CMP(X)
  722. #undef X
  723. };
  724. static int id0;
  725. Blk *b, *s;
  726. Ins *i, itmp;
  727. int *r, c, lbl;
  728. E *e;
  729. e = &(E){.f = f, .fn = fn};
  730. emitfnlnk(fn->name, &fn->lnk, f);
  731. fputs("\tendbr64\n", f);
  732. if (fn->vararg) {
  733. fprintf(f, "\tmovq %%rcx, 0x8(%%rsp)\n");
  734. fprintf(f, "\tmovq %%rdx, 0x10(%%rsp)\n");
  735. fprintf(f, "\tmovq %%r8, 0x18(%%rsp)\n");
  736. fprintf(f, "\tmovq %%r9, 0x20(%%rsp)\n");
  737. }
  738. if (!fn->leaf || fn->vararg || fn->dynalloc) {
  739. e->fp = RBP;
  740. fputs("\tpushq %rbp\n\tmovq %rsp, %rbp\n", f);
  741. } else
  742. e->fp = RSP;
  743. winabi_framesz(e);
  744. if (e->fsz)
  745. fprintf(f, "\tsubq $%"PRIu64", %%rsp\n", e->fsz);
  746. for (r=amd64_winabi_rclob; r<&amd64_winabi_rclob[NCLR_WIN]; r++)
  747. if (fn->reg & BIT(*r)) {
  748. itmp.arg[0] = TMP(*r);
  749. emitf("pushq %L0", &itmp, e);
  750. e->nclob++;
  751. }
  752. for (lbl=0, b=fn->start; b; b=b->link) {
  753. if (lbl || b->npred > 1)
  754. fprintf(f, "%sbb%d:\n", T.asloc, id0+b->id);
  755. for (i=b->ins; i!=&b->ins[b->nins]; i++)
  756. emitins(*i, e);
  757. lbl = 1;
  758. switch (b->jmp.type) {
  759. case Jhlt:
  760. fprintf(f, "\tud2\n");
  761. break;
  762. case Jret0:
  763. if (fn->dynalloc)
  764. fprintf(f,
  765. "\tmovq %%rbp, %%rsp\n"
  766. "\tsubq $%"PRIu64", %%rsp\n",
  767. e->fsz + e->nclob * 8);
  768. for (r=&amd64_winabi_rclob[NCLR_WIN]; r>amd64_winabi_rclob;)
  769. if (fn->reg & BIT(*--r)) {
  770. itmp.arg[0] = TMP(*r);
  771. emitf("popq %L0", &itmp, e);
  772. }
  773. if (e->fp == RBP)
  774. fputs("\tleave\n", f);
  775. else if (e->fsz)
  776. fprintf(f,
  777. "\taddq $%"PRIu64", %%rsp\n",
  778. e->fsz);
  779. fputs("\tret\n", f);
  780. break;
  781. case Jjmp:
  782. Jmp:
  783. if (b->s1 != b->link)
  784. fprintf(f, "\tjmp %sbb%d\n",
  785. T.asloc, id0+b->s1->id);
  786. else
  787. lbl = 0;
  788. break;
  789. default:
  790. c = b->jmp.type - Jjf;
  791. if (0 <= c && c <= NCmp) {
  792. if (b->link == b->s2) {
  793. s = b->s1;
  794. b->s1 = b->s2;
  795. b->s2 = s;
  796. } else
  797. c = cmpneg(c);
  798. fprintf(f, "\tj%s %sbb%d\n", ctoa[c],
  799. T.asloc, id0+b->s2->id);
  800. goto Jmp;
  801. }
  802. die("unhandled jump %d", b->jmp.type);
  803. }
  804. }
  805. id0 += fn->nblk;
  806. }