emit.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644
  1. #include "all.h"
  2. typedef struct E E;
  3. struct E {
  4. FILE *f;
  5. Fn *fn;
  6. uint64_t frame;
  7. uint padding;
  8. };
  9. #define CMP(X) \
  10. X(Cieq, "eq") \
  11. X(Cine, "ne") \
  12. X(Cisge, "ge") \
  13. X(Cisgt, "gt") \
  14. X(Cisle, "le") \
  15. X(Cislt, "lt") \
  16. X(Ciuge, "cs") \
  17. X(Ciugt, "hi") \
  18. X(Ciule, "ls") \
  19. X(Ciult, "cc") \
  20. X(NCmpI+Cfeq, "eq") \
  21. X(NCmpI+Cfge, "ge") \
  22. X(NCmpI+Cfgt, "gt") \
  23. X(NCmpI+Cfle, "ls") \
  24. X(NCmpI+Cflt, "mi") \
  25. X(NCmpI+Cfne, "ne") \
  26. X(NCmpI+Cfo, "vc") \
  27. X(NCmpI+Cfuo, "vs")
  28. enum {
  29. Ki = -1, /* matches Kw and Kl */
  30. Ka = -2, /* matches all classes */
  31. };
  32. static struct {
  33. short op;
  34. short cls;
  35. char *fmt;
  36. } omap[] = {
  37. { Oadd, Ki, "add %=, %0, %1" },
  38. { Oadd, Ka, "fadd %=, %0, %1" },
  39. { Osub, Ki, "sub %=, %0, %1" },
  40. { Osub, Ka, "fsub %=, %0, %1" },
  41. { Oneg, Ki, "neg %=, %0" },
  42. { Oneg, Ka, "fneg %=, %0" },
  43. { Oand, Ki, "and %=, %0, %1" },
  44. { Oor, Ki, "orr %=, %0, %1" },
  45. { Oxor, Ki, "eor %=, %0, %1" },
  46. { Osar, Ki, "asr %=, %0, %1" },
  47. { Oshr, Ki, "lsr %=, %0, %1" },
  48. { Oshl, Ki, "lsl %=, %0, %1" },
  49. { Omul, Ki, "mul %=, %0, %1" },
  50. { Omul, Ka, "fmul %=, %0, %1" },
  51. { Odiv, Ki, "sdiv %=, %0, %1" },
  52. { Odiv, Ka, "fdiv %=, %0, %1" },
  53. { Oudiv, Ki, "udiv %=, %0, %1" },
  54. { Orem, Ki, "sdiv %?, %0, %1\n\tmsub\t%=, %?, %1, %0" },
  55. { Ourem, Ki, "udiv %?, %0, %1\n\tmsub\t%=, %?, %1, %0" },
  56. { Ocopy, Ki, "mov %=, %0" },
  57. { Ocopy, Ka, "fmov %=, %0" },
  58. { Oswap, Ki, "mov %?, %0\n\tmov\t%0, %1\n\tmov\t%1, %?" },
  59. { Oswap, Ka, "fmov %?, %0\n\tfmov\t%0, %1\n\tfmov\t%1, %?" },
  60. { Ostoreb, Kw, "strb %W0, %M1" },
  61. { Ostoreh, Kw, "strh %W0, %M1" },
  62. { Ostorew, Kw, "str %W0, %M1" },
  63. { Ostorel, Kw, "str %L0, %M1" },
  64. { Ostores, Kw, "str %S0, %M1" },
  65. { Ostored, Kw, "str %D0, %M1" },
  66. { Oloadsb, Ki, "ldrsb %=, %M0" },
  67. { Oloadub, Ki, "ldrb %W=, %M0" },
  68. { Oloadsh, Ki, "ldrsh %=, %M0" },
  69. { Oloaduh, Ki, "ldrh %W=, %M0" },
  70. { Oloadsw, Kw, "ldr %=, %M0" },
  71. { Oloadsw, Kl, "ldrsw %=, %M0" },
  72. { Oloaduw, Ki, "ldr %W=, %M0" },
  73. { Oload, Ka, "ldr %=, %M0" },
  74. { Oextsb, Ki, "sxtb %=, %W0" },
  75. { Oextub, Ki, "uxtb %W=, %W0" },
  76. { Oextsh, Ki, "sxth %=, %W0" },
  77. { Oextuh, Ki, "uxth %W=, %W0" },
  78. { Oextsw, Ki, "sxtw %L=, %W0" },
  79. { Oextuw, Ki, "mov %W=, %W0" },
  80. { Oexts, Kd, "fcvt %=, %S0" },
  81. { Otruncd, Ks, "fcvt %=, %D0" },
  82. { Ocast, Kw, "fmov %=, %S0" },
  83. { Ocast, Kl, "fmov %=, %D0" },
  84. { Ocast, Ks, "fmov %=, %W0" },
  85. { Ocast, Kd, "fmov %=, %L0" },
  86. { Ostosi, Ka, "fcvtzs %=, %S0" },
  87. { Ostoui, Ka, "fcvtzu %=, %S0" },
  88. { Odtosi, Ka, "fcvtzs %=, %D0" },
  89. { Odtoui, Ka, "fcvtzu %=, %D0" },
  90. { Oswtof, Ka, "scvtf %=, %W0" },
  91. { Ouwtof, Ka, "ucvtf %=, %W0" },
  92. { Osltof, Ka, "scvtf %=, %L0" },
  93. { Oultof, Ka, "ucvtf %=, %L0" },
  94. { Ocall, Kw, "blr %L0" },
  95. { Oacmp, Ki, "cmp %0, %1" },
  96. { Oacmn, Ki, "cmn %0, %1" },
  97. { Oafcmp, Ka, "fcmpe %0, %1" },
  98. #define X(c, str) \
  99. { Oflag+c, Ki, "cset %=, " str },
  100. CMP(X)
  101. #undef X
  102. { NOp, 0, 0 }
  103. };
  104. static char *
  105. rname(int r, int k)
  106. {
  107. static char buf[4];
  108. if (r == SP) {
  109. assert(k == Kl);
  110. sprintf(buf, "sp");
  111. }
  112. else if (R0 <= r && r <= LR)
  113. switch (k) {
  114. default: die("invalid class");
  115. case Kw: sprintf(buf, "w%d", r-R0); break;
  116. case Kx:
  117. case Kl: sprintf(buf, "x%d", r-R0); break;
  118. }
  119. else if (V0 <= r && r <= V30)
  120. switch (k) {
  121. default: die("invalid class");
  122. case Ks: sprintf(buf, "s%d", r-V0); break;
  123. case Kx:
  124. case Kd: sprintf(buf, "d%d", r-V0); break;
  125. }
  126. else
  127. die("invalid register");
  128. return buf;
  129. }
  130. static uint64_t
  131. slot(Ref r, E *e)
  132. {
  133. int s;
  134. s = rsval(r);
  135. if (s == -1)
  136. return 16 + e->frame;
  137. if (s < 0) {
  138. if (e->fn->vararg && !T.apple)
  139. return 16 + e->frame + 192 - (s+2);
  140. else
  141. return 16 + e->frame - (s+2);
  142. } else
  143. return 16 + e->padding + 4 * s;
  144. }
  145. static void
  146. emitf(char *s, Ins *i, E *e)
  147. {
  148. Ref r;
  149. int k, c;
  150. Con *pc;
  151. uint n, sp;
  152. fputc('\t', e->f);
  153. sp = 0;
  154. for (;;) {
  155. k = i->cls;
  156. while ((c = *s++) != '%')
  157. if (c == ' ' && !sp) {
  158. fputc('\t', e->f);
  159. sp = 1;
  160. } else if ( !c) {
  161. fputc('\n', e->f);
  162. return;
  163. } else
  164. fputc(c, e->f);
  165. Switch:
  166. switch ((c = *s++)) {
  167. default:
  168. die("invalid escape");
  169. case 'W':
  170. k = Kw;
  171. goto Switch;
  172. case 'L':
  173. k = Kl;
  174. goto Switch;
  175. case 'S':
  176. k = Ks;
  177. goto Switch;
  178. case 'D':
  179. k = Kd;
  180. goto Switch;
  181. case '?':
  182. if (KBASE(k) == 0)
  183. fputs(rname(R18, k), e->f);
  184. else
  185. fputs(k==Ks ? "s31" : "d31", e->f);
  186. break;
  187. case '=':
  188. case '0':
  189. r = c == '=' ? i->to : i->arg[0];
  190. assert(isreg(r));
  191. fputs(rname(r.val, k), e->f);
  192. break;
  193. case '1':
  194. r = i->arg[1];
  195. switch (rtype(r)) {
  196. default:
  197. die("invalid second argument");
  198. case RTmp:
  199. assert(isreg(r));
  200. fputs(rname(r.val, k), e->f);
  201. break;
  202. case RCon:
  203. pc = &e->fn->con[r.val];
  204. n = pc->bits.i;
  205. assert(pc->type == CBits);
  206. if (n & 0xfff000)
  207. fprintf(e->f, "#%u, lsl #12", n>>12);
  208. else
  209. fprintf(e->f, "#%u", n);
  210. break;
  211. }
  212. break;
  213. case 'M':
  214. c = *s++;
  215. assert(c == '0' || c == '1' || c == '=');
  216. r = c == '=' ? i->to : i->arg[c - '0'];
  217. switch (rtype(r)) {
  218. default:
  219. die("todo (arm emit): unhandled ref");
  220. case RTmp:
  221. assert(isreg(r));
  222. fprintf(e->f, "[%s]", rname(r.val, Kl));
  223. break;
  224. case RSlot:
  225. fprintf(e->f, "[x29, %"PRIu64"]", slot(r, e));
  226. break;
  227. }
  228. break;
  229. }
  230. }
  231. }
  232. static void
  233. loadaddr(Con *c, char *rn, E *e)
  234. {
  235. char *p, *l, *s;
  236. switch (c->sym.type) {
  237. default:
  238. die("unreachable");
  239. case SGlo:
  240. if (T.apple)
  241. s = "\tadrp\tR, S@pageO\n"
  242. "\tadd\tR, R, S@pageoffO\n";
  243. else
  244. s = "\tadrp\tR, SO\n"
  245. "\tadd\tR, R, #:lo12:SO\n";
  246. break;
  247. case SThr:
  248. if (T.apple)
  249. s = "\tadrp\tR, S@tlvppage\n"
  250. "\tldr\tR, [R, S@tlvppageoff]\n";
  251. else
  252. s = "\tmrs\tR, tpidr_el0\n"
  253. "\tadd\tR, R, #:tprel_hi12:SO, lsl #12\n"
  254. "\tadd\tR, R, #:tprel_lo12_nc:SO\n";
  255. break;
  256. }
  257. l = str(c->sym.id);
  258. p = l[0] == '"' ? "" : T.assym;
  259. for (; *s; s++)
  260. switch (*s) {
  261. default:
  262. fputc(*s, e->f);
  263. break;
  264. case 'R':
  265. fputs(rn, e->f);
  266. break;
  267. case 'S':
  268. fputs(p, e->f);
  269. fputs(l, e->f);
  270. break;
  271. case 'O':
  272. if (c->bits.i)
  273. /* todo, handle large offsets */
  274. fprintf(e->f, "+%"PRIi64, c->bits.i);
  275. break;
  276. }
  277. }
  278. static void
  279. loadcon(Con *c, int r, int k, E *e)
  280. {
  281. char *rn;
  282. int64_t n;
  283. int w, sh;
  284. w = KWIDE(k);
  285. rn = rname(r, k);
  286. n = c->bits.i;
  287. if (c->type == CAddr) {
  288. loadaddr(c, rn, e);
  289. return;
  290. }
  291. assert(c->type == CBits);
  292. if (!w)
  293. n = (int32_t)n;
  294. if ((n | 0xffff) == -1 || arm64_logimm(n, k)) {
  295. fprintf(e->f, "\tmov\t%s, #%"PRIi64"\n", rn, n);
  296. } else {
  297. fprintf(e->f, "\tmov\t%s, #%d\n",
  298. rn, (int)(n & 0xffff));
  299. for (sh=16; n>>=16; sh+=16) {
  300. if ((!w && sh == 32) || sh == 64)
  301. break;
  302. fprintf(e->f, "\tmovk\t%s, #0x%x, lsl #%d\n",
  303. rn, (uint)(n & 0xffff), sh);
  304. }
  305. }
  306. }
  307. static void emitins(Ins *, E *);
  308. static void
  309. fixarg(Ref *pr, int sz, E *e)
  310. {
  311. Ins *i;
  312. Ref r;
  313. uint64_t s;
  314. r = *pr;
  315. if (rtype(r) == RSlot) {
  316. s = slot(r, e);
  317. if (s > sz * 4095u) {
  318. i = &(Ins){Oaddr, Kl, TMP(IP0), {r}};
  319. emitins(i, e);
  320. *pr = TMP(IP0);
  321. }
  322. }
  323. }
  324. static void
  325. emitins(Ins *i, E *e)
  326. {
  327. char *l, *p, *rn;
  328. uint64_t s;
  329. int o;
  330. Ref r;
  331. Con *c;
  332. switch (i->op) {
  333. default:
  334. if (isload(i->op))
  335. fixarg(&i->arg[0], loadsz(i), e);
  336. if (isstore(i->op))
  337. fixarg(&i->arg[1], storesz(i), e);
  338. Table:
  339. /* most instructions are just pulled out of
  340. * the table omap[], some special cases are
  341. * detailed below */
  342. for (o=0;; o++) {
  343. /* this linear search should really be a binary
  344. * search */
  345. if (omap[o].op == NOp)
  346. die("no match for %s(%c)",
  347. optab[i->op].name, "wlsd"[i->cls]);
  348. if (omap[o].op == i->op)
  349. if (omap[o].cls == i->cls || omap[o].cls == Ka
  350. || (omap[o].cls == Ki && KBASE(i->cls) == 0))
  351. break;
  352. }
  353. emitf(omap[o].fmt, i, e);
  354. break;
  355. case Onop:
  356. break;
  357. case Ocopy:
  358. if (req(i->to, i->arg[0]))
  359. break;
  360. if (rtype(i->to) == RSlot) {
  361. r = i->to;
  362. if (!isreg(i->arg[0])) {
  363. i->to = TMP(R18);
  364. emitins(i, e);
  365. i->arg[0] = i->to;
  366. }
  367. i->op = Ostorew + i->cls;
  368. i->cls = Kw;
  369. i->arg[1] = r;
  370. emitins(i, e);
  371. break;
  372. }
  373. assert(isreg(i->to));
  374. switch (rtype(i->arg[0])) {
  375. case RCon:
  376. c = &e->fn->con[i->arg[0].val];
  377. loadcon(c, i->to.val, i->cls, e);
  378. break;
  379. case RSlot:
  380. i->op = Oload;
  381. emitins(i, e);
  382. break;
  383. default:
  384. assert(i->to.val != R18);
  385. goto Table;
  386. }
  387. break;
  388. case Oaddr:
  389. assert(rtype(i->arg[0]) == RSlot);
  390. rn = rname(i->to.val, Kl);
  391. s = slot(i->arg[0], e);
  392. if (s <= 4095)
  393. fprintf(e->f, "\tadd\t%s, x29, #%"PRIu64"\n", rn, s);
  394. else if (s <= 65535)
  395. fprintf(e->f,
  396. "\tmov\t%s, #%"PRIu64"\n"
  397. "\tadd\t%s, x29, %s\n",
  398. rn, s, rn, rn
  399. );
  400. else
  401. fprintf(e->f,
  402. "\tmov\t%s, #%"PRIu64"\n"
  403. "\tmovk\t%s, #%"PRIu64", lsl #16\n"
  404. "\tadd\t%s, x29, %s\n",
  405. rn, s & 0xFFFF, rn, s >> 16, rn, rn
  406. );
  407. break;
  408. case Ocall:
  409. if (rtype(i->arg[0]) != RCon)
  410. goto Table;
  411. c = &e->fn->con[i->arg[0].val];
  412. if (c->type != CAddr
  413. || c->sym.type != SGlo
  414. || c->bits.i)
  415. die("invalid call argument");
  416. l = str(c->sym.id);
  417. p = l[0] == '"' ? "" : T.assym;
  418. fprintf(e->f, "\tbl\t%s%s\n", p, l);
  419. break;
  420. case Osalloc:
  421. emitf("sub sp, sp, %0", i, e);
  422. if (!req(i->to, R))
  423. emitf("mov %=, sp", i, e);
  424. break;
  425. case Odbgloc:
  426. emitdbgloc(i->arg[0].val, i->arg[1].val, e->f);
  427. break;
  428. }
  429. }
  430. static void
  431. framelayout(E *e)
  432. {
  433. int *r;
  434. uint o;
  435. uint64_t f;
  436. for (o=0, r=arm64_rclob; *r>=0; r++)
  437. o += 1 & (e->fn->reg >> *r);
  438. f = e->fn->slot;
  439. f = (f + 3) & -4;
  440. o += o & 1;
  441. e->padding = 4*(f-e->fn->slot);
  442. e->frame = 4*f + 8*o;
  443. }
  444. /*
  445. Stack-frame layout:
  446. +=============+
  447. | varargs |
  448. | save area |
  449. +-------------+
  450. | callee-save | ^
  451. | registers | |
  452. +-------------+ |
  453. | ... | |
  454. | spill slots | |
  455. | ... | | e->frame
  456. +-------------+ |
  457. | ... | |
  458. | locals | |
  459. | ... | |
  460. +-------------+ |
  461. | e->padding | v
  462. +-------------+
  463. | saved x29 |
  464. | saved x30 |
  465. +=============+ <- x29
  466. */
  467. void
  468. arm64_emitfn(Fn *fn, FILE *out)
  469. {
  470. static char *ctoa[] = {
  471. #define X(c, s) [c] = s,
  472. CMP(X)
  473. #undef X
  474. };
  475. static int id0;
  476. int s, n, c, lbl, *r;
  477. uint64_t o;
  478. Blk *b, *t;
  479. Ins *i;
  480. E *e;
  481. e = &(E){.f = out, .fn = fn};
  482. if (T.apple)
  483. e->fn->lnk.align = 4;
  484. emitfnlnk(e->fn->name, &e->fn->lnk, e->f);
  485. framelayout(e);
  486. if (e->fn->vararg && !T.apple) {
  487. for (n=7; n>=0; n--)
  488. fprintf(e->f, "\tstr\tq%d, [sp, -16]!\n", n);
  489. for (n=7; n>=0; n-=2)
  490. fprintf(e->f, "\tstp\tx%d, x%d, [sp, -16]!\n", n-1, n);
  491. }
  492. if (e->frame + 16 <= 512)
  493. fprintf(e->f,
  494. "\tstp\tx29, x30, [sp, -%"PRIu64"]!\n",
  495. e->frame + 16
  496. );
  497. else if (e->frame <= 4095)
  498. fprintf(e->f,
  499. "\tsub\tsp, sp, #%"PRIu64"\n"
  500. "\tstp\tx29, x30, [sp, -16]!\n",
  501. e->frame
  502. );
  503. else if (e->frame <= 65535)
  504. fprintf(e->f,
  505. "\tmov\tx16, #%"PRIu64"\n"
  506. "\tsub\tsp, sp, x16\n"
  507. "\tstp\tx29, x30, [sp, -16]!\n",
  508. e->frame
  509. );
  510. else
  511. fprintf(e->f,
  512. "\tmov\tx16, #%"PRIu64"\n"
  513. "\tmovk\tx16, #%"PRIu64", lsl #16\n"
  514. "\tsub\tsp, sp, x16\n"
  515. "\tstp\tx29, x30, [sp, -16]!\n",
  516. e->frame & 0xFFFF, e->frame >> 16
  517. );
  518. fputs("\tmov\tx29, sp\n", e->f);
  519. s = (e->frame - e->padding) / 4;
  520. for (r=arm64_rclob; *r>=0; r++)
  521. if (e->fn->reg & BIT(*r)) {
  522. s -= 2;
  523. i = &(Ins){.arg = {TMP(*r), SLOT(s)}};
  524. i->op = *r >= V0 ? Ostored : Ostorel;
  525. emitins(i, e);
  526. }
  527. for (lbl=0, b=e->fn->start; b; b=b->link) {
  528. if (lbl || b->npred > 1)
  529. fprintf(e->f, "%s%d:\n", T.asloc, id0+b->id);
  530. for (i=b->ins; i!=&b->ins[b->nins]; i++)
  531. emitins(i, e);
  532. lbl = 1;
  533. switch (b->jmp.type) {
  534. case Jhlt:
  535. fprintf(e->f, "\tbrk\t#1000\n");
  536. break;
  537. case Jret0:
  538. s = (e->frame - e->padding) / 4;
  539. for (r=arm64_rclob; *r>=0; r++)
  540. if (e->fn->reg & BIT(*r)) {
  541. s -= 2;
  542. i = &(Ins){Oload, 0, TMP(*r), {SLOT(s)}};
  543. i->cls = *r >= V0 ? Kd : Kl;
  544. emitins(i, e);
  545. }
  546. if (e->fn->dynalloc)
  547. fputs("\tmov sp, x29\n", e->f);
  548. o = e->frame + 16;
  549. if (e->fn->vararg && !T.apple)
  550. o += 192;
  551. if (o <= 504)
  552. fprintf(e->f,
  553. "\tldp\tx29, x30, [sp], %"PRIu64"\n",
  554. o
  555. );
  556. else if (o - 16 <= 4095)
  557. fprintf(e->f,
  558. "\tldp\tx29, x30, [sp], 16\n"
  559. "\tadd\tsp, sp, #%"PRIu64"\n",
  560. o - 16
  561. );
  562. else if (o - 16 <= 65535)
  563. fprintf(e->f,
  564. "\tldp\tx29, x30, [sp], 16\n"
  565. "\tmov\tx16, #%"PRIu64"\n"
  566. "\tadd\tsp, sp, x16\n",
  567. o - 16
  568. );
  569. else
  570. fprintf(e->f,
  571. "\tldp\tx29, x30, [sp], 16\n"
  572. "\tmov\tx16, #%"PRIu64"\n"
  573. "\tmovk\tx16, #%"PRIu64", lsl #16\n"
  574. "\tadd\tsp, sp, x16\n",
  575. (o - 16) & 0xFFFF, (o - 16) >> 16
  576. );
  577. fprintf(e->f, "\tret\n");
  578. break;
  579. case Jjmp:
  580. Jmp:
  581. if (b->s1 != b->link)
  582. fprintf(e->f,
  583. "\tb\t%s%d\n",
  584. T.asloc, id0+b->s1->id
  585. );
  586. else
  587. lbl = 0;
  588. break;
  589. default:
  590. c = b->jmp.type - Jjf;
  591. if (c < 0 || c > NCmp)
  592. die("unhandled jump %d", b->jmp.type);
  593. if (b->link == b->s2) {
  594. t = b->s1;
  595. b->s1 = b->s2;
  596. b->s2 = t;
  597. } else
  598. c = cmpneg(c);
  599. fprintf(e->f,
  600. "\tb%s\t%s%d\n",
  601. ctoa[c], T.asloc, id0+b->s2->id
  602. );
  603. goto Jmp;
  604. }
  605. }
  606. id0 += e->fn->nblk;
  607. if (!T.apple)
  608. elf_emitfnfin(fn->name, out);
  609. }