isel.c 5.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254
  1. #include "all.h"
  2. static int
  3. memarg(Ref *r, int op, Ins *i)
  4. {
  5. if (isload(op) || op == Ocall)
  6. return r == &i->arg[0];
  7. if (isstore(op))
  8. return r == &i->arg[1];
  9. return 0;
  10. }
  11. static int
  12. immarg(Ref *r, int op, Ins *i)
  13. {
  14. return rv64_op[op].imm && r == &i->arg[1];
  15. }
  16. static void
  17. fixarg(Ref *r, int k, Ins *i, Fn *fn)
  18. {
  19. char buf[32];
  20. Ref r0, r1;
  21. int s, n, op;
  22. Con *c;
  23. r0 = r1 = *r;
  24. op = i ? i->op : Ocopy;
  25. switch (rtype(r0)) {
  26. case RCon:
  27. c = &fn->con[r0.val];
  28. if (c->type == CAddr && memarg(r, op, i))
  29. break;
  30. if (c->type == CBits && immarg(r, op, i))
  31. if (-2048 <= c->bits.i && c->bits.i < 2048)
  32. break;
  33. r1 = newtmp("isel", k, fn);
  34. if (KBASE(k) == 1) {
  35. /* load floating points from memory
  36. * slots, they can't be used as
  37. * immediates
  38. */
  39. assert(c->type == CBits);
  40. n = stashbits(&c->bits, KWIDE(k) ? 8 : 4);
  41. vgrow(&fn->con, ++fn->ncon);
  42. c = &fn->con[fn->ncon-1];
  43. sprintf(buf, "\"%sfp%d\"", T.asloc, n);
  44. *c = (Con){.type = CAddr};
  45. c->sym.id = intern(buf);
  46. emit(Oload, k, r1, CON(c-fn->con), R);
  47. break;
  48. }
  49. emit(Ocopy, k, r1, r0, R);
  50. break;
  51. case RTmp:
  52. if (isreg(r0))
  53. break;
  54. s = fn->tmp[r0.val].slot;
  55. if (s != -1) {
  56. /* aggregate passed by value on
  57. * stack, or fast local address,
  58. * replace with slot if we can
  59. */
  60. if (memarg(r, op, i)) {
  61. r1 = SLOT(s);
  62. break;
  63. }
  64. r1 = newtmp("isel", k, fn);
  65. emit(Oaddr, k, r1, SLOT(s), R);
  66. break;
  67. }
  68. if (k == Kw && fn->tmp[r0.val].cls == Kl) {
  69. /* TODO: this sign extension isn't needed
  70. * for 32-bit arithmetic instructions
  71. */
  72. r1 = newtmp("isel", k, fn);
  73. emit(Oextsw, Kl, r1, r0, R);
  74. } else {
  75. assert(k == fn->tmp[r0.val].cls);
  76. }
  77. break;
  78. }
  79. *r = r1;
  80. }
  81. static void
  82. negate(Ref *pr, Fn *fn)
  83. {
  84. Ref r;
  85. r = newtmp("isel", Kw, fn);
  86. emit(Oxor, Kw, *pr, r, getcon(1, fn));
  87. *pr = r;
  88. }
  89. static void
  90. selcmp(Ins i, int k, int op, Fn *fn)
  91. {
  92. Ins *icmp;
  93. Ref r, r0, r1;
  94. int sign, swap, neg;
  95. switch (op) {
  96. case Cieq:
  97. r = newtmp("isel", k, fn);
  98. emit(Oreqz, i.cls, i.to, r, R);
  99. emit(Oxor, k, r, i.arg[0], i.arg[1]);
  100. icmp = curi;
  101. fixarg(&icmp->arg[0], k, icmp, fn);
  102. fixarg(&icmp->arg[1], k, icmp, fn);
  103. return;
  104. case Cine:
  105. r = newtmp("isel", k, fn);
  106. emit(Ornez, i.cls, i.to, r, R);
  107. emit(Oxor, k, r, i.arg[0], i.arg[1]);
  108. icmp = curi;
  109. fixarg(&icmp->arg[0], k, icmp, fn);
  110. fixarg(&icmp->arg[1], k, icmp, fn);
  111. return;
  112. case Cisge: sign = 1, swap = 0, neg = 1; break;
  113. case Cisgt: sign = 1, swap = 1, neg = 0; break;
  114. case Cisle: sign = 1, swap = 1, neg = 1; break;
  115. case Cislt: sign = 1, swap = 0, neg = 0; break;
  116. case Ciuge: sign = 0, swap = 0, neg = 1; break;
  117. case Ciugt: sign = 0, swap = 1, neg = 0; break;
  118. case Ciule: sign = 0, swap = 1, neg = 1; break;
  119. case Ciult: sign = 0, swap = 0, neg = 0; break;
  120. case NCmpI+Cfeq:
  121. case NCmpI+Cfge:
  122. case NCmpI+Cfgt:
  123. case NCmpI+Cfle:
  124. case NCmpI+Cflt:
  125. swap = 0, neg = 0;
  126. break;
  127. case NCmpI+Cfuo:
  128. negate(&i.to, fn);
  129. /* fall through */
  130. case NCmpI+Cfo:
  131. r0 = newtmp("isel", i.cls, fn);
  132. r1 = newtmp("isel", i.cls, fn);
  133. emit(Oand, i.cls, i.to, r0, r1);
  134. op = KWIDE(k) ? Oceqd : Oceqs;
  135. emit(op, i.cls, r0, i.arg[0], i.arg[0]);
  136. icmp = curi;
  137. fixarg(&icmp->arg[0], k, icmp, fn);
  138. fixarg(&icmp->arg[1], k, icmp, fn);
  139. emit(op, i.cls, r1, i.arg[1], i.arg[1]);
  140. icmp = curi;
  141. fixarg(&icmp->arg[0], k, icmp, fn);
  142. fixarg(&icmp->arg[1], k, icmp, fn);
  143. return;
  144. case NCmpI+Cfne:
  145. swap = 0, neg = 1;
  146. i.op = KWIDE(k) ? Oceqd : Oceqs;
  147. break;
  148. default:
  149. assert(0 && "unknown comparison");
  150. }
  151. if (op < NCmpI)
  152. i.op = sign ? Ocsltl : Ocultl;
  153. if (swap) {
  154. r = i.arg[0];
  155. i.arg[0] = i.arg[1];
  156. i.arg[1] = r;
  157. }
  158. if (neg)
  159. negate(&i.to, fn);
  160. emiti(i);
  161. icmp = curi;
  162. fixarg(&icmp->arg[0], k, icmp, fn);
  163. fixarg(&icmp->arg[1], k, icmp, fn);
  164. }
  165. static void
  166. sel(Ins i, Fn *fn)
  167. {
  168. Ins *i0;
  169. int ck, cc;
  170. if (INRANGE(i.op, Oalloc, Oalloc1)) {
  171. i0 = curi - 1;
  172. salloc(i.to, i.arg[0], fn);
  173. fixarg(&i0->arg[0], Kl, i0, fn);
  174. return;
  175. }
  176. if (iscmp(i.op, &ck, &cc)) {
  177. selcmp(i, ck, cc, fn);
  178. return;
  179. }
  180. if (i.op != Onop) {
  181. emiti(i);
  182. i0 = curi; /* fixarg() can change curi */
  183. fixarg(&i0->arg[0], argcls(&i, 0), i0, fn);
  184. fixarg(&i0->arg[1], argcls(&i, 1), i0, fn);
  185. }
  186. }
  187. static void
  188. seljmp(Blk *b, Fn *fn)
  189. {
  190. /* TODO: replace cmp+jnz with beq/bne/blt[u]/bge[u] */
  191. if (b->jmp.type == Jjnz)
  192. fixarg(&b->jmp.arg, Kw, 0, fn);
  193. }
  194. void
  195. rv64_isel(Fn *fn)
  196. {
  197. Blk *b, **sb;
  198. Ins *i;
  199. Phi *p;
  200. uint n;
  201. int al;
  202. int64_t sz;
  203. /* assign slots to fast allocs */
  204. b = fn->start;
  205. /* specific to NAlign == 3 */ /* or change n=4 and sz /= 4 below */
  206. for (al=Oalloc, n=4; al<=Oalloc1; al++, n*=2)
  207. for (i=b->ins; i<&b->ins[b->nins]; i++)
  208. if (i->op == al) {
  209. if (rtype(i->arg[0]) != RCon)
  210. break;
  211. sz = fn->con[i->arg[0].val].bits.i;
  212. if (sz < 0 || sz >= INT_MAX-15)
  213. err("invalid alloc size %"PRId64, sz);
  214. sz = (sz + n-1) & -n;
  215. sz /= 4;
  216. if (sz > INT_MAX - fn->slot)
  217. die("alloc too large");
  218. fn->tmp[i->to.val].slot = fn->slot;
  219. fn->slot += sz;
  220. *i = (Ins){.op = Onop};
  221. }
  222. for (b=fn->start; b; b=b->link) {
  223. curi = &insb[NIns];
  224. for (sb=(Blk*[3]){b->s1, b->s2, 0}; *sb; sb++)
  225. for (p=(*sb)->phi; p; p=p->link) {
  226. for (n=0; p->blk[n] != b; n++)
  227. assert(n+1 < p->narg);
  228. fixarg(&p->arg[n], p->cls, 0, fn);
  229. }
  230. seljmp(b, fn);
  231. for (i=&b->ins[b->nins]; i!=b->ins;)
  232. sel(*--i, fn);
  233. idup(b, curi, &insb[NIns]-curi);
  234. }
  235. if (debug['I']) {
  236. fprintf(stderr, "\n> After instruction selection:\n");
  237. printfn(fn, stderr);
  238. }
  239. }