isel.c 5.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255
  1. #include "all.h"
  2. static int
  3. memarg(Ref *r, int op, Ins *i)
  4. {
  5. if (isload(op) || op == Ocall)
  6. return r == &i->arg[0];
  7. if (isstore(op))
  8. return r == &i->arg[1];
  9. return 0;
  10. }
  11. static int
  12. immarg(Ref *r, int op, Ins *i)
  13. {
  14. return rv64_op[op].imm && r == &i->arg[1];
  15. }
  16. static void
  17. fixarg(Ref *r, int k, Ins *i, Fn *fn)
  18. {
  19. char buf[32];
  20. Ref r0, r1;
  21. int s, n, op;
  22. Con *c;
  23. r0 = r1 = *r;
  24. op = i ? i->op : Ocopy;
  25. switch (rtype(r0)) {
  26. case RCon:
  27. c = &fn->con[r0.val];
  28. if (c->type == CAddr && memarg(r, op, i))
  29. break;
  30. if (KBASE(k) == 0)
  31. if (c->type == CBits && immarg(r, op, i))
  32. if (-2048 <= c->bits.i && c->bits.i < 2048)
  33. break;
  34. r1 = newtmp("isel", k, fn);
  35. if (KBASE(k) == 1) {
  36. /* load floating points from memory
  37. * slots, they can't be used as
  38. * immediates
  39. */
  40. assert(c->type == CBits);
  41. n = stashbits(c->bits.i, KWIDE(k) ? 8 : 4);
  42. vgrow(&fn->con, ++fn->ncon);
  43. c = &fn->con[fn->ncon-1];
  44. sprintf(buf, "\"%sfp%d\"", T.asloc, n);
  45. *c = (Con){.type = CAddr};
  46. c->sym.id = intern(buf);
  47. emit(Oload, k, r1, CON(c-fn->con), R);
  48. break;
  49. }
  50. emit(Ocopy, k, r1, r0, R);
  51. break;
  52. case RTmp:
  53. if (isreg(r0))
  54. break;
  55. s = fn->tmp[r0.val].slot;
  56. if (s != -1) {
  57. /* aggregate passed by value on
  58. * stack, or fast local address,
  59. * replace with slot if we can
  60. */
  61. if (memarg(r, op, i)) {
  62. r1 = SLOT(s);
  63. break;
  64. }
  65. r1 = newtmp("isel", k, fn);
  66. emit(Oaddr, k, r1, SLOT(s), R);
  67. break;
  68. }
  69. if (k == Kw && fn->tmp[r0.val].cls == Kl) {
  70. /* TODO: this sign extension isn't needed
  71. * for 32-bit arithmetic instructions
  72. */
  73. r1 = newtmp("isel", k, fn);
  74. emit(Oextsw, Kl, r1, r0, R);
  75. } else {
  76. assert(k == fn->tmp[r0.val].cls);
  77. }
  78. break;
  79. }
  80. *r = r1;
  81. }
  82. static void
  83. negate(Ref *pr, Fn *fn)
  84. {
  85. Ref r;
  86. r = newtmp("isel", Kw, fn);
  87. emit(Oxor, Kw, *pr, r, getcon(1, fn));
  88. *pr = r;
  89. }
  90. static void
  91. selcmp(Ins i, int k, int op, Fn *fn)
  92. {
  93. Ins *icmp;
  94. Ref r, r0, r1;
  95. int sign, swap, neg;
  96. switch (op) {
  97. case Cieq:
  98. r = newtmp("isel", k, fn);
  99. emit(Oreqz, i.cls, i.to, r, R);
  100. emit(Oxor, k, r, i.arg[0], i.arg[1]);
  101. icmp = curi;
  102. fixarg(&icmp->arg[0], k, icmp, fn);
  103. fixarg(&icmp->arg[1], k, icmp, fn);
  104. return;
  105. case Cine:
  106. r = newtmp("isel", k, fn);
  107. emit(Ornez, i.cls, i.to, r, R);
  108. emit(Oxor, k, r, i.arg[0], i.arg[1]);
  109. icmp = curi;
  110. fixarg(&icmp->arg[0], k, icmp, fn);
  111. fixarg(&icmp->arg[1], k, icmp, fn);
  112. return;
  113. case Cisge: sign = 1, swap = 0, neg = 1; break;
  114. case Cisgt: sign = 1, swap = 1, neg = 0; break;
  115. case Cisle: sign = 1, swap = 1, neg = 1; break;
  116. case Cislt: sign = 1, swap = 0, neg = 0; break;
  117. case Ciuge: sign = 0, swap = 0, neg = 1; break;
  118. case Ciugt: sign = 0, swap = 1, neg = 0; break;
  119. case Ciule: sign = 0, swap = 1, neg = 1; break;
  120. case Ciult: sign = 0, swap = 0, neg = 0; break;
  121. case NCmpI+Cfeq:
  122. case NCmpI+Cfge:
  123. case NCmpI+Cfgt:
  124. case NCmpI+Cfle:
  125. case NCmpI+Cflt:
  126. swap = 0, neg = 0;
  127. break;
  128. case NCmpI+Cfuo:
  129. negate(&i.to, fn);
  130. /* fall through */
  131. case NCmpI+Cfo:
  132. r0 = newtmp("isel", i.cls, fn);
  133. r1 = newtmp("isel", i.cls, fn);
  134. emit(Oand, i.cls, i.to, r0, r1);
  135. op = KWIDE(k) ? Oceqd : Oceqs;
  136. emit(op, i.cls, r0, i.arg[0], i.arg[0]);
  137. icmp = curi;
  138. fixarg(&icmp->arg[0], k, icmp, fn);
  139. fixarg(&icmp->arg[1], k, icmp, fn);
  140. emit(op, i.cls, r1, i.arg[1], i.arg[1]);
  141. icmp = curi;
  142. fixarg(&icmp->arg[0], k, icmp, fn);
  143. fixarg(&icmp->arg[1], k, icmp, fn);
  144. return;
  145. case NCmpI+Cfne:
  146. swap = 0, neg = 1;
  147. i.op = KWIDE(k) ? Oceqd : Oceqs;
  148. break;
  149. default:
  150. assert(0 && "unknown comparison");
  151. }
  152. if (op < NCmpI)
  153. i.op = sign ? Ocsltl : Ocultl;
  154. if (swap) {
  155. r = i.arg[0];
  156. i.arg[0] = i.arg[1];
  157. i.arg[1] = r;
  158. }
  159. if (neg)
  160. negate(&i.to, fn);
  161. emiti(i);
  162. icmp = curi;
  163. fixarg(&icmp->arg[0], k, icmp, fn);
  164. fixarg(&icmp->arg[1], k, icmp, fn);
  165. }
  166. static void
  167. sel(Ins i, Fn *fn)
  168. {
  169. Ins *i0;
  170. int ck, cc;
  171. if (INRANGE(i.op, Oalloc, Oalloc1)) {
  172. i0 = curi - 1;
  173. salloc(i.to, i.arg[0], fn);
  174. fixarg(&i0->arg[0], Kl, i0, fn);
  175. return;
  176. }
  177. if (iscmp(i.op, &ck, &cc)) {
  178. selcmp(i, ck, cc, fn);
  179. return;
  180. }
  181. if (i.op != Onop) {
  182. emiti(i);
  183. i0 = curi; /* fixarg() can change curi */
  184. fixarg(&i0->arg[0], argcls(&i, 0), i0, fn);
  185. fixarg(&i0->arg[1], argcls(&i, 1), i0, fn);
  186. }
  187. }
  188. static void
  189. seljmp(Blk *b, Fn *fn)
  190. {
  191. /* TODO: replace cmp+jnz with beq/bne/blt[u]/bge[u] */
  192. if (b->jmp.type == Jjnz)
  193. fixarg(&b->jmp.arg, Kw, 0, fn);
  194. }
  195. void
  196. rv64_isel(Fn *fn)
  197. {
  198. Blk *b, **sb;
  199. Ins *i;
  200. Phi *p;
  201. uint n;
  202. int al;
  203. int64_t sz;
  204. /* assign slots to fast allocs */
  205. b = fn->start;
  206. /* specific to NAlign == 3 */ /* or change n=4 and sz /= 4 below */
  207. for (al=Oalloc, n=4; al<=Oalloc1; al++, n*=2)
  208. for (i=b->ins; i<&b->ins[b->nins]; i++)
  209. if (i->op == al) {
  210. if (rtype(i->arg[0]) != RCon)
  211. break;
  212. sz = fn->con[i->arg[0].val].bits.i;
  213. if (sz < 0 || sz >= INT_MAX-15)
  214. err("invalid alloc size %"PRId64, sz);
  215. sz = (sz + n-1) & -n;
  216. sz /= 4;
  217. if (sz > INT_MAX - fn->slot)
  218. die("alloc too large");
  219. fn->tmp[i->to.val].slot = fn->slot;
  220. fn->slot += sz;
  221. *i = (Ins){.op = Onop};
  222. }
  223. for (b=fn->start; b; b=b->link) {
  224. curi = &insb[NIns];
  225. for (sb=(Blk*[3]){b->s1, b->s2, 0}; *sb; sb++)
  226. for (p=(*sb)->phi; p; p=p->link) {
  227. for (n=0; p->blk[n] != b; n++)
  228. assert(n+1 < p->narg);
  229. fixarg(&p->arg[n], p->cls, 0, fn);
  230. }
  231. seljmp(b, fn);
  232. for (i=&b->ins[b->nins]; i!=b->ins;)
  233. sel(*--i, fn);
  234. idup(b, curi, &insb[NIns]-curi);
  235. }
  236. if (debug['I']) {
  237. fprintf(stderr, "\n> After instruction selection:\n");
  238. printfn(fn, stderr);
  239. }
  240. }