isel.c 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839
  1. #include "all.h"
  2. #include <limits.h>
  3. /* For x86_64, do the following:
  4. *
  5. * - check that constants are used only in
  6. * places allowed
  7. * - ensure immediates always fit in 32b
  8. * - expose machine register contraints
  9. * on instructions like division.
  10. * - implement fast locals (the streak of
  11. * constant allocX in the first basic block)
  12. * - recognize complex addressing modes
  13. *
  14. * Invariant: the use counts that are used
  15. * in sel() must be sound. This
  16. * is not so trivial, maybe the
  17. * dce should be moved out...
  18. */
  19. static int amatch(Addr *, Num *, Ref, Fn *);
  20. static int
  21. noimm(Ref r, Fn *fn)
  22. {
  23. int64_t val;
  24. if (rtype(r) != RCon)
  25. return 0;
  26. switch (fn->con[r.val].type) {
  27. case CAddr:
  28. /* we only support the 'small'
  29. * code model of the ABI, this
  30. * means that we can always
  31. * address data with 32bits
  32. */
  33. return 0;
  34. case CBits:
  35. val = fn->con[r.val].bits.i;
  36. return (val < INT32_MIN || val > INT32_MAX);
  37. default:
  38. die("invalid constant");
  39. }
  40. }
  41. static int
  42. rslot(Ref r, Fn *fn)
  43. {
  44. if (rtype(r) != RTmp)
  45. return -1;
  46. return fn->tmp[r.val].slot;
  47. }
  48. static int
  49. hascon(Ref r, Con **pc, Fn *fn)
  50. {
  51. switch (rtype(r)) {
  52. case RCon:
  53. *pc = &fn->con[r.val];
  54. return 1;
  55. case RMem:
  56. *pc = &fn->mem[r.val].offset;
  57. return 1;
  58. default:
  59. return 0;
  60. }
  61. }
  62. static void
  63. fixarg(Ref *r, int k, Ins *i, Fn *fn)
  64. {
  65. char buf[32];
  66. Addr a, *m;
  67. Con cc, *c;
  68. Ref r0, r1, r2, r3;
  69. int s, n, op;
  70. r1 = r0 = *r;
  71. s = rslot(r0, fn);
  72. op = i ? i->op : Ocopy;
  73. if (KBASE(k) == 1 && rtype(r0) == RCon) {
  74. /* load floating points from memory
  75. * slots, they can't be used as
  76. * immediates
  77. */
  78. r1 = MEM(fn->nmem);
  79. vgrow(&fn->mem, ++fn->nmem);
  80. memset(&a, 0, sizeof a);
  81. a.offset.type = CAddr;
  82. n = stashbits(&fn->con[r0.val].bits, KWIDE(k) ? 8 : 4);
  83. /* quote the name so that we do not
  84. * add symbol prefixes on the apple
  85. * target variant
  86. */
  87. sprintf(buf, "\"%sfp%d\"", T.asloc, n);
  88. a.offset.sym.id = intern(buf);
  89. fn->mem[fn->nmem-1] = a;
  90. }
  91. else if (op != Ocopy && k == Kl && noimm(r0, fn)) {
  92. /* load constants that do not fit in
  93. * a 32bit signed integer into a
  94. * long temporary
  95. */
  96. r1 = newtmp("isel", Kl, fn);
  97. emit(Ocopy, Kl, r1, r0, R);
  98. }
  99. else if (s != -1) {
  100. /* load fast locals' addresses into
  101. * temporaries right before the
  102. * instruction
  103. */
  104. r1 = newtmp("isel", Kl, fn);
  105. emit(Oaddr, Kl, r1, SLOT(s), R);
  106. }
  107. else if (T.apple && hascon(r0, &c, fn)
  108. && c->type == CAddr && c->sym.type == SThr) {
  109. r1 = newtmp("isel", Kl, fn);
  110. if (c->bits.i) {
  111. r2 = newtmp("isel", Kl, fn);
  112. cc = (Con){.type = CBits};
  113. cc.bits.i = c->bits.i;
  114. r3 = newcon(&cc, fn);
  115. emit(Oadd, Kl, r1, r2, r3);
  116. } else
  117. r2 = r1;
  118. emit(Ocopy, Kl, r2, TMP(RAX), R);
  119. r2 = newtmp("isel", Kl, fn);
  120. r3 = newtmp("isel", Kl, fn);
  121. emit(Ocall, 0, R, r3, CALL(17));
  122. emit(Ocopy, Kl, TMP(RDI), r2, R);
  123. emit(Oload, Kl, r3, r2, R);
  124. cc = *c;
  125. cc.bits.i = 0;
  126. r3 = newcon(&cc, fn);
  127. emit(Oload, Kl, r2, r3, R);
  128. if (rtype(r0) == RMem) {
  129. m = &fn->mem[r0.val];
  130. m->offset.type = CUndef;
  131. m->base = r1;
  132. r1 = r0;
  133. }
  134. }
  135. else if (!(isstore(op) && r == &i->arg[1])
  136. && !isload(op) && op != Ocall && rtype(r0) == RCon
  137. && fn->con[r0.val].type == CAddr) {
  138. /* apple as does not support 32-bit
  139. * absolute addressing, use a rip-
  140. * relative leaq instead
  141. */
  142. r1 = newtmp("isel", Kl, fn);
  143. emit(Oaddr, Kl, r1, r0, R);
  144. }
  145. else if (rtype(r0) == RMem) {
  146. /* eliminate memory operands of
  147. * the form $foo(%rip, ...)
  148. */
  149. m = &fn->mem[r0.val];
  150. if (req(m->base, R))
  151. if (m->offset.type == CAddr) {
  152. r0 = newtmp("isel", Kl, fn);
  153. emit(Oaddr, Kl, r0, newcon(&m->offset, fn), R);
  154. m->offset.type = CUndef;
  155. m->base = r0;
  156. }
  157. }
  158. *r = r1;
  159. }
  160. static void
  161. seladdr(Ref *r, Num *tn, Fn *fn)
  162. {
  163. Addr a;
  164. Ref r0;
  165. r0 = *r;
  166. if (rtype(r0) == RTmp) {
  167. memset(&a, 0, sizeof a);
  168. if (!amatch(&a, tn, r0, fn))
  169. return;
  170. if (!req(a.base, R))
  171. if (a.offset.type == CAddr) {
  172. /* apple as does not support
  173. * $foo(%r0, %r1, M); try to
  174. * rewrite it or bail out if
  175. * impossible
  176. */
  177. if (!req(a.index, R) || rtype(a.base) != RTmp)
  178. return;
  179. else {
  180. a.index = a.base;
  181. a.scale = 1;
  182. a.base = R;
  183. }
  184. }
  185. chuse(r0, -1, fn);
  186. vgrow(&fn->mem, ++fn->nmem);
  187. fn->mem[fn->nmem-1] = a;
  188. chuse(a.base, +1, fn);
  189. chuse(a.index, +1, fn);
  190. *r = MEM(fn->nmem-1);
  191. }
  192. }
  193. static int
  194. cmpswap(Ref arg[2], int op)
  195. {
  196. switch (op) {
  197. case NCmpI+Cflt:
  198. case NCmpI+Cfle:
  199. return 1;
  200. case NCmpI+Cfgt:
  201. case NCmpI+Cfge:
  202. return 0;
  203. }
  204. return rtype(arg[0]) == RCon;
  205. }
  206. static void
  207. selcmp(Ref arg[2], int k, int swap, Fn *fn)
  208. {
  209. Ref r;
  210. Ins *icmp;
  211. if (swap) {
  212. r = arg[1];
  213. arg[1] = arg[0];
  214. arg[0] = r;
  215. }
  216. emit(Oxcmp, k, R, arg[1], arg[0]);
  217. icmp = curi;
  218. if (rtype(arg[0]) == RCon) {
  219. assert(k != Kw);
  220. icmp->arg[1] = newtmp("isel", k, fn);
  221. emit(Ocopy, k, icmp->arg[1], arg[0], R);
  222. fixarg(&curi->arg[0], k, curi, fn);
  223. }
  224. fixarg(&icmp->arg[0], k, icmp, fn);
  225. fixarg(&icmp->arg[1], k, icmp, fn);
  226. }
  227. static void
  228. sel(Ins i, Num *tn, Fn *fn)
  229. {
  230. Ref r0, r1, tmp[7];
  231. int x, j, k, kc, sh, swap;
  232. Ins *i0, *i1;
  233. if (rtype(i.to) == RTmp)
  234. if (!isreg(i.to) && !isreg(i.arg[0]) && !isreg(i.arg[1]))
  235. if (fn->tmp[i.to.val].nuse == 0) {
  236. chuse(i.arg[0], -1, fn);
  237. chuse(i.arg[1], -1, fn);
  238. return;
  239. }
  240. i0 = curi;
  241. k = i.cls;
  242. switch (i.op) {
  243. case Odiv:
  244. case Orem:
  245. case Oudiv:
  246. case Ourem:
  247. if (KBASE(k) == 1)
  248. goto Emit;
  249. if (i.op == Odiv || i.op == Oudiv)
  250. r0 = TMP(RAX), r1 = TMP(RDX);
  251. else
  252. r0 = TMP(RDX), r1 = TMP(RAX);
  253. emit(Ocopy, k, i.to, r0, R);
  254. emit(Ocopy, k, R, r1, R);
  255. if (rtype(i.arg[1]) == RCon) {
  256. /* immediates not allowed for
  257. * divisions in x86
  258. */
  259. r0 = newtmp("isel", k, fn);
  260. } else
  261. r0 = i.arg[1];
  262. if (fn->tmp[r0.val].slot != -1)
  263. err("unlikely argument %%%s in %s",
  264. fn->tmp[r0.val].name, optab[i.op].name);
  265. if (i.op == Odiv || i.op == Orem) {
  266. emit(Oxidiv, k, R, r0, R);
  267. emit(Osign, k, TMP(RDX), TMP(RAX), R);
  268. } else {
  269. emit(Oxdiv, k, R, r0, R);
  270. emit(Ocopy, k, TMP(RDX), CON_Z, R);
  271. }
  272. emit(Ocopy, k, TMP(RAX), i.arg[0], R);
  273. fixarg(&curi->arg[0], k, curi, fn);
  274. if (rtype(i.arg[1]) == RCon)
  275. emit(Ocopy, k, r0, i.arg[1], R);
  276. break;
  277. case Osar:
  278. case Oshr:
  279. case Oshl:
  280. r0 = i.arg[1];
  281. if (rtype(r0) == RCon)
  282. goto Emit;
  283. if (fn->tmp[r0.val].slot != -1)
  284. err("unlikely argument %%%s in %s",
  285. fn->tmp[r0.val].name, optab[i.op].name);
  286. i.arg[1] = TMP(RCX);
  287. emit(Ocopy, Kw, R, TMP(RCX), R);
  288. emiti(i);
  289. i1 = curi;
  290. emit(Ocopy, Kw, TMP(RCX), r0, R);
  291. fixarg(&i1->arg[0], argcls(&i, 0), i1, fn);
  292. break;
  293. case Ouwtof:
  294. r0 = newtmp("utof", Kl, fn);
  295. emit(Osltof, k, i.to, r0, R);
  296. emit(Oextuw, Kl, r0, i.arg[0], R);
  297. fixarg(&curi->arg[0], k, curi, fn);
  298. break;
  299. case Oultof:
  300. /* %mask =l and %arg.0, 1
  301. * %isbig =l shr %arg.0, 63
  302. * %divided =l shr %arg.0, %isbig
  303. * %or =l or %mask, %divided
  304. * %float =d sltof %or
  305. * %cast =l cast %float
  306. * %addend =l shl %isbig, 52
  307. * %sum =l add %cast, %addend
  308. * %result =d cast %sum
  309. */
  310. r0 = newtmp("utof", k, fn);
  311. if (k == Ks)
  312. kc = Kw, sh = 23;
  313. else
  314. kc = Kl, sh = 52;
  315. for (j=0; j<4; j++)
  316. tmp[j] = newtmp("utof", Kl, fn);
  317. for (; j<7; j++)
  318. tmp[j] = newtmp("utof", kc, fn);
  319. emit(Ocast, k, i.to, tmp[6], R);
  320. emit(Oadd, kc, tmp[6], tmp[4], tmp[5]);
  321. emit(Oshl, kc, tmp[5], tmp[1], getcon(sh, fn));
  322. emit(Ocast, kc, tmp[4], r0, R);
  323. emit(Osltof, k, r0, tmp[3], R);
  324. emit(Oor, Kl, tmp[3], tmp[0], tmp[2]);
  325. emit(Oshr, Kl, tmp[2], i.arg[0], tmp[1]);
  326. sel(*curi++, 0, fn);
  327. emit(Oshr, Kl, tmp[1], i.arg[0], getcon(63, fn));
  328. fixarg(&curi->arg[0], Kl, curi, fn);
  329. emit(Oand, Kl, tmp[0], i.arg[0], getcon(1, fn));
  330. fixarg(&curi->arg[0], Kl, curi, fn);
  331. break;
  332. case Ostoui:
  333. i.op = Ostosi;
  334. kc = Ks;
  335. tmp[4] = getcon(0xdf000000, fn);
  336. goto Oftoui;
  337. case Odtoui:
  338. i.op = Odtosi;
  339. kc = Kd;
  340. tmp[4] = getcon(0xc3e0000000000000, fn);
  341. Oftoui:
  342. if (k == Kw) {
  343. r0 = newtmp("ftou", Kl, fn);
  344. emit(Ocopy, Kw, i.to, r0, R);
  345. i.cls = Kl;
  346. i.to = r0;
  347. goto Emit;
  348. }
  349. /* %try0 =l {s,d}tosi %fp
  350. * %mask =l sar %try0, 63
  351. *
  352. * mask is all ones if the first
  353. * try was oob, all zeroes o.w.
  354. *
  355. * %fps ={s,d} sub %fp, (1<<63)
  356. * %try1 =l {s,d}tosi %fps
  357. *
  358. * %tmp =l and %mask, %try1
  359. * %res =l or %tmp, %try0
  360. */
  361. r0 = newtmp("ftou", kc, fn);
  362. for (j=0; j<4; j++)
  363. tmp[j] = newtmp("ftou", Kl, fn);
  364. emit(Oor, Kl, i.to, tmp[0], tmp[3]);
  365. emit(Oand, Kl, tmp[3], tmp[2], tmp[1]);
  366. emit(i.op, Kl, tmp[2], r0, R);
  367. emit(Oadd, kc, r0, tmp[4], i.arg[0]);
  368. i1 = curi; /* fixarg() can change curi */
  369. fixarg(&i1->arg[0], kc, i1, fn);
  370. fixarg(&i1->arg[1], kc, i1, fn);
  371. emit(Osar, Kl, tmp[1], tmp[0], getcon(63, fn));
  372. emit(i.op, Kl, tmp[0], i.arg[0], R);
  373. fixarg(&curi->arg[0], Kl, curi, fn);
  374. break;
  375. case Onop:
  376. break;
  377. case Ostored:
  378. case Ostores:
  379. case Ostorel:
  380. case Ostorew:
  381. case Ostoreh:
  382. case Ostoreb:
  383. if (rtype(i.arg[0]) == RCon) {
  384. if (i.op == Ostored)
  385. i.op = Ostorel;
  386. if (i.op == Ostores)
  387. i.op = Ostorew;
  388. }
  389. seladdr(&i.arg[1], tn, fn);
  390. goto Emit;
  391. case_Oload:
  392. seladdr(&i.arg[0], tn, fn);
  393. goto Emit;
  394. case Odbgloc:
  395. case Ocall:
  396. case Osalloc:
  397. case Ocopy:
  398. case Oadd:
  399. case Osub:
  400. case Oneg:
  401. case Omul:
  402. case Oand:
  403. case Oor:
  404. case Oxor:
  405. case Oxtest:
  406. case Ostosi:
  407. case Odtosi:
  408. case Oswtof:
  409. case Osltof:
  410. case Oexts:
  411. case Otruncd:
  412. case Ocast:
  413. case_OExt:
  414. Emit:
  415. emiti(i);
  416. i1 = curi; /* fixarg() can change curi */
  417. fixarg(&i1->arg[0], argcls(&i, 0), i1, fn);
  418. fixarg(&i1->arg[1], argcls(&i, 1), i1, fn);
  419. break;
  420. case Oalloc4:
  421. case Oalloc8:
  422. case Oalloc16:
  423. salloc(i.to, i.arg[0], fn);
  424. break;
  425. default:
  426. if (isext(i.op))
  427. goto case_OExt;
  428. if (isload(i.op))
  429. goto case_Oload;
  430. if (iscmp(i.op, &kc, &x)) {
  431. switch (x) {
  432. case NCmpI+Cfeq:
  433. /* zf is set when operands are
  434. * unordered, so we may have to
  435. * check pf
  436. */
  437. r0 = newtmp("isel", Kw, fn);
  438. r1 = newtmp("isel", Kw, fn);
  439. emit(Oand, Kw, i.to, r0, r1);
  440. emit(Oflagfo, k, r1, R, R);
  441. i.to = r0;
  442. break;
  443. case NCmpI+Cfne:
  444. r0 = newtmp("isel", Kw, fn);
  445. r1 = newtmp("isel", Kw, fn);
  446. emit(Oor, Kw, i.to, r0, r1);
  447. emit(Oflagfuo, k, r1, R, R);
  448. i.to = r0;
  449. break;
  450. }
  451. swap = cmpswap(i.arg, x);
  452. if (swap)
  453. x = cmpop(x);
  454. emit(Oflag+x, k, i.to, R, R);
  455. selcmp(i.arg, kc, swap, fn);
  456. break;
  457. }
  458. die("unknown instruction %s", optab[i.op].name);
  459. }
  460. while (i0>curi && --i0) {
  461. assert(rslot(i0->arg[0], fn) == -1);
  462. assert(rslot(i0->arg[1], fn) == -1);
  463. }
  464. }
  465. static Ins *
  466. flagi(Ins *i0, Ins *i)
  467. {
  468. while (i>i0) {
  469. i--;
  470. if (amd64_op[i->op].zflag)
  471. return i;
  472. if (amd64_op[i->op].lflag)
  473. continue;
  474. return 0;
  475. }
  476. return 0;
  477. }
  478. static void
  479. seljmp(Blk *b, Fn *fn)
  480. {
  481. Ref r;
  482. int c, k, swap;
  483. Ins *fi;
  484. Tmp *t;
  485. if (b->jmp.type == Jret0
  486. || b->jmp.type == Jjmp
  487. || b->jmp.type == Jhlt)
  488. return;
  489. assert(b->jmp.type == Jjnz);
  490. r = b->jmp.arg;
  491. t = &fn->tmp[r.val];
  492. b->jmp.arg = R;
  493. assert(rtype(r) == RTmp);
  494. if (b->s1 == b->s2) {
  495. chuse(r, -1, fn);
  496. b->jmp.type = Jjmp;
  497. b->s2 = 0;
  498. return;
  499. }
  500. fi = flagi(b->ins, &b->ins[b->nins]);
  501. if (!fi || !req(fi->to, r)) {
  502. selcmp((Ref[2]){r, CON_Z}, Kw, 0, fn);
  503. b->jmp.type = Jjf + Cine;
  504. }
  505. else if (iscmp(fi->op, &k, &c)
  506. && c != NCmpI+Cfeq /* see sel() */
  507. && c != NCmpI+Cfne) {
  508. swap = cmpswap(fi->arg, c);
  509. if (swap)
  510. c = cmpop(c);
  511. if (t->nuse == 1) {
  512. selcmp(fi->arg, k, swap, fn);
  513. *fi = (Ins){.op = Onop};
  514. }
  515. b->jmp.type = Jjf + c;
  516. }
  517. else if (fi->op == Oand && t->nuse == 1
  518. && (rtype(fi->arg[0]) == RTmp ||
  519. rtype(fi->arg[1]) == RTmp)) {
  520. fi->op = Oxtest;
  521. fi->to = R;
  522. b->jmp.type = Jjf + Cine;
  523. if (rtype(fi->arg[1]) == RCon) {
  524. r = fi->arg[1];
  525. fi->arg[1] = fi->arg[0];
  526. fi->arg[0] = r;
  527. }
  528. }
  529. else {
  530. /* since flags are not tracked in liveness,
  531. * the result of the flag-setting instruction
  532. * has to be marked as live
  533. */
  534. if (t->nuse == 1)
  535. emit(Ocopy, Kw, R, r, R);
  536. b->jmp.type = Jjf + Cine;
  537. }
  538. }
  539. enum {
  540. Pob,
  541. Pbis,
  542. Pois,
  543. Pobis,
  544. Pbi1,
  545. Pobi1,
  546. };
  547. /* mgen generated code
  548. *
  549. * (with-vars (o b i s)
  550. * (patterns
  551. * (ob (add (con o) (tmp b)))
  552. * (bis (add (tmp b) (mul (tmp i) (con s 1 2 4 8))))
  553. * (ois (add (con o) (mul (tmp i) (con s 1 2 4 8))))
  554. * (obis (add (con o) (tmp b) (mul (tmp i) (con s 1 2 4 8))))
  555. * (bi1 (add (tmp b) (tmp i)))
  556. * (obi1 (add (con o) (tmp b) (tmp i)))
  557. * ))
  558. */
  559. static int
  560. opn(int op, int l, int r)
  561. {
  562. static uchar Oaddtbl[91] = {
  563. 2,
  564. 2,2,
  565. 4,4,5,
  566. 6,6,8,8,
  567. 4,4,9,10,9,
  568. 7,7,5,8,9,5,
  569. 4,4,12,10,12,12,12,
  570. 4,4,9,10,9,9,12,9,
  571. 11,11,5,8,9,5,12,9,5,
  572. 7,7,5,8,9,5,12,9,5,5,
  573. 11,11,5,8,9,5,12,9,5,5,5,
  574. 4,4,9,10,9,9,12,9,9,9,9,9,
  575. 7,7,5,8,9,5,12,9,5,5,5,9,5,
  576. };
  577. int t;
  578. if (l < r)
  579. t = l, l = r, r = t;
  580. switch (op) {
  581. case Omul:
  582. if (2 <= l)
  583. if (r == 0) {
  584. return 3;
  585. }
  586. return 2;
  587. case Oadd:
  588. return Oaddtbl[(l + l*l)/2 + r];
  589. default:
  590. return 2;
  591. }
  592. }
  593. static int
  594. refn(Ref r, Num *tn, Con *con)
  595. {
  596. int64_t n;
  597. switch (rtype(r)) {
  598. case RTmp:
  599. if (!tn[r.val].n)
  600. tn[r.val].n = 2;
  601. return tn[r.val].n;
  602. case RCon:
  603. if (con[r.val].type != CBits)
  604. return 1;
  605. n = con[r.val].bits.i;
  606. if (n == 8 || n == 4 || n == 2 || n == 1)
  607. return 0;
  608. return 1;
  609. default:
  610. return INT_MIN;
  611. }
  612. }
  613. static bits match[13] = {
  614. [4] = BIT(Pob),
  615. [5] = BIT(Pbi1),
  616. [6] = BIT(Pob) | BIT(Pois),
  617. [7] = BIT(Pob) | BIT(Pobi1),
  618. [8] = BIT(Pbi1) | BIT(Pbis),
  619. [9] = BIT(Pbi1) | BIT(Pobi1),
  620. [10] = BIT(Pbi1) | BIT(Pbis) | BIT(Pobi1) | BIT(Pobis),
  621. [11] = BIT(Pob) | BIT(Pobi1) | BIT(Pobis),
  622. [12] = BIT(Pbi1) | BIT(Pobi1) | BIT(Pobis),
  623. };
  624. static uchar *matcher[] = {
  625. [Pbi1] = (uchar[]){
  626. 1,3,1,3,2,0
  627. },
  628. [Pbis] = (uchar[]){
  629. 5,1,8,5,27,1,5,1,2,5,13,3,1,1,3,3,3,2,0,1,
  630. 3,3,3,2,3,1,0,1,29
  631. },
  632. [Pob] = (uchar[]){
  633. 1,3,0,3,1,0
  634. },
  635. [Pobi1] = (uchar[]){
  636. 5,3,9,9,10,33,12,35,45,1,5,3,11,9,7,9,4,9,
  637. 17,1,3,0,3,1,3,2,0,3,1,1,3,0,34,1,37,1,5,2,
  638. 5,7,2,7,8,37,29,1,3,0,1,32
  639. },
  640. [Pobis] = (uchar[]){
  641. 5,2,10,7,11,19,49,1,1,3,3,3,2,1,3,0,3,1,0,
  642. 1,3,0,5,1,8,5,25,1,5,1,2,5,13,3,1,1,3,3,3,
  643. 2,0,1,3,3,3,2,26,1,51,1,5,1,6,5,9,1,3,0,51,
  644. 3,1,1,3,0,45
  645. },
  646. [Pois] = (uchar[]){
  647. 1,3,0,1,3,3,3,2,0
  648. },
  649. };
  650. /* end of generated code */
  651. static void
  652. anumber(Num *tn, Blk *b, Con *con)
  653. {
  654. Ins *i;
  655. Num *n;
  656. for (i=b->ins; i<&b->ins[b->nins]; i++) {
  657. if (rtype(i->to) != RTmp)
  658. continue;
  659. n = &tn[i->to.val];
  660. n->l = i->arg[0];
  661. n->r = i->arg[1];
  662. n->nl = refn(n->l, tn, con);
  663. n->nr = refn(n->r, tn, con);
  664. n->n = opn(i->op, n->nl, n->nr);
  665. }
  666. }
  667. static Ref
  668. adisp(Con *c, Num *tn, Ref r, Fn *fn, int s)
  669. {
  670. Ref v[2];
  671. int n;
  672. while (!req(r, R)) {
  673. assert(rtype(r) == RTmp);
  674. n = refn(r, tn, fn->con);
  675. if (!(match[n] & BIT(Pob)))
  676. break;
  677. runmatch(matcher[Pob], tn, r, v);
  678. assert(rtype(v[0]) == RCon);
  679. addcon(c, &fn->con[v[0].val], s);
  680. r = v[1];
  681. }
  682. return r;
  683. }
  684. static int
  685. amatch(Addr *a, Num *tn, Ref r, Fn *fn)
  686. {
  687. static int pat[] = {Pobis, Pobi1, Pbis, Pois, Pbi1, -1};
  688. Ref ro, rb, ri, rs, v[4];
  689. Con *c, co;
  690. int s, n, *p;
  691. if (rtype(r) != RTmp)
  692. return 0;
  693. n = refn(r, tn, fn->con);
  694. memset(v, 0, sizeof v);
  695. for (p=pat; *p>=0; p++)
  696. if (match[n] & BIT(*p)) {
  697. runmatch(matcher[*p], tn, r, v);
  698. break;
  699. }
  700. if (*p < 0)
  701. v[1] = r;
  702. memset(&co, 0, sizeof co);
  703. ro = v[0];
  704. rb = adisp(&co, tn, v[1], fn, 1);
  705. ri = v[2];
  706. rs = v[3];
  707. s = 1;
  708. if (*p < 0 && co.type != CUndef)
  709. if (amatch(a, tn, rb, fn))
  710. return addcon(&a->offset, &co, 1);
  711. if (!req(ro, R)) {
  712. assert(rtype(ro) == RCon);
  713. c = &fn->con[ro.val];
  714. if (!addcon(&co, c, 1))
  715. return 0;
  716. }
  717. if (!req(rs, R)) {
  718. assert(rtype(rs) == RCon);
  719. c = &fn->con[rs.val];
  720. assert(c->type = CBits);
  721. s = c->bits.i;
  722. }
  723. ri = adisp(&co, tn, ri, fn, s);
  724. *a = (Addr){co, rb, ri, s};
  725. if (rtype(ri) == RTmp)
  726. if (fn->tmp[ri.val].slot != -1) {
  727. if (a->scale != 1
  728. || fn->tmp[rb.val].slot != -1)
  729. return 0;
  730. a->base = ri;
  731. a->index = rb;
  732. }
  733. if (!req(a->base, R)) {
  734. assert(rtype(a->base) == RTmp);
  735. s = fn->tmp[a->base.val].slot;
  736. if (s != -1)
  737. a->base = SLOT(s);
  738. }
  739. return 1;
  740. }
  741. /* instruction selection
  742. * requires use counts (as given by parsing)
  743. */
  744. void
  745. amd64_isel(Fn *fn)
  746. {
  747. Blk *b, **sb;
  748. Ins *i;
  749. Phi *p;
  750. uint a;
  751. int n, al;
  752. int64_t sz;
  753. Num *num;
  754. /* assign slots to fast allocs */
  755. b = fn->start;
  756. /* specific to NAlign == 3 */ /* or change n=4 and sz /= 4 below */
  757. for (al=Oalloc, n=4; al<=Oalloc1; al++, n*=2)
  758. for (i=b->ins; i<&b->ins[b->nins]; i++)
  759. if (i->op == al) {
  760. if (rtype(i->arg[0]) != RCon)
  761. break;
  762. sz = fn->con[i->arg[0].val].bits.i;
  763. if (sz < 0 || sz >= INT_MAX-15)
  764. err("invalid alloc size %"PRId64, sz);
  765. sz = (sz + n-1) & -n;
  766. sz /= 4;
  767. if (sz > INT_MAX - fn->slot)
  768. die("alloc too large");
  769. fn->tmp[i->to.val].slot = fn->slot;
  770. fn->slot += sz;
  771. *i = (Ins){.op = Onop};
  772. }
  773. /* process basic blocks */
  774. n = fn->ntmp;
  775. num = emalloc(n * sizeof num[0]);
  776. for (b=fn->start; b; b=b->link) {
  777. curi = &insb[NIns];
  778. for (sb=(Blk*[3]){b->s1, b->s2, 0}; *sb; sb++)
  779. for (p=(*sb)->phi; p; p=p->link) {
  780. for (a=0; p->blk[a] != b; a++)
  781. assert(a+1 < p->narg);
  782. fixarg(&p->arg[a], p->cls, 0, fn);
  783. }
  784. memset(num, 0, n * sizeof num[0]);
  785. anumber(num, b, fn->con);
  786. seljmp(b, fn);
  787. for (i=&b->ins[b->nins]; i!=b->ins;)
  788. sel(*--i, num, fn);
  789. b->nins = &insb[NIns] - curi;
  790. idup(&b->ins, curi, b->nins);
  791. }
  792. free(num);
  793. if (debug['I']) {
  794. fprintf(stderr, "\n> After instruction selection:\n");
  795. printfn(fn, stderr);
  796. }
  797. }