isel.c 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942
  1. #include "all.h"
  2. #include <limits.h>
  3. /* For x86_64, do the following:
  4. *
  5. * - check that constants are used only in
  6. * places allowed
  7. * - ensure immediates always fit in 32b
  8. * - expose machine register contraints
  9. * on instructions like division.
  10. * - implement fast locals (the streak of
  11. * constant allocX in the first basic block)
  12. * - recognize complex addressing modes
  13. *
  14. * Invariant: the use counts that are used
  15. * in sel() must be sound. This
  16. * is not so trivial, maybe the
  17. * dce should be moved out...
  18. */
  19. static int amatch(Addr *, Num *, Ref, Fn *);
  20. static int
  21. noimm(Ref r, Fn *fn)
  22. {
  23. int64_t val;
  24. if (rtype(r) != RCon)
  25. return 0;
  26. switch (fn->con[r.val].type) {
  27. case CAddr:
  28. /* we only support the 'small'
  29. * code model of the ABI, this
  30. * means that we can always
  31. * address data with 32bits
  32. */
  33. return 0;
  34. case CBits:
  35. val = fn->con[r.val].bits.i;
  36. return (val < INT32_MIN || val > INT32_MAX);
  37. default:
  38. die("invalid constant");
  39. }
  40. }
  41. static int
  42. rslot(Ref r, Fn *fn)
  43. {
  44. if (rtype(r) != RTmp)
  45. return -1;
  46. return fn->tmp[r.val].slot;
  47. }
  48. static int
  49. hascon(Ref r, Con **pc, Fn *fn)
  50. {
  51. switch (rtype(r)) {
  52. case RCon:
  53. *pc = &fn->con[r.val];
  54. return 1;
  55. case RMem:
  56. *pc = &fn->mem[r.val].offset;
  57. return 1;
  58. default:
  59. return 0;
  60. }
  61. }
  62. static void
  63. fixarg(Ref *r, int k, Ins *i, Fn *fn)
  64. {
  65. char buf[32];
  66. Addr a, *m;
  67. Con cc, *c;
  68. Ref r0, r1, r2, r3;
  69. int s, n, op;
  70. r1 = r0 = *r;
  71. s = rslot(r0, fn);
  72. op = i ? i->op : Ocopy;
  73. if (KBASE(k) == 1 && rtype(r0) == RCon) {
  74. /* load floating points from memory
  75. * slots, they can't be used as
  76. * immediates
  77. */
  78. r1 = MEM(fn->nmem);
  79. vgrow(&fn->mem, ++fn->nmem);
  80. memset(&a, 0, sizeof a);
  81. a.offset.type = CAddr;
  82. n = stashbits(fn->con[r0.val].bits.i, KWIDE(k) ? 8 : 4);
  83. /* quote the name so that we do not
  84. * add symbol prefixes on the apple
  85. * target variant
  86. */
  87. sprintf(buf, "\"%sfp%d\"", T.asloc, n);
  88. a.offset.sym.id = intern(buf);
  89. fn->mem[fn->nmem-1] = a;
  90. }
  91. else if (op == Ocall && r == &i->arg[0]
  92. && rtype(r0) == RCon && fn->con[r0.val].type != CAddr) {
  93. /* use a temporary register so that we
  94. * produce an indirect call
  95. */
  96. r1 = newtmp("isel", Kl, fn);
  97. emit(Ocopy, Kl, r1, r0, R);
  98. }
  99. else if (op != Ocopy && k == Kl && noimm(r0, fn)) {
  100. /* load constants that do not fit in
  101. * a 32bit signed integer into a
  102. * long temporary
  103. */
  104. r1 = newtmp("isel", Kl, fn);
  105. emit(Ocopy, Kl, r1, r0, R);
  106. }
  107. else if (s != -1) {
  108. /* load fast locals' addresses into
  109. * temporaries right before the
  110. * instruction
  111. */
  112. r1 = newtmp("isel", Kl, fn);
  113. emit(Oaddr, Kl, r1, SLOT(s), R);
  114. }
  115. else if (T.apple && hascon(r0, &c, fn)
  116. && c->type == CAddr && c->sym.type == SThr) {
  117. r1 = newtmp("isel", Kl, fn);
  118. if (c->bits.i) {
  119. r2 = newtmp("isel", Kl, fn);
  120. cc = (Con){.type = CBits};
  121. cc.bits.i = c->bits.i;
  122. r3 = newcon(&cc, fn);
  123. emit(Oadd, Kl, r1, r2, r3);
  124. } else
  125. r2 = r1;
  126. emit(Ocopy, Kl, r2, TMP(RAX), R);
  127. r2 = newtmp("isel", Kl, fn);
  128. r3 = newtmp("isel", Kl, fn);
  129. emit(Ocall, 0, R, r3, CALL(17));
  130. emit(Ocopy, Kl, TMP(RDI), r2, R);
  131. emit(Oload, Kl, r3, r2, R);
  132. cc = *c;
  133. cc.bits.i = 0;
  134. r3 = newcon(&cc, fn);
  135. emit(Oload, Kl, r2, r3, R);
  136. if (rtype(r0) == RMem) {
  137. m = &fn->mem[r0.val];
  138. m->offset.type = CUndef;
  139. m->base = r1;
  140. r1 = r0;
  141. }
  142. }
  143. else if (!(isstore(op) && r == &i->arg[1])
  144. && !isload(op) && op != Ocall && rtype(r0) == RCon
  145. && fn->con[r0.val].type == CAddr) {
  146. /* apple as does not support 32-bit
  147. * absolute addressing, use a rip-
  148. * relative leaq instead
  149. */
  150. r1 = newtmp("isel", Kl, fn);
  151. emit(Oaddr, Kl, r1, r0, R);
  152. }
  153. else if (rtype(r0) == RMem) {
  154. /* eliminate memory operands of
  155. * the form $foo(%rip, ...)
  156. */
  157. m = &fn->mem[r0.val];
  158. if (req(m->base, R))
  159. if (m->offset.type == CAddr) {
  160. r0 = newtmp("isel", Kl, fn);
  161. emit(Oaddr, Kl, r0, newcon(&m->offset, fn), R);
  162. m->offset.type = CUndef;
  163. m->base = r0;
  164. }
  165. }
  166. else if (isxsel(op) && rtype(*r) == RCon) {
  167. r1 = newtmp("isel", i->cls, fn);
  168. emit(Ocopy, i->cls, r1, *r, R);
  169. }
  170. *r = r1;
  171. }
  172. static void
  173. seladdr(Ref *r, Num *tn, Fn *fn)
  174. {
  175. Addr a;
  176. Ref r0;
  177. r0 = *r;
  178. if (rtype(r0) == RTmp) {
  179. memset(&a, 0, sizeof a);
  180. if (!amatch(&a, tn, r0, fn))
  181. return;
  182. if (!req(a.base, R))
  183. if (a.offset.type == CAddr) {
  184. /* apple as does not support
  185. * $foo(%r0, %r1, M); try to
  186. * rewrite it or bail out if
  187. * impossible
  188. */
  189. if (!req(a.index, R) || rtype(a.base) != RTmp)
  190. return;
  191. else {
  192. a.index = a.base;
  193. a.scale = 1;
  194. a.base = R;
  195. }
  196. }
  197. chuse(r0, -1, fn);
  198. vgrow(&fn->mem, ++fn->nmem);
  199. fn->mem[fn->nmem-1] = a;
  200. chuse(a.base, +1, fn);
  201. chuse(a.index, +1, fn);
  202. *r = MEM(fn->nmem-1);
  203. }
  204. }
  205. static int
  206. cmpswap(Ref arg[2], int op)
  207. {
  208. switch (op) {
  209. case NCmpI+Cflt:
  210. case NCmpI+Cfle:
  211. return 1;
  212. case NCmpI+Cfgt:
  213. case NCmpI+Cfge:
  214. return 0;
  215. }
  216. return rtype(arg[0]) == RCon;
  217. }
  218. static void
  219. selcmp(Ref arg[2], int k, int swap, Fn *fn)
  220. {
  221. Ref r;
  222. Ins *icmp;
  223. if (swap) {
  224. r = arg[1];
  225. arg[1] = arg[0];
  226. arg[0] = r;
  227. }
  228. emit(Oxcmp, k, R, arg[1], arg[0]);
  229. icmp = curi;
  230. if (rtype(arg[0]) == RCon) {
  231. assert(k != Kw);
  232. icmp->arg[1] = newtmp("isel", k, fn);
  233. emit(Ocopy, k, icmp->arg[1], arg[0], R);
  234. fixarg(&curi->arg[0], k, curi, fn);
  235. }
  236. fixarg(&icmp->arg[0], k, icmp, fn);
  237. fixarg(&icmp->arg[1], k, icmp, fn);
  238. }
  239. static void
  240. sel(Ins i, Num *tn, Fn *fn)
  241. {
  242. Ref r0, r1, tmp[7];
  243. int x, j, k, kc, sh, swap;
  244. Ins *i0, *i1;
  245. if (rtype(i.to) == RTmp)
  246. if (!isreg(i.to) && !isreg(i.arg[0]) && !isreg(i.arg[1]))
  247. if (fn->tmp[i.to.val].nuse == 0) {
  248. chuse(i.arg[0], -1, fn);
  249. chuse(i.arg[1], -1, fn);
  250. return;
  251. }
  252. i0 = curi;
  253. k = i.cls;
  254. switch (i.op) {
  255. case Odiv:
  256. case Orem:
  257. case Oudiv:
  258. case Ourem:
  259. if (KBASE(k) == 1)
  260. goto Emit;
  261. if (i.op == Odiv || i.op == Oudiv)
  262. r0 = TMP(RAX), r1 = TMP(RDX);
  263. else
  264. r0 = TMP(RDX), r1 = TMP(RAX);
  265. emit(Ocopy, k, i.to, r0, R);
  266. emit(Ocopy, k, R, r1, R);
  267. if (rtype(i.arg[1]) == RCon) {
  268. /* immediates not allowed for
  269. * divisions in x86
  270. */
  271. r0 = newtmp("isel", k, fn);
  272. } else
  273. r0 = i.arg[1];
  274. if (fn->tmp[r0.val].slot != -1)
  275. err("unlikely argument %%%s in %s",
  276. fn->tmp[r0.val].name, optab[i.op].name);
  277. if (i.op == Odiv || i.op == Orem) {
  278. emit(Oxidiv, k, R, r0, R);
  279. emit(Osign, k, TMP(RDX), TMP(RAX), R);
  280. } else {
  281. emit(Oxdiv, k, R, r0, R);
  282. emit(Ocopy, k, TMP(RDX), CON_Z, R);
  283. }
  284. emit(Ocopy, k, TMP(RAX), i.arg[0], R);
  285. fixarg(&curi->arg[0], k, curi, fn);
  286. if (rtype(i.arg[1]) == RCon)
  287. emit(Ocopy, k, r0, i.arg[1], R);
  288. break;
  289. case Osar:
  290. case Oshr:
  291. case Oshl:
  292. r0 = i.arg[1];
  293. if (rtype(r0) == RCon)
  294. goto Emit;
  295. if (fn->tmp[r0.val].slot != -1)
  296. err("unlikely argument %%%s in %s",
  297. fn->tmp[r0.val].name, optab[i.op].name);
  298. i.arg[1] = TMP(RCX);
  299. emit(Ocopy, Kw, R, TMP(RCX), R);
  300. emiti(i);
  301. i1 = curi;
  302. emit(Ocopy, Kw, TMP(RCX), r0, R);
  303. fixarg(&i1->arg[0], argcls(&i, 0), i1, fn);
  304. break;
  305. case Ouwtof:
  306. r0 = newtmp("utof", Kl, fn);
  307. emit(Osltof, k, i.to, r0, R);
  308. emit(Oextuw, Kl, r0, i.arg[0], R);
  309. fixarg(&curi->arg[0], k, curi, fn);
  310. break;
  311. case Oultof:
  312. /* %mask =l and %arg.0, 1
  313. * %isbig =l shr %arg.0, 63
  314. * %divided =l shr %arg.0, %isbig
  315. * %or =l or %mask, %divided
  316. * %float =d sltof %or
  317. * %cast =l cast %float
  318. * %addend =l shl %isbig, 52
  319. * %sum =l add %cast, %addend
  320. * %result =d cast %sum
  321. */
  322. r0 = newtmp("utof", k, fn);
  323. if (k == Ks)
  324. kc = Kw, sh = 23;
  325. else
  326. kc = Kl, sh = 52;
  327. for (j=0; j<4; j++)
  328. tmp[j] = newtmp("utof", Kl, fn);
  329. for (; j<7; j++)
  330. tmp[j] = newtmp("utof", kc, fn);
  331. emit(Ocast, k, i.to, tmp[6], R);
  332. emit(Oadd, kc, tmp[6], tmp[4], tmp[5]);
  333. emit(Oshl, kc, tmp[5], tmp[1], getcon(sh, fn));
  334. emit(Ocast, kc, tmp[4], r0, R);
  335. emit(Osltof, k, r0, tmp[3], R);
  336. emit(Oor, Kl, tmp[3], tmp[0], tmp[2]);
  337. emit(Oshr, Kl, tmp[2], i.arg[0], tmp[1]);
  338. sel(*curi++, 0, fn);
  339. emit(Oshr, Kl, tmp[1], i.arg[0], getcon(63, fn));
  340. fixarg(&curi->arg[0], Kl, curi, fn);
  341. emit(Oand, Kl, tmp[0], i.arg[0], getcon(1, fn));
  342. fixarg(&curi->arg[0], Kl, curi, fn);
  343. break;
  344. case Ostoui:
  345. i.op = Ostosi;
  346. kc = Ks;
  347. tmp[4] = getcon(0xdf000000, fn);
  348. goto Oftoui;
  349. case Odtoui:
  350. i.op = Odtosi;
  351. kc = Kd;
  352. tmp[4] = getcon(0xc3e0000000000000, fn);
  353. Oftoui:
  354. if (k == Kw) {
  355. r0 = newtmp("ftou", Kl, fn);
  356. emit(Ocopy, Kw, i.to, r0, R);
  357. i.cls = Kl;
  358. i.to = r0;
  359. goto Emit;
  360. }
  361. /* %try0 =l {s,d}tosi %fp
  362. * %mask =l sar %try0, 63
  363. *
  364. * mask is all ones if the first
  365. * try was oob, all zeroes o.w.
  366. *
  367. * %fps ={s,d} sub %fp, (1<<63)
  368. * %try1 =l {s,d}tosi %fps
  369. *
  370. * %tmp =l and %mask, %try1
  371. * %res =l or %tmp, %try0
  372. */
  373. r0 = newtmp("ftou", kc, fn);
  374. for (j=0; j<4; j++)
  375. tmp[j] = newtmp("ftou", Kl, fn);
  376. emit(Oor, Kl, i.to, tmp[0], tmp[3]);
  377. emit(Oand, Kl, tmp[3], tmp[2], tmp[1]);
  378. emit(i.op, Kl, tmp[2], r0, R);
  379. emit(Oadd, kc, r0, tmp[4], i.arg[0]);
  380. i1 = curi; /* fixarg() can change curi */
  381. fixarg(&i1->arg[0], kc, i1, fn);
  382. fixarg(&i1->arg[1], kc, i1, fn);
  383. emit(Osar, Kl, tmp[1], tmp[0], getcon(63, fn));
  384. emit(i.op, Kl, tmp[0], i.arg[0], R);
  385. fixarg(&curi->arg[0], Kl, curi, fn);
  386. break;
  387. case Onop:
  388. break;
  389. case Ostored:
  390. case Ostores:
  391. case Ostorel:
  392. case Ostorew:
  393. case Ostoreh:
  394. case Ostoreb:
  395. if (rtype(i.arg[0]) == RCon) {
  396. if (i.op == Ostored)
  397. i.op = Ostorel;
  398. if (i.op == Ostores)
  399. i.op = Ostorew;
  400. }
  401. seladdr(&i.arg[1], tn, fn);
  402. goto Emit;
  403. case_Oload:
  404. seladdr(&i.arg[0], tn, fn);
  405. goto Emit;
  406. case Odbgloc:
  407. case Ocall:
  408. case Osalloc:
  409. case Ocopy:
  410. case Oadd:
  411. case Osub:
  412. case Oneg:
  413. case Omul:
  414. case Oand:
  415. case Oor:
  416. case Oxor:
  417. case Oxtest:
  418. case Ostosi:
  419. case Odtosi:
  420. case Oswtof:
  421. case Osltof:
  422. case Oexts:
  423. case Otruncd:
  424. case Ocast:
  425. case_Oxsel:
  426. case_Oext:
  427. Emit:
  428. emiti(i);
  429. i1 = curi; /* fixarg() can change curi */
  430. fixarg(&i1->arg[0], argcls(&i, 0), i1, fn);
  431. fixarg(&i1->arg[1], argcls(&i, 1), i1, fn);
  432. break;
  433. case Oalloc4:
  434. case Oalloc8:
  435. case Oalloc16:
  436. salloc(i.to, i.arg[0], fn);
  437. break;
  438. default:
  439. if (isext(i.op))
  440. goto case_Oext;
  441. if (isxsel(i.op))
  442. goto case_Oxsel;
  443. if (isload(i.op))
  444. goto case_Oload;
  445. if (iscmp(i.op, &kc, &x)) {
  446. switch (x) {
  447. case NCmpI+Cfeq:
  448. /* zf is set when operands are
  449. * unordered, so we may have to
  450. * check pf
  451. */
  452. r0 = newtmp("isel", Kw, fn);
  453. r1 = newtmp("isel", Kw, fn);
  454. emit(Oand, Kw, i.to, r0, r1);
  455. emit(Oflagfo, k, r1, R, R);
  456. i.to = r0;
  457. break;
  458. case NCmpI+Cfne:
  459. r0 = newtmp("isel", Kw, fn);
  460. r1 = newtmp("isel", Kw, fn);
  461. emit(Oor, Kw, i.to, r0, r1);
  462. emit(Oflagfuo, k, r1, R, R);
  463. i.to = r0;
  464. break;
  465. }
  466. swap = cmpswap(i.arg, x);
  467. if (swap)
  468. x = cmpop(x);
  469. emit(Oflag+x, k, i.to, R, R);
  470. selcmp(i.arg, kc, swap, fn);
  471. break;
  472. }
  473. die("unknown instruction %s", optab[i.op].name);
  474. }
  475. while (i0>curi && --i0) {
  476. assert(rslot(i0->arg[0], fn) == -1);
  477. assert(rslot(i0->arg[1], fn) == -1);
  478. }
  479. }
  480. static Ins *
  481. flagi(Ins *i0, Ins *i)
  482. {
  483. while (i>i0) {
  484. i--;
  485. if (amd64_op[i->op].zflag)
  486. return i;
  487. if (amd64_op[i->op].lflag)
  488. continue;
  489. return 0;
  490. }
  491. return 0;
  492. }
  493. static Ins*
  494. selsel(Fn *fn, Blk *b, Ins *i, Num *tn)
  495. {
  496. Ref r, cr[2];
  497. int c, k, swap, gencmp, gencpy;
  498. Ins *isel0, *isel1, *fi;
  499. Tmp *t;
  500. assert(i->op == Osel1);
  501. for (isel0=i; b->ins<isel0; isel0--) {
  502. if (isel0->op == Osel0)
  503. break;
  504. assert(isel0->op == Osel1);
  505. }
  506. assert(isel0->op == Osel0);
  507. r = isel0->arg[0];
  508. assert(rtype(r) == RTmp);
  509. t = &fn->tmp[r.val];
  510. fi = flagi(b->ins, isel0);
  511. cr[0] = cr[1] = R;
  512. gencmp = gencpy = swap = 0;
  513. k = Kw;
  514. c = Cine;
  515. if (!fi || !req(fi->to, r)) {
  516. gencmp = 1;
  517. cr[0] = r;
  518. cr[1] = CON_Z;
  519. }
  520. else if (iscmp(fi->op, &k, &c)) {
  521. if (c == NCmpI+Cfeq
  522. || c == NCmpI+Cfne) {
  523. /* these are selected as 'and'
  524. * or 'or', so we check their
  525. * result with Cine
  526. */
  527. c = Cine;
  528. goto Other;
  529. }
  530. swap = cmpswap(fi->arg, c);
  531. if (swap)
  532. c = cmpop(c);
  533. if (t->nuse == 1) {
  534. gencmp = 1;
  535. cr[0] = fi->arg[0];
  536. cr[1] = fi->arg[1];
  537. *fi = (Ins){.op = Onop};
  538. }
  539. }
  540. else if (fi->op == Oand && t->nuse == 1
  541. && (rtype(fi->arg[0]) == RTmp ||
  542. rtype(fi->arg[1]) == RTmp)) {
  543. fi->op = Oxtest;
  544. fi->to = R;
  545. if (rtype(fi->arg[1]) == RCon) {
  546. r = fi->arg[1];
  547. fi->arg[1] = fi->arg[0];
  548. fi->arg[0] = r;
  549. }
  550. }
  551. else {
  552. Other:
  553. /* since flags are not tracked in liveness,
  554. * the result of the flag-setting instruction
  555. * has to be marked as live
  556. */
  557. if (t->nuse == 1)
  558. gencpy = 1;
  559. }
  560. /* generate conditional moves */
  561. for (isel1=i; isel0<isel1; --isel1) {
  562. isel1->op = Oxsel+c;
  563. sel(*isel1, tn, fn);
  564. }
  565. assert(!gencmp || !gencpy);
  566. if (gencmp)
  567. selcmp(cr, k, swap, fn);
  568. if (gencpy)
  569. emit(Ocopy, Kw, R, r, R);
  570. *isel0 = (Ins){.op = Onop};
  571. return isel0;
  572. }
  573. static void
  574. seljmp(Blk *b, Fn *fn)
  575. {
  576. Ref r;
  577. int c, k, swap;
  578. Ins *fi;
  579. Tmp *t;
  580. if (b->jmp.type == Jret0
  581. || b->jmp.type == Jjmp
  582. || b->jmp.type == Jhlt)
  583. return;
  584. assert(b->jmp.type == Jjnz);
  585. r = b->jmp.arg;
  586. t = &fn->tmp[r.val];
  587. b->jmp.arg = R;
  588. assert(rtype(r) == RTmp);
  589. if (b->s1 == b->s2) {
  590. chuse(r, -1, fn);
  591. b->jmp.type = Jjmp;
  592. b->s2 = 0;
  593. return;
  594. }
  595. fi = flagi(b->ins, &b->ins[b->nins]);
  596. if (!fi || !req(fi->to, r)) {
  597. selcmp((Ref[2]){r, CON_Z}, Kw, 0, fn);
  598. b->jmp.type = Jjf + Cine;
  599. }
  600. else if (iscmp(fi->op, &k, &c)
  601. && c != NCmpI+Cfeq /* see sel(), selsel() */
  602. && c != NCmpI+Cfne) {
  603. swap = cmpswap(fi->arg, c);
  604. if (swap)
  605. c = cmpop(c);
  606. if (t->nuse == 1) {
  607. selcmp(fi->arg, k, swap, fn);
  608. *fi = (Ins){.op = Onop};
  609. }
  610. b->jmp.type = Jjf + c;
  611. }
  612. else if (fi->op == Oand && t->nuse == 1
  613. && (rtype(fi->arg[0]) == RTmp ||
  614. rtype(fi->arg[1]) == RTmp)) {
  615. fi->op = Oxtest;
  616. fi->to = R;
  617. b->jmp.type = Jjf + Cine;
  618. if (rtype(fi->arg[1]) == RCon) {
  619. r = fi->arg[1];
  620. fi->arg[1] = fi->arg[0];
  621. fi->arg[0] = r;
  622. }
  623. }
  624. else {
  625. /* since flags are not tracked in liveness,
  626. * the result of the flag-setting instruction
  627. * has to be marked as live
  628. */
  629. if (t->nuse == 1)
  630. emit(Ocopy, Kw, R, r, R);
  631. b->jmp.type = Jjf + Cine;
  632. }
  633. }
  634. enum {
  635. Pob,
  636. Pbis,
  637. Pois,
  638. Pobis,
  639. Pbi1,
  640. Pobi1,
  641. };
  642. /* mgen generated code
  643. *
  644. * (with-vars (o b i s)
  645. * (patterns
  646. * (ob (add (con o) (tmp b)))
  647. * (bis (add (tmp b) (mul (tmp i) (con s 1 2 4 8))))
  648. * (ois (add (con o) (mul (tmp i) (con s 1 2 4 8))))
  649. * (obis (add (con o) (tmp b) (mul (tmp i) (con s 1 2 4 8))))
  650. * (bi1 (add (tmp b) (tmp i)))
  651. * (obi1 (add (con o) (tmp b) (tmp i)))
  652. * ))
  653. */
  654. static int
  655. opn(int op, int l, int r)
  656. {
  657. static uchar Oaddtbl[91] = {
  658. 2,
  659. 2,2,
  660. 4,4,5,
  661. 6,6,8,8,
  662. 4,4,9,10,9,
  663. 7,7,5,8,9,5,
  664. 4,4,12,10,12,12,12,
  665. 4,4,9,10,9,9,12,9,
  666. 11,11,5,8,9,5,12,9,5,
  667. 7,7,5,8,9,5,12,9,5,5,
  668. 11,11,5,8,9,5,12,9,5,5,5,
  669. 4,4,9,10,9,9,12,9,9,9,9,9,
  670. 7,7,5,8,9,5,12,9,5,5,5,9,5,
  671. };
  672. int t;
  673. if (l < r)
  674. t = l, l = r, r = t;
  675. switch (op) {
  676. case Omul:
  677. if (2 <= l)
  678. if (r == 0) {
  679. return 3;
  680. }
  681. return 2;
  682. case Oadd:
  683. return Oaddtbl[(l + l*l)/2 + r];
  684. default:
  685. return 2;
  686. }
  687. }
  688. static int
  689. refn(Ref r, Num *tn, Con *con)
  690. {
  691. int64_t n;
  692. switch (rtype(r)) {
  693. case RTmp:
  694. if (!tn[r.val].n)
  695. tn[r.val].n = 2;
  696. return tn[r.val].n;
  697. case RCon:
  698. if (con[r.val].type != CBits)
  699. return 1;
  700. n = con[r.val].bits.i;
  701. if (n == 8 || n == 4 || n == 2 || n == 1)
  702. return 0;
  703. return 1;
  704. default:
  705. return INT_MIN;
  706. }
  707. }
  708. static bits match[13] = {
  709. [4] = BIT(Pob),
  710. [5] = BIT(Pbi1),
  711. [6] = BIT(Pob) | BIT(Pois),
  712. [7] = BIT(Pob) | BIT(Pobi1),
  713. [8] = BIT(Pbi1) | BIT(Pbis),
  714. [9] = BIT(Pbi1) | BIT(Pobi1),
  715. [10] = BIT(Pbi1) | BIT(Pbis) | BIT(Pobi1) | BIT(Pobis),
  716. [11] = BIT(Pob) | BIT(Pobi1) | BIT(Pobis),
  717. [12] = BIT(Pbi1) | BIT(Pobi1) | BIT(Pobis),
  718. };
  719. static uchar *matcher[] = {
  720. [Pbi1] = (uchar[]){
  721. 1,3,1,3,2,0
  722. },
  723. [Pbis] = (uchar[]){
  724. 5,1,8,5,27,1,5,1,2,5,13,3,1,1,3,3,3,2,0,1,
  725. 3,3,3,2,3,1,0,1,29
  726. },
  727. [Pob] = (uchar[]){
  728. 1,3,0,3,1,0
  729. },
  730. [Pobi1] = (uchar[]){
  731. 5,3,9,9,10,33,12,35,45,1,5,3,11,9,7,9,4,9,
  732. 17,1,3,0,3,1,3,2,0,3,1,1,3,0,34,1,37,1,5,2,
  733. 5,7,2,7,8,37,29,1,3,0,1,32
  734. },
  735. [Pobis] = (uchar[]){
  736. 5,2,10,7,11,19,49,1,1,3,3,3,2,1,3,0,3,1,0,
  737. 1,3,0,5,1,8,5,25,1,5,1,2,5,13,3,1,1,3,3,3,
  738. 2,0,1,3,3,3,2,26,1,51,1,5,1,6,5,9,1,3,0,51,
  739. 3,1,1,3,0,45
  740. },
  741. [Pois] = (uchar[]){
  742. 1,3,0,1,3,3,3,2,0
  743. },
  744. };
  745. /* end of generated code */
  746. static void
  747. anumber(Num *tn, Blk *b, Con *con)
  748. {
  749. Ins *i;
  750. Num *n;
  751. for (i=b->ins; i<&b->ins[b->nins]; i++) {
  752. if (rtype(i->to) != RTmp)
  753. continue;
  754. n = &tn[i->to.val];
  755. n->l = i->arg[0];
  756. n->r = i->arg[1];
  757. n->nl = refn(n->l, tn, con);
  758. n->nr = refn(n->r, tn, con);
  759. n->n = opn(i->op, n->nl, n->nr);
  760. }
  761. }
  762. static Ref
  763. adisp(Con *c, Num *tn, Ref r, Fn *fn, int s)
  764. {
  765. Ref v[2];
  766. int n;
  767. while (!req(r, R)) {
  768. assert(rtype(r) == RTmp);
  769. n = refn(r, tn, fn->con);
  770. if (!(match[n] & BIT(Pob)))
  771. break;
  772. runmatch(matcher[Pob], tn, r, v);
  773. assert(rtype(v[0]) == RCon);
  774. addcon(c, &fn->con[v[0].val], s);
  775. r = v[1];
  776. }
  777. return r;
  778. }
  779. static int
  780. amatch(Addr *a, Num *tn, Ref r, Fn *fn)
  781. {
  782. static int pat[] = {Pobis, Pobi1, Pbis, Pois, Pbi1, -1};
  783. Ref ro, rb, ri, rs, v[4];
  784. Con *c, co;
  785. int s, n, *p;
  786. if (rtype(r) != RTmp)
  787. return 0;
  788. n = refn(r, tn, fn->con);
  789. memset(v, 0, sizeof v);
  790. for (p=pat; *p>=0; p++)
  791. if (match[n] & BIT(*p)) {
  792. runmatch(matcher[*p], tn, r, v);
  793. break;
  794. }
  795. if (*p < 0)
  796. v[1] = r;
  797. memset(&co, 0, sizeof co);
  798. ro = v[0];
  799. rb = adisp(&co, tn, v[1], fn, 1);
  800. ri = v[2];
  801. rs = v[3];
  802. s = 1;
  803. if (*p < 0 && co.type != CUndef)
  804. if (amatch(a, tn, rb, fn))
  805. return addcon(&a->offset, &co, 1);
  806. if (!req(ro, R)) {
  807. assert(rtype(ro) == RCon);
  808. c = &fn->con[ro.val];
  809. if (!addcon(&co, c, 1))
  810. return 0;
  811. }
  812. if (!req(rs, R)) {
  813. assert(rtype(rs) == RCon);
  814. c = &fn->con[rs.val];
  815. assert(c->type == CBits);
  816. s = c->bits.i;
  817. }
  818. ri = adisp(&co, tn, ri, fn, s);
  819. *a = (Addr){co, rb, ri, s};
  820. if (rtype(ri) == RTmp)
  821. if (fn->tmp[ri.val].slot != -1) {
  822. if (a->scale != 1
  823. || fn->tmp[rb.val].slot != -1)
  824. return 0;
  825. a->base = ri;
  826. a->index = rb;
  827. }
  828. if (!req(a->base, R)) {
  829. assert(rtype(a->base) == RTmp);
  830. s = fn->tmp[a->base.val].slot;
  831. if (s != -1)
  832. a->base = SLOT(s);
  833. }
  834. return 1;
  835. }
  836. /* instruction selection
  837. * requires use counts (as given by parsing)
  838. */
  839. void
  840. amd64_isel(Fn *fn)
  841. {
  842. Blk *b, **sb;
  843. Ins *i;
  844. Phi *p;
  845. uint a;
  846. int n, al;
  847. int64_t sz;
  848. Num *num;
  849. /* assign slots to fast allocs */
  850. b = fn->start;
  851. /* specific to NAlign == 3 */ /* or change n=4 and sz /= 4 below */
  852. for (al=Oalloc, n=4; al<=Oalloc1; al++, n*=2)
  853. for (i=b->ins; i<&b->ins[b->nins]; i++)
  854. if (i->op == al) {
  855. if (rtype(i->arg[0]) != RCon)
  856. break;
  857. sz = fn->con[i->arg[0].val].bits.i;
  858. if (sz < 0 || sz >= INT_MAX-15)
  859. err("invalid alloc size %"PRId64, sz);
  860. sz = (sz + n-1) & -n;
  861. sz /= 4;
  862. if (sz > INT_MAX - fn->slot)
  863. die("alloc too large");
  864. fn->tmp[i->to.val].slot = fn->slot;
  865. fn->slot += sz;
  866. fn->salign = 2 + al - Oalloc;
  867. *i = (Ins){.op = Onop};
  868. }
  869. /* process basic blocks */
  870. n = fn->ntmp;
  871. num = emalloc(n * sizeof num[0]);
  872. for (b=fn->start; b; b=b->link) {
  873. curi = &insb[NIns];
  874. for (sb=(Blk*[3]){b->s1, b->s2, 0}; *sb; sb++)
  875. for (p=(*sb)->phi; p; p=p->link) {
  876. for (a=0; p->blk[a] != b; a++)
  877. assert(a+1 < p->narg);
  878. fixarg(&p->arg[a], p->cls, 0, fn);
  879. }
  880. memset(num, 0, n * sizeof num[0]);
  881. anumber(num, b, fn->con);
  882. seljmp(b, fn);
  883. for (i=&b->ins[b->nins]; i!=b->ins;) {
  884. --i;
  885. assert(i->op != Osel0);
  886. if (i->op == Osel1)
  887. i = selsel(fn, b, i, num);
  888. else
  889. sel(*i, num, fn);
  890. }
  891. idup(b, curi, &insb[NIns]-curi);
  892. }
  893. free(num);
  894. if (debug['I']) {
  895. fprintf(stderr, "\n> After instruction selection:\n");
  896. printfn(fn, stderr);
  897. }
  898. }