load.c 9.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493
  1. #include "all.h"
  2. #define MASK(w) (BIT(8*(w)-1)*2-1) /* must work when w==8 */
  3. typedef struct Loc Loc;
  4. typedef struct Slice Slice;
  5. typedef struct Insert Insert;
  6. struct Loc {
  7. enum {
  8. LRoot, /* right above the original load */
  9. LLoad, /* inserting a load is allowed */
  10. LNoLoad, /* only scalar operations allowed */
  11. } type;
  12. uint off;
  13. Blk *blk;
  14. };
  15. struct Slice {
  16. Ref ref;
  17. int off;
  18. short sz;
  19. short cls; /* load class */
  20. };
  21. struct Insert {
  22. uint isphi:1;
  23. uint num:31;
  24. uint bid;
  25. uint off;
  26. union {
  27. Ins ins;
  28. struct {
  29. Slice m;
  30. Phi *p;
  31. } phi;
  32. } new;
  33. };
  34. static Fn *curf;
  35. static uint inum; /* current insertion number */
  36. static Insert *ilog; /* global insertion log */
  37. static uint nlog; /* number of entries in the log */
  38. int
  39. loadsz(Ins *l)
  40. {
  41. switch (l->op) {
  42. case Oloadsb: case Oloadub: return 1;
  43. case Oloadsh: case Oloaduh: return 2;
  44. case Oloadsw: case Oloaduw: return 4;
  45. case Oload: return KWIDE(l->cls) ? 8 : 4;
  46. }
  47. die("unreachable");
  48. }
  49. int
  50. storesz(Ins *s)
  51. {
  52. switch (s->op) {
  53. case Ostoreb: return 1;
  54. case Ostoreh: return 2;
  55. case Ostorew: case Ostores: return 4;
  56. case Ostorel: case Ostored: return 8;
  57. }
  58. die("unreachable");
  59. }
  60. static Ref
  61. iins(int cls, int op, Ref a0, Ref a1, Loc *l)
  62. {
  63. Insert *ist;
  64. vgrow(&ilog, ++nlog);
  65. ist = &ilog[nlog-1];
  66. ist->isphi = 0;
  67. ist->num = inum++;
  68. ist->bid = l->blk->id;
  69. ist->off = l->off;
  70. ist->new.ins = (Ins){op, cls, R, {a0, a1}};
  71. return ist->new.ins.to = newtmp("ld", cls, curf);
  72. }
  73. static void
  74. cast(Ref *r, int cls, Loc *l)
  75. {
  76. int cls0;
  77. if (rtype(*r) == RCon)
  78. return;
  79. assert(rtype(*r) == RTmp);
  80. cls0 = curf->tmp[r->val].cls;
  81. if (cls0 == cls || (cls == Kw && cls0 == Kl))
  82. return;
  83. if (KWIDE(cls0) < KWIDE(cls)) {
  84. if (cls0 == Ks)
  85. *r = iins(Kw, Ocast, *r, R, l);
  86. *r = iins(Kl, Oextuw, *r, R, l);
  87. if (cls == Kd)
  88. *r = iins(Kd, Ocast, *r, R, l);
  89. } else {
  90. if (cls0 == Kd && cls != Kl)
  91. *r = iins(Kl, Ocast, *r, R, l);
  92. if (cls0 != Kd || cls != Kw)
  93. *r = iins(cls, Ocast, *r, R, l);
  94. }
  95. }
  96. static inline void
  97. mask(int cls, Ref *r, bits msk, Loc *l)
  98. {
  99. cast(r, cls, l);
  100. *r = iins(cls, Oand, *r, getcon(msk, curf), l);
  101. }
  102. static Ref
  103. load(Slice sl, bits msk, Loc *l)
  104. {
  105. Alias *a;
  106. Ref r, r1;
  107. int ld, cls, all;
  108. Con c;
  109. ld = (int[]){
  110. [1] = Oloadub,
  111. [2] = Oloaduh,
  112. [4] = Oloaduw,
  113. [8] = Oload
  114. }[sl.sz];
  115. all = msk == MASK(sl.sz);
  116. if (all)
  117. cls = sl.cls;
  118. else
  119. cls = sl.sz > 4 ? Kl : Kw;
  120. r = sl.ref;
  121. /* sl.ref might not be live here,
  122. * but its alias base ref will be
  123. * (see killsl() below) */
  124. if (rtype(r) == RTmp) {
  125. a = &curf->tmp[r.val].alias;
  126. switch (a->type) {
  127. default:
  128. die("unreachable");
  129. case ALoc:
  130. case AEsc:
  131. case AUnk:
  132. r = TMP(a->base);
  133. if (!a->offset)
  134. break;
  135. r1 = getcon(a->offset, curf);
  136. r = iins(Kl, Oadd, r, r1, l);
  137. break;
  138. case ACon:
  139. case ASym:
  140. memset(&c, 0, sizeof c);
  141. c.type = CAddr;
  142. c.sym = a->u.sym;
  143. c.bits.i = a->offset;
  144. r = newcon(&c, curf);
  145. break;
  146. }
  147. }
  148. r = iins(cls, ld, r, R, l);
  149. if (!all)
  150. mask(cls, &r, msk, l);
  151. return r;
  152. }
  153. static int
  154. killsl(Ref r, Slice sl)
  155. {
  156. Alias *a;
  157. if (rtype(sl.ref) != RTmp)
  158. return 0;
  159. a = &curf->tmp[sl.ref.val].alias;
  160. switch (a->type) {
  161. default: die("unreachable");
  162. case ALoc:
  163. case AEsc:
  164. case AUnk: return req(TMP(a->base), r);
  165. case ACon:
  166. case ASym: return 0;
  167. }
  168. }
  169. /* returns a ref containing the contents of the slice
  170. * passed as argument, all the bits set to 0 in the
  171. * mask argument are zeroed in the result;
  172. * the returned ref has an integer class when the
  173. * mask does not cover all the bits of the slice,
  174. * otherwise, it has class sl.cls
  175. * the procedure returns R when it fails */
  176. static Ref
  177. def(Slice sl, bits msk, Blk *b, Ins *i, Loc *il)
  178. {
  179. Slice sl1;
  180. Blk *bp;
  181. bits msk1, msks;
  182. int off, cls, cls1, op, sz, ld;
  183. uint np, oldl, oldt;
  184. Ref r, r1;
  185. Phi *p;
  186. Insert *ist;
  187. Loc l;
  188. /* invariants:
  189. * -1- b dominates il->blk; so we can use
  190. * temporaries of b in il->blk
  191. * -2- if il->type != LNoLoad, then il->blk
  192. * postdominates the original load; so it
  193. * is safe to load in il->blk
  194. * -3- if il->type != LNoLoad, then b
  195. * postdominates il->blk (and by 2, the
  196. * original load)
  197. */
  198. assert(dom(b, il->blk));
  199. oldl = nlog;
  200. oldt = curf->ntmp;
  201. if (0) {
  202. Load:
  203. curf->ntmp = oldt;
  204. nlog = oldl;
  205. if (il->type != LLoad)
  206. return R;
  207. return load(sl, msk, il);
  208. }
  209. if (!i)
  210. i = &b->ins[b->nins];
  211. cls = sl.sz > 4 ? Kl : Kw;
  212. msks = MASK(sl.sz);
  213. while (i > b->ins) {
  214. --i;
  215. if (killsl(i->to, sl)
  216. || (i->op == Ocall && escapes(sl.ref, curf)))
  217. goto Load;
  218. ld = isload(i->op);
  219. if (ld) {
  220. sz = loadsz(i);
  221. r1 = i->arg[0];
  222. r = i->to;
  223. } else if (isstore(i->op)) {
  224. sz = storesz(i);
  225. r1 = i->arg[1];
  226. r = i->arg[0];
  227. } else if (i->op == Oblit1) {
  228. assert(rtype(i->arg[0]) == RInt);
  229. sz = abs(rsval(i->arg[0]));
  230. assert(i > b->ins);
  231. --i;
  232. assert(i->op == Oblit0);
  233. r1 = i->arg[1];
  234. } else
  235. continue;
  236. switch (alias(sl.ref, sl.off, sl.sz, r1, sz, &off, curf)) {
  237. case MustAlias:
  238. if (i->op == Oblit0) {
  239. sl1 = sl;
  240. sl1.ref = i->arg[0];
  241. if (off >= 0) {
  242. assert(off < sz);
  243. sl1.off = off;
  244. sz -= off;
  245. off = 0;
  246. } else {
  247. sl1.off = 0;
  248. sl1.sz += off;
  249. }
  250. if (sz > sl1.sz)
  251. sz = sl1.sz;
  252. assert(sz <= 8);
  253. sl1.sz = sz;
  254. }
  255. if (off < 0) {
  256. off = -off;
  257. msk1 = (MASK(sz) << 8*off) & msks;
  258. op = Oshl;
  259. } else {
  260. msk1 = (MASK(sz) >> 8*off) & msks;
  261. op = Oshr;
  262. }
  263. if ((msk1 & msk) == 0)
  264. continue;
  265. if (i->op == Oblit0) {
  266. r = def(sl1, MASK(sz), b, i, il);
  267. if (req(r, R))
  268. goto Load;
  269. }
  270. if (off) {
  271. cls1 = cls;
  272. if (op == Oshr && off + sl.sz > 4)
  273. cls1 = Kl;
  274. cast(&r, cls1, il);
  275. r1 = getcon(8*off, curf);
  276. r = iins(cls1, op, r, r1, il);
  277. }
  278. if ((msk1 & msk) != msk1 || off + sz < sl.sz)
  279. mask(cls, &r, msk1 & msk, il);
  280. if ((msk & ~msk1) != 0) {
  281. r1 = def(sl, msk & ~msk1, b, i, il);
  282. if (req(r1, R))
  283. goto Load;
  284. r = iins(cls, Oor, r, r1, il);
  285. }
  286. if (msk == msks)
  287. cast(&r, sl.cls, il);
  288. return r;
  289. case MayAlias:
  290. if (ld)
  291. continue;
  292. else
  293. goto Load;
  294. case NoAlias:
  295. continue;
  296. default:
  297. die("unreachable");
  298. }
  299. }
  300. for (ist=ilog; ist<&ilog[nlog]; ++ist)
  301. if (ist->isphi && ist->bid == b->id)
  302. if (req(ist->new.phi.m.ref, sl.ref))
  303. if (ist->new.phi.m.off == sl.off)
  304. if (ist->new.phi.m.sz == sl.sz) {
  305. r = ist->new.phi.p->to;
  306. if (msk != msks)
  307. mask(cls, &r, msk, il);
  308. else
  309. cast(&r, sl.cls, il);
  310. return r;
  311. }
  312. for (p=b->phi; p; p=p->link)
  313. if (killsl(p->to, sl))
  314. /* scanning predecessors in that
  315. * case would be unsafe */
  316. goto Load;
  317. if (b->npred == 0)
  318. goto Load;
  319. if (b->npred == 1) {
  320. bp = b->pred[0];
  321. assert(bp->loop >= il->blk->loop);
  322. l = *il;
  323. if (bp->s2)
  324. l.type = LNoLoad;
  325. r1 = def(sl, msk, bp, 0, &l);
  326. if (req(r1, R))
  327. goto Load;
  328. return r1;
  329. }
  330. r = newtmp("ld", sl.cls, curf);
  331. p = alloc(sizeof *p);
  332. vgrow(&ilog, ++nlog);
  333. ist = &ilog[nlog-1];
  334. ist->isphi = 1;
  335. ist->bid = b->id;
  336. ist->new.phi.m = sl;
  337. ist->new.phi.p = p;
  338. p->to = r;
  339. p->cls = sl.cls;
  340. p->narg = b->npred;
  341. p->arg = vnew(p->narg, sizeof p->arg[0], PFn);
  342. p->blk = vnew(p->narg, sizeof p->blk[0], PFn);
  343. for (np=0; np<b->npred; ++np) {
  344. bp = b->pred[np];
  345. if (!bp->s2
  346. && il->type != LNoLoad
  347. && bp->loop < il->blk->loop)
  348. l.type = LLoad;
  349. else
  350. l.type = LNoLoad;
  351. l.blk = bp;
  352. l.off = bp->nins;
  353. r1 = def(sl, msks, bp, 0, &l);
  354. if (req(r1, R))
  355. goto Load;
  356. p->arg[np] = r1;
  357. p->blk[np] = bp;
  358. }
  359. if (msk != msks)
  360. mask(cls, &r, msk, il);
  361. return r;
  362. }
  363. static int
  364. icmp(const void *pa, const void *pb)
  365. {
  366. Insert *a, *b;
  367. int c;
  368. a = (Insert *)pa;
  369. b = (Insert *)pb;
  370. if ((c = a->bid - b->bid))
  371. return c;
  372. if (a->isphi && b->isphi)
  373. return 0;
  374. if (a->isphi)
  375. return -1;
  376. if (b->isphi)
  377. return +1;
  378. if ((c = a->off - b->off))
  379. return c;
  380. return a->num - b->num;
  381. }
  382. /* require rpo ssa alias */
  383. void
  384. loadopt(Fn *fn)
  385. {
  386. Ins *i, *ib;
  387. Blk *b;
  388. int sz;
  389. uint n, ni, ext, nt;
  390. Insert *ist;
  391. Slice sl;
  392. Loc l;
  393. curf = fn;
  394. ilog = vnew(0, sizeof ilog[0], PHeap);
  395. nlog = 0;
  396. inum = 0;
  397. for (b=fn->start; b; b=b->link)
  398. for (i=b->ins; i<&b->ins[b->nins]; ++i) {
  399. if (!isload(i->op))
  400. continue;
  401. sz = loadsz(i);
  402. sl = (Slice){i->arg[0], 0, sz, i->cls};
  403. l = (Loc){LRoot, i-b->ins, b};
  404. i->arg[1] = def(sl, MASK(sz), b, i, &l);
  405. }
  406. qsort(ilog, nlog, sizeof ilog[0], icmp);
  407. vgrow(&ilog, nlog+1);
  408. ilog[nlog].bid = fn->nblk; /* add a sentinel */
  409. ib = vnew(0, sizeof(Ins), PHeap);
  410. for (ist=ilog, n=0; n<fn->nblk; ++n) {
  411. b = fn->rpo[n];
  412. for (; ist->bid == n && ist->isphi; ++ist) {
  413. ist->new.phi.p->link = b->phi;
  414. b->phi = ist->new.phi.p;
  415. }
  416. ni = 0;
  417. nt = 0;
  418. for (;;) {
  419. if (ist->bid == n && ist->off == ni)
  420. i = &ist++->new.ins;
  421. else {
  422. if (ni == b->nins)
  423. break;
  424. i = &b->ins[ni++];
  425. if (isload(i->op)
  426. && !req(i->arg[1], R)) {
  427. ext = Oextsb + i->op - Oloadsb;
  428. switch (i->op) {
  429. default:
  430. die("unreachable");
  431. case Oloadsb:
  432. case Oloadub:
  433. case Oloadsh:
  434. case Oloaduh:
  435. i->op = ext;
  436. break;
  437. case Oloadsw:
  438. case Oloaduw:
  439. if (i->cls == Kl) {
  440. i->op = ext;
  441. break;
  442. }
  443. /* fall through */
  444. case Oload:
  445. i->op = Ocopy;
  446. break;
  447. }
  448. i->arg[0] = i->arg[1];
  449. i->arg[1] = R;
  450. }
  451. }
  452. vgrow(&ib, ++nt);
  453. ib[nt-1] = *i;
  454. }
  455. b->nins = nt;
  456. idup(&b->ins, ib, nt);
  457. }
  458. vfree(ib);
  459. vfree(ilog);
  460. if (debug['M']) {
  461. fprintf(stderr, "\n> After load elimination:\n");
  462. printfn(fn, stderr);
  463. }
  464. }