gvn.c 8.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508
  1. #include "all.h"
  2. Ref con01[2];
  3. static inline uint
  4. mix(uint x0, uint x1)
  5. {
  6. return x0 + 17*x1;
  7. }
  8. static inline uint
  9. rhash(Ref r)
  10. {
  11. return mix(r.type, r.val);
  12. }
  13. static uint
  14. ihash(Ins *i)
  15. {
  16. uint h;
  17. h = mix(i->op, i->cls);
  18. h = mix(h, rhash(i->arg[0]));
  19. h = mix(h, rhash(i->arg[1]));
  20. return h;
  21. }
  22. static int
  23. ieq(Ins *ia, Ins *ib)
  24. {
  25. if (ia->op == ib->op)
  26. if (ia->cls == ib->cls)
  27. if (req(ia->arg[0], ib->arg[0]))
  28. if (req(ia->arg[1], ib->arg[1]))
  29. return 1;
  30. return 0;
  31. }
  32. static Ins **gvntbl;
  33. static uint gvntbln;
  34. static Ins *
  35. gvndup(Ins *i, int insert)
  36. {
  37. uint idx, n;
  38. Ins *ii;
  39. idx = ihash(i) % gvntbln;
  40. for (n=1;; n++) {
  41. ii = gvntbl[idx];
  42. if (!ii)
  43. break;
  44. if (ieq(i, ii))
  45. return ii;
  46. idx++;
  47. if (gvntbln <= idx)
  48. idx = 0;
  49. }
  50. if (insert)
  51. gvntbl[idx] = i;
  52. return 0;
  53. }
  54. static void
  55. replaceuse(Fn *fn, Use *u, Ref r1, Ref r2)
  56. {
  57. Blk *b;
  58. Ins *i;
  59. Phi *p;
  60. Ref *pr;
  61. Tmp *t2;
  62. int n;
  63. t2 = 0;
  64. if (rtype(r2) == RTmp)
  65. t2 = &fn->tmp[r2.val];
  66. b = fn->rpo[u->bid];
  67. switch (u->type) {
  68. case UPhi:
  69. p = u->u.phi;
  70. for (pr=p->arg; pr<&p->arg[p->narg]; pr++)
  71. if (req(*pr, r1))
  72. *pr = r2;
  73. if (t2)
  74. adduse(t2, UPhi, b, p);
  75. break;
  76. case UIns:
  77. i = u->u.ins;
  78. for (n=0; n<2; n++)
  79. if (req(i->arg[n], r1))
  80. i->arg[n] = r2;
  81. if (t2)
  82. adduse(t2, UIns, b, i);
  83. break;
  84. case UJmp:
  85. if (req(b->jmp.arg, r1))
  86. b->jmp.arg = r2;
  87. if (t2)
  88. adduse(t2, UJmp, b);
  89. break;
  90. case UXXX:
  91. die("unreachable");
  92. }
  93. }
  94. static void
  95. replaceuses(Fn *fn, Ref r1, Ref r2)
  96. {
  97. Tmp *t1;
  98. Use *u;
  99. assert(rtype(r1) == RTmp);
  100. t1 = &fn->tmp[r1.val];
  101. for (u=t1->use; u<&t1->use[t1->nuse]; u++)
  102. replaceuse(fn, u, r1, r2);
  103. t1->nuse = 0;
  104. }
  105. static void
  106. dedupphi(Fn *fn, Blk *b)
  107. {
  108. Phi *p, **pp;
  109. Ref r;
  110. for (pp=&b->phi; (p=*pp);) {
  111. r = phicopyref(fn, b, p);
  112. if (!req(r, R)) {
  113. replaceuses(fn, p->to, r);
  114. p->to = R;
  115. *pp = p->link;
  116. } else
  117. pp = &p->link;
  118. }
  119. }
  120. static int
  121. rcmp(Ref a, Ref b)
  122. {
  123. if (rtype(a) != rtype(b))
  124. return rtype(a) - rtype(b);
  125. return a.val - b.val;
  126. }
  127. static void
  128. normins(Fn *fn, Ins *i)
  129. {
  130. uint n;
  131. int64_t v;
  132. Ref r;
  133. /* truncate constant bits to
  134. * 32 bits for s/w uses */
  135. for (n=0; n<2; n++) {
  136. if (!KWIDE(argcls(i, n)))
  137. if (isconbits(fn, i->arg[n], &v))
  138. if ((v & 0xffffffff) != v)
  139. i->arg[n] = getcon(v & 0xffffffff, fn);
  140. }
  141. /* order arg[0] <= arg[1] for
  142. * commutative ops, preferring
  143. * RTmp in arg[0] */
  144. if (optab[i->op].commutes)
  145. if (rcmp(i->arg[0], i->arg[1]) > 0) {
  146. r = i->arg[1];
  147. i->arg[1] = i->arg[0];
  148. i->arg[0] = r;
  149. }
  150. }
  151. static int
  152. negcon(int cls, Con *c)
  153. {
  154. static Con z = {.type = CBits, .bits.i = 0};
  155. return foldint(c, Osub, cls, &z, c);
  156. }
  157. static void
  158. assoccon(Fn *fn, Blk *b, Ins *i1)
  159. {
  160. Tmp *t2;
  161. Ins *i2;
  162. int op, fail;
  163. Con c, c1, c2;
  164. op = i1->op;
  165. if (op == Osub)
  166. op = Oadd;
  167. if (!optab[op].assoc
  168. || KBASE(i1->cls) != 0
  169. || rtype(i1->arg[0]) != RTmp
  170. || rtype(i1->arg[1]) != RCon)
  171. return;
  172. c1 = fn->con[i1->arg[1].val];
  173. t2 = &fn->tmp[i1->arg[0].val];
  174. if (t2->def == 0)
  175. return;
  176. i2 = t2->def;
  177. if (op != (i2->op == Osub ? Oadd : i2->op)
  178. || rtype(i2->arg[1]) != RCon)
  179. return;
  180. c2 = fn->con[i2->arg[1].val];
  181. assert(KBASE(i2->cls) == 0);
  182. assert(KWIDE(i2->cls) >= KWIDE(i1->cls));
  183. if (i1->op == Osub && negcon(i1->cls, &c1))
  184. return;
  185. if (i2->op == Osub && negcon(i2->cls, &c2))
  186. return;
  187. if (foldint(&c, op, i1->cls, &c1, &c2))
  188. return;
  189. if (op == Oadd && c.type == CBits)
  190. if ((i1->cls == Kl && c.bits.i < 0)
  191. || (i1->cls == Kw && (int32_t)c.bits.i < 0)) {
  192. fail = negcon(i1->cls, &c);
  193. assert(fail == 0);
  194. op = Osub;
  195. }
  196. i1->op = op;
  197. i1->arg[0] = i2->arg[0];
  198. i1->arg[1] = newcon(&c, fn);
  199. adduse(&fn->tmp[i1->arg[0].val], UIns, b, i1);
  200. }
  201. static void
  202. killins(Fn *fn, Ins *i, Ref r)
  203. {
  204. replaceuses(fn, i->to, r);
  205. *i = (Ins){.op = Onop};
  206. }
  207. static void
  208. dedupins(Fn *fn, Blk *b, Ins *i)
  209. {
  210. Ref r;
  211. Ins *i1;
  212. normins(fn, i);
  213. if (i->op == Onop || pinned(i))
  214. return;
  215. /* when sel instructions are inserted
  216. * before gvn, we may want to optimize
  217. * them here */
  218. assert(i->op != Osel0);
  219. assert(!req(i->to, R));
  220. assoccon(fn, b, i);
  221. r = copyref(fn, b, i);
  222. if (!req(r, R)) {
  223. killins(fn, i, r);
  224. return;
  225. }
  226. r = foldref(fn, i);
  227. if (!req(r, R)) {
  228. killins(fn, i, r);
  229. return;
  230. }
  231. i1 = gvndup(i, 1);
  232. if (i1) {
  233. killins(fn, i, i1->to);
  234. return;
  235. }
  236. }
  237. int
  238. cmpeqz(Fn *fn, Ref r, Ref *arg, int *cls, int *eqval)
  239. {
  240. Ins *i;
  241. if (rtype(r) != RTmp)
  242. return 0;
  243. i = fn->tmp[r.val].def;
  244. if (i)
  245. if (optab[i->op].cmpeqwl)
  246. if (req(i->arg[1], CON_Z)) {
  247. *arg = i->arg[0];
  248. *cls = argcls(i, 0);
  249. *eqval = optab[i->op].eqval;
  250. return 1;
  251. }
  252. return 0;
  253. }
  254. static int
  255. branchdom(Fn *fn, Blk *bif, Blk *bbr1, Blk *bbr2, Blk *b)
  256. {
  257. assert(bif->jmp.type == Jjnz);
  258. if (b != bif
  259. && dom(bbr1, b)
  260. && !reachesnotvia(fn, bbr2, b, bif))
  261. return 1;
  262. return 0;
  263. }
  264. static int
  265. domzero(Fn *fn, Blk *d, Blk *b, int *z)
  266. {
  267. if (branchdom(fn, d, d->s1, d->s2, b)) {
  268. *z = 0;
  269. return 1;
  270. }
  271. if (branchdom(fn, d, d->s2, d->s1, b)) {
  272. *z = 1;
  273. return 1;
  274. }
  275. return 0;
  276. }
  277. /* infer 0/non-0 value from dominating jnz */
  278. int
  279. zeroval(Fn *fn, Blk *b, Ref r, int cls, int *z)
  280. {
  281. Blk *d;
  282. Ref arg;
  283. int cls1, eqval;
  284. for (d=b->idom; d; d=d->idom) {
  285. if (d->jmp.type != Jjnz)
  286. continue;
  287. if (req(r, d->jmp.arg)
  288. && cls == Kw
  289. && domzero(fn, d, b, z)) {
  290. return 1;
  291. }
  292. if (cmpeqz(fn, d->jmp.arg, &arg, &cls1, &eqval)
  293. && req(r, arg)
  294. && cls == cls1
  295. && domzero(fn, d, b, z)) {
  296. *z ^= eqval;
  297. return 1;
  298. }
  299. }
  300. return 0;
  301. }
  302. static int
  303. usecls(Use *u, Ref r, int cls)
  304. {
  305. int k;
  306. switch (u->type) {
  307. case UIns:
  308. k = Kx; /* widest use */
  309. if (req(u->u.ins->arg[0], r))
  310. k = argcls(u->u.ins, 0);
  311. if (req(u->u.ins->arg[1], r))
  312. if (k == Kx || !KWIDE(k))
  313. k = argcls(u->u.ins, 1);
  314. return k == Kx ? cls : k;
  315. case UPhi:
  316. if (req(u->u.phi->to, R))
  317. return cls; /* eliminated */
  318. return u->u.phi->cls;
  319. case UJmp:
  320. return Kw;
  321. default:
  322. break;
  323. }
  324. die("unreachable");
  325. }
  326. static void
  327. propjnz0(Fn *fn, Blk *bif, Blk *s0, Blk *snon0, Ref r, int cls)
  328. {
  329. Blk *b;
  330. Tmp *t;
  331. Use *u;
  332. if (s0->npred != 1 || rtype(r) != RTmp)
  333. return;
  334. t = &fn->tmp[r.val];
  335. for (u=t->use; u<&t->use[t->nuse]; u++) {
  336. b = fn->rpo[u->bid];
  337. /* we may compare an l temp with a w
  338. * comparison; so check that the use
  339. * does not involve high bits */
  340. if (usecls(u, r, cls) == cls)
  341. if (branchdom(fn, bif, s0, snon0, b))
  342. replaceuse(fn, u, r, CON_Z);
  343. }
  344. }
  345. static void
  346. dedupjmp(Fn *fn, Blk *b)
  347. {
  348. Blk **ps;
  349. int64_t v;
  350. Ref arg;
  351. int cls, eqval, z;
  352. if (b->jmp.type != Jjnz)
  353. return;
  354. /* propagate jmp arg as 0 through s2 */
  355. propjnz0(fn, b, b->s2, b->s1, b->jmp.arg, Kw);
  356. /* propagate cmp eq/ne 0 def of jmp arg as 0 */
  357. if (cmpeqz(fn, b->jmp.arg, &arg, &cls, &eqval)) {
  358. ps = (Blk*[]){b->s1, b->s2};
  359. propjnz0(fn, b, ps[eqval^1], ps[eqval], arg, cls);
  360. }
  361. /* collapse trivial/constant jnz to jmp */
  362. v = 1;
  363. z = 0;
  364. if (b->s1 == b->s2
  365. || isconbits(fn, b->jmp.arg, &v)
  366. || zeroval(fn, b, b->jmp.arg, Kw, &z)) {
  367. if (v == 0 || z)
  368. b->s1 = b->s2;
  369. /* we later move active ins out of dead blks */
  370. b->s2 = 0;
  371. b->jmp.type = Jjmp;
  372. b->jmp.arg = R;
  373. }
  374. }
  375. static void
  376. rebuildcfg(Fn *fn)
  377. {
  378. uint n, nblk;
  379. Blk *b, *s, **rpo;
  380. Ins *i;
  381. nblk = fn->nblk;
  382. rpo = emalloc(nblk * sizeof rpo[0]);
  383. memcpy(rpo, fn->rpo, nblk * sizeof rpo[0]);
  384. fillcfg(fn);
  385. /* move instructions that were in
  386. * killed blocks and may be active
  387. * in the computation in the start
  388. * block */
  389. s = fn->start;
  390. for (n=0; n<nblk; n++) {
  391. b = rpo[n];
  392. if (b->id != -1u)
  393. continue;
  394. /* blk unreachable after GVN */
  395. assert(b != s);
  396. for (i=b->ins; i<&b->ins[b->nins]; i++)
  397. if (!optab[i->op].pinned)
  398. if (gvndup(i, 0) == i)
  399. addins(&s->ins, &s->nins, i);
  400. }
  401. free(rpo);
  402. }
  403. /* requires rpo pred ssa use
  404. * recreates rpo preds
  405. * breaks pred use dom ssa (GCM fixes ssa)
  406. */
  407. void
  408. gvn(Fn *fn)
  409. {
  410. Blk *b;
  411. Phi *p;
  412. Ins *i;
  413. uint n, nins;
  414. con01[0] = getcon(0, fn);
  415. con01[1] = getcon(1, fn);
  416. /* copy.c uses the visit bit */
  417. for (b=fn->start; b; b=b->link)
  418. for (p=b->phi; p; p=p->link)
  419. p->visit = 0;
  420. fillloop(fn);
  421. narrowpars(fn);
  422. filluse(fn);
  423. ssacheck(fn);
  424. nins = 0;
  425. for (b=fn->start; b; b=b->link) {
  426. b->visit = 0;
  427. nins += b->nins;
  428. }
  429. gvntbln = nins + nins/2;
  430. gvntbl = emalloc(gvntbln * sizeof gvntbl[0]);
  431. for (n=0; n<fn->nblk; n++) {
  432. b = fn->rpo[n];
  433. dedupphi(fn, b);
  434. for (i=b->ins; i<&b->ins[b->nins]; i++)
  435. dedupins(fn, b, i);
  436. dedupjmp(fn, b);
  437. }
  438. rebuildcfg(fn);
  439. free(gvntbl);
  440. gvntbl = 0;
  441. if (debug['G']) {
  442. fprintf(stderr, "\n> After GVN:\n");
  443. printfn(fn, stderr);
  444. }
  445. }