copy.c 7.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408
  1. #include "all.h"
  2. typedef struct Ext Ext;
  3. struct Ext {
  4. char zext;
  5. char nopw; /* is a no-op if arg width is <= nopw */
  6. char usew; /* uses only the low usew bits of arg */
  7. };
  8. static int
  9. ext(Ins *i, Ext *e)
  10. {
  11. static Ext tbl[] = {
  12. /*extsb*/ {0, 7, 8},
  13. /*extub*/ {1, 8, 8},
  14. /*extsh*/ {0, 15, 16},
  15. /*extuh*/ {1, 16, 16},
  16. /*extsw*/ {0, 31, 32},
  17. /*extuw*/ {1, 32, 32},
  18. };
  19. if (!isext(i->op))
  20. return 0;
  21. *e = tbl[i->op - Oextsb];
  22. return 1;
  23. }
  24. static int
  25. bitwidth(uint64_t v)
  26. {
  27. int n;
  28. n = 0;
  29. if (v >> 32) { n += 32; v >>= 32; }
  30. if (v >> 16) { n += 16; v >>= 16; }
  31. if (v >> 8) { n += 8; v >>= 8; }
  32. if (v >> 4) { n += 4; v >>= 4; }
  33. if (v >> 2) { n += 2; v >>= 2; }
  34. if (v >> 1) { n += 1; v >>= 1; }
  35. return n+v;
  36. }
  37. /* no more than w bits are used */
  38. static int
  39. usewidthle(Fn *fn, Ref r, int w)
  40. {
  41. Ext e;
  42. Tmp *t;
  43. Use *u;
  44. Phi *p;
  45. Ins *i;
  46. Ref rc;
  47. int64_t v;
  48. int b;
  49. assert(rtype(r) == RTmp);
  50. t = &fn->tmp[r.val];
  51. for (u=t->use; u<&t->use[t->nuse]; u++) {
  52. switch (u->type) {
  53. case UPhi:
  54. p = u->u.phi;
  55. /* during gvn, phi nodes may be
  56. * replaced by other temps; in
  57. * this case, the replaced phi
  58. * uses are added to the
  59. * replacement temp uses and
  60. * Phi.to is set to R */
  61. if (p->visit || req(p->to, R))
  62. continue;
  63. p->visit = 1;
  64. b = usewidthle(fn, p->to, w);
  65. p->visit = 0;
  66. if (b)
  67. continue;
  68. break;
  69. case UIns:
  70. i = u->u.ins;
  71. assert(i != 0);
  72. if (i->op == Ocopy)
  73. if (usewidthle(fn, i->to, w))
  74. continue;
  75. if (ext(i, &e)) {
  76. if (e.usew <= w)
  77. continue;
  78. if (usewidthle(fn, i->to, w))
  79. continue;
  80. }
  81. if (i->op == Oand) {
  82. if (req(r, i->arg[0]))
  83. rc = i->arg[1];
  84. else {
  85. assert(req(r, i->arg[1]));
  86. rc = i->arg[0];
  87. }
  88. if (isconbits(fn, rc, &v)
  89. && bitwidth(v) <= w)
  90. continue;
  91. break;
  92. }
  93. break;
  94. default:
  95. break;
  96. }
  97. return 0;
  98. }
  99. return 1;
  100. }
  101. static int
  102. min(int v1, int v2)
  103. {
  104. return v1 < v2 ? v1 : v2;
  105. }
  106. /* is the ref narrower than w bits */
  107. static int
  108. defwidthle(Fn *fn, Ref r, int w)
  109. {
  110. Ext e;
  111. Tmp *t;
  112. Phi *p;
  113. Ins *i;
  114. uint n;
  115. int64_t v;
  116. int x;
  117. if (isconbits(fn, r, &v)
  118. && bitwidth(v) <= w)
  119. return 1;
  120. if (rtype(r) != RTmp)
  121. return 0;
  122. t = &fn->tmp[r.val];
  123. if (t->cls != Kw)
  124. return 0;
  125. if (!t->def) {
  126. /* phi def */
  127. for (p=fn->rpo[t->bid]->phi; p; p=p->link)
  128. if (req(p->to, r))
  129. break;
  130. assert(p);
  131. if (p->visit)
  132. return 1;
  133. p->visit = 1;
  134. for (n=0; n<p->narg; n++)
  135. if (!defwidthle(fn, p->arg[n], w)) {
  136. p->visit = 0;
  137. return 0;
  138. }
  139. p->visit = 0;
  140. return 1;
  141. }
  142. i = t->def;
  143. if (i->op == Ocopy)
  144. return defwidthle(fn, i->arg[0], w);
  145. if (i->op == Oshr || i->op == Osar) {
  146. if (isconbits(fn, i->arg[1], &v))
  147. if (0 < v && v <= 32) {
  148. if (i->op == Oshr && w+v >= 32)
  149. return 1;
  150. if (w < 32) {
  151. if (i->op == Osar)
  152. w = min(31, w+v);
  153. else
  154. w = min(32, w+v);
  155. }
  156. }
  157. return defwidthle(fn, i->arg[0], w);
  158. }
  159. if (iscmp(i->op, &x, &x))
  160. return w >= 1;
  161. if (i->op == Oand) {
  162. if (defwidthle(fn, i->arg[0], w)
  163. || defwidthle(fn, i->arg[1], w))
  164. return 1;
  165. return 0;
  166. }
  167. if (i->op == Oor || i->op == Oxor) {
  168. if (defwidthle(fn, i->arg[0], w)
  169. && defwidthle(fn, i->arg[1], w))
  170. return 1;
  171. return 0;
  172. }
  173. if (ext(i, &e)) {
  174. if (e.zext && e.usew <= w)
  175. return 1;
  176. w = min(w, e.nopw);
  177. return defwidthle(fn, i->arg[0], w);
  178. }
  179. return 0;
  180. }
  181. static int
  182. isw1(Fn *fn, Ref r)
  183. {
  184. return defwidthle(fn, r, 1);
  185. }
  186. /* insert early extub/extuh instructions
  187. * for pars used only narrowly; this
  188. * helps factoring extensions out of
  189. * loops
  190. *
  191. * needs use; breaks use
  192. */
  193. void
  194. narrowpars(Fn *fn)
  195. {
  196. Blk *b;
  197. int loop;
  198. Ins ext, *i, *ins;
  199. uint npar, nins;
  200. Ref r;
  201. /* only useful for functions with loops */
  202. loop = 0;
  203. for (b=fn->start; b; b=b->link)
  204. if (b->loop > 1) {
  205. loop = 1;
  206. break;
  207. }
  208. if (!loop)
  209. return;
  210. b = fn->start;
  211. npar = 0;
  212. for (i=b->ins; i<&b->ins[b->nins]; i++) {
  213. if (!ispar(i->op))
  214. break;
  215. npar++;
  216. }
  217. if (npar == 0)
  218. return;
  219. nins = b->nins + npar;
  220. ins = vnew(nins, sizeof ins[0], PFn);
  221. icpy(ins, b->ins, npar);
  222. icpy(ins + 2*npar, b->ins+npar, b->nins-npar);
  223. b->ins = ins;
  224. b->nins = nins;
  225. for (i=b->ins; i<&b->ins[b->nins]; i++) {
  226. if (!ispar(i->op))
  227. break;
  228. ext = (Ins){.op = Onop};
  229. if (i->cls == Kw)
  230. if (usewidthle(fn, i->to, 16)) {
  231. ext.op = Oextuh;
  232. if (usewidthle(fn, i->to, 8))
  233. ext.op = Oextub;
  234. r = newtmp("vw", i->cls, fn);
  235. ext.cls = i->cls;
  236. ext.to = i->to;
  237. ext.arg[0] = r;
  238. i->to = r;
  239. }
  240. *(i+npar) = ext;
  241. }
  242. }
  243. Ref
  244. copyref(Fn *fn, Blk *b, Ins *i)
  245. {
  246. /* which extensions are copies for a given
  247. * argument width */
  248. static bits extcpy[] = {
  249. [WFull] = 0,
  250. [Wsb] = BIT(Wsb) | BIT(Wsh) | BIT(Wsw),
  251. [Wub] = BIT(Wub) | BIT(Wuh) | BIT(Wuw),
  252. [Wsh] = BIT(Wsh) | BIT(Wsw),
  253. [Wuh] = BIT(Wuh) | BIT(Wuw),
  254. [Wsw] = BIT(Wsw),
  255. [Wuw] = BIT(Wuw),
  256. };
  257. Ext e;
  258. Tmp *t;
  259. int64_t v;
  260. int w, z;
  261. if (i->op == Ocopy)
  262. return i->arg[0];
  263. /* op identity value */
  264. if (optab[i->op].hasid
  265. && KBASE(i->cls) == 0 /* integer only - fp NaN! */
  266. && req(i->arg[1], con01[optab[i->op].idval])
  267. && (!optab[i->op].cmpeqwl || isw1(fn, i->arg[0])))
  268. return i->arg[0];
  269. /* idempotent op with identical args */
  270. if (optab[i->op].idemp
  271. && req(i->arg[0], i->arg[1]))
  272. return i->arg[0];
  273. /* integer cmp with identical args */
  274. if ((optab[i->op].cmpeqwl || optab[i->op].cmplgtewl)
  275. && req(i->arg[0], i->arg[1]))
  276. return con01[optab[i->op].eqval];
  277. /* cmpeq/ne 0 with 0/non-0 inference */
  278. if (optab[i->op].cmpeqwl
  279. && req(i->arg[1], CON_Z)
  280. && zeroval(fn, b, i->arg[0], argcls(i, 0), &z))
  281. return con01[optab[i->op].eqval^z^1];
  282. /* redundant and mask */
  283. if (i->op == Oand
  284. && isconbits(fn, i->arg[1], &v)
  285. && (v > 0 && ((v+1) & v) == 0)
  286. && defwidthle(fn, i->arg[0], bitwidth(v)))
  287. return i->arg[0];
  288. if (i->cls == Kw
  289. && (i->op == Oextsw || i->op == Oextuw))
  290. return i->arg[0];
  291. if (ext(i, &e) && rtype(i->arg[0]) == RTmp) {
  292. t = &fn->tmp[i->arg[0].val];
  293. assert(KBASE(t->cls) == 0);
  294. /* do not break typing by returning
  295. * a narrower temp */
  296. if (KWIDE(i->cls) > KWIDE(t->cls))
  297. return R;
  298. w = Wsb + (i->op - Oextsb);
  299. if (BIT(w) & extcpy[t->width])
  300. return i->arg[0];
  301. /* avoid eliding extensions of params
  302. * inserted in the start block; their
  303. * point is to make further extensions
  304. * redundant */
  305. if ((!t->def || !ispar(t->def->op))
  306. && usewidthle(fn, i->to, e.usew))
  307. return i->arg[0];
  308. if (defwidthle(fn, i->arg[0], e.nopw))
  309. return i->arg[0];
  310. }
  311. return R;
  312. }
  313. static int
  314. phieq(Phi *pa, Phi *pb)
  315. {
  316. Ref r;
  317. uint n;
  318. assert(pa->narg == pb->narg);
  319. for (n=0; n<pa->narg; n++) {
  320. r = phiarg(pb, pa->blk[n]);
  321. if (!req(pa->arg[n], r))
  322. return 0;
  323. }
  324. return 1;
  325. }
  326. Ref
  327. phicopyref(Fn *fn, Blk *b, Phi *p)
  328. {
  329. Blk *d, **s;
  330. Phi *p1;
  331. uint n, c;
  332. /* identical args */
  333. for (n=0; n<p->narg-1; n++)
  334. if (!req(p->arg[n], p->arg[n+1]))
  335. break;
  336. if (n == p->narg-1)
  337. return p->arg[n];
  338. /* same as a previous phi */
  339. for (p1=b->phi; p1!=p; p1=p1->link) {
  340. assert(p1);
  341. if (phieq(p1, p))
  342. return p1->to;
  343. }
  344. /* can be replaced by a
  345. * dominating jnz arg */
  346. d = b->idom;
  347. if (p->narg != 2
  348. || d->jmp.type != Jjnz
  349. || !isw1(fn, d->jmp.arg))
  350. return R;
  351. s = (Blk*[]){0, 0};
  352. for (n=0; n<2; n++)
  353. for (c=0; c<2; c++)
  354. if (req(p->arg[n], con01[c]))
  355. s[c] = p->blk[n];
  356. /* if s1 ends with a jnz on either b
  357. * or s2; the inference below is wrong
  358. * without the jump type checks */
  359. if (d->s1 == s[1] && d->s2 == s[0]
  360. && d->s1->jmp.type == Jjmp
  361. && d->s2->jmp.type == Jjmp)
  362. return d->jmp.arg;
  363. return R;
  364. }