spill.c 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537
  1. #include "all.h"
  2. static void
  3. aggreg(Blk *hd, Blk *b)
  4. {
  5. int k;
  6. /* aggregate looping information at
  7. * loop headers */
  8. bsunion(hd->gen, b->gen);
  9. for (k=0; k<2; k++)
  10. if (b->nlive[k] > hd->nlive[k])
  11. hd->nlive[k] = b->nlive[k];
  12. }
  13. static void
  14. tmpuse(Ref r, int use, int loop, Fn *fn)
  15. {
  16. Mem *m;
  17. Tmp *t;
  18. if (rtype(r) == RMem) {
  19. m = &fn->mem[r.val];
  20. tmpuse(m->base, 1, loop, fn);
  21. tmpuse(m->index, 1, loop, fn);
  22. }
  23. else if (rtype(r) == RTmp && r.val >= Tmp0) {
  24. t = &fn->tmp[r.val];
  25. t->nuse += use;
  26. t->ndef += !use;
  27. t->cost += loop;
  28. }
  29. }
  30. /* evaluate spill costs of temporaries,
  31. * this also fills usage information
  32. * requires rpo, preds
  33. */
  34. void
  35. fillcost(Fn *fn)
  36. {
  37. int n;
  38. uint a;
  39. Blk *b;
  40. Ins *i;
  41. Tmp *t;
  42. Phi *p;
  43. loopiter(fn, aggreg);
  44. if (debug['S']) {
  45. fprintf(stderr, "\n> Loop information:\n");
  46. for (b=fn->start; b; b=b->link) {
  47. for (a=0; a<b->npred; ++a)
  48. if (b->id <= b->pred[a]->id)
  49. break;
  50. if (a != b->npred) {
  51. fprintf(stderr, "\t%-10s", b->name);
  52. fprintf(stderr, " (% 3d ", b->nlive[0]);
  53. fprintf(stderr, "% 3d) ", b->nlive[1]);
  54. dumpts(b->gen, fn->tmp, stderr);
  55. }
  56. }
  57. }
  58. for (t=fn->tmp; t-fn->tmp < fn->ntmp; t++) {
  59. t->cost = t-fn->tmp < Tmp0 ? UINT_MAX : 0;
  60. t->nuse = 0;
  61. t->ndef = 0;
  62. }
  63. for (b=fn->start; b; b=b->link) {
  64. for (p=b->phi; p; p=p->link) {
  65. t = &fn->tmp[p->to.val];
  66. tmpuse(p->to, 0, 0, fn);
  67. for (a=0; a<p->narg; a++) {
  68. n = p->blk[a]->loop;
  69. t->cost += n;
  70. tmpuse(p->arg[a], 1, n, fn);
  71. }
  72. }
  73. n = b->loop;
  74. for (i=b->ins; i<&b->ins[b->nins]; i++) {
  75. tmpuse(i->to, 0, n, fn);
  76. tmpuse(i->arg[0], 1, n, fn);
  77. tmpuse(i->arg[1], 1, n, fn);
  78. }
  79. tmpuse(b->jmp.arg, 1, n, fn);
  80. }
  81. if (debug['S']) {
  82. fprintf(stderr, "\n> Spill costs:\n");
  83. for (n=Tmp0; n<fn->ntmp; n++)
  84. fprintf(stderr, "\t%-10s %d\n",
  85. fn->tmp[n].name,
  86. fn->tmp[n].cost);
  87. fprintf(stderr, "\n");
  88. }
  89. }
  90. static BSet *fst; /* temps to prioritize in registers (for tcmp1) */
  91. static Tmp *tmp; /* current temporaries (for tcmpX) */
  92. static int ntmp; /* current # of temps (for limit) */
  93. static int locs; /* stack size used by locals */
  94. static int slot4; /* next slot of 4 bytes */
  95. static int slot8; /* ditto, 8 bytes */
  96. static BSet mask[2][1]; /* class masks */
  97. static int
  98. tcmp0(const void *pa, const void *pb)
  99. {
  100. uint ca, cb;
  101. ca = tmp[*(int *)pa].cost;
  102. cb = tmp[*(int *)pb].cost;
  103. return (cb < ca) ? -1 : (cb > ca);
  104. }
  105. static int
  106. tcmp1(const void *pa, const void *pb)
  107. {
  108. int c;
  109. c = bshas(fst, *(int *)pb) - bshas(fst, *(int *)pa);
  110. return c ? c : tcmp0(pa, pb);
  111. }
  112. static Ref
  113. slot(int t)
  114. {
  115. int s;
  116. assert(t >= Tmp0 && "cannot spill register");
  117. s = tmp[t].slot;
  118. if (s == -1) {
  119. /* specific to NAlign == 3 */
  120. /* nice logic to pack stack slots
  121. * on demand, there can be only
  122. * one hole and slot4 points to it
  123. *
  124. * invariant: slot4 <= slot8
  125. */
  126. if (KWIDE(tmp[t].cls)) {
  127. s = slot8;
  128. if (slot4 == slot8)
  129. slot4 += 2;
  130. slot8 += 2;
  131. } else {
  132. s = slot4;
  133. if (slot4 == slot8) {
  134. slot8 += 2;
  135. slot4 += 1;
  136. } else
  137. slot4 = slot8;
  138. }
  139. s += locs;
  140. tmp[t].slot = s;
  141. }
  142. return SLOT(s);
  143. }
  144. /* restricts b to hold at most k
  145. * temporaries, preferring those
  146. * present in f (if given), then
  147. * those with the largest spill
  148. * cost
  149. */
  150. static void
  151. limit(BSet *b, int k, BSet *f)
  152. {
  153. static int *tarr, maxt;
  154. int i, t, nt;
  155. nt = bscount(b);
  156. if (nt <= k)
  157. return;
  158. if (nt > maxt) {
  159. free(tarr);
  160. tarr = emalloc(nt * sizeof tarr[0]);
  161. maxt = nt;
  162. }
  163. for (i=0, t=0; bsiter(b, &t); t++) {
  164. bsclr(b, t);
  165. tarr[i++] = t;
  166. }
  167. if (nt > 1) {
  168. if (!f)
  169. qsort(tarr, nt, sizeof tarr[0], tcmp0);
  170. else {
  171. fst = f;
  172. qsort(tarr, nt, sizeof tarr[0], tcmp1);
  173. }
  174. }
  175. for (i=0; i<k && i<nt; i++)
  176. bsset(b, tarr[i]);
  177. for (; i<nt; i++)
  178. slot(tarr[i]);
  179. }
  180. /* spills temporaries to fit the
  181. * target limits using the same
  182. * preferences as limit(); assumes
  183. * that k1 gprs and k2 fprs are
  184. * currently in use
  185. */
  186. static void
  187. limit2(BSet *b1, int k1, int k2, BSet *f)
  188. {
  189. BSet b2[1];
  190. bsinit(b2, ntmp); /* todo, free those */
  191. bscopy(b2, b1);
  192. bsinter(b1, mask[0]);
  193. bsinter(b2, mask[1]);
  194. limit(b1, T.ngpr - k1, f);
  195. limit(b2, T.nfpr - k2, f);
  196. bsunion(b1, b2);
  197. }
  198. static void
  199. sethint(BSet *u, bits r)
  200. {
  201. int t;
  202. for (t=Tmp0; bsiter(u, &t); t++)
  203. tmp[phicls(t, tmp)].hint.m |= r;
  204. }
  205. /* reloads temporaries in u that are
  206. * not in v from their slots
  207. */
  208. static void
  209. reloads(BSet *u, BSet *v)
  210. {
  211. int t;
  212. for (t=Tmp0; bsiter(u, &t); t++)
  213. if (!bshas(v, t))
  214. emit(Oload, tmp[t].cls, TMP(t), slot(t), R);
  215. }
  216. static void
  217. store(Ref r, int s)
  218. {
  219. if (s != -1)
  220. emit(Ostorew + tmp[r.val].cls, 0, R, r, SLOT(s));
  221. }
  222. static int
  223. regcpy(Ins *i)
  224. {
  225. return i->op == Ocopy && isreg(i->arg[0]);
  226. }
  227. static Ins *
  228. dopm(Blk *b, Ins *i, BSet *v)
  229. {
  230. int n, t;
  231. BSet u[1];
  232. Ins *i1;
  233. bits r;
  234. bsinit(u, ntmp); /* todo, free those */
  235. /* consecutive copies from
  236. * registers need to be handled
  237. * as one large instruction
  238. *
  239. * fixme: there is an assumption
  240. * that calls are always followed
  241. * by copy instructions here, this
  242. * might not be true if previous
  243. * passes change
  244. */
  245. i1 = ++i;
  246. do {
  247. i--;
  248. t = i->to.val;
  249. if (!req(i->to, R))
  250. if (bshas(v, t)) {
  251. bsclr(v, t);
  252. store(i->to, tmp[t].slot);
  253. }
  254. bsset(v, i->arg[0].val);
  255. } while (i != b->ins && regcpy(i-1));
  256. bscopy(u, v);
  257. if (i != b->ins && (i-1)->op == Ocall) {
  258. v->t[0] &= ~T.retregs((i-1)->arg[1], 0);
  259. limit2(v, T.nrsave[0], T.nrsave[1], 0);
  260. for (n=0, r=0; T.rsave[n]>=0; n++)
  261. r |= BIT(T.rsave[n]);
  262. v->t[0] |= T.argregs((i-1)->arg[1], 0);
  263. } else {
  264. limit2(v, 0, 0, 0);
  265. r = v->t[0];
  266. }
  267. sethint(v, r);
  268. reloads(u, v);
  269. do
  270. emiti(*--i1);
  271. while (i1 != i);
  272. return i;
  273. }
  274. static void
  275. merge(BSet *u, Blk *bu, BSet *v, Blk *bv)
  276. {
  277. int t;
  278. if (bu->loop <= bv->loop)
  279. bsunion(u, v);
  280. else
  281. for (t=0; bsiter(v, &t); t++)
  282. if (tmp[t].slot == -1)
  283. bsset(u, t);
  284. }
  285. /* spill code insertion
  286. * requires spill costs, rpo, liveness
  287. *
  288. * Note: this will replace liveness
  289. * information (in, out) with temporaries
  290. * that must be in registers at block
  291. * borders
  292. *
  293. * Be careful with:
  294. * - Ocopy instructions to ensure register
  295. * constraints
  296. */
  297. void
  298. spill(Fn *fn)
  299. {
  300. Blk *b, *s1, *s2, *hd, **bp;
  301. int j, l, t, k, lvarg[2];
  302. uint n;
  303. BSet u[1], v[1], w[1];
  304. Ins *i;
  305. Phi *p;
  306. Mem *m;
  307. bits r;
  308. tmp = fn->tmp;
  309. ntmp = fn->ntmp;
  310. bsinit(u, ntmp);
  311. bsinit(v, ntmp);
  312. bsinit(w, ntmp);
  313. bsinit(mask[0], ntmp);
  314. bsinit(mask[1], ntmp);
  315. locs = fn->slot;
  316. slot4 = 0;
  317. slot8 = 0;
  318. for (t=0; t<ntmp; t++) {
  319. k = 0;
  320. if (t >= T.fpr0 && t < T.fpr0 + T.nfpr)
  321. k = 1;
  322. if (t >= Tmp0)
  323. k = KBASE(tmp[t].cls);
  324. bsset(mask[k], t);
  325. }
  326. for (bp=&fn->rpo[fn->nblk]; bp!=fn->rpo;) {
  327. b = *--bp;
  328. /* invariant: all blocks with bigger rpo got
  329. * their in,out updated. */
  330. /* 1. find temporaries in registers at
  331. * the end of the block (put them in v) */
  332. curi = 0;
  333. s1 = b->s1;
  334. s2 = b->s2;
  335. hd = 0;
  336. if (s1 && s1->id <= b->id)
  337. hd = s1;
  338. if (s2 && s2->id <= b->id)
  339. if (!hd || s2->id >= hd->id)
  340. hd = s2;
  341. if (hd) {
  342. /* back-edge */
  343. bszero(v);
  344. hd->gen->t[0] |= T.rglob; /* don't spill registers */
  345. for (k=0; k<2; k++) {
  346. n = k == 0 ? T.ngpr : T.nfpr;
  347. bscopy(u, b->out);
  348. bsinter(u, mask[k]);
  349. bscopy(w, u);
  350. bsinter(u, hd->gen);
  351. bsdiff(w, hd->gen);
  352. if (bscount(u) < n) {
  353. j = bscount(w); /* live through */
  354. l = hd->nlive[k];
  355. limit(w, n - (l - j), 0);
  356. bsunion(u, w);
  357. } else
  358. limit(u, n, 0);
  359. bsunion(v, u);
  360. }
  361. } else if (s1) {
  362. /* avoid reloading temporaries
  363. * in the middle of loops */
  364. bszero(v);
  365. liveon(w, b, s1);
  366. merge(v, b, w, s1);
  367. if (s2) {
  368. liveon(u, b, s2);
  369. merge(v, b, u, s2);
  370. bsinter(w, u);
  371. }
  372. limit2(v, 0, 0, w);
  373. } else {
  374. bscopy(v, b->out);
  375. if (rtype(b->jmp.arg) == RCall)
  376. v->t[0] |= T.retregs(b->jmp.arg, 0);
  377. }
  378. for (t=Tmp0; bsiter(b->out, &t); t++)
  379. if (!bshas(v, t))
  380. slot(t);
  381. bscopy(b->out, v);
  382. /* 2. process the block instructions */
  383. if (rtype(b->jmp.arg) == RTmp) {
  384. t = b->jmp.arg.val;
  385. assert(KBASE(tmp[t].cls) == 0);
  386. lvarg[0] = bshas(v, t);
  387. bsset(v, t);
  388. bscopy(u, v);
  389. limit2(v, 0, 0, NULL);
  390. if (!bshas(v, t)) {
  391. if (!lvarg[0])
  392. bsclr(u, t);
  393. b->jmp.arg = slot(t);
  394. }
  395. reloads(u, v);
  396. }
  397. curi = &insb[NIns];
  398. for (i=&b->ins[b->nins]; i!=b->ins;) {
  399. i--;
  400. if (regcpy(i)) {
  401. i = dopm(b, i, v);
  402. continue;
  403. }
  404. bszero(w);
  405. if (!req(i->to, R)) {
  406. assert(rtype(i->to) == RTmp);
  407. t = i->to.val;
  408. if (bshas(v, t))
  409. bsclr(v, t);
  410. else {
  411. /* make sure we have a reg
  412. * for the result */
  413. assert(t >= Tmp0 && "dead reg");
  414. bsset(v, t);
  415. bsset(w, t);
  416. }
  417. }
  418. j = T.memargs(i->op);
  419. for (n=0; n<2; n++)
  420. if (rtype(i->arg[n]) == RMem)
  421. j--;
  422. for (n=0; n<2; n++)
  423. switch (rtype(i->arg[n])) {
  424. case RMem:
  425. t = i->arg[n].val;
  426. m = &fn->mem[t];
  427. if (rtype(m->base) == RTmp) {
  428. bsset(v, m->base.val);
  429. bsset(w, m->base.val);
  430. }
  431. if (rtype(m->index) == RTmp) {
  432. bsset(v, m->index.val);
  433. bsset(w, m->index.val);
  434. }
  435. break;
  436. case RTmp:
  437. t = i->arg[n].val;
  438. lvarg[n] = bshas(v, t);
  439. bsset(v, t);
  440. if (j-- <= 0)
  441. bsset(w, t);
  442. break;
  443. }
  444. bscopy(u, v);
  445. limit2(v, 0, 0, w);
  446. for (n=0; n<2; n++)
  447. if (rtype(i->arg[n]) == RTmp) {
  448. t = i->arg[n].val;
  449. if (!bshas(v, t)) {
  450. /* do not reload if the
  451. * argument is dead
  452. */
  453. if (!lvarg[n])
  454. bsclr(u, t);
  455. i->arg[n] = slot(t);
  456. }
  457. }
  458. reloads(u, v);
  459. if (!req(i->to, R)) {
  460. t = i->to.val;
  461. store(i->to, tmp[t].slot);
  462. if (t >= Tmp0)
  463. /* in case i->to was a
  464. * dead temporary */
  465. bsclr(v, t);
  466. }
  467. emiti(*i);
  468. r = v->t[0]; /* Tmp0 is NBit */
  469. if (r)
  470. sethint(v, r);
  471. }
  472. if (b == fn->start)
  473. assert(v->t[0] == (T.rglob | fn->reg));
  474. else
  475. assert(v->t[0] == T.rglob);
  476. for (p=b->phi; p; p=p->link) {
  477. assert(rtype(p->to) == RTmp);
  478. t = p->to.val;
  479. if (bshas(v, t)) {
  480. bsclr(v, t);
  481. store(p->to, tmp[t].slot);
  482. } else if (bshas(b->in, t))
  483. /* only if the phi is live */
  484. p->to = slot(p->to.val);
  485. }
  486. bscopy(b->in, v);
  487. idup(b, curi, &insb[NIns]-curi);
  488. }
  489. /* align the locals to a 16 byte boundary */
  490. /* specific to NAlign == 3 */
  491. slot8 += slot8 & 3;
  492. fn->slot += slot8;
  493. if (debug['S']) {
  494. fprintf(stderr, "\n> Block information:\n");
  495. for (b=fn->start; b; b=b->link) {
  496. fprintf(stderr, "\t%-10s (% 5d) ", b->name, b->loop);
  497. dumpts(b->out, fn->tmp, stderr);
  498. }
  499. fprintf(stderr, "\n> After spilling:\n");
  500. printfn(fn, stderr);
  501. }
  502. }