ir_opt.cpp 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494
  1. // Optimizations for the IR code
  2. void ir_opt_add_operands(Array<irValue *> *ops, irInstr *i) {
  3. switch (i->kind) {
  4. case irInstr_Comment:
  5. break;
  6. case irInstr_Local:
  7. break;
  8. case irInstr_ZeroInit:
  9. array_add(ops, i->ZeroInit.address);
  10. break;
  11. case irInstr_Store:
  12. array_add(ops, i->Store.address);
  13. array_add(ops, i->Store.value);
  14. break;
  15. case irInstr_Load:
  16. array_add(ops, i->Load.address);
  17. break;
  18. case irInstr_ArrayElementPtr:
  19. array_add(ops, i->ArrayElementPtr.address);
  20. array_add(ops, i->ArrayElementPtr.elem_index);
  21. break;
  22. case irInstr_StructElementPtr:
  23. array_add(ops, i->StructElementPtr.address);
  24. break;
  25. case irInstr_PtrOffset:
  26. array_add(ops, i->PtrOffset.address);
  27. array_add(ops, i->PtrOffset.offset);
  28. break;
  29. case irInstr_StructExtractValue:
  30. array_add(ops, i->StructExtractValue.address);
  31. break;
  32. case irInstr_Conv:
  33. array_add(ops, i->Conv.value);
  34. break;
  35. case irInstr_Jump:
  36. break;
  37. case irInstr_If:
  38. array_add(ops, i->If.cond);
  39. break;
  40. case irInstr_Return:
  41. if (i->Return.value != nullptr) {
  42. array_add(ops, i->Return.value);
  43. }
  44. break;
  45. case irInstr_Select:
  46. array_add(ops, i->Select.cond);
  47. break;
  48. case irInstr_Phi:
  49. for_array(j, i->Phi.edges) {
  50. array_add(ops, i->Phi.edges[j]);
  51. }
  52. break;
  53. case irInstr_Unreachable:
  54. break;
  55. case irInstr_UnaryOp:
  56. array_add(ops, i->UnaryOp.expr);
  57. break;
  58. case irInstr_BinaryOp:
  59. array_add(ops, i->BinaryOp.left);
  60. array_add(ops, i->BinaryOp.right);
  61. break;
  62. case irInstr_Call:
  63. array_add(ops, i->Call.value);
  64. for (isize j = 0; j < i->Call.arg_count; j++) {
  65. array_add(ops, i->Call.args[j]);
  66. }
  67. break;
  68. // case irInstr_VectorExtractElement:
  69. // array_add(ops, i->VectorExtractElement.vector);
  70. // array_add(ops, i->VectorExtractElement.index);
  71. // break;
  72. // case irInstr_VectorInsertElement:
  73. // array_add(ops, i->VectorInsertElement.vector);
  74. // array_add(ops, i->VectorInsertElement.elem);
  75. // array_add(ops, i->VectorInsertElement.index);
  76. // break;
  77. // case irInstr_VectorShuffle:
  78. // array_add(ops, i->VectorShuffle.vector);
  79. // break;
  80. case irInstr_StartupRuntime:
  81. break;
  82. #if 0
  83. case irInstr_BoundsCheck:
  84. array_add(ops, i->BoundsCheck.index);
  85. array_add(ops, i->BoundsCheck.len);
  86. break;
  87. case irInstr_SliceBoundsCheck:
  88. array_add(ops, i->SliceBoundsCheck.low);
  89. array_add(ops, i->SliceBoundsCheck.high);
  90. break;
  91. #endif
  92. }
  93. }
  94. void ir_opt_block_replace_pred(irBlock *b, irBlock *from, irBlock *to) {
  95. for_array(i, b->preds) {
  96. irBlock *pred = b->preds[i];
  97. if (pred == from) {
  98. b->preds[i] = to;
  99. }
  100. }
  101. }
  102. void ir_opt_block_replace_succ(irBlock *b, irBlock *from, irBlock *to) {
  103. for_array(i, b->succs) {
  104. irBlock *succ = b->succs[i];
  105. if (succ == from) {
  106. b->succs[i] = to;
  107. }
  108. }
  109. }
  110. bool ir_opt_block_has_phi(irBlock *b) {
  111. return b->instrs[0]->Instr.kind == irInstr_Phi;
  112. }
  113. Array<irValue *> ir_get_block_phi_nodes(irBlock *b) {
  114. Array<irValue *> phis = {0};
  115. for_array(i, b->instrs) {
  116. irInstr *instr = &b->instrs[i]->Instr;
  117. if (instr->kind != irInstr_Phi) {
  118. phis = b->instrs;
  119. phis.count = i;
  120. return phis;
  121. }
  122. }
  123. return phis;
  124. }
  125. void ir_remove_pred(irBlock *b, irBlock *p) {
  126. Array<irValue *> phis = ir_get_block_phi_nodes(b);
  127. isize i = 0;
  128. for_array(j, b->preds) {
  129. irBlock *pred = b->preds[j];
  130. if (pred != p) {
  131. b->preds[i] = b->preds[j];
  132. for_array(k, phis) {
  133. irInstrPhi *phi = &phis[k]->Instr.Phi;
  134. phi->edges[i] = phi->edges[j];
  135. }
  136. i++;
  137. }
  138. }
  139. b->preds.count = i;
  140. for_array(k, phis) {
  141. irInstrPhi *phi = &phis[k]->Instr.Phi;
  142. phi->edges.count = i;
  143. }
  144. }
  145. void ir_remove_dead_blocks(irProcedure *proc) {
  146. isize j = 0;
  147. for_array(i, proc->blocks) {
  148. irBlock *b = proc->blocks[i];
  149. if (b == nullptr) {
  150. continue;
  151. }
  152. // NOTE(bill): Swap order
  153. b->index = j;
  154. proc->blocks[j++] = b;
  155. }
  156. proc->blocks.count = j;
  157. }
  158. void ir_mark_reachable(irBlock *b) {
  159. isize const WHITE = 0;
  160. isize const BLACK = -1;
  161. b->index = BLACK;
  162. for_array(i, b->succs) {
  163. irBlock *succ = b->succs[i];
  164. if (succ->index == WHITE) {
  165. ir_mark_reachable(succ);
  166. }
  167. }
  168. }
  169. void ir_remove_unreachable_blocks(irProcedure *proc) {
  170. isize const WHITE = 0;
  171. isize const BLACK = -1;
  172. for_array(i, proc->blocks) {
  173. proc->blocks[i]->index = WHITE;
  174. }
  175. ir_mark_reachable(proc->blocks[0]);
  176. for_array(i, proc->blocks) {
  177. irBlock *b = proc->blocks[i];
  178. if (b->index == WHITE) {
  179. for_array(j, b->succs) {
  180. irBlock *c = b->succs[j];
  181. if (c->index == BLACK) {
  182. ir_remove_pred(c, b);
  183. }
  184. }
  185. // NOTE(bill): Mark as empty but don't actually free it
  186. // As it's been allocated with an arena
  187. proc->blocks[i] = nullptr;
  188. }
  189. }
  190. ir_remove_dead_blocks(proc);
  191. }
  192. bool ir_opt_block_fusion(irProcedure *proc, irBlock *a) {
  193. if (a->succs.count != 1) {
  194. return false;
  195. }
  196. irBlock *b = a->succs[0];
  197. if (b->preds.count != 1) {
  198. return false;
  199. }
  200. if (ir_opt_block_has_phi(b)) {
  201. return false;
  202. }
  203. array_pop(&a->instrs); // Remove branch at end
  204. for_array(i, b->instrs) {
  205. array_add(&a->instrs, b->instrs[i]);
  206. ir_set_instr_parent(b->instrs[i], a);
  207. }
  208. array_clear(&a->succs);
  209. for_array(i, b->succs) {
  210. array_add(&a->succs, b->succs[i]);
  211. }
  212. // Fix preds links
  213. for_array(i, b->succs) {
  214. ir_opt_block_replace_pred(b->succs[i], b, a);
  215. }
  216. proc->blocks[b->index] = nullptr;
  217. return true;
  218. }
  219. void ir_opt_blocks(irProcedure *proc) {
  220. ir_remove_unreachable_blocks(proc);
  221. #if 1
  222. bool changed = true;
  223. while (changed) {
  224. changed = false;
  225. for_array(i, proc->blocks) {
  226. irBlock *b = proc->blocks[i];
  227. if (b == nullptr) {
  228. continue;
  229. }
  230. GB_ASSERT_MSG(b->index == i, "%d, %td", b->index, i);
  231. if (ir_opt_block_fusion(proc, b)) {
  232. changed = true;
  233. }
  234. // TODO(bill): other simple block optimizations
  235. }
  236. }
  237. #endif
  238. ir_remove_dead_blocks(proc);
  239. }
  240. void ir_opt_build_referrers(irProcedure *proc) {
  241. gbTempArenaMemory tmp = gb_temp_arena_memory_begin(&proc->module->tmp_arena);
  242. Array<irValue *> ops = {0}; // NOTE(bill): Act as a buffer
  243. array_init(&ops, proc->module->tmp_allocator, 64); // HACK(bill): This _could_ overflow the temp arena
  244. for_array(i, proc->blocks) {
  245. irBlock *b = proc->blocks[i];
  246. for_array(j, b->instrs) {
  247. irValue *instr = b->instrs[j];
  248. array_clear(&ops);
  249. ir_opt_add_operands(&ops, &instr->Instr);
  250. for_array(k, ops) {
  251. irValue *op = ops[k];
  252. if (op == nullptr) {
  253. continue;
  254. }
  255. Array<irValue *> *refs = ir_value_referrers(op);
  256. if (refs != nullptr) {
  257. array_add(refs, instr);
  258. }
  259. }
  260. }
  261. }
  262. gb_temp_arena_memory_end(tmp);
  263. }
  264. // State of Lengauer-Tarjan algorithm
  265. // Based on this paper: http://jgaa.info/accepted/2006/GeorgiadisTarjanWerneck2006.10.1.pdf
  266. typedef struct irLTState {
  267. isize count;
  268. // NOTE(bill): These are arrays
  269. irBlock **sdom; // Semidominator
  270. irBlock **parent; // Parent in DFS traversal of CFG
  271. irBlock **ancestor;
  272. } irLTState;
  273. // §2.2 - bottom of page
  274. void ir_lt_link(irLTState *lt, irBlock *p, irBlock *q) {
  275. lt->ancestor[q->index] = p;
  276. }
  277. i32 ir_lt_depth_first_search(irLTState *lt, irBlock *p, i32 i, irBlock **preorder) {
  278. preorder[i] = p;
  279. p->dom.pre = i++;
  280. lt->sdom[p->index] = p;
  281. ir_lt_link(lt, nullptr, p);
  282. for_array(index, p->succs) {
  283. irBlock *q = p->succs[index];
  284. if (lt->sdom[q->index] == nullptr) {
  285. lt->parent[q->index] = p;
  286. i = ir_lt_depth_first_search(lt, q, i, preorder);
  287. }
  288. }
  289. return i;
  290. }
  291. irBlock *ir_lt_eval(irLTState *lt, irBlock *v) {
  292. irBlock *u = v;
  293. for (;
  294. lt->ancestor[v->index] != nullptr;
  295. v = lt->ancestor[v->index]) {
  296. if (lt->sdom[v->index]->dom.pre < lt->sdom[u->index]->dom.pre) {
  297. u = v;
  298. }
  299. }
  300. return u;
  301. }
  302. typedef struct irDomPrePost {
  303. i32 pre, post;
  304. } irDomPrePost;
  305. irDomPrePost ir_opt_number_dom_tree(irBlock *v, i32 pre, i32 post) {
  306. irDomPrePost result = {pre, post};
  307. v->dom.pre = pre++;
  308. for_array(i, v->dom.children) {
  309. result = ir_opt_number_dom_tree(v->dom.children[i], result.pre, result.post);
  310. }
  311. v->dom.post = post++;
  312. result.pre = pre;
  313. result.post = post;
  314. return result;
  315. }
  316. // NOTE(bill): Requires `ir_opt_blocks` to be called before this
  317. void ir_opt_build_dom_tree(irProcedure *proc) {
  318. // Based on this paper: http://jgaa.info/accepted/2006/GeorgiadisTarjanWerneck2006.10.1.pdf
  319. gbTempArenaMemory tmp = gb_temp_arena_memory_begin(&proc->module->tmp_arena);
  320. isize n = proc->blocks.count;
  321. irBlock **buf = gb_alloc_array(proc->module->tmp_allocator, irBlock *, 5*n);
  322. irLTState lt = {0};
  323. lt.count = n;
  324. lt.sdom = &buf[0*n];
  325. lt.parent = &buf[1*n];
  326. lt.ancestor = &buf[2*n];
  327. irBlock **preorder = &buf[3*n];
  328. irBlock **buckets = &buf[4*n];
  329. irBlock *root = proc->blocks[0];
  330. // Step 1 - number vertices
  331. i32 pre_num = ir_lt_depth_first_search(&lt, root, 0, preorder);
  332. gb_memmove(buckets, preorder, n*gb_size_of(preorder[0]));
  333. for (i32 i = n-1; i > 0; i--) {
  334. irBlock *w = preorder[i];
  335. // Step 3 - Implicitly define idom for nodes
  336. for (irBlock *v = buckets[i]; v != w; v = buckets[v->dom.pre]) {
  337. irBlock *u = ir_lt_eval(&lt, v);
  338. if (lt.sdom[u->index]->dom.pre < i) {
  339. v->dom.idom = u;
  340. } else {
  341. v->dom.idom = w;
  342. }
  343. }
  344. // Step 2 - Compute all sdoms
  345. lt.sdom[w->index] = lt.parent[w->index];
  346. for_array(pred_index, w->preds) {
  347. irBlock *v = w->preds[pred_index];
  348. irBlock *u = ir_lt_eval(&lt, v);
  349. if (lt.sdom[u->index]->dom.pre < lt.sdom[w->index]->dom.pre) {
  350. lt.sdom[w->index] = lt.sdom[u->index];
  351. }
  352. }
  353. ir_lt_link(&lt, lt.parent[w->index], w);
  354. if (lt.parent[w->index] == lt.sdom[w->index]) {
  355. w->dom.idom = lt.parent[w->index];
  356. } else {
  357. buckets[i] = buckets[lt.sdom[w->index]->dom.pre];
  358. buckets[lt.sdom[w->index]->dom.pre] = w;
  359. }
  360. }
  361. // The rest of Step 3
  362. for (irBlock *v = buckets[0]; v != root; v = buckets[v->dom.pre]) {
  363. v->dom.idom = root;
  364. }
  365. // Step 4 - Explicitly define idom for nodes (in preorder)
  366. for (isize i = 1; i < n; i++) {
  367. irBlock *w = preorder[i];
  368. if (w == root) {
  369. w->dom.idom = nullptr;
  370. } else {
  371. // Weird tree relationships here!
  372. if (w->dom.idom != lt.sdom[w->index]) {
  373. w->dom.idom = w->dom.idom->dom.idom;
  374. }
  375. // Calculate children relation as inverse of idom
  376. if (w->dom.idom->dom.children.data == nullptr) {
  377. // TODO(bill): Is this good enough for memory allocations?
  378. array_init(&w->dom.idom->dom.children, heap_allocator());
  379. }
  380. array_add(&w->dom.idom->dom.children, w);
  381. }
  382. }
  383. ir_opt_number_dom_tree(root, 0, 0);
  384. gb_temp_arena_memory_end(tmp);
  385. }
  386. void ir_opt_mem2reg(irProcedure *proc) {
  387. // TODO(bill): ir_opt_mem2reg
  388. }
  389. void ir_opt_tree(irGen *s) {
  390. s->opt_called = true;
  391. for_array(member_index, s->module.procs) {
  392. irProcedure *proc = s->module.procs[member_index];
  393. if (proc->blocks.count == 0) { // Prototype/external procedure
  394. continue;
  395. }
  396. ir_opt_blocks(proc);
  397. #if 0
  398. ir_opt_build_referrers(proc);
  399. ir_opt_build_dom_tree(proc);
  400. // TODO(bill): ir optimization
  401. // [ ] cse (common-subexpression) elim
  402. // [ ] copy elim
  403. // [ ] dead code elim
  404. // [ ] dead store/load elim
  405. // [ ] phi elim
  406. // [ ] short circuit elim
  407. // [ ] bounds check elim
  408. // [ ] lift/mem2reg
  409. // [ ] lift/mem2reg
  410. ir_opt_mem2reg(proc);
  411. #endif
  412. GB_ASSERT(proc->blocks.count > 0);
  413. ir_number_proc_registers(proc);
  414. }
  415. }