cfa.h 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398
  1. // Copyright (c) 2015-2016 The Khronos Group Inc.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. #ifndef SOURCE_CFA_H_
  15. #define SOURCE_CFA_H_
  16. #include <stddef.h>
  17. #include <algorithm>
  18. #include <cassert>
  19. #include <cstdint>
  20. #include <functional>
  21. #include <map>
  22. #include <unordered_map>
  23. #include <unordered_set>
  24. #include <utility>
  25. #include <vector>
  26. namespace spvtools {
  27. // Control Flow Analysis of control flow graphs of basic block nodes |BB|.
  28. template <class BB>
  29. class CFA {
  30. using bb_ptr = BB*;
  31. using cbb_ptr = const BB*;
  32. using bb_iter = typename std::vector<BB*>::const_iterator;
  33. using get_blocks_func = std::function<const std::vector<BB*>*(const BB*)>;
  34. struct block_info {
  35. cbb_ptr block; ///< pointer to the block
  36. bb_iter iter; ///< Iterator to the current child node being processed
  37. };
  38. /// Returns true if a block with @p id is found in the @p work_list vector
  39. ///
  40. /// @param[in] work_list Set of blocks visited in the depth first
  41. /// traversal
  42. /// of the CFG
  43. /// @param[in] id The ID of the block being checked
  44. ///
  45. /// @return true if the edge work_list.back().block->id() => id is a back-edge
  46. static bool FindInWorkList(const std::vector<block_info>& work_list,
  47. uint32_t id);
  48. public:
  49. /// @brief Depth first traversal starting from the \p entry BasicBlock
  50. ///
  51. /// This function performs a depth first traversal from the \p entry
  52. /// BasicBlock and calls the pre/postorder functions when it needs to process
  53. /// the node in pre order, post order.
  54. ///
  55. /// @param[in] entry The root BasicBlock of a CFG
  56. /// @param[in] successor_func A function which will return a pointer to the
  57. /// successor nodes
  58. /// @param[in] preorder A function that will be called for every block in a
  59. /// CFG following preorder traversal semantics
  60. /// @param[in] postorder A function that will be called for every block in a
  61. /// CFG following postorder traversal semantics
  62. /// @param[in] terminal A function that will be called to determine if the
  63. /// search should stop at the given node.
  64. /// NOTE: The @p successor_func and predecessor_func each return a pointer to
  65. /// a collection such that iterators to that collection remain valid for the
  66. /// lifetime of the algorithm.
  67. static void DepthFirstTraversal(const BB* entry,
  68. get_blocks_func successor_func,
  69. std::function<void(cbb_ptr)> preorder,
  70. std::function<void(cbb_ptr)> postorder,
  71. std::function<bool(cbb_ptr)> terminal);
  72. /// @brief Depth first traversal starting from the \p entry BasicBlock
  73. ///
  74. /// This function performs a depth first traversal from the \p entry
  75. /// BasicBlock and calls the pre/postorder functions when it needs to process
  76. /// the node in pre order, post order. It also calls the backedge function
  77. /// when a back edge is encountered. The backedge function can be empty. The
  78. /// runtime of the algorithm is improved if backedge is empty.
  79. ///
  80. /// @param[in] entry The root BasicBlock of a CFG
  81. /// @param[in] successor_func A function which will return a pointer to the
  82. /// successor nodes
  83. /// @param[in] preorder A function that will be called for every block in a
  84. /// CFG following preorder traversal semantics
  85. /// @param[in] postorder A function that will be called for every block in a
  86. /// CFG following postorder traversal semantics
  87. /// @param[in] backedge A function that will be called when a backedge is
  88. /// encountered during a traversal.
  89. /// @param[in] terminal A function that will be called to determine if the
  90. /// search should stop at the given node.
  91. /// NOTE: The @p successor_func and predecessor_func each return a pointer to
  92. /// a collection such that iterators to that collection remain valid for the
  93. /// lifetime of the algorithm.
  94. static void DepthFirstTraversal(
  95. const BB* entry, get_blocks_func successor_func,
  96. std::function<void(cbb_ptr)> preorder,
  97. std::function<void(cbb_ptr)> postorder,
  98. std::function<void(cbb_ptr, cbb_ptr)> backedge,
  99. std::function<bool(cbb_ptr)> terminal);
  100. /// @brief Calculates dominator edges for a set of blocks
  101. ///
  102. /// Computes dominators using the algorithm of Cooper, Harvey, and Kennedy
  103. /// "A Simple, Fast Dominance Algorithm", 2001.
  104. ///
  105. /// The algorithm assumes there is a unique root node (a node without
  106. /// predecessors), and it is therefore at the end of the postorder vector.
  107. ///
  108. /// This function calculates the dominator edges for a set of blocks in the
  109. /// CFG.
  110. /// Uses the dominator algorithm by Cooper et al.
  111. ///
  112. /// @param[in] postorder A vector of blocks in post order traversal
  113. /// order
  114. /// in a CFG
  115. /// @param[in] predecessor_func Function used to get the predecessor nodes of
  116. /// a
  117. /// block
  118. ///
  119. /// @return the dominator tree of the graph, as a vector of pairs of nodes.
  120. /// The first node in the pair is a node in the graph. The second node in the
  121. /// pair is its immediate dominator in the sense of Cooper et.al., where a
  122. /// block
  123. /// without predecessors (such as the root node) is its own immediate
  124. /// dominator.
  125. static std::vector<std::pair<BB*, BB*>> CalculateDominators(
  126. const std::vector<cbb_ptr>& postorder, get_blocks_func predecessor_func);
  127. // Computes a minimal set of root nodes required to traverse, in the forward
  128. // direction, the CFG represented by the given vector of blocks, and successor
  129. // and predecessor functions. When considering adding two nodes, each having
  130. // predecessors, favour using the one that appears earlier on the input blocks
  131. // list.
  132. static std::vector<BB*> TraversalRoots(const std::vector<BB*>& blocks,
  133. get_blocks_func succ_func,
  134. get_blocks_func pred_func);
  135. static void ComputeAugmentedCFG(
  136. std::vector<BB*>& ordered_blocks, BB* pseudo_entry_block,
  137. BB* pseudo_exit_block,
  138. std::unordered_map<const BB*, std::vector<BB*>>* augmented_successors_map,
  139. std::unordered_map<const BB*, std::vector<BB*>>*
  140. augmented_predecessors_map,
  141. get_blocks_func succ_func, get_blocks_func pred_func);
  142. };
  143. template <class BB>
  144. bool CFA<BB>::FindInWorkList(const std::vector<block_info>& work_list,
  145. uint32_t id) {
  146. for (const auto& b : work_list) {
  147. if (b.block->id() == id) return true;
  148. }
  149. return false;
  150. }
  151. template <class BB>
  152. void CFA<BB>::DepthFirstTraversal(const BB* entry,
  153. get_blocks_func successor_func,
  154. std::function<void(cbb_ptr)> preorder,
  155. std::function<void(cbb_ptr)> postorder,
  156. std::function<bool(cbb_ptr)> terminal) {
  157. DepthFirstTraversal(entry, successor_func, preorder, postorder,
  158. /* backedge = */ {}, terminal);
  159. }
  160. template <class BB>
  161. void CFA<BB>::DepthFirstTraversal(
  162. const BB* entry, get_blocks_func successor_func,
  163. std::function<void(cbb_ptr)> preorder,
  164. std::function<void(cbb_ptr)> postorder,
  165. std::function<void(cbb_ptr, cbb_ptr)> backedge,
  166. std::function<bool(cbb_ptr)> terminal) {
  167. assert(successor_func && "The successor function cannot be empty.");
  168. assert(preorder && "The preorder function cannot be empty.");
  169. assert(postorder && "The postorder function cannot be empty.");
  170. assert(terminal && "The terminal function cannot be empty.");
  171. std::unordered_set<uint32_t> processed;
  172. /// NOTE: work_list is the sequence of nodes from the root node to the node
  173. /// being processed in the traversal
  174. std::vector<block_info> work_list;
  175. work_list.reserve(10);
  176. work_list.push_back({entry, std::begin(*successor_func(entry))});
  177. preorder(entry);
  178. processed.insert(entry->id());
  179. while (!work_list.empty()) {
  180. block_info& top = work_list.back();
  181. if (terminal(top.block) || top.iter == end(*successor_func(top.block))) {
  182. postorder(top.block);
  183. work_list.pop_back();
  184. } else {
  185. BB* child = *top.iter;
  186. top.iter++;
  187. if (backedge && FindInWorkList(work_list, child->id())) {
  188. backedge(top.block, child);
  189. }
  190. if (processed.count(child->id()) == 0) {
  191. preorder(child);
  192. work_list.emplace_back(
  193. block_info{child, std::begin(*successor_func(child))});
  194. processed.insert(child->id());
  195. }
  196. }
  197. }
  198. }
  199. template <class BB>
  200. std::vector<std::pair<BB*, BB*>> CFA<BB>::CalculateDominators(
  201. const std::vector<cbb_ptr>& postorder, get_blocks_func predecessor_func) {
  202. struct block_detail {
  203. size_t dominator; ///< The index of blocks's dominator in post order array
  204. size_t postorder_index; ///< The index of the block in the post order array
  205. };
  206. const size_t undefined_dom = postorder.size();
  207. std::unordered_map<cbb_ptr, block_detail> idoms;
  208. for (size_t i = 0; i < postorder.size(); i++) {
  209. idoms[postorder[i]] = {undefined_dom, i};
  210. }
  211. idoms[postorder.back()].dominator = idoms[postorder.back()].postorder_index;
  212. bool changed = true;
  213. while (changed) {
  214. changed = false;
  215. for (auto b = postorder.rbegin() + 1; b != postorder.rend(); ++b) {
  216. const std::vector<BB*>& predecessors = *predecessor_func(*b);
  217. // Find the first processed/reachable predecessor that is reachable
  218. // in the forward traversal.
  219. auto res = std::find_if(std::begin(predecessors), std::end(predecessors),
  220. [&idoms, undefined_dom](BB* pred) {
  221. return idoms.count(pred) &&
  222. idoms[pred].dominator != undefined_dom;
  223. });
  224. if (res == end(predecessors)) continue;
  225. const BB* idom = *res;
  226. size_t idom_idx = idoms[idom].postorder_index;
  227. // all other predecessors
  228. for (const auto* p : predecessors) {
  229. if (idom == p) continue;
  230. // Only consider nodes reachable in the forward traversal.
  231. // Otherwise the intersection doesn't make sense and will never
  232. // terminate.
  233. if (!idoms.count(p)) continue;
  234. if (idoms[p].dominator != undefined_dom) {
  235. size_t finger1 = idoms[p].postorder_index;
  236. size_t finger2 = idom_idx;
  237. while (finger1 != finger2) {
  238. while (finger1 < finger2) {
  239. finger1 = idoms[postorder[finger1]].dominator;
  240. }
  241. while (finger2 < finger1) {
  242. finger2 = idoms[postorder[finger2]].dominator;
  243. }
  244. }
  245. idom_idx = finger1;
  246. }
  247. }
  248. if (idoms[*b].dominator != idom_idx) {
  249. idoms[*b].dominator = idom_idx;
  250. changed = true;
  251. }
  252. }
  253. }
  254. std::vector<std::pair<bb_ptr, bb_ptr>> out;
  255. for (auto idom : idoms) {
  256. // At this point if there is no dominator for the node, just make it
  257. // reflexive.
  258. auto dominator = std::get<1>(idom).dominator;
  259. if (dominator == undefined_dom) {
  260. dominator = std::get<1>(idom).postorder_index;
  261. }
  262. // NOTE: performing a const cast for convenient usage with
  263. // UpdateImmediateDominators
  264. out.push_back({const_cast<BB*>(std::get<0>(idom)),
  265. const_cast<BB*>(postorder[dominator])});
  266. }
  267. // Sort by postorder index to generate a deterministic ordering of edges.
  268. std::sort(
  269. out.begin(), out.end(),
  270. [&idoms](const std::pair<bb_ptr, bb_ptr>& lhs,
  271. const std::pair<bb_ptr, bb_ptr>& rhs) {
  272. assert(lhs.first);
  273. assert(lhs.second);
  274. assert(rhs.first);
  275. assert(rhs.second);
  276. auto lhs_indices = std::make_pair(idoms[lhs.first].postorder_index,
  277. idoms[lhs.second].postorder_index);
  278. auto rhs_indices = std::make_pair(idoms[rhs.first].postorder_index,
  279. idoms[rhs.second].postorder_index);
  280. return lhs_indices < rhs_indices;
  281. });
  282. return out;
  283. }
  284. template <class BB>
  285. std::vector<BB*> CFA<BB>::TraversalRoots(const std::vector<BB*>& blocks,
  286. get_blocks_func succ_func,
  287. get_blocks_func pred_func) {
  288. // The set of nodes which have been visited from any of the roots so far.
  289. std::unordered_set<const BB*> visited;
  290. auto mark_visited = [&visited](const BB* b) { visited.insert(b); };
  291. auto ignore_block = [](const BB*) {};
  292. auto no_terminal_blocks = [](const BB*) { return false; };
  293. auto traverse_from_root = [&mark_visited, &succ_func, &ignore_block,
  294. &no_terminal_blocks](const BB* entry) {
  295. DepthFirstTraversal(entry, succ_func, mark_visited, ignore_block,
  296. no_terminal_blocks);
  297. };
  298. std::vector<BB*> result;
  299. // First collect nodes without predecessors.
  300. for (auto block : blocks) {
  301. if (pred_func(block)->empty()) {
  302. assert(visited.count(block) == 0 && "Malformed graph!");
  303. result.push_back(block);
  304. traverse_from_root(block);
  305. }
  306. }
  307. // Now collect other stranded nodes. These must be in unreachable cycles.
  308. for (auto block : blocks) {
  309. if (visited.count(block) == 0) {
  310. result.push_back(block);
  311. traverse_from_root(block);
  312. }
  313. }
  314. return result;
  315. }
  316. template <class BB>
  317. void CFA<BB>::ComputeAugmentedCFG(
  318. std::vector<BB*>& ordered_blocks, BB* pseudo_entry_block,
  319. BB* pseudo_exit_block,
  320. std::unordered_map<const BB*, std::vector<BB*>>* augmented_successors_map,
  321. std::unordered_map<const BB*, std::vector<BB*>>* augmented_predecessors_map,
  322. get_blocks_func succ_func, get_blocks_func pred_func) {
  323. // Compute the successors of the pseudo-entry block, and
  324. // the predecessors of the pseudo exit block.
  325. auto sources = TraversalRoots(ordered_blocks, succ_func, pred_func);
  326. // For the predecessor traversals, reverse the order of blocks. This
  327. // will affect the post-dominance calculation as follows:
  328. // - Suppose you have blocks A and B, with A appearing before B in
  329. // the list of blocks.
  330. // - Also, A branches only to B, and B branches only to A.
  331. // - We want to compute A as dominating B, and B as post-dominating B.
  332. // By using reversed blocks for predecessor traversal roots discovery,
  333. // we'll add an edge from B to the pseudo-exit node, rather than from A.
  334. // All this is needed to correctly process the dominance/post-dominance
  335. // constraint when A is a loop header that points to itself as its
  336. // own continue target, and B is the latch block for the loop.
  337. std::vector<BB*> reversed_blocks(ordered_blocks.rbegin(),
  338. ordered_blocks.rend());
  339. auto sinks = TraversalRoots(reversed_blocks, pred_func, succ_func);
  340. // Wire up the pseudo entry block.
  341. (*augmented_successors_map)[pseudo_entry_block] = sources;
  342. for (auto block : sources) {
  343. auto& augmented_preds = (*augmented_predecessors_map)[block];
  344. const auto preds = pred_func(block);
  345. augmented_preds.reserve(1 + preds->size());
  346. augmented_preds.push_back(pseudo_entry_block);
  347. augmented_preds.insert(augmented_preds.end(), preds->begin(), preds->end());
  348. }
  349. // Wire up the pseudo exit block.
  350. (*augmented_predecessors_map)[pseudo_exit_block] = sinks;
  351. for (auto block : sinks) {
  352. auto& augmented_succ = (*augmented_successors_map)[block];
  353. const auto succ = succ_func(block);
  354. augmented_succ.reserve(1 + succ->size());
  355. augmented_succ.push_back(pseudo_exit_block);
  356. augmented_succ.insert(augmented_succ.end(), succ->begin(), succ->end());
  357. }
  358. }
  359. } // namespace spvtools
  360. #endif // SOURCE_CFA_H_