loop_utils.cpp 28 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738
  1. // Copyright (c) 2018 Google LLC.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. #include <algorithm>
  15. #include <memory>
  16. #include <unordered_map>
  17. #include <unordered_set>
  18. #include <utility>
  19. #include <vector>
  20. #include "source/cfa.h"
  21. #include "source/opt/cfg.h"
  22. #include "source/opt/ir_builder.h"
  23. #include "source/opt/ir_context.h"
  24. #include "source/opt/loop_descriptor.h"
  25. #include "source/opt/loop_utils.h"
  26. namespace spvtools {
  27. namespace opt {
  28. namespace {
  29. // Return true if |bb| is dominated by at least one block in |exits|
  30. inline bool DominatesAnExit(BasicBlock* bb,
  31. const std::unordered_set<BasicBlock*>& exits,
  32. const DominatorTree& dom_tree) {
  33. for (BasicBlock* e_bb : exits)
  34. if (dom_tree.Dominates(bb, e_bb)) return true;
  35. return false;
  36. }
  37. // Utility class to rewrite out-of-loop uses of an in-loop definition in terms
  38. // of phi instructions to achieve a LCSSA form.
  39. // For a given definition, the class user registers phi instructions using that
  40. // definition in all loop exit blocks by which the definition escapes.
  41. // Then, when rewriting a use of the definition, the rewriter walks the
  42. // paths from the use the loop exits. At each step, it will insert a phi
  43. // instruction to merge the incoming value according to exit blocks definition.
  44. class LCSSARewriter {
  45. public:
  46. LCSSARewriter(IRContext* context, const DominatorTree& dom_tree,
  47. const std::unordered_set<BasicBlock*>& exit_bb,
  48. BasicBlock* merge_block)
  49. : context_(context),
  50. cfg_(context_->cfg()),
  51. dom_tree_(dom_tree),
  52. exit_bb_(exit_bb),
  53. merge_block_id_(merge_block ? merge_block->id() : 0) {}
  54. struct UseRewriter {
  55. explicit UseRewriter(LCSSARewriter* base, const Instruction& def_insn)
  56. : base_(base), def_insn_(def_insn) {}
  57. // Rewrites the use of |def_insn_| by the instruction |user| at the index
  58. // |operand_index| in terms of phi instruction. This recursively builds new
  59. // phi instructions from |user| to the loop exit blocks' phis. The use of
  60. // |def_insn_| in |user| is replaced by the relevant phi instruction at the
  61. // end of the operation.
  62. // It is assumed that |user| does not dominates any of the loop exit basic
  63. // block. This operation does not update the def/use manager, instead it
  64. // records what needs to be updated. The actual update is performed by
  65. // UpdateManagers.
  66. bool RewriteUse(BasicBlock* bb, Instruction* user, uint32_t operand_index) {
  67. assert(
  68. (user->opcode() != spv::Op::OpPhi || bb != GetParent(user)) &&
  69. "The root basic block must be the incoming edge if |user| is a phi "
  70. "instruction");
  71. assert((user->opcode() == spv::Op::OpPhi || bb == GetParent(user)) &&
  72. "The root basic block must be the instruction parent if |user| is "
  73. "not "
  74. "phi instruction");
  75. Instruction* new_def = GetOrBuildIncoming(bb->id());
  76. if (!new_def) {
  77. return false;
  78. }
  79. user->SetOperand(operand_index, {new_def->result_id()});
  80. rewritten_.insert(user);
  81. return true;
  82. }
  83. // In-place update of some managers (avoid full invalidation).
  84. inline void UpdateManagers() {
  85. analysis::DefUseManager* def_use_mgr = base_->context_->get_def_use_mgr();
  86. // Register all new definitions.
  87. for (Instruction* insn : rewritten_) {
  88. def_use_mgr->AnalyzeInstDef(insn);
  89. }
  90. // Register all new uses.
  91. for (Instruction* insn : rewritten_) {
  92. def_use_mgr->AnalyzeInstUse(insn);
  93. }
  94. }
  95. private:
  96. // Return the basic block that |instr| belongs to.
  97. BasicBlock* GetParent(Instruction* instr) {
  98. return base_->context_->get_instr_block(instr);
  99. }
  100. // Builds a phi instruction for the basic block |bb|. The function assumes
  101. // that |defining_blocks| contains the list of basic block that define the
  102. // usable value for each predecessor of |bb|.
  103. inline Instruction* CreatePhiInstruction(
  104. BasicBlock* bb, const std::vector<uint32_t>& defining_blocks) {
  105. std::vector<uint32_t> incomings;
  106. const std::vector<uint32_t>& bb_preds = base_->cfg_->preds(bb->id());
  107. assert(bb_preds.size() == defining_blocks.size());
  108. for (size_t i = 0; i < bb_preds.size(); i++) {
  109. incomings.push_back(
  110. GetOrBuildIncoming(defining_blocks[i])->result_id());
  111. incomings.push_back(bb_preds[i]);
  112. }
  113. InstructionBuilder builder(base_->context_, &*bb->begin(),
  114. IRContext::kAnalysisInstrToBlockMapping);
  115. Instruction* incoming_phi =
  116. builder.AddPhi(def_insn_.type_id(), incomings);
  117. if (!incoming_phi) {
  118. return nullptr;
  119. }
  120. rewritten_.insert(incoming_phi);
  121. return incoming_phi;
  122. }
  123. // Builds a phi instruction for the basic block |bb|, all incoming values
  124. // will be |value|.
  125. inline Instruction* CreatePhiInstruction(BasicBlock* bb,
  126. const Instruction& value) {
  127. std::vector<uint32_t> incomings;
  128. const std::vector<uint32_t>& bb_preds = base_->cfg_->preds(bb->id());
  129. for (size_t i = 0; i < bb_preds.size(); i++) {
  130. incomings.push_back(value.result_id());
  131. incomings.push_back(bb_preds[i]);
  132. }
  133. InstructionBuilder builder(base_->context_, &*bb->begin(),
  134. IRContext::kAnalysisInstrToBlockMapping);
  135. Instruction* incoming_phi =
  136. builder.AddPhi(def_insn_.type_id(), incomings);
  137. if (!incoming_phi) {
  138. return nullptr;
  139. }
  140. rewritten_.insert(incoming_phi);
  141. return incoming_phi;
  142. }
  143. // Return the new def to use for the basic block |bb_id|.
  144. // If |bb_id| does not have a suitable def to use then we:
  145. // - return the common def used by all predecessors;
  146. // - if there is no common def, then we build a new phi instr at the
  147. // beginning of |bb_id| and return this new instruction.
  148. Instruction* GetOrBuildIncoming(uint32_t bb_id) {
  149. assert(base_->cfg_->block(bb_id) != nullptr && "Unknown basic block");
  150. Instruction*& incoming_phi = bb_to_phi_[bb_id];
  151. if (incoming_phi) {
  152. return incoming_phi;
  153. }
  154. BasicBlock* bb = &*base_->cfg_->block(bb_id);
  155. // If this is an exit basic block, look if there already is an eligible
  156. // phi instruction. An eligible phi has |def_insn_| as all incoming
  157. // values.
  158. if (base_->exit_bb_.count(bb)) {
  159. // Look if there is an eligible phi in this block.
  160. if (!bb->WhileEachPhiInst([&incoming_phi, this](Instruction* phi) {
  161. for (uint32_t i = 0; i < phi->NumInOperands(); i += 2) {
  162. if (phi->GetSingleWordInOperand(i) != def_insn_.result_id())
  163. return true;
  164. }
  165. incoming_phi = phi;
  166. rewritten_.insert(incoming_phi);
  167. return false;
  168. })) {
  169. return incoming_phi;
  170. }
  171. incoming_phi = CreatePhiInstruction(bb, def_insn_);
  172. return incoming_phi;
  173. }
  174. // Get the block that defines the value to use for each predecessor.
  175. // If the vector has 1 value, then it means that this block does not need
  176. // to build a phi instruction unless |bb_id| is the loop merge block.
  177. const std::vector<uint32_t>& defining_blocks =
  178. base_->GetDefiningBlocks(bb_id);
  179. // Special case for structured loops: merge block might be different from
  180. // the exit block set. To maintain structured properties it will ease
  181. // transformations if the merge block also holds a phi instruction like
  182. // the exit ones.
  183. if (defining_blocks.size() > 1 || bb_id == base_->merge_block_id_) {
  184. if (defining_blocks.size() > 1) {
  185. incoming_phi = CreatePhiInstruction(bb, defining_blocks);
  186. } else {
  187. assert(bb_id == base_->merge_block_id_);
  188. incoming_phi =
  189. CreatePhiInstruction(bb, *GetOrBuildIncoming(defining_blocks[0]));
  190. }
  191. } else {
  192. incoming_phi = GetOrBuildIncoming(defining_blocks[0]);
  193. }
  194. return incoming_phi;
  195. }
  196. LCSSARewriter* base_;
  197. const Instruction& def_insn_;
  198. std::unordered_map<uint32_t, Instruction*> bb_to_phi_;
  199. std::unordered_set<Instruction*> rewritten_;
  200. };
  201. private:
  202. // Return the new def to use for the basic block |bb_id|.
  203. // If |bb_id| does not have a suitable def to use then we:
  204. // - return the common def used by all predecessors;
  205. // - if there is no common def, then we build a new phi instr at the
  206. // beginning of |bb_id| and return this new instruction.
  207. const std::vector<uint32_t>& GetDefiningBlocks(uint32_t bb_id) {
  208. assert(cfg_->block(bb_id) != nullptr && "Unknown basic block");
  209. std::vector<uint32_t>& defining_blocks = bb_to_defining_blocks_[bb_id];
  210. if (defining_blocks.size()) return defining_blocks;
  211. // Check if one of the loop exit basic block dominates |bb_id|.
  212. for (const BasicBlock* e_bb : exit_bb_) {
  213. if (dom_tree_.Dominates(e_bb->id(), bb_id)) {
  214. defining_blocks.push_back(e_bb->id());
  215. return defining_blocks;
  216. }
  217. }
  218. // Process parents, they will returns their suitable blocks.
  219. // If they are all the same, this means this basic block is dominated by a
  220. // common block, so we won't need to build a phi instruction.
  221. for (uint32_t pred_id : cfg_->preds(bb_id)) {
  222. const std::vector<uint32_t>& pred_blocks = GetDefiningBlocks(pred_id);
  223. if (pred_blocks.size() == 1)
  224. defining_blocks.push_back(pred_blocks[0]);
  225. else
  226. defining_blocks.push_back(pred_id);
  227. }
  228. assert(defining_blocks.size());
  229. if (std::all_of(defining_blocks.begin(), defining_blocks.end(),
  230. [&defining_blocks](uint32_t id) {
  231. return id == defining_blocks[0];
  232. })) {
  233. // No need for a phi.
  234. defining_blocks.resize(1);
  235. }
  236. return defining_blocks;
  237. }
  238. IRContext* context_;
  239. CFG* cfg_;
  240. const DominatorTree& dom_tree_;
  241. const std::unordered_set<BasicBlock*>& exit_bb_;
  242. uint32_t merge_block_id_;
  243. // This map represent the set of known paths. For each key, the vector
  244. // represent the set of blocks holding the definition to be used to build the
  245. // phi instruction.
  246. // If the vector has 0 value, then the path is unknown yet, and must be built.
  247. // If the vector has 1 value, then the value defined by that basic block
  248. // should be used.
  249. // If the vector has more than 1 value, then a phi node must be created, the
  250. // basic block ordering is the same as the predecessor ordering.
  251. std::unordered_map<uint32_t, std::vector<uint32_t>> bb_to_defining_blocks_;
  252. };
  253. // Make the set |blocks| closed SSA. The set is closed SSA if all the uses
  254. // outside the set are phi instructions in exiting basic block set (hold by
  255. // |lcssa_rewriter|).
  256. inline bool MakeSetClosedSSA(IRContext* context, Function* function,
  257. const std::unordered_set<uint32_t>& blocks,
  258. const std::unordered_set<BasicBlock*>& exit_bb,
  259. LCSSARewriter* lcssa_rewriter) {
  260. CFG& cfg = *context->cfg();
  261. DominatorTree& dom_tree =
  262. context->GetDominatorAnalysis(function)->GetDomTree();
  263. analysis::DefUseManager* def_use_manager = context->get_def_use_mgr();
  264. for (uint32_t bb_id : blocks) {
  265. BasicBlock* bb = cfg.block(bb_id);
  266. // If bb does not dominate an exit block, then it cannot have escaping defs.
  267. if (!DominatesAnExit(bb, exit_bb, dom_tree)) continue;
  268. for (Instruction& inst : *bb) {
  269. LCSSARewriter::UseRewriter rewriter(lcssa_rewriter, inst);
  270. bool success = def_use_manager->WhileEachUse(
  271. &inst, [&blocks, &rewriter, &exit_bb, context](
  272. Instruction* use, uint32_t operand_index) {
  273. BasicBlock* use_parent = context->get_instr_block(use);
  274. assert(use_parent);
  275. if (blocks.count(use_parent->id())) return true;
  276. if (use->opcode() == spv::Op::OpPhi) {
  277. // If the use is a Phi instruction and the incoming block is
  278. // coming from the loop, then that's consistent with LCSSA form.
  279. if (exit_bb.count(use_parent)) {
  280. return true;
  281. } else {
  282. // That's not an exit block, but the user is a phi instruction.
  283. // Consider the incoming branch only.
  284. use_parent = context->get_instr_block(
  285. use->GetSingleWordOperand(operand_index + 1));
  286. }
  287. }
  288. // Rewrite the use. Note that this call does not invalidate the
  289. // def/use manager. So this operation is safe.
  290. return rewriter.RewriteUse(use_parent, use, operand_index);
  291. });
  292. if (!success) {
  293. return false;
  294. }
  295. rewriter.UpdateManagers();
  296. }
  297. }
  298. return true;
  299. }
  300. } // namespace
  301. bool LoopUtils::CreateLoopDedicatedExits() {
  302. Function* function = loop_->GetHeaderBlock()->GetParent();
  303. LoopDescriptor& loop_desc = *context_->GetLoopDescriptor(function);
  304. CFG& cfg = *context_->cfg();
  305. analysis::DefUseManager* def_use_mgr = context_->get_def_use_mgr();
  306. const IRContext::Analysis PreservedAnalyses =
  307. IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping;
  308. // Gathers the set of basic block that are not in this loop and have at least
  309. // one predecessor in the loop and one not in the loop.
  310. std::unordered_set<uint32_t> exit_bb_set;
  311. loop_->GetExitBlocks(&exit_bb_set);
  312. std::unordered_set<BasicBlock*> new_loop_exits;
  313. bool made_change = false;
  314. // For each block, we create a new one that gathers all branches from
  315. // the loop and fall into the block.
  316. for (uint32_t non_dedicate_id : exit_bb_set) {
  317. BasicBlock* non_dedicate = cfg.block(non_dedicate_id);
  318. const std::vector<uint32_t>& bb_pred = cfg.preds(non_dedicate_id);
  319. // Ignore the block if all the predecessors are in the loop.
  320. if (std::all_of(bb_pred.begin(), bb_pred.end(),
  321. [this](uint32_t id) { return loop_->IsInsideLoop(id); })) {
  322. new_loop_exits.insert(non_dedicate);
  323. continue;
  324. }
  325. made_change = true;
  326. Function::iterator insert_pt = function->begin();
  327. for (; insert_pt != function->end() && &*insert_pt != non_dedicate;
  328. ++insert_pt) {
  329. }
  330. assert(insert_pt != function->end() && "Basic Block not found");
  331. // Create the dedicate exit basic block.
  332. uint32_t exit_id = context_->TakeNextId();
  333. if (exit_id == 0) {
  334. return false;
  335. }
  336. BasicBlock& exit = *insert_pt.InsertBefore(
  337. std::unique_ptr<BasicBlock>(new BasicBlock(std::unique_ptr<Instruction>(
  338. new Instruction(context_, spv::Op::OpLabel, 0, exit_id, {})))));
  339. exit.SetParent(function);
  340. // Redirect in loop predecessors to |exit| block.
  341. for (uint32_t exit_pred_id : bb_pred) {
  342. if (loop_->IsInsideLoop(exit_pred_id)) {
  343. BasicBlock* pred_block = cfg.block(exit_pred_id);
  344. pred_block->ForEachSuccessorLabel([non_dedicate, &exit](uint32_t* id) {
  345. if (*id == non_dedicate->id()) *id = exit.id();
  346. });
  347. // Update the CFG.
  348. // |non_dedicate|'s predecessor list will be updated at the end of the
  349. // loop.
  350. cfg.RegisterBlock(pred_block);
  351. }
  352. }
  353. // Register the label to the def/use manager, requires for the phi patching.
  354. def_use_mgr->AnalyzeInstDefUse(exit.GetLabelInst());
  355. context_->set_instr_block(exit.GetLabelInst(), &exit);
  356. InstructionBuilder builder(context_, &exit, PreservedAnalyses);
  357. // Now jump from our dedicate basic block to the old exit.
  358. // We also reset the insert point so all instructions are inserted before
  359. // the branch.
  360. builder.SetInsertPoint(builder.AddBranch(non_dedicate->id()));
  361. bool succeeded = non_dedicate->WhileEachPhiInst(
  362. [&builder, &exit, def_use_mgr, this](Instruction* phi) {
  363. // New phi operands for this instruction.
  364. std::vector<uint32_t> new_phi_op;
  365. // Phi operands for the dedicated exit block.
  366. std::vector<uint32_t> exit_phi_op;
  367. for (uint32_t i = 0; i < phi->NumInOperands(); i += 2) {
  368. uint32_t def_id = phi->GetSingleWordInOperand(i);
  369. uint32_t incoming_id = phi->GetSingleWordInOperand(i + 1);
  370. if (loop_->IsInsideLoop(incoming_id)) {
  371. exit_phi_op.push_back(def_id);
  372. exit_phi_op.push_back(incoming_id);
  373. } else {
  374. new_phi_op.push_back(def_id);
  375. new_phi_op.push_back(incoming_id);
  376. }
  377. }
  378. // Build the new phi instruction dedicated exit block.
  379. Instruction* exit_phi = builder.AddPhi(phi->type_id(), exit_phi_op);
  380. if (!exit_phi) {
  381. return false;
  382. }
  383. // Build the new incoming branch.
  384. new_phi_op.push_back(exit_phi->result_id());
  385. new_phi_op.push_back(exit.id());
  386. // Rewrite operands.
  387. uint32_t idx = 0;
  388. for (; idx < new_phi_op.size(); idx++)
  389. phi->SetInOperand(idx, {new_phi_op[idx]});
  390. // Remove extra operands, from last to first (more efficient).
  391. for (uint32_t j = phi->NumInOperands() - 1; j >= idx; j--)
  392. phi->RemoveInOperand(j);
  393. // Update the def/use manager for this |phi|.
  394. def_use_mgr->AnalyzeInstUse(phi);
  395. return true;
  396. });
  397. if (!succeeded) return false;
  398. // Update the CFG.
  399. cfg.RegisterBlock(&exit);
  400. cfg.RemoveNonExistingEdges(non_dedicate->id());
  401. new_loop_exits.insert(&exit);
  402. // If non_dedicate is in a loop, add the new dedicated exit in that loop.
  403. if (Loop* parent_loop = loop_desc[non_dedicate])
  404. parent_loop->AddBasicBlock(&exit);
  405. }
  406. if (new_loop_exits.size() == 1) {
  407. loop_->SetMergeBlock(*new_loop_exits.begin());
  408. }
  409. if (made_change) {
  410. context_->InvalidateAnalysesExceptFor(
  411. PreservedAnalyses | IRContext::kAnalysisCFG |
  412. IRContext::Analysis::kAnalysisLoopAnalysis);
  413. }
  414. return true;
  415. }
  416. bool LoopUtils::MakeLoopClosedSSA() {
  417. if (!CreateLoopDedicatedExits()) {
  418. return false;
  419. }
  420. Function* function = loop_->GetHeaderBlock()->GetParent();
  421. CFG& cfg = *context_->cfg();
  422. DominatorTree& dom_tree =
  423. context_->GetDominatorAnalysis(function)->GetDomTree();
  424. std::unordered_set<BasicBlock*> exit_bb;
  425. {
  426. std::unordered_set<uint32_t> exit_bb_id;
  427. loop_->GetExitBlocks(&exit_bb_id);
  428. for (uint32_t bb_id : exit_bb_id) {
  429. exit_bb.insert(cfg.block(bb_id));
  430. }
  431. }
  432. LCSSARewriter lcssa_rewriter(context_, dom_tree, exit_bb,
  433. loop_->GetMergeBlock());
  434. if (!MakeSetClosedSSA(context_, function, loop_->GetBlocks(), exit_bb,
  435. &lcssa_rewriter)) {
  436. return false;
  437. }
  438. // Make sure all defs post-dominated by the merge block have their last use no
  439. // further than the merge block.
  440. if (loop_->GetMergeBlock()) {
  441. std::unordered_set<uint32_t> merging_bb_id;
  442. loop_->GetMergingBlocks(&merging_bb_id);
  443. merging_bb_id.erase(loop_->GetMergeBlock()->id());
  444. // Reset the exit set, now only the merge block is the exit.
  445. exit_bb.clear();
  446. exit_bb.insert(loop_->GetMergeBlock());
  447. // LCSSARewriter is reusable here only because it forces the creation of a
  448. // phi instruction in the merge block.
  449. if (!MakeSetClosedSSA(context_, function, merging_bb_id, exit_bb,
  450. &lcssa_rewriter)) {
  451. return false;
  452. }
  453. }
  454. context_->InvalidateAnalysesExceptFor(
  455. IRContext::Analysis::kAnalysisCFG |
  456. IRContext::Analysis::kAnalysisDominatorAnalysis |
  457. IRContext::Analysis::kAnalysisLoopAnalysis);
  458. return true;
  459. }
  460. Loop* LoopUtils::CloneLoop(LoopCloningResult* cloning_result) const {
  461. // Compute the structured order of the loop basic blocks and store it in the
  462. // vector ordered_loop_blocks.
  463. std::vector<BasicBlock*> ordered_loop_blocks;
  464. loop_->ComputeLoopStructuredOrder(&ordered_loop_blocks);
  465. // Clone the loop.
  466. return CloneLoop(cloning_result, ordered_loop_blocks);
  467. }
  468. Loop* LoopUtils::CloneAndAttachLoopToHeader(LoopCloningResult* cloning_result) {
  469. // Clone the loop.
  470. Loop* cloned_loop = CloneLoop(cloning_result);
  471. if (!cloned_loop) {
  472. return nullptr;
  473. }
  474. // Create a new exit block/label for the new loop.
  475. uint32_t new_label_id = context_->TakeNextId();
  476. if (new_label_id == 0) {
  477. return nullptr;
  478. }
  479. std::unique_ptr<Instruction> new_label{
  480. new Instruction(context_, spv::Op::OpLabel, 0, new_label_id, {})};
  481. std::unique_ptr<BasicBlock> new_exit_bb{new BasicBlock(std::move(new_label))};
  482. new_exit_bb->SetParent(loop_->GetMergeBlock()->GetParent());
  483. // Create an unconditional branch to the header block.
  484. InstructionBuilder builder{context_, new_exit_bb.get()};
  485. builder.AddBranch(loop_->GetHeaderBlock()->id());
  486. // Save the ids of the new and old merge block.
  487. const uint32_t old_merge_block = loop_->GetMergeBlock()->id();
  488. const uint32_t new_merge_block = new_exit_bb->id();
  489. // Replace the uses of the old merge block in the new loop with the new merge
  490. // block.
  491. for (std::unique_ptr<BasicBlock>& basic_block : cloning_result->cloned_bb_) {
  492. for (Instruction& inst : *basic_block) {
  493. // For each operand in each instruction check if it is using the old merge
  494. // block and change it to be the new merge block.
  495. auto replace_merge_use = [old_merge_block,
  496. new_merge_block](uint32_t* id) {
  497. if (*id == old_merge_block) *id = new_merge_block;
  498. };
  499. inst.ForEachInOperand(replace_merge_use);
  500. }
  501. }
  502. const uint32_t old_header = loop_->GetHeaderBlock()->id();
  503. const uint32_t new_header = cloned_loop->GetHeaderBlock()->id();
  504. analysis::DefUseManager* def_use = context_->get_def_use_mgr();
  505. def_use->ForEachUse(old_header,
  506. [new_header, this](Instruction* inst, uint32_t operand) {
  507. if (!this->loop_->IsInsideLoop(inst))
  508. inst->SetOperand(operand, {new_header});
  509. });
  510. BasicBlock* pre_header = loop_->GetOrCreatePreHeaderBlock();
  511. if (!pre_header) {
  512. return nullptr;
  513. }
  514. def_use->ForEachUse(
  515. pre_header->id(),
  516. [new_merge_block, this](Instruction* inst, uint32_t operand) {
  517. if (this->loop_->IsInsideLoop(inst))
  518. inst->SetOperand(operand, {new_merge_block});
  519. });
  520. cloned_loop->SetMergeBlock(new_exit_bb.get());
  521. cloned_loop->SetPreHeaderBlock(loop_->GetPreHeaderBlock());
  522. // Add the new block into the cloned instructions.
  523. cloning_result->cloned_bb_.push_back(std::move(new_exit_bb));
  524. return cloned_loop;
  525. }
  526. Loop* LoopUtils::CloneLoop(
  527. LoopCloningResult* cloning_result,
  528. const std::vector<BasicBlock*>& ordered_loop_blocks) const {
  529. analysis::DefUseManager* def_use_mgr = context_->get_def_use_mgr();
  530. std::unique_ptr<Loop> new_loop = MakeUnique<Loop>(context_);
  531. CFG& cfg = *context_->cfg();
  532. // Clone and place blocks in a SPIR-V compliant order (dominators first).
  533. for (BasicBlock* old_bb : ordered_loop_blocks) {
  534. // For each basic block in the loop, we clone it and register the mapping
  535. // between old and new ids.
  536. BasicBlock* new_bb = old_bb->Clone(context_);
  537. if (!new_bb) return nullptr;
  538. new_bb->SetParent(&function_);
  539. uint32_t new_label_id = context_->TakeNextId();
  540. if (new_label_id == 0) {
  541. return nullptr;
  542. }
  543. new_bb->GetLabelInst()->SetResultId(new_label_id);
  544. def_use_mgr->AnalyzeInstDef(new_bb->GetLabelInst());
  545. context_->set_instr_block(new_bb->GetLabelInst(), new_bb);
  546. cloning_result->cloned_bb_.emplace_back(new_bb);
  547. cloning_result->old_to_new_bb_[old_bb->id()] = new_bb;
  548. cloning_result->new_to_old_bb_[new_bb->id()] = old_bb;
  549. cloning_result->value_map_[old_bb->id()] = new_bb->id();
  550. if (loop_->IsInsideLoop(old_bb)) new_loop->AddBasicBlock(new_bb);
  551. for (auto new_inst = new_bb->begin(), old_inst = old_bb->begin();
  552. new_inst != new_bb->end(); ++new_inst, ++old_inst) {
  553. cloning_result->ptr_map_[&*new_inst] = &*old_inst;
  554. if (new_inst->HasResultId()) {
  555. uint32_t new_result_id = context_->TakeNextId();
  556. if (new_result_id == 0) {
  557. return nullptr;
  558. }
  559. new_inst->SetResultId(new_result_id);
  560. cloning_result->value_map_[old_inst->result_id()] =
  561. new_inst->result_id();
  562. // Only look at the defs for now, uses are not updated yet.
  563. def_use_mgr->AnalyzeInstDef(&*new_inst);
  564. }
  565. }
  566. }
  567. // All instructions (including all labels) have been cloned,
  568. // remap instruction operands id with the new ones.
  569. for (std::unique_ptr<BasicBlock>& bb_ref : cloning_result->cloned_bb_) {
  570. BasicBlock* bb = bb_ref.get();
  571. for (Instruction& insn : *bb) {
  572. insn.ForEachInId([cloning_result](uint32_t* old_id) {
  573. // If the operand is defined in the loop, remap the id.
  574. auto id_it = cloning_result->value_map_.find(*old_id);
  575. if (id_it != cloning_result->value_map_.end()) {
  576. *old_id = id_it->second;
  577. }
  578. });
  579. // Only look at what the instruction uses. All defs are register, so all
  580. // should be fine now.
  581. def_use_mgr->AnalyzeInstUse(&insn);
  582. context_->set_instr_block(&insn, bb);
  583. }
  584. cfg.RegisterBlock(bb);
  585. }
  586. PopulateLoopNest(new_loop.get(), *cloning_result);
  587. return new_loop.release();
  588. }
  589. void LoopUtils::PopulateLoopNest(
  590. Loop* new_loop, const LoopCloningResult& cloning_result) const {
  591. std::unordered_map<Loop*, Loop*> loop_mapping;
  592. loop_mapping[loop_] = new_loop;
  593. if (loop_->HasParent()) loop_->GetParent()->AddNestedLoop(new_loop);
  594. PopulateLoopDesc(new_loop, loop_, cloning_result);
  595. for (Loop& sub_loop :
  596. make_range(++TreeDFIterator<Loop>(loop_), TreeDFIterator<Loop>())) {
  597. Loop* cloned = new Loop(context_);
  598. if (Loop* parent = loop_mapping[sub_loop.GetParent()])
  599. parent->AddNestedLoop(cloned);
  600. loop_mapping[&sub_loop] = cloned;
  601. PopulateLoopDesc(cloned, &sub_loop, cloning_result);
  602. }
  603. loop_desc_->AddLoopNest(std::unique_ptr<Loop>(new_loop));
  604. }
  605. // Populates |new_loop| descriptor according to |old_loop|'s one.
  606. void LoopUtils::PopulateLoopDesc(
  607. Loop* new_loop, Loop* old_loop,
  608. const LoopCloningResult& cloning_result) const {
  609. for (uint32_t bb_id : old_loop->GetBlocks()) {
  610. BasicBlock* bb = cloning_result.old_to_new_bb_.at(bb_id);
  611. new_loop->AddBasicBlock(bb);
  612. }
  613. new_loop->SetHeaderBlock(
  614. cloning_result.old_to_new_bb_.at(old_loop->GetHeaderBlock()->id()));
  615. if (old_loop->GetLatchBlock())
  616. new_loop->SetLatchBlock(
  617. cloning_result.old_to_new_bb_.at(old_loop->GetLatchBlock()->id()));
  618. if (old_loop->GetContinueBlock())
  619. new_loop->SetContinueBlock(
  620. cloning_result.old_to_new_bb_.at(old_loop->GetContinueBlock()->id()));
  621. if (old_loop->GetMergeBlock()) {
  622. auto it =
  623. cloning_result.old_to_new_bb_.find(old_loop->GetMergeBlock()->id());
  624. BasicBlock* bb = it != cloning_result.old_to_new_bb_.end()
  625. ? it->second
  626. : old_loop->GetMergeBlock();
  627. new_loop->SetMergeBlock(bb);
  628. }
  629. if (old_loop->GetPreHeaderBlock()) {
  630. auto it =
  631. cloning_result.old_to_new_bb_.find(old_loop->GetPreHeaderBlock()->id());
  632. if (it != cloning_result.old_to_new_bb_.end()) {
  633. new_loop->SetPreHeaderBlock(it->second);
  634. }
  635. }
  636. }
  637. // Class to gather some metrics about a region of interest.
  638. void CodeMetrics::Analyze(const Loop& loop) {
  639. CFG& cfg = *loop.GetContext()->cfg();
  640. roi_size_ = 0;
  641. block_sizes_.clear();
  642. for (uint32_t id : loop.GetBlocks()) {
  643. const BasicBlock* bb = cfg.block(id);
  644. size_t bb_size = 0;
  645. bb->ForEachInst([&bb_size](const Instruction* insn) {
  646. if (insn->opcode() == spv::Op::OpLabel) return;
  647. if (insn->IsNop()) return;
  648. if (insn->opcode() == spv::Op::OpPhi) return;
  649. bb_size++;
  650. });
  651. block_sizes_[bb->id()] = bb_size;
  652. roi_size_ += bb_size;
  653. }
  654. }
  655. } // namespace opt
  656. } // namespace spvtools