loop_unroller.cpp 42 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143
  1. // Copyright (c) 2018 Google LLC.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. #include "source/opt/loop_unroller.h"
  15. #include <limits>
  16. #include <memory>
  17. #include <unordered_map>
  18. #include <utility>
  19. #include <vector>
  20. #include "source/opt/ir_builder.h"
  21. #include "source/opt/loop_utils.h"
  22. // Implements loop util unrolling functionality for fully and partially
  23. // unrolling loops. Given a factor it will duplicate the loop that many times,
  24. // appending each one to the end of the old loop and removing backedges, to
  25. // create a new unrolled loop.
  26. //
  27. // 1 - User calls LoopUtils::FullyUnroll or LoopUtils::PartiallyUnroll with a
  28. // loop they wish to unroll. LoopUtils::CanPerformUnroll is used to
  29. // validate that a given loop can be unrolled. That method (along with the
  30. // constructor of loop) checks that the IR is in the expected canonicalised
  31. // format.
  32. //
  33. // 2 - The LoopUtils methods create a LoopUnrollerUtilsImpl object to actually
  34. // perform the unrolling. This implements helper methods to copy the loop basic
  35. // blocks and remap the ids of instructions used inside them.
  36. //
  37. // 3 - The core of LoopUnrollerUtilsImpl is the Unroll method, this method
  38. // actually performs the loop duplication. It does this by creating a
  39. // LoopUnrollState object and then copying the loop as given by the factor
  40. // parameter. The LoopUnrollState object retains the state of the unroller
  41. // between the loop body copies as each iteration needs information on the last
  42. // to adjust the phi induction variable, adjust the OpLoopMerge instruction in
  43. // the main loop header, and change the previous continue block to point to the
  44. // new header and the new continue block to the main loop header.
  45. //
  46. // 4 - If the loop is to be fully unrolled then it is simply closed after step
  47. // 3, with the OpLoopMerge being deleted, the backedge removed, and the
  48. // condition blocks folded.
  49. //
  50. // 5 - If it is being partially unrolled: if the unrolling factor leaves the
  51. // loop with an even number of bodies with respect to the number of loop
  52. // iterations then step 3 is all that is needed. If it is uneven then we need to
  53. // duplicate the loop completely and unroll the duplicated loop to cover the
  54. // residual part and adjust the first loop to cover only the "even" part. For
  55. // instance if you request an unroll factor of 3 on a loop with 10 iterations
  56. // then copying the body three times would leave you with three bodies in the
  57. // loop
  58. // where the loop still iterates over each 4 times. So we make two loops one
  59. // iterating once then a second loop of three iterating 3 times.
  60. namespace spvtools {
  61. namespace opt {
  62. namespace {
  63. // Loop control constant value for DontUnroll flag.
  64. constexpr uint32_t kLoopControlDontUnrollIndex = 2;
  65. // Operand index of the loop control parameter of the OpLoopMerge.
  66. constexpr uint32_t kLoopControlIndex = 2;
  67. // This utility class encapsulates some of the state we need to maintain between
  68. // loop unrolls. Specifically it maintains key blocks and the induction variable
  69. // in the current loop duplication step and the blocks from the previous one.
  70. // This is because each step of the unroll needs to use data from both the
  71. // preceding step and the original loop.
  72. struct LoopUnrollState {
  73. LoopUnrollState()
  74. : previous_phi_(nullptr),
  75. previous_latch_block_(nullptr),
  76. previous_condition_block_(nullptr),
  77. new_phi(nullptr),
  78. new_continue_block(nullptr),
  79. new_condition_block(nullptr),
  80. new_header_block(nullptr) {}
  81. // Initialize from the loop descriptor class.
  82. LoopUnrollState(Instruction* induction, BasicBlock* latch_block,
  83. BasicBlock* condition, std::vector<Instruction*>&& phis)
  84. : previous_phi_(induction),
  85. previous_latch_block_(latch_block),
  86. previous_condition_block_(condition),
  87. new_phi(nullptr),
  88. new_continue_block(nullptr),
  89. new_condition_block(nullptr),
  90. new_header_block(nullptr) {
  91. previous_phis_ = std::move(phis);
  92. }
  93. // Swap the state so that the new nodes are now the previous nodes.
  94. void NextIterationState() {
  95. previous_phi_ = new_phi;
  96. previous_latch_block_ = new_latch_block;
  97. previous_condition_block_ = new_condition_block;
  98. previous_phis_ = std::move(new_phis_);
  99. // Clear new nodes.
  100. new_phi = nullptr;
  101. new_continue_block = nullptr;
  102. new_condition_block = nullptr;
  103. new_header_block = nullptr;
  104. new_latch_block = nullptr;
  105. // Clear new block/instruction maps.
  106. new_blocks.clear();
  107. new_inst.clear();
  108. ids_to_new_inst.clear();
  109. }
  110. // The induction variable from the immediately preceding loop body.
  111. Instruction* previous_phi_;
  112. // All the phi nodes from the previous loop iteration.
  113. std::vector<Instruction*> previous_phis_;
  114. std::vector<Instruction*> new_phis_;
  115. // The previous latch block. The backedge will be removed from this and
  116. // added to the new latch block.
  117. BasicBlock* previous_latch_block_;
  118. // The previous condition block. This may be folded to flatten the loop.
  119. BasicBlock* previous_condition_block_;
  120. // The new induction variable.
  121. Instruction* new_phi;
  122. // The new continue block.
  123. BasicBlock* new_continue_block;
  124. // The new condition block.
  125. BasicBlock* new_condition_block;
  126. // The new header block.
  127. BasicBlock* new_header_block;
  128. // The new latch block.
  129. BasicBlock* new_latch_block;
  130. // A mapping of new block ids to the original blocks which they were copied
  131. // from.
  132. std::unordered_map<uint32_t, BasicBlock*> new_blocks;
  133. // A mapping of the original instruction ids to the instruction ids to their
  134. // copies.
  135. std::unordered_map<uint32_t, uint32_t> new_inst;
  136. std::unordered_map<uint32_t, Instruction*> ids_to_new_inst;
  137. };
  138. // This class implements the actual unrolling. It uses a LoopUnrollState to
  139. // maintain the state of the unrolling in between steps.
  140. class LoopUnrollerUtilsImpl {
  141. public:
  142. using BasicBlockListTy = std::vector<std::unique_ptr<BasicBlock>>;
  143. LoopUnrollerUtilsImpl(IRContext* c, Function* function)
  144. : context_(c),
  145. function_(*function),
  146. loop_condition_block_(nullptr),
  147. loop_induction_variable_(nullptr),
  148. number_of_loop_iterations_(0),
  149. loop_step_value_(0),
  150. loop_init_value_(0) {}
  151. // Unroll the |loop| by given |factor| by copying the whole body |factor|
  152. // times. The resulting basicblock structure will remain a loop.
  153. void PartiallyUnroll(Loop*, size_t factor);
  154. // If partially unrolling the |loop| would leave the loop with too many bodies
  155. // for its number of iterations then this method should be used. This method
  156. // will duplicate the |loop| completely, making the duplicated loop the
  157. // successor of the original's merge block. The original loop will have its
  158. // condition changed to loop over the residual part and the duplicate will be
  159. // partially unrolled. The resulting structure will be two loops.
  160. void PartiallyUnrollResidualFactor(Loop* loop, size_t factor);
  161. // Fully unroll the |loop| by copying the full body by the total number of
  162. // loop iterations, folding all conditions, and removing the backedge from the
  163. // continue block to the header.
  164. void FullyUnroll(Loop* loop);
  165. // Get the ID of the variable in the |phi| paired with |label|.
  166. uint32_t GetPhiDefID(const Instruction* phi, uint32_t label) const;
  167. // Close the loop by removing the OpLoopMerge from the |loop| header block and
  168. // making the backedge point to the merge block.
  169. void CloseUnrolledLoop(Loop* loop);
  170. // Remove the OpConditionalBranch instruction inside |conditional_block| used
  171. // to branch to either exit or continue the loop and replace it with an
  172. // unconditional OpBranch to block |new_target|.
  173. void FoldConditionBlock(BasicBlock* condtion_block, uint32_t new_target);
  174. // Add all blocks_to_add_ to function_ at the |insert_point|.
  175. void AddBlocksToFunction(const BasicBlock* insert_point);
  176. // Duplicates the |old_loop|, cloning each body and remapping the ids without
  177. // removing instructions or changing relative structure. Result will be stored
  178. // in |new_loop|.
  179. void DuplicateLoop(Loop* old_loop, Loop* new_loop);
  180. inline size_t GetLoopIterationCount() const {
  181. return number_of_loop_iterations_;
  182. }
  183. // Extracts the initial state information from the |loop|.
  184. void Init(Loop* loop);
  185. // Replace the uses of each induction variable outside the loop with the final
  186. // value of the induction variable before the loop exit. To reflect the proper
  187. // state of a fully unrolled loop.
  188. void ReplaceInductionUseWithFinalValue(Loop* loop);
  189. // Remove all the instructions in the invalidated_instructions_ vector.
  190. void RemoveDeadInstructions();
  191. // Replace any use of induction variables outwith the loop with the final
  192. // value of the induction variable in the unrolled loop.
  193. void ReplaceOutsideLoopUseWithFinalValue(Loop* loop);
  194. // Set the LoopControl operand of the OpLoopMerge instruction to be
  195. // DontUnroll.
  196. void MarkLoopControlAsDontUnroll(Loop* loop) const;
  197. private:
  198. // Remap all the in |basic_block| to new IDs and keep the mapping of new ids
  199. // to old
  200. // ids. |loop| is used to identify special loop blocks (header, continue,
  201. // etc).
  202. void AssignNewResultIds(BasicBlock* basic_block);
  203. // Using the map built by AssignNewResultIds, replace the uses in |inst|
  204. // by the id that the use maps to.
  205. void RemapOperands(Instruction* inst);
  206. // Using the map built by AssignNewResultIds, for each instruction in
  207. // |basic_block| use
  208. // that map to substitute the IDs used by instructions (in the operands) with
  209. // the new ids.
  210. void RemapOperands(BasicBlock* basic_block);
  211. // Copy the whole body of the loop, all blocks dominated by the |loop| header
  212. // and not dominated by the |loop| merge. The copied body will be linked to by
  213. // the old |loop| continue block and the new body will link to the |loop|
  214. // header via the new continue block. |eliminate_conditions| is used to decide
  215. // whether or not to fold all the condition blocks other than the last one.
  216. void CopyBody(Loop* loop, bool eliminate_conditions);
  217. // Copy a given |block_to_copy| in the |loop| and record the mapping of the
  218. // old/new ids. |preserve_instructions| determines whether or not the method
  219. // will modify (other than result_id) instructions which are copied.
  220. void CopyBasicBlock(Loop* loop, const BasicBlock* block_to_copy,
  221. bool preserve_instructions);
  222. // The actual implementation of the unroll step. Unrolls |loop| by given
  223. // |factor| by copying the body by |factor| times. Also propagates the
  224. // induction variable value throughout the copies.
  225. void Unroll(Loop* loop, size_t factor);
  226. // Fills the loop_blocks_inorder_ field with the ordered list of basic blocks
  227. // as computed by the method ComputeLoopOrderedBlocks.
  228. void ComputeLoopOrderedBlocks(Loop* loop);
  229. // Adds the blocks_to_add_ to both the |loop| and to the parent of |loop| if
  230. // the parent exists.
  231. void AddBlocksToLoop(Loop* loop) const;
  232. // After the partially unroll step the phi instructions in the header block
  233. // will be in an illegal format. This function makes the phis legal by making
  234. // the edge from the latch block come from the new latch block and the value
  235. // to be the actual value of the phi at that point.
  236. void LinkLastPhisToStart(Loop* loop) const;
  237. // Kill all debug declaration instructions from |bb|.
  238. void KillDebugDeclares(BasicBlock* bb);
  239. // A pointer to the IRContext. Used to add/remove instructions and for usedef
  240. // chains.
  241. IRContext* context_;
  242. // A reference the function the loop is within.
  243. Function& function_;
  244. // A list of basic blocks to be added to the loop at the end of an unroll
  245. // step.
  246. BasicBlockListTy blocks_to_add_;
  247. // List of instructions which are now dead and can be removed.
  248. std::vector<Instruction*> invalidated_instructions_;
  249. // Maintains the current state of the transform between calls to unroll.
  250. LoopUnrollState state_;
  251. // An ordered list containing the loop basic blocks.
  252. std::vector<BasicBlock*> loop_blocks_inorder_;
  253. // The block containing the condition check which contains a conditional
  254. // branch to the merge and continue block.
  255. BasicBlock* loop_condition_block_;
  256. // The induction variable of the loop.
  257. Instruction* loop_induction_variable_;
  258. // Phis used in the loop need to be remapped to use the actual result values
  259. // and then be remapped at the end.
  260. std::vector<Instruction*> loop_phi_instructions_;
  261. // The number of loop iterations that the loop would perform pre-unroll.
  262. size_t number_of_loop_iterations_;
  263. // The amount that the loop steps each iteration.
  264. int64_t loop_step_value_;
  265. // The value the loop starts stepping from.
  266. int64_t loop_init_value_;
  267. };
  268. /*
  269. * Static helper functions.
  270. */
  271. // Retrieve the index of the OpPhi instruction |phi| which corresponds to the
  272. // incoming |block| id.
  273. uint32_t GetPhiIndexFromLabel(const BasicBlock* block, const Instruction* phi) {
  274. for (uint32_t i = 1; i < phi->NumInOperands(); i += 2) {
  275. if (block->id() == phi->GetSingleWordInOperand(i)) {
  276. return i;
  277. }
  278. }
  279. assert(false && "Could not find operand in instruction.");
  280. return 0;
  281. }
  282. void LoopUnrollerUtilsImpl::Init(Loop* loop) {
  283. loop_condition_block_ = loop->FindConditionBlock();
  284. // When we reinit the second loop during PartiallyUnrollResidualFactor we need
  285. // to use the cached value from the duplicate step as the dominator tree
  286. // basded solution, loop->FindConditionBlock, requires all the nodes to be
  287. // connected up with the correct branches. They won't be at this point.
  288. if (!loop_condition_block_) {
  289. loop_condition_block_ = state_.new_condition_block;
  290. }
  291. assert(loop_condition_block_);
  292. loop_induction_variable_ = loop->FindConditionVariable(loop_condition_block_);
  293. assert(loop_induction_variable_);
  294. bool found = loop->FindNumberOfIterations(
  295. loop_induction_variable_, &*loop_condition_block_->ctail(),
  296. &number_of_loop_iterations_, &loop_step_value_, &loop_init_value_);
  297. (void)found; // To silence unused variable warning on release builds.
  298. assert(found);
  299. // Blocks are stored in an unordered set of ids in the loop class, we need to
  300. // create the dominator ordered list.
  301. ComputeLoopOrderedBlocks(loop);
  302. }
  303. // This function is used to partially unroll the loop when the factor provided
  304. // would normally lead to an illegal optimization. Instead of just unrolling the
  305. // loop it creates two loops and unrolls one and adjusts the condition on the
  306. // other. The end result being that the new loop pair iterates over the correct
  307. // number of bodies.
  308. void LoopUnrollerUtilsImpl::PartiallyUnrollResidualFactor(Loop* loop,
  309. size_t factor) {
  310. // TODO(1841): Handle id overflow.
  311. std::unique_ptr<Instruction> new_label{new Instruction(
  312. context_, spv::Op::OpLabel, 0, context_->TakeNextId(), {})};
  313. std::unique_ptr<BasicBlock> new_exit_bb{new BasicBlock(std::move(new_label))};
  314. new_exit_bb->SetParent(&function_);
  315. // Save the id of the block before we move it.
  316. uint32_t new_merge_id = new_exit_bb->id();
  317. // Add the block the list of blocks to add, we want this merge block to be
  318. // right at the start of the new blocks.
  319. blocks_to_add_.push_back(std::move(new_exit_bb));
  320. BasicBlock* new_exit_bb_raw = blocks_to_add_[0].get();
  321. Instruction& original_conditional_branch = *loop_condition_block_->tail();
  322. // Duplicate the loop, providing access to the blocks of both loops.
  323. // This is a naked new due to the VS2013 requirement of not having unique
  324. // pointers in vectors, as it will be inserted into a vector with
  325. // loop_descriptor.AddLoop.
  326. std::unique_ptr<Loop> new_loop = MakeUnique<Loop>(*loop);
  327. // Clear the basic blocks of the new loop.
  328. new_loop->ClearBlocks();
  329. DuplicateLoop(loop, new_loop.get());
  330. // Add the blocks to the function.
  331. AddBlocksToFunction(loop->GetMergeBlock());
  332. blocks_to_add_.clear();
  333. // Create a new merge block for the first loop.
  334. InstructionBuilder builder{context_, new_exit_bb_raw};
  335. // Make the first loop branch to the second.
  336. builder.AddBranch(new_loop->GetHeaderBlock()->id());
  337. loop_condition_block_ = state_.new_condition_block;
  338. loop_induction_variable_ = state_.new_phi;
  339. // Unroll the new loop by the factor with the usual -1 to account for the
  340. // existing block iteration.
  341. Unroll(new_loop.get(), factor);
  342. LinkLastPhisToStart(new_loop.get());
  343. AddBlocksToLoop(new_loop.get());
  344. // Add the new merge block to the back of the list of blocks to be added. It
  345. // needs to be the last block added to maintain dominator order in the binary.
  346. blocks_to_add_.push_back(
  347. std::unique_ptr<BasicBlock>(new_loop->GetMergeBlock()));
  348. // Add the blocks to the function.
  349. AddBlocksToFunction(loop->GetMergeBlock());
  350. // Reset the usedef analysis.
  351. context_->InvalidateAnalysesExceptFor(
  352. IRContext::Analysis::kAnalysisLoopAnalysis);
  353. analysis::DefUseManager* def_use_manager = context_->get_def_use_mgr();
  354. // The loop condition.
  355. Instruction* condition_check = def_use_manager->GetDef(
  356. original_conditional_branch.GetSingleWordOperand(0));
  357. // This should have been checked by the LoopUtils::CanPerformUnroll function
  358. // before entering this.
  359. assert(loop->IsSupportedCondition(condition_check->opcode()));
  360. // We need to account for the initial body when calculating the remainder.
  361. int64_t remainder = Loop::GetResidualConditionValue(
  362. condition_check->opcode(), loop_init_value_, loop_step_value_,
  363. number_of_loop_iterations_, factor);
  364. assert(remainder > std::numeric_limits<int32_t>::min() &&
  365. remainder < std::numeric_limits<int32_t>::max());
  366. Instruction* new_constant = nullptr;
  367. // If the remainder is negative then we add a signed constant, otherwise just
  368. // add an unsigned constant.
  369. if (remainder < 0) {
  370. new_constant = builder.GetSintConstant(static_cast<int32_t>(remainder));
  371. } else {
  372. new_constant = builder.GetUintConstant(static_cast<int32_t>(remainder));
  373. }
  374. uint32_t constant_id = new_constant->result_id();
  375. // Update the condition check.
  376. condition_check->SetInOperand(1, {constant_id});
  377. // Update the next phi node. The phi will have a constant value coming in from
  378. // the preheader block. For the duplicated loop we need to update the constant
  379. // to be the amount of iterations covered by the first loop and the incoming
  380. // block to be the first loops new merge block.
  381. std::vector<Instruction*> new_inductions;
  382. new_loop->GetInductionVariables(new_inductions);
  383. std::vector<Instruction*> old_inductions;
  384. loop->GetInductionVariables(old_inductions);
  385. for (size_t index = 0; index < new_inductions.size(); ++index) {
  386. Instruction* new_induction = new_inductions[index];
  387. Instruction* old_induction = old_inductions[index];
  388. // Get the index of the loop initalizer, the value coming in from the
  389. // preheader.
  390. uint32_t initalizer_index =
  391. GetPhiIndexFromLabel(new_loop->GetPreHeaderBlock(), old_induction);
  392. // Replace the second loop initalizer with the phi from the first
  393. new_induction->SetInOperand(initalizer_index - 1,
  394. {old_induction->result_id()});
  395. new_induction->SetInOperand(initalizer_index, {new_merge_id});
  396. // If the use of the first loop induction variable is outside of the loop
  397. // then replace that use with the second loop induction variable.
  398. uint32_t second_loop_induction = new_induction->result_id();
  399. auto replace_use_outside_of_loop = [loop, second_loop_induction](
  400. Instruction* user,
  401. uint32_t operand_index) {
  402. if (!loop->IsInsideLoop(user)) {
  403. user->SetOperand(operand_index, {second_loop_induction});
  404. }
  405. };
  406. context_->get_def_use_mgr()->ForEachUse(old_induction,
  407. replace_use_outside_of_loop);
  408. }
  409. context_->InvalidateAnalysesExceptFor(
  410. IRContext::Analysis::kAnalysisLoopAnalysis);
  411. context_->ReplaceAllUsesWith(loop->GetMergeBlock()->id(), new_merge_id);
  412. LoopDescriptor& loop_descriptor = *context_->GetLoopDescriptor(&function_);
  413. loop_descriptor.AddLoop(std::move(new_loop), loop->GetParent());
  414. RemoveDeadInstructions();
  415. }
  416. // Mark this loop as DontUnroll as it will already be unrolled and it may not
  417. // be safe to unroll a previously partially unrolled loop.
  418. void LoopUnrollerUtilsImpl::MarkLoopControlAsDontUnroll(Loop* loop) const {
  419. Instruction* loop_merge_inst = loop->GetHeaderBlock()->GetLoopMergeInst();
  420. assert(loop_merge_inst &&
  421. "Loop merge instruction could not be found after entering unroller "
  422. "(should have exited before this)");
  423. loop_merge_inst->SetInOperand(kLoopControlIndex,
  424. {kLoopControlDontUnrollIndex});
  425. }
  426. // Duplicate the |loop| body |factor| - 1 number of times while keeping the loop
  427. // backedge intact. This will leave the loop with |factor| number of bodies
  428. // after accounting for the initial body.
  429. void LoopUnrollerUtilsImpl::Unroll(Loop* loop, size_t factor) {
  430. // If we unroll a loop partially it will not be safe to unroll it further.
  431. // This is due to the current method of calculating the number of loop
  432. // iterations.
  433. MarkLoopControlAsDontUnroll(loop);
  434. std::vector<Instruction*> inductions;
  435. loop->GetInductionVariables(inductions);
  436. state_ = LoopUnrollState{loop_induction_variable_, loop->GetLatchBlock(),
  437. loop_condition_block_, std::move(inductions)};
  438. for (size_t i = 0; i < factor - 1; ++i) {
  439. CopyBody(loop, true);
  440. }
  441. }
  442. void LoopUnrollerUtilsImpl::RemoveDeadInstructions() {
  443. // Remove the dead instructions.
  444. for (Instruction* inst : invalidated_instructions_) {
  445. context_->KillInst(inst);
  446. }
  447. }
  448. void LoopUnrollerUtilsImpl::ReplaceInductionUseWithFinalValue(Loop* loop) {
  449. context_->InvalidateAnalysesExceptFor(
  450. IRContext::Analysis::kAnalysisLoopAnalysis |
  451. IRContext::Analysis::kAnalysisDefUse |
  452. IRContext::Analysis::kAnalysisInstrToBlockMapping);
  453. std::vector<Instruction*> inductions;
  454. loop->GetInductionVariables(inductions);
  455. for (size_t index = 0; index < inductions.size(); ++index) {
  456. uint32_t trip_step_id = GetPhiDefID(state_.previous_phis_[index],
  457. state_.previous_latch_block_->id());
  458. context_->ReplaceAllUsesWith(inductions[index]->result_id(), trip_step_id);
  459. invalidated_instructions_.push_back(inductions[index]);
  460. }
  461. }
  462. // Fully unroll the loop by partially unrolling it by the number of loop
  463. // iterations minus one for the body already accounted for.
  464. void LoopUnrollerUtilsImpl::FullyUnroll(Loop* loop) {
  465. // We unroll the loop by number of iterations in the loop.
  466. Unroll(loop, number_of_loop_iterations_);
  467. // The first condition block is preserved until now so it can be copied.
  468. FoldConditionBlock(loop_condition_block_, 1);
  469. // Delete the OpLoopMerge and remove the backedge to the header.
  470. CloseUnrolledLoop(loop);
  471. // Mark the loop for later deletion. This allows us to preserve the loop
  472. // iterators but still disregard dead loops.
  473. loop->MarkLoopForRemoval();
  474. // If the loop has a parent add the new blocks to the parent.
  475. if (loop->GetParent()) {
  476. AddBlocksToLoop(loop->GetParent());
  477. }
  478. // Add the blocks to the function.
  479. AddBlocksToFunction(loop->GetMergeBlock());
  480. ReplaceInductionUseWithFinalValue(loop);
  481. RemoveDeadInstructions();
  482. // Invalidate all analyses.
  483. context_->InvalidateAnalysesExceptFor(
  484. IRContext::Analysis::kAnalysisLoopAnalysis |
  485. IRContext::Analysis::kAnalysisDefUse);
  486. }
  487. void LoopUnrollerUtilsImpl::KillDebugDeclares(BasicBlock* bb) {
  488. // We cannot kill an instruction inside BasicBlock::ForEachInst()
  489. // because it will generate dangling pointers. We use |to_be_killed|
  490. // to kill them after the loop.
  491. std::vector<Instruction*> to_be_killed;
  492. bb->ForEachInst([&to_be_killed, this](Instruction* inst) {
  493. if (context_->get_debug_info_mgr()->IsDebugDeclare(inst)) {
  494. to_be_killed.push_back(inst);
  495. }
  496. });
  497. for (auto* inst : to_be_killed) context_->KillInst(inst);
  498. }
  499. // Copy a given basic block, give it a new result_id, and store the new block
  500. // and the id mapping in the state. |preserve_instructions| is used to determine
  501. // whether or not this function should edit instructions other than the
  502. // |result_id|.
  503. void LoopUnrollerUtilsImpl::CopyBasicBlock(Loop* loop, const BasicBlock* itr,
  504. bool preserve_instructions) {
  505. // Clone the block exactly, including the IDs.
  506. BasicBlock* basic_block = itr->Clone(context_);
  507. basic_block->SetParent(itr->GetParent());
  508. // We do not want to duplicate DebugDeclare.
  509. KillDebugDeclares(basic_block);
  510. // Assign each result a new unique ID and keep a mapping of the old ids to
  511. // the new ones.
  512. AssignNewResultIds(basic_block);
  513. // If this is the continue block we are copying.
  514. if (itr == loop->GetContinueBlock()) {
  515. // Make the OpLoopMerge point to this block for the continue.
  516. if (!preserve_instructions) {
  517. Instruction* merge_inst = loop->GetHeaderBlock()->GetLoopMergeInst();
  518. merge_inst->SetInOperand(1, {basic_block->id()});
  519. context_->UpdateDefUse(merge_inst);
  520. }
  521. state_.new_continue_block = basic_block;
  522. }
  523. // If this is the header block we are copying.
  524. if (itr == loop->GetHeaderBlock()) {
  525. state_.new_header_block = basic_block;
  526. if (!preserve_instructions) {
  527. // Remove the loop merge instruction if it exists.
  528. Instruction* merge_inst = basic_block->GetLoopMergeInst();
  529. if (merge_inst) invalidated_instructions_.push_back(merge_inst);
  530. }
  531. }
  532. // If this is the latch block being copied, record it in the state.
  533. if (itr == loop->GetLatchBlock()) state_.new_latch_block = basic_block;
  534. // If this is the condition block we are copying.
  535. if (itr == loop_condition_block_) {
  536. state_.new_condition_block = basic_block;
  537. }
  538. // Add this block to the list of blocks to add to the function at the end of
  539. // the unrolling process.
  540. blocks_to_add_.push_back(std::unique_ptr<BasicBlock>(basic_block));
  541. // Keep tracking the old block via a map.
  542. state_.new_blocks[itr->id()] = basic_block;
  543. }
  544. void LoopUnrollerUtilsImpl::CopyBody(Loop* loop, bool eliminate_conditions) {
  545. // Copy each basic block in the loop, give them new ids, and save state
  546. // information.
  547. for (const BasicBlock* itr : loop_blocks_inorder_) {
  548. CopyBasicBlock(loop, itr, false);
  549. }
  550. // Set the previous latch block to point to the new header.
  551. Instruction* latch_branch = state_.previous_latch_block_->terminator();
  552. latch_branch->SetInOperand(0, {state_.new_header_block->id()});
  553. context_->UpdateDefUse(latch_branch);
  554. // As the algorithm copies the original loop blocks exactly, the tail of the
  555. // latch block on iterations after the first one will be a branch to the new
  556. // header and not the actual loop header. The last continue block in the loop
  557. // should always be a backedge to the global header.
  558. Instruction* new_latch_branch = state_.new_latch_block->terminator();
  559. new_latch_branch->SetInOperand(0, {loop->GetHeaderBlock()->id()});
  560. context_->AnalyzeUses(new_latch_branch);
  561. std::vector<Instruction*> inductions;
  562. loop->GetInductionVariables(inductions);
  563. for (size_t index = 0; index < inductions.size(); ++index) {
  564. Instruction* primary_copy = inductions[index];
  565. assert(primary_copy->result_id() != 0);
  566. Instruction* induction_clone =
  567. state_.ids_to_new_inst[state_.new_inst[primary_copy->result_id()]];
  568. state_.new_phis_.push_back(induction_clone);
  569. assert(induction_clone->result_id() != 0);
  570. if (!state_.previous_phis_.empty()) {
  571. state_.new_inst[primary_copy->result_id()] = GetPhiDefID(
  572. state_.previous_phis_[index], state_.previous_latch_block_->id());
  573. } else {
  574. // Do not replace the first phi block ids.
  575. state_.new_inst[primary_copy->result_id()] = primary_copy->result_id();
  576. }
  577. }
  578. if (eliminate_conditions &&
  579. state_.new_condition_block != loop_condition_block_) {
  580. FoldConditionBlock(state_.new_condition_block, 1);
  581. }
  582. // Only reference to the header block is the backedge in the latch block,
  583. // don't change this.
  584. state_.new_inst[loop->GetHeaderBlock()->id()] = loop->GetHeaderBlock()->id();
  585. for (auto& pair : state_.new_blocks) {
  586. RemapOperands(pair.second);
  587. }
  588. for (Instruction* dead_phi : state_.new_phis_)
  589. invalidated_instructions_.push_back(dead_phi);
  590. // Swap the state so the new is now the previous.
  591. state_.NextIterationState();
  592. }
  593. uint32_t LoopUnrollerUtilsImpl::GetPhiDefID(const Instruction* phi,
  594. uint32_t label) const {
  595. for (uint32_t operand = 3; operand < phi->NumOperands(); operand += 2) {
  596. if (phi->GetSingleWordOperand(operand) == label) {
  597. return phi->GetSingleWordOperand(operand - 1);
  598. }
  599. }
  600. assert(false && "Could not find a phi index matching the provided label");
  601. return 0;
  602. }
  603. void LoopUnrollerUtilsImpl::FoldConditionBlock(BasicBlock* condition_block,
  604. uint32_t operand_label) {
  605. // Remove the old conditional branch to the merge and continue blocks.
  606. Instruction& old_branch = *condition_block->tail();
  607. uint32_t new_target = old_branch.GetSingleWordOperand(operand_label);
  608. DebugScope scope = old_branch.GetDebugScope();
  609. const std::vector<Instruction> lines = old_branch.dbg_line_insts();
  610. context_->KillInst(&old_branch);
  611. // Add the new unconditional branch to the merge block.
  612. InstructionBuilder builder(
  613. context_, condition_block,
  614. IRContext::Analysis::kAnalysisDefUse |
  615. IRContext::Analysis::kAnalysisInstrToBlockMapping);
  616. Instruction* new_branch = builder.AddBranch(new_target);
  617. if (!lines.empty()) new_branch->AddDebugLine(&lines.back());
  618. new_branch->SetDebugScope(scope);
  619. }
  620. void LoopUnrollerUtilsImpl::CloseUnrolledLoop(Loop* loop) {
  621. // Remove the OpLoopMerge instruction from the function.
  622. Instruction* merge_inst = loop->GetHeaderBlock()->GetLoopMergeInst();
  623. invalidated_instructions_.push_back(merge_inst);
  624. // Remove the final backedge to the header and make it point instead to the
  625. // merge block.
  626. Instruction* latch_instruction = state_.previous_latch_block_->terminator();
  627. latch_instruction->SetInOperand(0, {loop->GetMergeBlock()->id()});
  628. context_->UpdateDefUse(latch_instruction);
  629. // Remove all induction variables as the phis will now be invalid. Replace all
  630. // uses with the constant initializer value (all uses of phis will be in
  631. // the first iteration with the subsequent phis already having been removed).
  632. std::vector<Instruction*> inductions;
  633. loop->GetInductionVariables(inductions);
  634. // We can use the state instruction mechanism to replace all internal loop
  635. // values within the first loop trip (as the subsequent ones will be updated
  636. // by the copy function) with the value coming in from the preheader and then
  637. // use context ReplaceAllUsesWith for the uses outside the loop with the final
  638. // trip phi value.
  639. state_.new_inst.clear();
  640. for (Instruction* induction : inductions) {
  641. uint32_t initalizer_id =
  642. GetPhiDefID(induction, loop->GetPreHeaderBlock()->id());
  643. state_.new_inst[induction->result_id()] = initalizer_id;
  644. }
  645. for (BasicBlock* block : loop_blocks_inorder_) {
  646. RemapOperands(block);
  647. }
  648. for (auto& block_itr : blocks_to_add_) {
  649. RemapOperands(block_itr.get());
  650. }
  651. // Rewrite the last phis, since they may still reference the original phi.
  652. for (Instruction* last_phi : state_.previous_phis_) {
  653. RemapOperands(last_phi);
  654. }
  655. }
  656. // Uses the first loop to create a copy of the loop with new IDs.
  657. void LoopUnrollerUtilsImpl::DuplicateLoop(Loop* old_loop, Loop* new_loop) {
  658. std::vector<BasicBlock*> new_block_order;
  659. // Copy every block in the old loop.
  660. for (const BasicBlock* itr : loop_blocks_inorder_) {
  661. CopyBasicBlock(old_loop, itr, true);
  662. new_block_order.push_back(blocks_to_add_.back().get());
  663. }
  664. // Clone the merge block, give it a new id and record it in the state.
  665. BasicBlock* new_merge = old_loop->GetMergeBlock()->Clone(context_);
  666. new_merge->SetParent(old_loop->GetMergeBlock()->GetParent());
  667. AssignNewResultIds(new_merge);
  668. state_.new_blocks[old_loop->GetMergeBlock()->id()] = new_merge;
  669. // Remap the operands of every instruction in the loop to point to the new
  670. // copies.
  671. for (auto& pair : state_.new_blocks) {
  672. RemapOperands(pair.second);
  673. }
  674. loop_blocks_inorder_ = std::move(new_block_order);
  675. AddBlocksToLoop(new_loop);
  676. new_loop->SetHeaderBlock(state_.new_header_block);
  677. new_loop->SetContinueBlock(state_.new_continue_block);
  678. new_loop->SetLatchBlock(state_.new_latch_block);
  679. new_loop->SetMergeBlock(new_merge);
  680. }
  681. // Whenever the utility copies a block it stores it in a temporary buffer, this
  682. // function adds the buffer into the Function. The blocks will be inserted
  683. // after the block |insert_point|.
  684. void LoopUnrollerUtilsImpl::AddBlocksToFunction(
  685. const BasicBlock* insert_point) {
  686. for (auto basic_block_iterator = function_.begin();
  687. basic_block_iterator != function_.end(); ++basic_block_iterator) {
  688. if (basic_block_iterator->id() == insert_point->id()) {
  689. basic_block_iterator.InsertBefore(&blocks_to_add_);
  690. return;
  691. }
  692. }
  693. assert(
  694. false &&
  695. "Could not add basic blocks to function as insert point was not found.");
  696. }
  697. // Assign all result_ids in |basic_block| instructions to new IDs and preserve
  698. // the mapping of new ids to old ones.
  699. void LoopUnrollerUtilsImpl::AssignNewResultIds(BasicBlock* basic_block) {
  700. analysis::DefUseManager* def_use_mgr = context_->get_def_use_mgr();
  701. // Label instructions aren't covered by normal traversal of the
  702. // instructions.
  703. // TODO(1841): Handle id overflow.
  704. uint32_t new_label_id = context_->TakeNextId();
  705. // Assign a new id to the label.
  706. state_.new_inst[basic_block->GetLabelInst()->result_id()] = new_label_id;
  707. basic_block->GetLabelInst()->SetResultId(new_label_id);
  708. def_use_mgr->AnalyzeInstDefUse(basic_block->GetLabelInst());
  709. for (Instruction& inst : *basic_block) {
  710. // Do def/use analysis on new lines
  711. for (auto& line : inst.dbg_line_insts())
  712. def_use_mgr->AnalyzeInstDefUse(&line);
  713. uint32_t old_id = inst.result_id();
  714. // Ignore stores etc.
  715. if (old_id == 0) {
  716. continue;
  717. }
  718. // Give the instruction a new id.
  719. // TODO(1841): Handle id overflow.
  720. inst.SetResultId(context_->TakeNextId());
  721. def_use_mgr->AnalyzeInstDef(&inst);
  722. // Save the mapping of old_id -> new_id.
  723. state_.new_inst[old_id] = inst.result_id();
  724. // Check if this instruction is the induction variable.
  725. if (loop_induction_variable_->result_id() == old_id) {
  726. // Save a pointer to the new copy of it.
  727. state_.new_phi = &inst;
  728. }
  729. state_.ids_to_new_inst[inst.result_id()] = &inst;
  730. }
  731. }
  732. void LoopUnrollerUtilsImpl::RemapOperands(Instruction* inst) {
  733. auto remap_operands_to_new_ids = [this](uint32_t* id) {
  734. auto itr = state_.new_inst.find(*id);
  735. if (itr != state_.new_inst.end()) {
  736. *id = itr->second;
  737. }
  738. };
  739. inst->ForEachInId(remap_operands_to_new_ids);
  740. context_->AnalyzeUses(inst);
  741. }
  742. void LoopUnrollerUtilsImpl::RemapOperands(BasicBlock* basic_block) {
  743. for (Instruction& inst : *basic_block) {
  744. RemapOperands(&inst);
  745. }
  746. }
  747. // Generate the ordered list of basic blocks in the |loop| and cache it for
  748. // later use.
  749. void LoopUnrollerUtilsImpl::ComputeLoopOrderedBlocks(Loop* loop) {
  750. loop_blocks_inorder_.clear();
  751. loop->ComputeLoopStructuredOrder(&loop_blocks_inorder_);
  752. }
  753. // Adds the blocks_to_add_ to both the loop and to the parent.
  754. void LoopUnrollerUtilsImpl::AddBlocksToLoop(Loop* loop) const {
  755. // Add the blocks to this loop.
  756. for (auto& block_itr : blocks_to_add_) {
  757. loop->AddBasicBlock(block_itr.get());
  758. }
  759. // Add the blocks to the parent as well.
  760. if (loop->GetParent()) AddBlocksToLoop(loop->GetParent());
  761. }
  762. void LoopUnrollerUtilsImpl::LinkLastPhisToStart(Loop* loop) const {
  763. std::vector<Instruction*> inductions;
  764. loop->GetInductionVariables(inductions);
  765. for (size_t i = 0; i < inductions.size(); ++i) {
  766. Instruction* last_phi_in_block = state_.previous_phis_[i];
  767. uint32_t phi_index =
  768. GetPhiIndexFromLabel(state_.previous_latch_block_, last_phi_in_block);
  769. uint32_t phi_variable =
  770. last_phi_in_block->GetSingleWordInOperand(phi_index - 1);
  771. uint32_t phi_label = last_phi_in_block->GetSingleWordInOperand(phi_index);
  772. Instruction* phi = inductions[i];
  773. phi->SetInOperand(phi_index - 1, {phi_variable});
  774. phi->SetInOperand(phi_index, {phi_label});
  775. }
  776. }
  777. // Duplicate the |loop| body |factor| number of times while keeping the loop
  778. // backedge intact.
  779. void LoopUnrollerUtilsImpl::PartiallyUnroll(Loop* loop, size_t factor) {
  780. Unroll(loop, factor);
  781. LinkLastPhisToStart(loop);
  782. AddBlocksToLoop(loop);
  783. AddBlocksToFunction(loop->GetMergeBlock());
  784. RemoveDeadInstructions();
  785. }
  786. /*
  787. * End LoopUtilsImpl.
  788. */
  789. } // namespace
  790. /*
  791. *
  792. * Begin Utils.
  793. *
  794. * */
  795. bool LoopUtils::CanPerformUnroll() {
  796. // The loop is expected to be in structured order.
  797. if (!loop_->GetHeaderBlock()->GetMergeInst()) {
  798. return false;
  799. }
  800. // Find check the loop has a condition we can find and evaluate.
  801. const BasicBlock* condition = loop_->FindConditionBlock();
  802. if (!condition) return false;
  803. // Check that we can find and process the induction variable.
  804. const Instruction* induction = loop_->FindConditionVariable(condition);
  805. if (!induction || induction->opcode() != spv::Op::OpPhi) return false;
  806. // Check that we can find the number of loop iterations.
  807. if (!loop_->FindNumberOfIterations(induction, &*condition->ctail(), nullptr))
  808. return false;
  809. #ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
  810. // ClusterFuzz/OSS-Fuzz is likely to yield examples with very high loop
  811. // iteration counts. This can cause timeouts and memouts during fuzzing that
  812. // are not classed as bugs. To avoid this noise, loop unrolling is not applied
  813. // to loops with large iteration counts when fuzzing.
  814. constexpr size_t kFuzzerIterationLimit = 100;
  815. size_t num_iterations;
  816. loop_->FindNumberOfIterations(induction, &*condition->ctail(),
  817. &num_iterations);
  818. if (num_iterations > kFuzzerIterationLimit) {
  819. return false;
  820. }
  821. #endif
  822. // Make sure the latch block is a unconditional branch to the header
  823. // block.
  824. const Instruction& branch = *loop_->GetLatchBlock()->ctail();
  825. bool branching_assumption =
  826. branch.opcode() == spv::Op::OpBranch &&
  827. branch.GetSingleWordInOperand(0) == loop_->GetHeaderBlock()->id();
  828. if (!branching_assumption) {
  829. return false;
  830. }
  831. std::vector<Instruction*> inductions;
  832. loop_->GetInductionVariables(inductions);
  833. // Ban breaks within the loop.
  834. const std::vector<uint32_t>& merge_block_preds =
  835. context_->cfg()->preds(loop_->GetMergeBlock()->id());
  836. if (merge_block_preds.size() != 1) {
  837. return false;
  838. }
  839. // Ban continues within the loop.
  840. const std::vector<uint32_t>& continue_block_preds =
  841. context_->cfg()->preds(loop_->GetContinueBlock()->id());
  842. if (continue_block_preds.size() != 1) {
  843. return false;
  844. }
  845. // Ban returns in the loop.
  846. // Iterate over all the blocks within the loop and check that none of them
  847. // exit the loop.
  848. for (uint32_t label_id : loop_->GetBlocks()) {
  849. const BasicBlock* block = context_->cfg()->block(label_id);
  850. if (block->ctail()->opcode() == spv::Op::OpKill ||
  851. block->ctail()->opcode() == spv::Op::OpReturn ||
  852. block->ctail()->opcode() == spv::Op::OpReturnValue ||
  853. block->ctail()->opcode() == spv::Op::OpTerminateInvocation) {
  854. return false;
  855. }
  856. }
  857. // Can only unroll inner loops.
  858. if (!loop_->AreAllChildrenMarkedForRemoval()) {
  859. return false;
  860. }
  861. return true;
  862. }
  863. bool LoopUtils::PartiallyUnroll(size_t factor) {
  864. if (factor == 1 || !CanPerformUnroll()) return false;
  865. // Create the unroller utility.
  866. LoopUnrollerUtilsImpl unroller{context_,
  867. loop_->GetHeaderBlock()->GetParent()};
  868. unroller.Init(loop_);
  869. // If the unrolling factor is larger than or the same size as the loop just
  870. // fully unroll the loop.
  871. if (factor >= unroller.GetLoopIterationCount()) {
  872. unroller.FullyUnroll(loop_);
  873. return true;
  874. }
  875. // If the loop unrolling factor is an residual number of iterations we need to
  876. // let run the loop for the residual part then let it branch into the unrolled
  877. // remaining part. We add one when calucating the remainder to take into
  878. // account the one iteration already in the loop.
  879. if (unroller.GetLoopIterationCount() % factor != 0) {
  880. unroller.PartiallyUnrollResidualFactor(loop_, factor);
  881. } else {
  882. unroller.PartiallyUnroll(loop_, factor);
  883. }
  884. return true;
  885. }
  886. bool LoopUtils::FullyUnroll() {
  887. if (!CanPerformUnroll()) return false;
  888. std::vector<Instruction*> inductions;
  889. loop_->GetInductionVariables(inductions);
  890. LoopUnrollerUtilsImpl unroller{context_,
  891. loop_->GetHeaderBlock()->GetParent()};
  892. unroller.Init(loop_);
  893. unroller.FullyUnroll(loop_);
  894. return true;
  895. }
  896. void LoopUtils::Finalize() {
  897. // Clean up the loop descriptor to preserve the analysis.
  898. LoopDescriptor* LD = context_->GetLoopDescriptor(&function_);
  899. LD->PostModificationCleanup();
  900. }
  901. /*
  902. *
  903. * Begin Pass.
  904. *
  905. */
  906. Pass::Status LoopUnroller::Process() {
  907. bool changed = false;
  908. for (Function& f : *context()->module()) {
  909. if (f.IsDeclaration()) {
  910. continue;
  911. }
  912. LoopDescriptor* LD = context()->GetLoopDescriptor(&f);
  913. for (Loop& loop : *LD) {
  914. LoopUtils loop_utils{context(), &loop};
  915. if (!loop.HasUnrollLoopControl() || !loop_utils.CanPerformUnroll()) {
  916. continue;
  917. }
  918. if (fully_unroll_) {
  919. loop_utils.FullyUnroll();
  920. } else {
  921. loop_utils.PartiallyUnroll(unroll_factor_);
  922. }
  923. changed = true;
  924. }
  925. LD->PostModificationCleanup();
  926. }
  927. return changed ? Status::SuccessWithChange : Status::SuccessWithoutChange;
  928. }
  929. } // namespace opt
  930. } // namespace spvtools