loop_unroller.cpp 42 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145
  1. // Copyright (c) 2018 Google LLC.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. #include "source/opt/loop_unroller.h"
  15. #include <limits>
  16. #include <map>
  17. #include <memory>
  18. #include <unordered_map>
  19. #include <utility>
  20. #include <vector>
  21. #include "source/opt/ir_builder.h"
  22. #include "source/opt/loop_utils.h"
  23. // Implements loop util unrolling functionality for fully and partially
  24. // unrolling loops. Given a factor it will duplicate the loop that many times,
  25. // appending each one to the end of the old loop and removing backedges, to
  26. // create a new unrolled loop.
  27. //
  28. // 1 - User calls LoopUtils::FullyUnroll or LoopUtils::PartiallyUnroll with a
  29. // loop they wish to unroll. LoopUtils::CanPerformUnroll is used to
  30. // validate that a given loop can be unrolled. That method (along with the
  31. // constructor of loop) checks that the IR is in the expected canonicalised
  32. // format.
  33. //
  34. // 2 - The LoopUtils methods create a LoopUnrollerUtilsImpl object to actually
  35. // perform the unrolling. This implements helper methods to copy the loop basic
  36. // blocks and remap the ids of instructions used inside them.
  37. //
  38. // 3 - The core of LoopUnrollerUtilsImpl is the Unroll method, this method
  39. // actually performs the loop duplication. It does this by creating a
  40. // LoopUnrollState object and then copying the loop as given by the factor
  41. // parameter. The LoopUnrollState object retains the state of the unroller
  42. // between the loop body copies as each iteration needs information on the last
  43. // to adjust the phi induction variable, adjust the OpLoopMerge instruction in
  44. // the main loop header, and change the previous continue block to point to the
  45. // new header and the new continue block to the main loop header.
  46. //
  47. // 4 - If the loop is to be fully unrolled then it is simply closed after step
  48. // 3, with the OpLoopMerge being deleted, the backedge removed, and the
  49. // condition blocks folded.
  50. //
  51. // 5 - If it is being partially unrolled: if the unrolling factor leaves the
  52. // loop with an even number of bodies with respect to the number of loop
  53. // iterations then step 3 is all that is needed. If it is uneven then we need to
  54. // duplicate the loop completely and unroll the duplicated loop to cover the
  55. // residual part and adjust the first loop to cover only the "even" part. For
  56. // instance if you request an unroll factor of 3 on a loop with 10 iterations
  57. // then copying the body three times would leave you with three bodies in the
  58. // loop
  59. // where the loop still iterates over each 4 times. So we make two loops one
  60. // iterating once then a second loop of three iterating 3 times.
  61. namespace spvtools {
  62. namespace opt {
  63. namespace {
  64. // Loop control constant value for DontUnroll flag.
  65. static const uint32_t kLoopControlDontUnrollIndex = 2;
  66. // Operand index of the loop control parameter of the OpLoopMerge.
  67. static const uint32_t kLoopControlIndex = 2;
  68. // This utility class encapsulates some of the state we need to maintain between
  69. // loop unrolls. Specifically it maintains key blocks and the induction variable
  70. // in the current loop duplication step and the blocks from the previous one.
  71. // This is because each step of the unroll needs to use data from both the
  72. // preceding step and the original loop.
  73. struct LoopUnrollState {
  74. LoopUnrollState()
  75. : previous_phi_(nullptr),
  76. previous_latch_block_(nullptr),
  77. previous_condition_block_(nullptr),
  78. new_phi(nullptr),
  79. new_continue_block(nullptr),
  80. new_condition_block(nullptr),
  81. new_header_block(nullptr) {}
  82. // Initialize from the loop descriptor class.
  83. LoopUnrollState(Instruction* induction, BasicBlock* latch_block,
  84. BasicBlock* condition, std::vector<Instruction*>&& phis)
  85. : previous_phi_(induction),
  86. previous_latch_block_(latch_block),
  87. previous_condition_block_(condition),
  88. new_phi(nullptr),
  89. new_continue_block(nullptr),
  90. new_condition_block(nullptr),
  91. new_header_block(nullptr) {
  92. previous_phis_ = std::move(phis);
  93. }
  94. // Swap the state so that the new nodes are now the previous nodes.
  95. void NextIterationState() {
  96. previous_phi_ = new_phi;
  97. previous_latch_block_ = new_latch_block;
  98. previous_condition_block_ = new_condition_block;
  99. previous_phis_ = std::move(new_phis_);
  100. // Clear new nodes.
  101. new_phi = nullptr;
  102. new_continue_block = nullptr;
  103. new_condition_block = nullptr;
  104. new_header_block = nullptr;
  105. new_latch_block = nullptr;
  106. // Clear new block/instruction maps.
  107. new_blocks.clear();
  108. new_inst.clear();
  109. ids_to_new_inst.clear();
  110. }
  111. // The induction variable from the immediately preceding loop body.
  112. Instruction* previous_phi_;
  113. // All the phi nodes from the previous loop iteration.
  114. std::vector<Instruction*> previous_phis_;
  115. std::vector<Instruction*> new_phis_;
  116. // The previous latch block. The backedge will be removed from this and
  117. // added to the new latch block.
  118. BasicBlock* previous_latch_block_;
  119. // The previous condition block. This may be folded to flatten the loop.
  120. BasicBlock* previous_condition_block_;
  121. // The new induction variable.
  122. Instruction* new_phi;
  123. // The new continue block.
  124. BasicBlock* new_continue_block;
  125. // The new condition block.
  126. BasicBlock* new_condition_block;
  127. // The new header block.
  128. BasicBlock* new_header_block;
  129. // The new latch block.
  130. BasicBlock* new_latch_block;
  131. // A mapping of new block ids to the original blocks which they were copied
  132. // from.
  133. std::unordered_map<uint32_t, BasicBlock*> new_blocks;
  134. // A mapping of the original instruction ids to the instruction ids to their
  135. // copies.
  136. std::unordered_map<uint32_t, uint32_t> new_inst;
  137. std::unordered_map<uint32_t, Instruction*> ids_to_new_inst;
  138. };
  139. // This class implements the actual unrolling. It uses a LoopUnrollState to
  140. // maintain the state of the unrolling in between steps.
  141. class LoopUnrollerUtilsImpl {
  142. public:
  143. using BasicBlockListTy = std::vector<std::unique_ptr<BasicBlock>>;
  144. LoopUnrollerUtilsImpl(IRContext* c, Function* function)
  145. : context_(c),
  146. function_(*function),
  147. loop_condition_block_(nullptr),
  148. loop_induction_variable_(nullptr),
  149. number_of_loop_iterations_(0),
  150. loop_step_value_(0),
  151. loop_init_value_(0) {}
  152. // Unroll the |loop| by given |factor| by copying the whole body |factor|
  153. // times. The resulting basicblock structure will remain a loop.
  154. void PartiallyUnroll(Loop*, size_t factor);
  155. // If partially unrolling the |loop| would leave the loop with too many bodies
  156. // for its number of iterations then this method should be used. This method
  157. // will duplicate the |loop| completely, making the duplicated loop the
  158. // successor of the original's merge block. The original loop will have its
  159. // condition changed to loop over the residual part and the duplicate will be
  160. // partially unrolled. The resulting structure will be two loops.
  161. void PartiallyUnrollResidualFactor(Loop* loop, size_t factor);
  162. // Fully unroll the |loop| by copying the full body by the total number of
  163. // loop iterations, folding all conditions, and removing the backedge from the
  164. // continue block to the header.
  165. void FullyUnroll(Loop* loop);
  166. // Get the ID of the variable in the |phi| paired with |label|.
  167. uint32_t GetPhiDefID(const Instruction* phi, uint32_t label) const;
  168. // Close the loop by removing the OpLoopMerge from the |loop| header block and
  169. // making the backedge point to the merge block.
  170. void CloseUnrolledLoop(Loop* loop);
  171. // Remove the OpConditionalBranch instruction inside |conditional_block| used
  172. // to branch to either exit or continue the loop and replace it with an
  173. // unconditional OpBranch to block |new_target|.
  174. void FoldConditionBlock(BasicBlock* condtion_block, uint32_t new_target);
  175. // Add all blocks_to_add_ to function_ at the |insert_point|.
  176. void AddBlocksToFunction(const BasicBlock* insert_point);
  177. // Duplicates the |old_loop|, cloning each body and remapping the ids without
  178. // removing instructions or changing relative structure. Result will be stored
  179. // in |new_loop|.
  180. void DuplicateLoop(Loop* old_loop, Loop* new_loop);
  181. inline size_t GetLoopIterationCount() const {
  182. return number_of_loop_iterations_;
  183. }
  184. // Extracts the initial state information from the |loop|.
  185. void Init(Loop* loop);
  186. // Replace the uses of each induction variable outside the loop with the final
  187. // value of the induction variable before the loop exit. To reflect the proper
  188. // state of a fully unrolled loop.
  189. void ReplaceInductionUseWithFinalValue(Loop* loop);
  190. // Remove all the instructions in the invalidated_instructions_ vector.
  191. void RemoveDeadInstructions();
  192. // Replace any use of induction variables outwith the loop with the final
  193. // value of the induction variable in the unrolled loop.
  194. void ReplaceOutsideLoopUseWithFinalValue(Loop* loop);
  195. // Set the LoopControl operand of the OpLoopMerge instruction to be
  196. // DontUnroll.
  197. void MarkLoopControlAsDontUnroll(Loop* loop) const;
  198. private:
  199. // Remap all the in |basic_block| to new IDs and keep the mapping of new ids
  200. // to old
  201. // ids. |loop| is used to identify special loop blocks (header, continue,
  202. // etc).
  203. void AssignNewResultIds(BasicBlock* basic_block);
  204. // Using the map built by AssignNewResultIds, replace the uses in |inst|
  205. // by the id that the use maps to.
  206. void RemapOperands(Instruction* inst);
  207. // Using the map built by AssignNewResultIds, for each instruction in
  208. // |basic_block| use
  209. // that map to substitute the IDs used by instructions (in the operands) with
  210. // the new ids.
  211. void RemapOperands(BasicBlock* basic_block);
  212. // Copy the whole body of the loop, all blocks dominated by the |loop| header
  213. // and not dominated by the |loop| merge. The copied body will be linked to by
  214. // the old |loop| continue block and the new body will link to the |loop|
  215. // header via the new continue block. |eliminate_conditions| is used to decide
  216. // whether or not to fold all the condition blocks other than the last one.
  217. void CopyBody(Loop* loop, bool eliminate_conditions);
  218. // Copy a given |block_to_copy| in the |loop| and record the mapping of the
  219. // old/new ids. |preserve_instructions| determines whether or not the method
  220. // will modify (other than result_id) instructions which are copied.
  221. void CopyBasicBlock(Loop* loop, const BasicBlock* block_to_copy,
  222. bool preserve_instructions);
  223. // The actual implementation of the unroll step. Unrolls |loop| by given
  224. // |factor| by copying the body by |factor| times. Also propagates the
  225. // induction variable value throughout the copies.
  226. void Unroll(Loop* loop, size_t factor);
  227. // Fills the loop_blocks_inorder_ field with the ordered list of basic blocks
  228. // as computed by the method ComputeLoopOrderedBlocks.
  229. void ComputeLoopOrderedBlocks(Loop* loop);
  230. // Adds the blocks_to_add_ to both the |loop| and to the parent of |loop| if
  231. // the parent exists.
  232. void AddBlocksToLoop(Loop* loop) const;
  233. // After the partially unroll step the phi instructions in the header block
  234. // will be in an illegal format. This function makes the phis legal by making
  235. // the edge from the latch block come from the new latch block and the value
  236. // to be the actual value of the phi at that point.
  237. void LinkLastPhisToStart(Loop* loop) const;
  238. // Kill all debug declaration instructions from |bb|.
  239. void KillDebugDeclares(BasicBlock* bb);
  240. // A pointer to the IRContext. Used to add/remove instructions and for usedef
  241. // chains.
  242. IRContext* context_;
  243. // A reference the function the loop is within.
  244. Function& function_;
  245. // A list of basic blocks to be added to the loop at the end of an unroll
  246. // step.
  247. BasicBlockListTy blocks_to_add_;
  248. // List of instructions which are now dead and can be removed.
  249. std::vector<Instruction*> invalidated_instructions_;
  250. // Maintains the current state of the transform between calls to unroll.
  251. LoopUnrollState state_;
  252. // An ordered list containing the loop basic blocks.
  253. std::vector<BasicBlock*> loop_blocks_inorder_;
  254. // The block containing the condition check which contains a conditional
  255. // branch to the merge and continue block.
  256. BasicBlock* loop_condition_block_;
  257. // The induction variable of the loop.
  258. Instruction* loop_induction_variable_;
  259. // Phis used in the loop need to be remapped to use the actual result values
  260. // and then be remapped at the end.
  261. std::vector<Instruction*> loop_phi_instructions_;
  262. // The number of loop iterations that the loop would perform pre-unroll.
  263. size_t number_of_loop_iterations_;
  264. // The amount that the loop steps each iteration.
  265. int64_t loop_step_value_;
  266. // The value the loop starts stepping from.
  267. int64_t loop_init_value_;
  268. };
  269. /*
  270. * Static helper functions.
  271. */
  272. // Retrieve the index of the OpPhi instruction |phi| which corresponds to the
  273. // incoming |block| id.
  274. static uint32_t GetPhiIndexFromLabel(const BasicBlock* block,
  275. const Instruction* phi) {
  276. for (uint32_t i = 1; i < phi->NumInOperands(); i += 2) {
  277. if (block->id() == phi->GetSingleWordInOperand(i)) {
  278. return i;
  279. }
  280. }
  281. assert(false && "Could not find operand in instruction.");
  282. return 0;
  283. }
  284. void LoopUnrollerUtilsImpl::Init(Loop* loop) {
  285. loop_condition_block_ = loop->FindConditionBlock();
  286. // When we reinit the second loop during PartiallyUnrollResidualFactor we need
  287. // to use the cached value from the duplicate step as the dominator tree
  288. // basded solution, loop->FindConditionBlock, requires all the nodes to be
  289. // connected up with the correct branches. They won't be at this point.
  290. if (!loop_condition_block_) {
  291. loop_condition_block_ = state_.new_condition_block;
  292. }
  293. assert(loop_condition_block_);
  294. loop_induction_variable_ = loop->FindConditionVariable(loop_condition_block_);
  295. assert(loop_induction_variable_);
  296. bool found = loop->FindNumberOfIterations(
  297. loop_induction_variable_, &*loop_condition_block_->ctail(),
  298. &number_of_loop_iterations_, &loop_step_value_, &loop_init_value_);
  299. (void)found; // To silence unused variable warning on release builds.
  300. assert(found);
  301. // Blocks are stored in an unordered set of ids in the loop class, we need to
  302. // create the dominator ordered list.
  303. ComputeLoopOrderedBlocks(loop);
  304. }
  305. // This function is used to partially unroll the loop when the factor provided
  306. // would normally lead to an illegal optimization. Instead of just unrolling the
  307. // loop it creates two loops and unrolls one and adjusts the condition on the
  308. // other. The end result being that the new loop pair iterates over the correct
  309. // number of bodies.
  310. void LoopUnrollerUtilsImpl::PartiallyUnrollResidualFactor(Loop* loop,
  311. size_t factor) {
  312. // TODO(1841): Handle id overflow.
  313. std::unique_ptr<Instruction> new_label{new Instruction(
  314. context_, SpvOp::SpvOpLabel, 0, context_->TakeNextId(), {})};
  315. std::unique_ptr<BasicBlock> new_exit_bb{new BasicBlock(std::move(new_label))};
  316. new_exit_bb->SetParent(&function_);
  317. // Save the id of the block before we move it.
  318. uint32_t new_merge_id = new_exit_bb->id();
  319. // Add the block the list of blocks to add, we want this merge block to be
  320. // right at the start of the new blocks.
  321. blocks_to_add_.push_back(std::move(new_exit_bb));
  322. BasicBlock* new_exit_bb_raw = blocks_to_add_[0].get();
  323. Instruction& original_conditional_branch = *loop_condition_block_->tail();
  324. // Duplicate the loop, providing access to the blocks of both loops.
  325. // This is a naked new due to the VS2013 requirement of not having unique
  326. // pointers in vectors, as it will be inserted into a vector with
  327. // loop_descriptor.AddLoop.
  328. std::unique_ptr<Loop> new_loop = MakeUnique<Loop>(*loop);
  329. // Clear the basic blocks of the new loop.
  330. new_loop->ClearBlocks();
  331. DuplicateLoop(loop, new_loop.get());
  332. // Add the blocks to the function.
  333. AddBlocksToFunction(loop->GetMergeBlock());
  334. blocks_to_add_.clear();
  335. // Create a new merge block for the first loop.
  336. InstructionBuilder builder{context_, new_exit_bb_raw};
  337. // Make the first loop branch to the second.
  338. builder.AddBranch(new_loop->GetHeaderBlock()->id());
  339. loop_condition_block_ = state_.new_condition_block;
  340. loop_induction_variable_ = state_.new_phi;
  341. // Unroll the new loop by the factor with the usual -1 to account for the
  342. // existing block iteration.
  343. Unroll(new_loop.get(), factor);
  344. LinkLastPhisToStart(new_loop.get());
  345. AddBlocksToLoop(new_loop.get());
  346. // Add the new merge block to the back of the list of blocks to be added. It
  347. // needs to be the last block added to maintain dominator order in the binary.
  348. blocks_to_add_.push_back(
  349. std::unique_ptr<BasicBlock>(new_loop->GetMergeBlock()));
  350. // Add the blocks to the function.
  351. AddBlocksToFunction(loop->GetMergeBlock());
  352. // Reset the usedef analysis.
  353. context_->InvalidateAnalysesExceptFor(
  354. IRContext::Analysis::kAnalysisLoopAnalysis);
  355. analysis::DefUseManager* def_use_manager = context_->get_def_use_mgr();
  356. // The loop condition.
  357. Instruction* condition_check = def_use_manager->GetDef(
  358. original_conditional_branch.GetSingleWordOperand(0));
  359. // This should have been checked by the LoopUtils::CanPerformUnroll function
  360. // before entering this.
  361. assert(loop->IsSupportedCondition(condition_check->opcode()));
  362. // We need to account for the initial body when calculating the remainder.
  363. int64_t remainder = Loop::GetResidualConditionValue(
  364. condition_check->opcode(), loop_init_value_, loop_step_value_,
  365. number_of_loop_iterations_, factor);
  366. assert(remainder > std::numeric_limits<int32_t>::min() &&
  367. remainder < std::numeric_limits<int32_t>::max());
  368. Instruction* new_constant = nullptr;
  369. // If the remainder is negative then we add a signed constant, otherwise just
  370. // add an unsigned constant.
  371. if (remainder < 0) {
  372. new_constant = builder.GetSintConstant(static_cast<int32_t>(remainder));
  373. } else {
  374. new_constant = builder.GetUintConstant(static_cast<int32_t>(remainder));
  375. }
  376. uint32_t constant_id = new_constant->result_id();
  377. // Update the condition check.
  378. condition_check->SetInOperand(1, {constant_id});
  379. // Update the next phi node. The phi will have a constant value coming in from
  380. // the preheader block. For the duplicated loop we need to update the constant
  381. // to be the amount of iterations covered by the first loop and the incoming
  382. // block to be the first loops new merge block.
  383. std::vector<Instruction*> new_inductions;
  384. new_loop->GetInductionVariables(new_inductions);
  385. std::vector<Instruction*> old_inductions;
  386. loop->GetInductionVariables(old_inductions);
  387. for (size_t index = 0; index < new_inductions.size(); ++index) {
  388. Instruction* new_induction = new_inductions[index];
  389. Instruction* old_induction = old_inductions[index];
  390. // Get the index of the loop initalizer, the value coming in from the
  391. // preheader.
  392. uint32_t initalizer_index =
  393. GetPhiIndexFromLabel(new_loop->GetPreHeaderBlock(), old_induction);
  394. // Replace the second loop initalizer with the phi from the first
  395. new_induction->SetInOperand(initalizer_index - 1,
  396. {old_induction->result_id()});
  397. new_induction->SetInOperand(initalizer_index, {new_merge_id});
  398. // If the use of the first loop induction variable is outside of the loop
  399. // then replace that use with the second loop induction variable.
  400. uint32_t second_loop_induction = new_induction->result_id();
  401. auto replace_use_outside_of_loop = [loop, second_loop_induction](
  402. Instruction* user,
  403. uint32_t operand_index) {
  404. if (!loop->IsInsideLoop(user)) {
  405. user->SetOperand(operand_index, {second_loop_induction});
  406. }
  407. };
  408. context_->get_def_use_mgr()->ForEachUse(old_induction,
  409. replace_use_outside_of_loop);
  410. }
  411. context_->InvalidateAnalysesExceptFor(
  412. IRContext::Analysis::kAnalysisLoopAnalysis);
  413. context_->ReplaceAllUsesWith(loop->GetMergeBlock()->id(), new_merge_id);
  414. LoopDescriptor& loop_descriptor = *context_->GetLoopDescriptor(&function_);
  415. loop_descriptor.AddLoop(std::move(new_loop), loop->GetParent());
  416. RemoveDeadInstructions();
  417. }
  418. // Mark this loop as DontUnroll as it will already be unrolled and it may not
  419. // be safe to unroll a previously partially unrolled loop.
  420. void LoopUnrollerUtilsImpl::MarkLoopControlAsDontUnroll(Loop* loop) const {
  421. Instruction* loop_merge_inst = loop->GetHeaderBlock()->GetLoopMergeInst();
  422. assert(loop_merge_inst &&
  423. "Loop merge instruction could not be found after entering unroller "
  424. "(should have exited before this)");
  425. loop_merge_inst->SetInOperand(kLoopControlIndex,
  426. {kLoopControlDontUnrollIndex});
  427. }
  428. // Duplicate the |loop| body |factor| - 1 number of times while keeping the loop
  429. // backedge intact. This will leave the loop with |factor| number of bodies
  430. // after accounting for the initial body.
  431. void LoopUnrollerUtilsImpl::Unroll(Loop* loop, size_t factor) {
  432. // If we unroll a loop partially it will not be safe to unroll it further.
  433. // This is due to the current method of calculating the number of loop
  434. // iterations.
  435. MarkLoopControlAsDontUnroll(loop);
  436. std::vector<Instruction*> inductions;
  437. loop->GetInductionVariables(inductions);
  438. state_ = LoopUnrollState{loop_induction_variable_, loop->GetLatchBlock(),
  439. loop_condition_block_, std::move(inductions)};
  440. for (size_t i = 0; i < factor - 1; ++i) {
  441. CopyBody(loop, true);
  442. }
  443. }
  444. void LoopUnrollerUtilsImpl::RemoveDeadInstructions() {
  445. // Remove the dead instructions.
  446. for (Instruction* inst : invalidated_instructions_) {
  447. context_->KillInst(inst);
  448. }
  449. }
  450. void LoopUnrollerUtilsImpl::ReplaceInductionUseWithFinalValue(Loop* loop) {
  451. context_->InvalidateAnalysesExceptFor(
  452. IRContext::Analysis::kAnalysisLoopAnalysis |
  453. IRContext::Analysis::kAnalysisDefUse |
  454. IRContext::Analysis::kAnalysisInstrToBlockMapping);
  455. std::vector<Instruction*> inductions;
  456. loop->GetInductionVariables(inductions);
  457. for (size_t index = 0; index < inductions.size(); ++index) {
  458. uint32_t trip_step_id = GetPhiDefID(state_.previous_phis_[index],
  459. state_.previous_latch_block_->id());
  460. context_->ReplaceAllUsesWith(inductions[index]->result_id(), trip_step_id);
  461. invalidated_instructions_.push_back(inductions[index]);
  462. }
  463. }
  464. // Fully unroll the loop by partially unrolling it by the number of loop
  465. // iterations minus one for the body already accounted for.
  466. void LoopUnrollerUtilsImpl::FullyUnroll(Loop* loop) {
  467. // We unroll the loop by number of iterations in the loop.
  468. Unroll(loop, number_of_loop_iterations_);
  469. // The first condition block is preserved until now so it can be copied.
  470. FoldConditionBlock(loop_condition_block_, 1);
  471. // Delete the OpLoopMerge and remove the backedge to the header.
  472. CloseUnrolledLoop(loop);
  473. // Mark the loop for later deletion. This allows us to preserve the loop
  474. // iterators but still disregard dead loops.
  475. loop->MarkLoopForRemoval();
  476. // If the loop has a parent add the new blocks to the parent.
  477. if (loop->GetParent()) {
  478. AddBlocksToLoop(loop->GetParent());
  479. }
  480. // Add the blocks to the function.
  481. AddBlocksToFunction(loop->GetMergeBlock());
  482. ReplaceInductionUseWithFinalValue(loop);
  483. RemoveDeadInstructions();
  484. // Invalidate all analyses.
  485. context_->InvalidateAnalysesExceptFor(
  486. IRContext::Analysis::kAnalysisLoopAnalysis |
  487. IRContext::Analysis::kAnalysisDefUse);
  488. }
  489. void LoopUnrollerUtilsImpl::KillDebugDeclares(BasicBlock* bb) {
  490. // We cannot kill an instruction inside BasicBlock::ForEachInst()
  491. // because it will generate dangling pointers. We use |to_be_killed|
  492. // to kill them after the loop.
  493. std::vector<Instruction*> to_be_killed;
  494. bb->ForEachInst([&to_be_killed, this](Instruction* inst) {
  495. if (context_->get_debug_info_mgr()->IsDebugDeclare(inst)) {
  496. to_be_killed.push_back(inst);
  497. }
  498. });
  499. for (auto* inst : to_be_killed) context_->KillInst(inst);
  500. }
  501. // Copy a given basic block, give it a new result_id, and store the new block
  502. // and the id mapping in the state. |preserve_instructions| is used to determine
  503. // whether or not this function should edit instructions other than the
  504. // |result_id|.
  505. void LoopUnrollerUtilsImpl::CopyBasicBlock(Loop* loop, const BasicBlock* itr,
  506. bool preserve_instructions) {
  507. // Clone the block exactly, including the IDs.
  508. BasicBlock* basic_block = itr->Clone(context_);
  509. basic_block->SetParent(itr->GetParent());
  510. // We do not want to duplicate DebugDeclare.
  511. KillDebugDeclares(basic_block);
  512. // Assign each result a new unique ID and keep a mapping of the old ids to
  513. // the new ones.
  514. AssignNewResultIds(basic_block);
  515. // If this is the continue block we are copying.
  516. if (itr == loop->GetContinueBlock()) {
  517. // Make the OpLoopMerge point to this block for the continue.
  518. if (!preserve_instructions) {
  519. Instruction* merge_inst = loop->GetHeaderBlock()->GetLoopMergeInst();
  520. merge_inst->SetInOperand(1, {basic_block->id()});
  521. context_->UpdateDefUse(merge_inst);
  522. }
  523. state_.new_continue_block = basic_block;
  524. }
  525. // If this is the header block we are copying.
  526. if (itr == loop->GetHeaderBlock()) {
  527. state_.new_header_block = basic_block;
  528. if (!preserve_instructions) {
  529. // Remove the loop merge instruction if it exists.
  530. Instruction* merge_inst = basic_block->GetLoopMergeInst();
  531. if (merge_inst) invalidated_instructions_.push_back(merge_inst);
  532. }
  533. }
  534. // If this is the latch block being copied, record it in the state.
  535. if (itr == loop->GetLatchBlock()) state_.new_latch_block = basic_block;
  536. // If this is the condition block we are copying.
  537. if (itr == loop_condition_block_) {
  538. state_.new_condition_block = basic_block;
  539. }
  540. // Add this block to the list of blocks to add to the function at the end of
  541. // the unrolling process.
  542. blocks_to_add_.push_back(std::unique_ptr<BasicBlock>(basic_block));
  543. // Keep tracking the old block via a map.
  544. state_.new_blocks[itr->id()] = basic_block;
  545. }
  546. void LoopUnrollerUtilsImpl::CopyBody(Loop* loop, bool eliminate_conditions) {
  547. // Copy each basic block in the loop, give them new ids, and save state
  548. // information.
  549. for (const BasicBlock* itr : loop_blocks_inorder_) {
  550. CopyBasicBlock(loop, itr, false);
  551. }
  552. // Set the previous latch block to point to the new header.
  553. Instruction* latch_branch = state_.previous_latch_block_->terminator();
  554. latch_branch->SetInOperand(0, {state_.new_header_block->id()});
  555. context_->UpdateDefUse(latch_branch);
  556. // As the algorithm copies the original loop blocks exactly, the tail of the
  557. // latch block on iterations after the first one will be a branch to the new
  558. // header and not the actual loop header. The last continue block in the loop
  559. // should always be a backedge to the global header.
  560. Instruction* new_latch_branch = state_.new_latch_block->terminator();
  561. new_latch_branch->SetInOperand(0, {loop->GetHeaderBlock()->id()});
  562. context_->AnalyzeUses(new_latch_branch);
  563. std::vector<Instruction*> inductions;
  564. loop->GetInductionVariables(inductions);
  565. for (size_t index = 0; index < inductions.size(); ++index) {
  566. Instruction* primary_copy = inductions[index];
  567. assert(primary_copy->result_id() != 0);
  568. Instruction* induction_clone =
  569. state_.ids_to_new_inst[state_.new_inst[primary_copy->result_id()]];
  570. state_.new_phis_.push_back(induction_clone);
  571. assert(induction_clone->result_id() != 0);
  572. if (!state_.previous_phis_.empty()) {
  573. state_.new_inst[primary_copy->result_id()] = GetPhiDefID(
  574. state_.previous_phis_[index], state_.previous_latch_block_->id());
  575. } else {
  576. // Do not replace the first phi block ids.
  577. state_.new_inst[primary_copy->result_id()] = primary_copy->result_id();
  578. }
  579. }
  580. if (eliminate_conditions &&
  581. state_.new_condition_block != loop_condition_block_) {
  582. FoldConditionBlock(state_.new_condition_block, 1);
  583. }
  584. // Only reference to the header block is the backedge in the latch block,
  585. // don't change this.
  586. state_.new_inst[loop->GetHeaderBlock()->id()] = loop->GetHeaderBlock()->id();
  587. for (auto& pair : state_.new_blocks) {
  588. RemapOperands(pair.second);
  589. }
  590. for (Instruction* dead_phi : state_.new_phis_)
  591. invalidated_instructions_.push_back(dead_phi);
  592. // Swap the state so the new is now the previous.
  593. state_.NextIterationState();
  594. }
  595. uint32_t LoopUnrollerUtilsImpl::GetPhiDefID(const Instruction* phi,
  596. uint32_t label) const {
  597. for (uint32_t operand = 3; operand < phi->NumOperands(); operand += 2) {
  598. if (phi->GetSingleWordOperand(operand) == label) {
  599. return phi->GetSingleWordOperand(operand - 1);
  600. }
  601. }
  602. assert(false && "Could not find a phi index matching the provided label");
  603. return 0;
  604. }
  605. void LoopUnrollerUtilsImpl::FoldConditionBlock(BasicBlock* condition_block,
  606. uint32_t operand_label) {
  607. // Remove the old conditional branch to the merge and continue blocks.
  608. Instruction& old_branch = *condition_block->tail();
  609. uint32_t new_target = old_branch.GetSingleWordOperand(operand_label);
  610. DebugScope scope = old_branch.GetDebugScope();
  611. const std::vector<Instruction> lines = old_branch.dbg_line_insts();
  612. context_->KillInst(&old_branch);
  613. // Add the new unconditional branch to the merge block.
  614. InstructionBuilder builder(
  615. context_, condition_block,
  616. IRContext::Analysis::kAnalysisDefUse |
  617. IRContext::Analysis::kAnalysisInstrToBlockMapping);
  618. Instruction* new_branch = builder.AddBranch(new_target);
  619. if (!lines.empty()) new_branch->AddDebugLine(&lines.back());
  620. new_branch->SetDebugScope(scope);
  621. }
  622. void LoopUnrollerUtilsImpl::CloseUnrolledLoop(Loop* loop) {
  623. // Remove the OpLoopMerge instruction from the function.
  624. Instruction* merge_inst = loop->GetHeaderBlock()->GetLoopMergeInst();
  625. invalidated_instructions_.push_back(merge_inst);
  626. // Remove the final backedge to the header and make it point instead to the
  627. // merge block.
  628. Instruction* latch_instruction = state_.previous_latch_block_->terminator();
  629. latch_instruction->SetInOperand(0, {loop->GetMergeBlock()->id()});
  630. context_->UpdateDefUse(latch_instruction);
  631. // Remove all induction variables as the phis will now be invalid. Replace all
  632. // uses with the constant initializer value (all uses of phis will be in
  633. // the first iteration with the subsequent phis already having been removed).
  634. std::vector<Instruction*> inductions;
  635. loop->GetInductionVariables(inductions);
  636. // We can use the state instruction mechanism to replace all internal loop
  637. // values within the first loop trip (as the subsequent ones will be updated
  638. // by the copy function) with the value coming in from the preheader and then
  639. // use context ReplaceAllUsesWith for the uses outside the loop with the final
  640. // trip phi value.
  641. state_.new_inst.clear();
  642. for (Instruction* induction : inductions) {
  643. uint32_t initalizer_id =
  644. GetPhiDefID(induction, loop->GetPreHeaderBlock()->id());
  645. state_.new_inst[induction->result_id()] = initalizer_id;
  646. }
  647. for (BasicBlock* block : loop_blocks_inorder_) {
  648. RemapOperands(block);
  649. }
  650. for (auto& block_itr : blocks_to_add_) {
  651. RemapOperands(block_itr.get());
  652. }
  653. // Rewrite the last phis, since they may still reference the original phi.
  654. for (Instruction* last_phi : state_.previous_phis_) {
  655. RemapOperands(last_phi);
  656. }
  657. }
  658. // Uses the first loop to create a copy of the loop with new IDs.
  659. void LoopUnrollerUtilsImpl::DuplicateLoop(Loop* old_loop, Loop* new_loop) {
  660. std::vector<BasicBlock*> new_block_order;
  661. // Copy every block in the old loop.
  662. for (const BasicBlock* itr : loop_blocks_inorder_) {
  663. CopyBasicBlock(old_loop, itr, true);
  664. new_block_order.push_back(blocks_to_add_.back().get());
  665. }
  666. // Clone the merge block, give it a new id and record it in the state.
  667. BasicBlock* new_merge = old_loop->GetMergeBlock()->Clone(context_);
  668. new_merge->SetParent(old_loop->GetMergeBlock()->GetParent());
  669. AssignNewResultIds(new_merge);
  670. state_.new_blocks[old_loop->GetMergeBlock()->id()] = new_merge;
  671. // Remap the operands of every instruction in the loop to point to the new
  672. // copies.
  673. for (auto& pair : state_.new_blocks) {
  674. RemapOperands(pair.second);
  675. }
  676. loop_blocks_inorder_ = std::move(new_block_order);
  677. AddBlocksToLoop(new_loop);
  678. new_loop->SetHeaderBlock(state_.new_header_block);
  679. new_loop->SetContinueBlock(state_.new_continue_block);
  680. new_loop->SetLatchBlock(state_.new_latch_block);
  681. new_loop->SetMergeBlock(new_merge);
  682. }
  683. // Whenever the utility copies a block it stores it in a temporary buffer, this
  684. // function adds the buffer into the Function. The blocks will be inserted
  685. // after the block |insert_point|.
  686. void LoopUnrollerUtilsImpl::AddBlocksToFunction(
  687. const BasicBlock* insert_point) {
  688. for (auto basic_block_iterator = function_.begin();
  689. basic_block_iterator != function_.end(); ++basic_block_iterator) {
  690. if (basic_block_iterator->id() == insert_point->id()) {
  691. basic_block_iterator.InsertBefore(&blocks_to_add_);
  692. return;
  693. }
  694. }
  695. assert(
  696. false &&
  697. "Could not add basic blocks to function as insert point was not found.");
  698. }
  699. // Assign all result_ids in |basic_block| instructions to new IDs and preserve
  700. // the mapping of new ids to old ones.
  701. void LoopUnrollerUtilsImpl::AssignNewResultIds(BasicBlock* basic_block) {
  702. analysis::DefUseManager* def_use_mgr = context_->get_def_use_mgr();
  703. // Label instructions aren't covered by normal traversal of the
  704. // instructions.
  705. // TODO(1841): Handle id overflow.
  706. uint32_t new_label_id = context_->TakeNextId();
  707. // Assign a new id to the label.
  708. state_.new_inst[basic_block->GetLabelInst()->result_id()] = new_label_id;
  709. basic_block->GetLabelInst()->SetResultId(new_label_id);
  710. def_use_mgr->AnalyzeInstDefUse(basic_block->GetLabelInst());
  711. for (Instruction& inst : *basic_block) {
  712. // Do def/use analysis on new lines
  713. for (auto& line : inst.dbg_line_insts())
  714. def_use_mgr->AnalyzeInstDefUse(&line);
  715. uint32_t old_id = inst.result_id();
  716. // Ignore stores etc.
  717. if (old_id == 0) {
  718. continue;
  719. }
  720. // Give the instruction a new id.
  721. // TODO(1841): Handle id overflow.
  722. inst.SetResultId(context_->TakeNextId());
  723. def_use_mgr->AnalyzeInstDef(&inst);
  724. // Save the mapping of old_id -> new_id.
  725. state_.new_inst[old_id] = inst.result_id();
  726. // Check if this instruction is the induction variable.
  727. if (loop_induction_variable_->result_id() == old_id) {
  728. // Save a pointer to the new copy of it.
  729. state_.new_phi = &inst;
  730. }
  731. state_.ids_to_new_inst[inst.result_id()] = &inst;
  732. }
  733. }
  734. void LoopUnrollerUtilsImpl::RemapOperands(Instruction* inst) {
  735. auto remap_operands_to_new_ids = [this](uint32_t* id) {
  736. auto itr = state_.new_inst.find(*id);
  737. if (itr != state_.new_inst.end()) {
  738. *id = itr->second;
  739. }
  740. };
  741. inst->ForEachInId(remap_operands_to_new_ids);
  742. context_->AnalyzeUses(inst);
  743. }
  744. void LoopUnrollerUtilsImpl::RemapOperands(BasicBlock* basic_block) {
  745. for (Instruction& inst : *basic_block) {
  746. RemapOperands(&inst);
  747. }
  748. }
  749. // Generate the ordered list of basic blocks in the |loop| and cache it for
  750. // later use.
  751. void LoopUnrollerUtilsImpl::ComputeLoopOrderedBlocks(Loop* loop) {
  752. loop_blocks_inorder_.clear();
  753. loop->ComputeLoopStructuredOrder(&loop_blocks_inorder_);
  754. }
  755. // Adds the blocks_to_add_ to both the loop and to the parent.
  756. void LoopUnrollerUtilsImpl::AddBlocksToLoop(Loop* loop) const {
  757. // Add the blocks to this loop.
  758. for (auto& block_itr : blocks_to_add_) {
  759. loop->AddBasicBlock(block_itr.get());
  760. }
  761. // Add the blocks to the parent as well.
  762. if (loop->GetParent()) AddBlocksToLoop(loop->GetParent());
  763. }
  764. void LoopUnrollerUtilsImpl::LinkLastPhisToStart(Loop* loop) const {
  765. std::vector<Instruction*> inductions;
  766. loop->GetInductionVariables(inductions);
  767. for (size_t i = 0; i < inductions.size(); ++i) {
  768. Instruction* last_phi_in_block = state_.previous_phis_[i];
  769. uint32_t phi_index =
  770. GetPhiIndexFromLabel(state_.previous_latch_block_, last_phi_in_block);
  771. uint32_t phi_variable =
  772. last_phi_in_block->GetSingleWordInOperand(phi_index - 1);
  773. uint32_t phi_label = last_phi_in_block->GetSingleWordInOperand(phi_index);
  774. Instruction* phi = inductions[i];
  775. phi->SetInOperand(phi_index - 1, {phi_variable});
  776. phi->SetInOperand(phi_index, {phi_label});
  777. }
  778. }
  779. // Duplicate the |loop| body |factor| number of times while keeping the loop
  780. // backedge intact.
  781. void LoopUnrollerUtilsImpl::PartiallyUnroll(Loop* loop, size_t factor) {
  782. Unroll(loop, factor);
  783. LinkLastPhisToStart(loop);
  784. AddBlocksToLoop(loop);
  785. AddBlocksToFunction(loop->GetMergeBlock());
  786. RemoveDeadInstructions();
  787. }
  788. /*
  789. * End LoopUtilsImpl.
  790. */
  791. } // namespace
  792. /*
  793. *
  794. * Begin Utils.
  795. *
  796. * */
  797. bool LoopUtils::CanPerformUnroll() {
  798. // The loop is expected to be in structured order.
  799. if (!loop_->GetHeaderBlock()->GetMergeInst()) {
  800. return false;
  801. }
  802. // Find check the loop has a condition we can find and evaluate.
  803. const BasicBlock* condition = loop_->FindConditionBlock();
  804. if (!condition) return false;
  805. // Check that we can find and process the induction variable.
  806. const Instruction* induction = loop_->FindConditionVariable(condition);
  807. if (!induction || induction->opcode() != SpvOpPhi) return false;
  808. // Check that we can find the number of loop iterations.
  809. if (!loop_->FindNumberOfIterations(induction, &*condition->ctail(), nullptr))
  810. return false;
  811. #ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
  812. // ClusterFuzz/OSS-Fuzz is likely to yield examples with very high loop
  813. // iteration counts. This can cause timeouts and memouts during fuzzing that
  814. // are not classed as bugs. To avoid this noise, loop unrolling is not applied
  815. // to loops with large iteration counts when fuzzing.
  816. const size_t kFuzzerIterationLimit = 100;
  817. size_t num_iterations;
  818. loop_->FindNumberOfIterations(induction, &*condition->ctail(),
  819. &num_iterations);
  820. if (num_iterations > kFuzzerIterationLimit) {
  821. return false;
  822. }
  823. #endif
  824. // Make sure the latch block is a unconditional branch to the header
  825. // block.
  826. const Instruction& branch = *loop_->GetLatchBlock()->ctail();
  827. bool branching_assumption =
  828. branch.opcode() == SpvOpBranch &&
  829. branch.GetSingleWordInOperand(0) == loop_->GetHeaderBlock()->id();
  830. if (!branching_assumption) {
  831. return false;
  832. }
  833. std::vector<Instruction*> inductions;
  834. loop_->GetInductionVariables(inductions);
  835. // Ban breaks within the loop.
  836. const std::vector<uint32_t>& merge_block_preds =
  837. context_->cfg()->preds(loop_->GetMergeBlock()->id());
  838. if (merge_block_preds.size() != 1) {
  839. return false;
  840. }
  841. // Ban continues within the loop.
  842. const std::vector<uint32_t>& continue_block_preds =
  843. context_->cfg()->preds(loop_->GetContinueBlock()->id());
  844. if (continue_block_preds.size() != 1) {
  845. return false;
  846. }
  847. // Ban returns in the loop.
  848. // Iterate over all the blocks within the loop and check that none of them
  849. // exit the loop.
  850. for (uint32_t label_id : loop_->GetBlocks()) {
  851. const BasicBlock* block = context_->cfg()->block(label_id);
  852. if (block->ctail()->opcode() == SpvOp::SpvOpKill ||
  853. block->ctail()->opcode() == SpvOp::SpvOpReturn ||
  854. block->ctail()->opcode() == SpvOp::SpvOpReturnValue ||
  855. block->ctail()->opcode() == SpvOp::SpvOpTerminateInvocation) {
  856. return false;
  857. }
  858. }
  859. // Can only unroll inner loops.
  860. if (!loop_->AreAllChildrenMarkedForRemoval()) {
  861. return false;
  862. }
  863. return true;
  864. }
  865. bool LoopUtils::PartiallyUnroll(size_t factor) {
  866. if (factor == 1 || !CanPerformUnroll()) return false;
  867. // Create the unroller utility.
  868. LoopUnrollerUtilsImpl unroller{context_,
  869. loop_->GetHeaderBlock()->GetParent()};
  870. unroller.Init(loop_);
  871. // If the unrolling factor is larger than or the same size as the loop just
  872. // fully unroll the loop.
  873. if (factor >= unroller.GetLoopIterationCount()) {
  874. unroller.FullyUnroll(loop_);
  875. return true;
  876. }
  877. // If the loop unrolling factor is an residual number of iterations we need to
  878. // let run the loop for the residual part then let it branch into the unrolled
  879. // remaining part. We add one when calucating the remainder to take into
  880. // account the one iteration already in the loop.
  881. if (unroller.GetLoopIterationCount() % factor != 0) {
  882. unroller.PartiallyUnrollResidualFactor(loop_, factor);
  883. } else {
  884. unroller.PartiallyUnroll(loop_, factor);
  885. }
  886. return true;
  887. }
  888. bool LoopUtils::FullyUnroll() {
  889. if (!CanPerformUnroll()) return false;
  890. std::vector<Instruction*> inductions;
  891. loop_->GetInductionVariables(inductions);
  892. LoopUnrollerUtilsImpl unroller{context_,
  893. loop_->GetHeaderBlock()->GetParent()};
  894. unroller.Init(loop_);
  895. unroller.FullyUnroll(loop_);
  896. return true;
  897. }
  898. void LoopUtils::Finalize() {
  899. // Clean up the loop descriptor to preserve the analysis.
  900. LoopDescriptor* LD = context_->GetLoopDescriptor(&function_);
  901. LD->PostModificationCleanup();
  902. }
  903. /*
  904. *
  905. * Begin Pass.
  906. *
  907. */
  908. Pass::Status LoopUnroller::Process() {
  909. bool changed = false;
  910. for (Function& f : *context()->module()) {
  911. if (f.IsDeclaration()) {
  912. continue;
  913. }
  914. LoopDescriptor* LD = context()->GetLoopDescriptor(&f);
  915. for (Loop& loop : *LD) {
  916. LoopUtils loop_utils{context(), &loop};
  917. if (!loop.HasUnrollLoopControl() || !loop_utils.CanPerformUnroll()) {
  918. continue;
  919. }
  920. if (fully_unroll_) {
  921. loop_utils.FullyUnroll();
  922. } else {
  923. loop_utils.PartiallyUnroll(unroll_factor_);
  924. }
  925. changed = true;
  926. }
  927. LD->PostModificationCleanup();
  928. }
  929. return changed ? Status::SuccessWithChange : Status::SuccessWithoutChange;
  930. }
  931. } // namespace opt
  932. } // namespace spvtools