| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143 |
- // Copyright (c) 2018 Google LLC.
- //
- // Licensed under the Apache License, Version 2.0 (the "License");
- // you may not use this file except in compliance with the License.
- // You may obtain a copy of the License at
- //
- // http://www.apache.org/licenses/LICENSE-2.0
- //
- // Unless required by applicable law or agreed to in writing, software
- // distributed under the License is distributed on an "AS IS" BASIS,
- // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- // See the License for the specific language governing permissions and
- // limitations under the License.
- #include "source/opt/loop_unroller.h"
- #include <limits>
- #include <memory>
- #include <unordered_map>
- #include <utility>
- #include <vector>
- #include "source/opt/ir_builder.h"
- #include "source/opt/loop_utils.h"
- // Implements loop util unrolling functionality for fully and partially
- // unrolling loops. Given a factor it will duplicate the loop that many times,
- // appending each one to the end of the old loop and removing backedges, to
- // create a new unrolled loop.
- //
- // 1 - User calls LoopUtils::FullyUnroll or LoopUtils::PartiallyUnroll with a
- // loop they wish to unroll. LoopUtils::CanPerformUnroll is used to
- // validate that a given loop can be unrolled. That method (along with the
- // constructor of loop) checks that the IR is in the expected canonicalised
- // format.
- //
- // 2 - The LoopUtils methods create a LoopUnrollerUtilsImpl object to actually
- // perform the unrolling. This implements helper methods to copy the loop basic
- // blocks and remap the ids of instructions used inside them.
- //
- // 3 - The core of LoopUnrollerUtilsImpl is the Unroll method, this method
- // actually performs the loop duplication. It does this by creating a
- // LoopUnrollState object and then copying the loop as given by the factor
- // parameter. The LoopUnrollState object retains the state of the unroller
- // between the loop body copies as each iteration needs information on the last
- // to adjust the phi induction variable, adjust the OpLoopMerge instruction in
- // the main loop header, and change the previous continue block to point to the
- // new header and the new continue block to the main loop header.
- //
- // 4 - If the loop is to be fully unrolled then it is simply closed after step
- // 3, with the OpLoopMerge being deleted, the backedge removed, and the
- // condition blocks folded.
- //
- // 5 - If it is being partially unrolled: if the unrolling factor leaves the
- // loop with an even number of bodies with respect to the number of loop
- // iterations then step 3 is all that is needed. If it is uneven then we need to
- // duplicate the loop completely and unroll the duplicated loop to cover the
- // residual part and adjust the first loop to cover only the "even" part. For
- // instance if you request an unroll factor of 3 on a loop with 10 iterations
- // then copying the body three times would leave you with three bodies in the
- // loop
- // where the loop still iterates over each 4 times. So we make two loops one
- // iterating once then a second loop of three iterating 3 times.
- namespace spvtools {
- namespace opt {
- namespace {
- // Loop control constant value for DontUnroll flag.
- constexpr uint32_t kLoopControlDontUnrollIndex = 2;
- // Operand index of the loop control parameter of the OpLoopMerge.
- constexpr uint32_t kLoopControlIndex = 2;
- // This utility class encapsulates some of the state we need to maintain between
- // loop unrolls. Specifically it maintains key blocks and the induction variable
- // in the current loop duplication step and the blocks from the previous one.
- // This is because each step of the unroll needs to use data from both the
- // preceding step and the original loop.
- struct LoopUnrollState {
- LoopUnrollState()
- : previous_phi_(nullptr),
- previous_latch_block_(nullptr),
- previous_condition_block_(nullptr),
- new_phi(nullptr),
- new_continue_block(nullptr),
- new_condition_block(nullptr),
- new_header_block(nullptr) {}
- // Initialize from the loop descriptor class.
- LoopUnrollState(Instruction* induction, BasicBlock* latch_block,
- BasicBlock* condition, std::vector<Instruction*>&& phis)
- : previous_phi_(induction),
- previous_latch_block_(latch_block),
- previous_condition_block_(condition),
- new_phi(nullptr),
- new_continue_block(nullptr),
- new_condition_block(nullptr),
- new_header_block(nullptr) {
- previous_phis_ = std::move(phis);
- }
- // Swap the state so that the new nodes are now the previous nodes.
- void NextIterationState() {
- previous_phi_ = new_phi;
- previous_latch_block_ = new_latch_block;
- previous_condition_block_ = new_condition_block;
- previous_phis_ = std::move(new_phis_);
- // Clear new nodes.
- new_phi = nullptr;
- new_continue_block = nullptr;
- new_condition_block = nullptr;
- new_header_block = nullptr;
- new_latch_block = nullptr;
- // Clear new block/instruction maps.
- new_blocks.clear();
- new_inst.clear();
- ids_to_new_inst.clear();
- }
- // The induction variable from the immediately preceding loop body.
- Instruction* previous_phi_;
- // All the phi nodes from the previous loop iteration.
- std::vector<Instruction*> previous_phis_;
- std::vector<Instruction*> new_phis_;
- // The previous latch block. The backedge will be removed from this and
- // added to the new latch block.
- BasicBlock* previous_latch_block_;
- // The previous condition block. This may be folded to flatten the loop.
- BasicBlock* previous_condition_block_;
- // The new induction variable.
- Instruction* new_phi;
- // The new continue block.
- BasicBlock* new_continue_block;
- // The new condition block.
- BasicBlock* new_condition_block;
- // The new header block.
- BasicBlock* new_header_block;
- // The new latch block.
- BasicBlock* new_latch_block;
- // A mapping of new block ids to the original blocks which they were copied
- // from.
- std::unordered_map<uint32_t, BasicBlock*> new_blocks;
- // A mapping of the original instruction ids to the instruction ids to their
- // copies.
- std::unordered_map<uint32_t, uint32_t> new_inst;
- std::unordered_map<uint32_t, Instruction*> ids_to_new_inst;
- };
- // This class implements the actual unrolling. It uses a LoopUnrollState to
- // maintain the state of the unrolling in between steps.
- class LoopUnrollerUtilsImpl {
- public:
- using BasicBlockListTy = std::vector<std::unique_ptr<BasicBlock>>;
- LoopUnrollerUtilsImpl(IRContext* c, Function* function)
- : context_(c),
- function_(*function),
- loop_condition_block_(nullptr),
- loop_induction_variable_(nullptr),
- number_of_loop_iterations_(0),
- loop_step_value_(0),
- loop_init_value_(0) {}
- // Unroll the |loop| by given |factor| by copying the whole body |factor|
- // times. The resulting basicblock structure will remain a loop.
- void PartiallyUnroll(Loop*, size_t factor);
- // If partially unrolling the |loop| would leave the loop with too many bodies
- // for its number of iterations then this method should be used. This method
- // will duplicate the |loop| completely, making the duplicated loop the
- // successor of the original's merge block. The original loop will have its
- // condition changed to loop over the residual part and the duplicate will be
- // partially unrolled. The resulting structure will be two loops.
- void PartiallyUnrollResidualFactor(Loop* loop, size_t factor);
- // Fully unroll the |loop| by copying the full body by the total number of
- // loop iterations, folding all conditions, and removing the backedge from the
- // continue block to the header.
- void FullyUnroll(Loop* loop);
- // Get the ID of the variable in the |phi| paired with |label|.
- uint32_t GetPhiDefID(const Instruction* phi, uint32_t label) const;
- // Close the loop by removing the OpLoopMerge from the |loop| header block and
- // making the backedge point to the merge block.
- void CloseUnrolledLoop(Loop* loop);
- // Remove the OpConditionalBranch instruction inside |conditional_block| used
- // to branch to either exit or continue the loop and replace it with an
- // unconditional OpBranch to block |new_target|.
- void FoldConditionBlock(BasicBlock* condtion_block, uint32_t new_target);
- // Add all blocks_to_add_ to function_ at the |insert_point|.
- void AddBlocksToFunction(const BasicBlock* insert_point);
- // Duplicates the |old_loop|, cloning each body and remapping the ids without
- // removing instructions or changing relative structure. Result will be stored
- // in |new_loop|.
- void DuplicateLoop(Loop* old_loop, Loop* new_loop);
- inline size_t GetLoopIterationCount() const {
- return number_of_loop_iterations_;
- }
- // Extracts the initial state information from the |loop|.
- void Init(Loop* loop);
- // Replace the uses of each induction variable outside the loop with the final
- // value of the induction variable before the loop exit. To reflect the proper
- // state of a fully unrolled loop.
- void ReplaceInductionUseWithFinalValue(Loop* loop);
- // Remove all the instructions in the invalidated_instructions_ vector.
- void RemoveDeadInstructions();
- // Replace any use of induction variables outwith the loop with the final
- // value of the induction variable in the unrolled loop.
- void ReplaceOutsideLoopUseWithFinalValue(Loop* loop);
- // Set the LoopControl operand of the OpLoopMerge instruction to be
- // DontUnroll.
- void MarkLoopControlAsDontUnroll(Loop* loop) const;
- private:
- // Remap all the in |basic_block| to new IDs and keep the mapping of new ids
- // to old
- // ids. |loop| is used to identify special loop blocks (header, continue,
- // etc).
- void AssignNewResultIds(BasicBlock* basic_block);
- // Using the map built by AssignNewResultIds, replace the uses in |inst|
- // by the id that the use maps to.
- void RemapOperands(Instruction* inst);
- // Using the map built by AssignNewResultIds, for each instruction in
- // |basic_block| use
- // that map to substitute the IDs used by instructions (in the operands) with
- // the new ids.
- void RemapOperands(BasicBlock* basic_block);
- // Copy the whole body of the loop, all blocks dominated by the |loop| header
- // and not dominated by the |loop| merge. The copied body will be linked to by
- // the old |loop| continue block and the new body will link to the |loop|
- // header via the new continue block. |eliminate_conditions| is used to decide
- // whether or not to fold all the condition blocks other than the last one.
- void CopyBody(Loop* loop, bool eliminate_conditions);
- // Copy a given |block_to_copy| in the |loop| and record the mapping of the
- // old/new ids. |preserve_instructions| determines whether or not the method
- // will modify (other than result_id) instructions which are copied.
- void CopyBasicBlock(Loop* loop, const BasicBlock* block_to_copy,
- bool preserve_instructions);
- // The actual implementation of the unroll step. Unrolls |loop| by given
- // |factor| by copying the body by |factor| times. Also propagates the
- // induction variable value throughout the copies.
- void Unroll(Loop* loop, size_t factor);
- // Fills the loop_blocks_inorder_ field with the ordered list of basic blocks
- // as computed by the method ComputeLoopOrderedBlocks.
- void ComputeLoopOrderedBlocks(Loop* loop);
- // Adds the blocks_to_add_ to both the |loop| and to the parent of |loop| if
- // the parent exists.
- void AddBlocksToLoop(Loop* loop) const;
- // After the partially unroll step the phi instructions in the header block
- // will be in an illegal format. This function makes the phis legal by making
- // the edge from the latch block come from the new latch block and the value
- // to be the actual value of the phi at that point.
- void LinkLastPhisToStart(Loop* loop) const;
- // Kill all debug declaration instructions from |bb|.
- void KillDebugDeclares(BasicBlock* bb);
- // A pointer to the IRContext. Used to add/remove instructions and for usedef
- // chains.
- IRContext* context_;
- // A reference the function the loop is within.
- Function& function_;
- // A list of basic blocks to be added to the loop at the end of an unroll
- // step.
- BasicBlockListTy blocks_to_add_;
- // List of instructions which are now dead and can be removed.
- std::vector<Instruction*> invalidated_instructions_;
- // Maintains the current state of the transform between calls to unroll.
- LoopUnrollState state_;
- // An ordered list containing the loop basic blocks.
- std::vector<BasicBlock*> loop_blocks_inorder_;
- // The block containing the condition check which contains a conditional
- // branch to the merge and continue block.
- BasicBlock* loop_condition_block_;
- // The induction variable of the loop.
- Instruction* loop_induction_variable_;
- // Phis used in the loop need to be remapped to use the actual result values
- // and then be remapped at the end.
- std::vector<Instruction*> loop_phi_instructions_;
- // The number of loop iterations that the loop would perform pre-unroll.
- size_t number_of_loop_iterations_;
- // The amount that the loop steps each iteration.
- int64_t loop_step_value_;
- // The value the loop starts stepping from.
- int64_t loop_init_value_;
- };
- /*
- * Static helper functions.
- */
- // Retrieve the index of the OpPhi instruction |phi| which corresponds to the
- // incoming |block| id.
- uint32_t GetPhiIndexFromLabel(const BasicBlock* block, const Instruction* phi) {
- for (uint32_t i = 1; i < phi->NumInOperands(); i += 2) {
- if (block->id() == phi->GetSingleWordInOperand(i)) {
- return i;
- }
- }
- assert(false && "Could not find operand in instruction.");
- return 0;
- }
- void LoopUnrollerUtilsImpl::Init(Loop* loop) {
- loop_condition_block_ = loop->FindConditionBlock();
- // When we reinit the second loop during PartiallyUnrollResidualFactor we need
- // to use the cached value from the duplicate step as the dominator tree
- // basded solution, loop->FindConditionBlock, requires all the nodes to be
- // connected up with the correct branches. They won't be at this point.
- if (!loop_condition_block_) {
- loop_condition_block_ = state_.new_condition_block;
- }
- assert(loop_condition_block_);
- loop_induction_variable_ = loop->FindConditionVariable(loop_condition_block_);
- assert(loop_induction_variable_);
- bool found = loop->FindNumberOfIterations(
- loop_induction_variable_, &*loop_condition_block_->ctail(),
- &number_of_loop_iterations_, &loop_step_value_, &loop_init_value_);
- (void)found; // To silence unused variable warning on release builds.
- assert(found);
- // Blocks are stored in an unordered set of ids in the loop class, we need to
- // create the dominator ordered list.
- ComputeLoopOrderedBlocks(loop);
- }
- // This function is used to partially unroll the loop when the factor provided
- // would normally lead to an illegal optimization. Instead of just unrolling the
- // loop it creates two loops and unrolls one and adjusts the condition on the
- // other. The end result being that the new loop pair iterates over the correct
- // number of bodies.
- void LoopUnrollerUtilsImpl::PartiallyUnrollResidualFactor(Loop* loop,
- size_t factor) {
- // TODO(1841): Handle id overflow.
- std::unique_ptr<Instruction> new_label{new Instruction(
- context_, spv::Op::OpLabel, 0, context_->TakeNextId(), {})};
- std::unique_ptr<BasicBlock> new_exit_bb{new BasicBlock(std::move(new_label))};
- new_exit_bb->SetParent(&function_);
- // Save the id of the block before we move it.
- uint32_t new_merge_id = new_exit_bb->id();
- // Add the block the list of blocks to add, we want this merge block to be
- // right at the start of the new blocks.
- blocks_to_add_.push_back(std::move(new_exit_bb));
- BasicBlock* new_exit_bb_raw = blocks_to_add_[0].get();
- Instruction& original_conditional_branch = *loop_condition_block_->tail();
- // Duplicate the loop, providing access to the blocks of both loops.
- // This is a naked new due to the VS2013 requirement of not having unique
- // pointers in vectors, as it will be inserted into a vector with
- // loop_descriptor.AddLoop.
- std::unique_ptr<Loop> new_loop = MakeUnique<Loop>(*loop);
- // Clear the basic blocks of the new loop.
- new_loop->ClearBlocks();
- DuplicateLoop(loop, new_loop.get());
- // Add the blocks to the function.
- AddBlocksToFunction(loop->GetMergeBlock());
- blocks_to_add_.clear();
- // Create a new merge block for the first loop.
- InstructionBuilder builder{context_, new_exit_bb_raw};
- // Make the first loop branch to the second.
- builder.AddBranch(new_loop->GetHeaderBlock()->id());
- loop_condition_block_ = state_.new_condition_block;
- loop_induction_variable_ = state_.new_phi;
- // Unroll the new loop by the factor with the usual -1 to account for the
- // existing block iteration.
- Unroll(new_loop.get(), factor);
- LinkLastPhisToStart(new_loop.get());
- AddBlocksToLoop(new_loop.get());
- // Add the new merge block to the back of the list of blocks to be added. It
- // needs to be the last block added to maintain dominator order in the binary.
- blocks_to_add_.push_back(
- std::unique_ptr<BasicBlock>(new_loop->GetMergeBlock()));
- // Add the blocks to the function.
- AddBlocksToFunction(loop->GetMergeBlock());
- // Reset the usedef analysis.
- context_->InvalidateAnalysesExceptFor(
- IRContext::Analysis::kAnalysisLoopAnalysis);
- analysis::DefUseManager* def_use_manager = context_->get_def_use_mgr();
- // The loop condition.
- Instruction* condition_check = def_use_manager->GetDef(
- original_conditional_branch.GetSingleWordOperand(0));
- // This should have been checked by the LoopUtils::CanPerformUnroll function
- // before entering this.
- assert(loop->IsSupportedCondition(condition_check->opcode()));
- // We need to account for the initial body when calculating the remainder.
- int64_t remainder = Loop::GetResidualConditionValue(
- condition_check->opcode(), loop_init_value_, loop_step_value_,
- number_of_loop_iterations_, factor);
- assert(remainder > std::numeric_limits<int32_t>::min() &&
- remainder < std::numeric_limits<int32_t>::max());
- Instruction* new_constant = nullptr;
- // If the remainder is negative then we add a signed constant, otherwise just
- // add an unsigned constant.
- if (remainder < 0) {
- new_constant = builder.GetSintConstant(static_cast<int32_t>(remainder));
- } else {
- new_constant = builder.GetUintConstant(static_cast<int32_t>(remainder));
- }
- uint32_t constant_id = new_constant->result_id();
- // Update the condition check.
- condition_check->SetInOperand(1, {constant_id});
- // Update the next phi node. The phi will have a constant value coming in from
- // the preheader block. For the duplicated loop we need to update the constant
- // to be the amount of iterations covered by the first loop and the incoming
- // block to be the first loops new merge block.
- std::vector<Instruction*> new_inductions;
- new_loop->GetInductionVariables(new_inductions);
- std::vector<Instruction*> old_inductions;
- loop->GetInductionVariables(old_inductions);
- for (size_t index = 0; index < new_inductions.size(); ++index) {
- Instruction* new_induction = new_inductions[index];
- Instruction* old_induction = old_inductions[index];
- // Get the index of the loop initalizer, the value coming in from the
- // preheader.
- uint32_t initalizer_index =
- GetPhiIndexFromLabel(new_loop->GetPreHeaderBlock(), old_induction);
- // Replace the second loop initalizer with the phi from the first
- new_induction->SetInOperand(initalizer_index - 1,
- {old_induction->result_id()});
- new_induction->SetInOperand(initalizer_index, {new_merge_id});
- // If the use of the first loop induction variable is outside of the loop
- // then replace that use with the second loop induction variable.
- uint32_t second_loop_induction = new_induction->result_id();
- auto replace_use_outside_of_loop = [loop, second_loop_induction](
- Instruction* user,
- uint32_t operand_index) {
- if (!loop->IsInsideLoop(user)) {
- user->SetOperand(operand_index, {second_loop_induction});
- }
- };
- context_->get_def_use_mgr()->ForEachUse(old_induction,
- replace_use_outside_of_loop);
- }
- context_->InvalidateAnalysesExceptFor(
- IRContext::Analysis::kAnalysisLoopAnalysis);
- context_->ReplaceAllUsesWith(loop->GetMergeBlock()->id(), new_merge_id);
- LoopDescriptor& loop_descriptor = *context_->GetLoopDescriptor(&function_);
- loop_descriptor.AddLoop(std::move(new_loop), loop->GetParent());
- RemoveDeadInstructions();
- }
- // Mark this loop as DontUnroll as it will already be unrolled and it may not
- // be safe to unroll a previously partially unrolled loop.
- void LoopUnrollerUtilsImpl::MarkLoopControlAsDontUnroll(Loop* loop) const {
- Instruction* loop_merge_inst = loop->GetHeaderBlock()->GetLoopMergeInst();
- assert(loop_merge_inst &&
- "Loop merge instruction could not be found after entering unroller "
- "(should have exited before this)");
- loop_merge_inst->SetInOperand(kLoopControlIndex,
- {kLoopControlDontUnrollIndex});
- }
- // Duplicate the |loop| body |factor| - 1 number of times while keeping the loop
- // backedge intact. This will leave the loop with |factor| number of bodies
- // after accounting for the initial body.
- void LoopUnrollerUtilsImpl::Unroll(Loop* loop, size_t factor) {
- // If we unroll a loop partially it will not be safe to unroll it further.
- // This is due to the current method of calculating the number of loop
- // iterations.
- MarkLoopControlAsDontUnroll(loop);
- std::vector<Instruction*> inductions;
- loop->GetInductionVariables(inductions);
- state_ = LoopUnrollState{loop_induction_variable_, loop->GetLatchBlock(),
- loop_condition_block_, std::move(inductions)};
- for (size_t i = 0; i < factor - 1; ++i) {
- CopyBody(loop, true);
- }
- }
- void LoopUnrollerUtilsImpl::RemoveDeadInstructions() {
- // Remove the dead instructions.
- for (Instruction* inst : invalidated_instructions_) {
- context_->KillInst(inst);
- }
- }
- void LoopUnrollerUtilsImpl::ReplaceInductionUseWithFinalValue(Loop* loop) {
- context_->InvalidateAnalysesExceptFor(
- IRContext::Analysis::kAnalysisLoopAnalysis |
- IRContext::Analysis::kAnalysisDefUse |
- IRContext::Analysis::kAnalysisInstrToBlockMapping);
- std::vector<Instruction*> inductions;
- loop->GetInductionVariables(inductions);
- for (size_t index = 0; index < inductions.size(); ++index) {
- uint32_t trip_step_id = GetPhiDefID(state_.previous_phis_[index],
- state_.previous_latch_block_->id());
- context_->ReplaceAllUsesWith(inductions[index]->result_id(), trip_step_id);
- invalidated_instructions_.push_back(inductions[index]);
- }
- }
- // Fully unroll the loop by partially unrolling it by the number of loop
- // iterations minus one for the body already accounted for.
- void LoopUnrollerUtilsImpl::FullyUnroll(Loop* loop) {
- // We unroll the loop by number of iterations in the loop.
- Unroll(loop, number_of_loop_iterations_);
- // The first condition block is preserved until now so it can be copied.
- FoldConditionBlock(loop_condition_block_, 1);
- // Delete the OpLoopMerge and remove the backedge to the header.
- CloseUnrolledLoop(loop);
- // Mark the loop for later deletion. This allows us to preserve the loop
- // iterators but still disregard dead loops.
- loop->MarkLoopForRemoval();
- // If the loop has a parent add the new blocks to the parent.
- if (loop->GetParent()) {
- AddBlocksToLoop(loop->GetParent());
- }
- // Add the blocks to the function.
- AddBlocksToFunction(loop->GetMergeBlock());
- ReplaceInductionUseWithFinalValue(loop);
- RemoveDeadInstructions();
- // Invalidate all analyses.
- context_->InvalidateAnalysesExceptFor(
- IRContext::Analysis::kAnalysisLoopAnalysis |
- IRContext::Analysis::kAnalysisDefUse);
- }
- void LoopUnrollerUtilsImpl::KillDebugDeclares(BasicBlock* bb) {
- // We cannot kill an instruction inside BasicBlock::ForEachInst()
- // because it will generate dangling pointers. We use |to_be_killed|
- // to kill them after the loop.
- std::vector<Instruction*> to_be_killed;
- bb->ForEachInst([&to_be_killed, this](Instruction* inst) {
- if (context_->get_debug_info_mgr()->IsDebugDeclare(inst)) {
- to_be_killed.push_back(inst);
- }
- });
- for (auto* inst : to_be_killed) context_->KillInst(inst);
- }
- // Copy a given basic block, give it a new result_id, and store the new block
- // and the id mapping in the state. |preserve_instructions| is used to determine
- // whether or not this function should edit instructions other than the
- // |result_id|.
- void LoopUnrollerUtilsImpl::CopyBasicBlock(Loop* loop, const BasicBlock* itr,
- bool preserve_instructions) {
- // Clone the block exactly, including the IDs.
- BasicBlock* basic_block = itr->Clone(context_);
- basic_block->SetParent(itr->GetParent());
- // We do not want to duplicate DebugDeclare.
- KillDebugDeclares(basic_block);
- // Assign each result a new unique ID and keep a mapping of the old ids to
- // the new ones.
- AssignNewResultIds(basic_block);
- // If this is the continue block we are copying.
- if (itr == loop->GetContinueBlock()) {
- // Make the OpLoopMerge point to this block for the continue.
- if (!preserve_instructions) {
- Instruction* merge_inst = loop->GetHeaderBlock()->GetLoopMergeInst();
- merge_inst->SetInOperand(1, {basic_block->id()});
- context_->UpdateDefUse(merge_inst);
- }
- state_.new_continue_block = basic_block;
- }
- // If this is the header block we are copying.
- if (itr == loop->GetHeaderBlock()) {
- state_.new_header_block = basic_block;
- if (!preserve_instructions) {
- // Remove the loop merge instruction if it exists.
- Instruction* merge_inst = basic_block->GetLoopMergeInst();
- if (merge_inst) invalidated_instructions_.push_back(merge_inst);
- }
- }
- // If this is the latch block being copied, record it in the state.
- if (itr == loop->GetLatchBlock()) state_.new_latch_block = basic_block;
- // If this is the condition block we are copying.
- if (itr == loop_condition_block_) {
- state_.new_condition_block = basic_block;
- }
- // Add this block to the list of blocks to add to the function at the end of
- // the unrolling process.
- blocks_to_add_.push_back(std::unique_ptr<BasicBlock>(basic_block));
- // Keep tracking the old block via a map.
- state_.new_blocks[itr->id()] = basic_block;
- }
- void LoopUnrollerUtilsImpl::CopyBody(Loop* loop, bool eliminate_conditions) {
- // Copy each basic block in the loop, give them new ids, and save state
- // information.
- for (const BasicBlock* itr : loop_blocks_inorder_) {
- CopyBasicBlock(loop, itr, false);
- }
- // Set the previous latch block to point to the new header.
- Instruction* latch_branch = state_.previous_latch_block_->terminator();
- latch_branch->SetInOperand(0, {state_.new_header_block->id()});
- context_->UpdateDefUse(latch_branch);
- // As the algorithm copies the original loop blocks exactly, the tail of the
- // latch block on iterations after the first one will be a branch to the new
- // header and not the actual loop header. The last continue block in the loop
- // should always be a backedge to the global header.
- Instruction* new_latch_branch = state_.new_latch_block->terminator();
- new_latch_branch->SetInOperand(0, {loop->GetHeaderBlock()->id()});
- context_->AnalyzeUses(new_latch_branch);
- std::vector<Instruction*> inductions;
- loop->GetInductionVariables(inductions);
- for (size_t index = 0; index < inductions.size(); ++index) {
- Instruction* primary_copy = inductions[index];
- assert(primary_copy->result_id() != 0);
- Instruction* induction_clone =
- state_.ids_to_new_inst[state_.new_inst[primary_copy->result_id()]];
- state_.new_phis_.push_back(induction_clone);
- assert(induction_clone->result_id() != 0);
- if (!state_.previous_phis_.empty()) {
- state_.new_inst[primary_copy->result_id()] = GetPhiDefID(
- state_.previous_phis_[index], state_.previous_latch_block_->id());
- } else {
- // Do not replace the first phi block ids.
- state_.new_inst[primary_copy->result_id()] = primary_copy->result_id();
- }
- }
- if (eliminate_conditions &&
- state_.new_condition_block != loop_condition_block_) {
- FoldConditionBlock(state_.new_condition_block, 1);
- }
- // Only reference to the header block is the backedge in the latch block,
- // don't change this.
- state_.new_inst[loop->GetHeaderBlock()->id()] = loop->GetHeaderBlock()->id();
- for (auto& pair : state_.new_blocks) {
- RemapOperands(pair.second);
- }
- for (Instruction* dead_phi : state_.new_phis_)
- invalidated_instructions_.push_back(dead_phi);
- // Swap the state so the new is now the previous.
- state_.NextIterationState();
- }
- uint32_t LoopUnrollerUtilsImpl::GetPhiDefID(const Instruction* phi,
- uint32_t label) const {
- for (uint32_t operand = 3; operand < phi->NumOperands(); operand += 2) {
- if (phi->GetSingleWordOperand(operand) == label) {
- return phi->GetSingleWordOperand(operand - 1);
- }
- }
- assert(false && "Could not find a phi index matching the provided label");
- return 0;
- }
- void LoopUnrollerUtilsImpl::FoldConditionBlock(BasicBlock* condition_block,
- uint32_t operand_label) {
- // Remove the old conditional branch to the merge and continue blocks.
- Instruction& old_branch = *condition_block->tail();
- uint32_t new_target = old_branch.GetSingleWordOperand(operand_label);
- DebugScope scope = old_branch.GetDebugScope();
- const std::vector<Instruction> lines = old_branch.dbg_line_insts();
- context_->KillInst(&old_branch);
- // Add the new unconditional branch to the merge block.
- InstructionBuilder builder(
- context_, condition_block,
- IRContext::Analysis::kAnalysisDefUse |
- IRContext::Analysis::kAnalysisInstrToBlockMapping);
- Instruction* new_branch = builder.AddBranch(new_target);
- if (!lines.empty()) new_branch->AddDebugLine(&lines.back());
- new_branch->SetDebugScope(scope);
- }
- void LoopUnrollerUtilsImpl::CloseUnrolledLoop(Loop* loop) {
- // Remove the OpLoopMerge instruction from the function.
- Instruction* merge_inst = loop->GetHeaderBlock()->GetLoopMergeInst();
- invalidated_instructions_.push_back(merge_inst);
- // Remove the final backedge to the header and make it point instead to the
- // merge block.
- Instruction* latch_instruction = state_.previous_latch_block_->terminator();
- latch_instruction->SetInOperand(0, {loop->GetMergeBlock()->id()});
- context_->UpdateDefUse(latch_instruction);
- // Remove all induction variables as the phis will now be invalid. Replace all
- // uses with the constant initializer value (all uses of phis will be in
- // the first iteration with the subsequent phis already having been removed).
- std::vector<Instruction*> inductions;
- loop->GetInductionVariables(inductions);
- // We can use the state instruction mechanism to replace all internal loop
- // values within the first loop trip (as the subsequent ones will be updated
- // by the copy function) with the value coming in from the preheader and then
- // use context ReplaceAllUsesWith for the uses outside the loop with the final
- // trip phi value.
- state_.new_inst.clear();
- for (Instruction* induction : inductions) {
- uint32_t initalizer_id =
- GetPhiDefID(induction, loop->GetPreHeaderBlock()->id());
- state_.new_inst[induction->result_id()] = initalizer_id;
- }
- for (BasicBlock* block : loop_blocks_inorder_) {
- RemapOperands(block);
- }
- for (auto& block_itr : blocks_to_add_) {
- RemapOperands(block_itr.get());
- }
- // Rewrite the last phis, since they may still reference the original phi.
- for (Instruction* last_phi : state_.previous_phis_) {
- RemapOperands(last_phi);
- }
- }
- // Uses the first loop to create a copy of the loop with new IDs.
- void LoopUnrollerUtilsImpl::DuplicateLoop(Loop* old_loop, Loop* new_loop) {
- std::vector<BasicBlock*> new_block_order;
- // Copy every block in the old loop.
- for (const BasicBlock* itr : loop_blocks_inorder_) {
- CopyBasicBlock(old_loop, itr, true);
- new_block_order.push_back(blocks_to_add_.back().get());
- }
- // Clone the merge block, give it a new id and record it in the state.
- BasicBlock* new_merge = old_loop->GetMergeBlock()->Clone(context_);
- new_merge->SetParent(old_loop->GetMergeBlock()->GetParent());
- AssignNewResultIds(new_merge);
- state_.new_blocks[old_loop->GetMergeBlock()->id()] = new_merge;
- // Remap the operands of every instruction in the loop to point to the new
- // copies.
- for (auto& pair : state_.new_blocks) {
- RemapOperands(pair.second);
- }
- loop_blocks_inorder_ = std::move(new_block_order);
- AddBlocksToLoop(new_loop);
- new_loop->SetHeaderBlock(state_.new_header_block);
- new_loop->SetContinueBlock(state_.new_continue_block);
- new_loop->SetLatchBlock(state_.new_latch_block);
- new_loop->SetMergeBlock(new_merge);
- }
- // Whenever the utility copies a block it stores it in a temporary buffer, this
- // function adds the buffer into the Function. The blocks will be inserted
- // after the block |insert_point|.
- void LoopUnrollerUtilsImpl::AddBlocksToFunction(
- const BasicBlock* insert_point) {
- for (auto basic_block_iterator = function_.begin();
- basic_block_iterator != function_.end(); ++basic_block_iterator) {
- if (basic_block_iterator->id() == insert_point->id()) {
- basic_block_iterator.InsertBefore(&blocks_to_add_);
- return;
- }
- }
- assert(
- false &&
- "Could not add basic blocks to function as insert point was not found.");
- }
- // Assign all result_ids in |basic_block| instructions to new IDs and preserve
- // the mapping of new ids to old ones.
- void LoopUnrollerUtilsImpl::AssignNewResultIds(BasicBlock* basic_block) {
- analysis::DefUseManager* def_use_mgr = context_->get_def_use_mgr();
- // Label instructions aren't covered by normal traversal of the
- // instructions.
- // TODO(1841): Handle id overflow.
- uint32_t new_label_id = context_->TakeNextId();
- // Assign a new id to the label.
- state_.new_inst[basic_block->GetLabelInst()->result_id()] = new_label_id;
- basic_block->GetLabelInst()->SetResultId(new_label_id);
- def_use_mgr->AnalyzeInstDefUse(basic_block->GetLabelInst());
- for (Instruction& inst : *basic_block) {
- // Do def/use analysis on new lines
- for (auto& line : inst.dbg_line_insts())
- def_use_mgr->AnalyzeInstDefUse(&line);
- uint32_t old_id = inst.result_id();
- // Ignore stores etc.
- if (old_id == 0) {
- continue;
- }
- // Give the instruction a new id.
- // TODO(1841): Handle id overflow.
- inst.SetResultId(context_->TakeNextId());
- def_use_mgr->AnalyzeInstDef(&inst);
- // Save the mapping of old_id -> new_id.
- state_.new_inst[old_id] = inst.result_id();
- // Check if this instruction is the induction variable.
- if (loop_induction_variable_->result_id() == old_id) {
- // Save a pointer to the new copy of it.
- state_.new_phi = &inst;
- }
- state_.ids_to_new_inst[inst.result_id()] = &inst;
- }
- }
- void LoopUnrollerUtilsImpl::RemapOperands(Instruction* inst) {
- auto remap_operands_to_new_ids = [this](uint32_t* id) {
- auto itr = state_.new_inst.find(*id);
- if (itr != state_.new_inst.end()) {
- *id = itr->second;
- }
- };
- inst->ForEachInId(remap_operands_to_new_ids);
- context_->AnalyzeUses(inst);
- }
- void LoopUnrollerUtilsImpl::RemapOperands(BasicBlock* basic_block) {
- for (Instruction& inst : *basic_block) {
- RemapOperands(&inst);
- }
- }
- // Generate the ordered list of basic blocks in the |loop| and cache it for
- // later use.
- void LoopUnrollerUtilsImpl::ComputeLoopOrderedBlocks(Loop* loop) {
- loop_blocks_inorder_.clear();
- loop->ComputeLoopStructuredOrder(&loop_blocks_inorder_);
- }
- // Adds the blocks_to_add_ to both the loop and to the parent.
- void LoopUnrollerUtilsImpl::AddBlocksToLoop(Loop* loop) const {
- // Add the blocks to this loop.
- for (auto& block_itr : blocks_to_add_) {
- loop->AddBasicBlock(block_itr.get());
- }
- // Add the blocks to the parent as well.
- if (loop->GetParent()) AddBlocksToLoop(loop->GetParent());
- }
- void LoopUnrollerUtilsImpl::LinkLastPhisToStart(Loop* loop) const {
- std::vector<Instruction*> inductions;
- loop->GetInductionVariables(inductions);
- for (size_t i = 0; i < inductions.size(); ++i) {
- Instruction* last_phi_in_block = state_.previous_phis_[i];
- uint32_t phi_index =
- GetPhiIndexFromLabel(state_.previous_latch_block_, last_phi_in_block);
- uint32_t phi_variable =
- last_phi_in_block->GetSingleWordInOperand(phi_index - 1);
- uint32_t phi_label = last_phi_in_block->GetSingleWordInOperand(phi_index);
- Instruction* phi = inductions[i];
- phi->SetInOperand(phi_index - 1, {phi_variable});
- phi->SetInOperand(phi_index, {phi_label});
- }
- }
- // Duplicate the |loop| body |factor| number of times while keeping the loop
- // backedge intact.
- void LoopUnrollerUtilsImpl::PartiallyUnroll(Loop* loop, size_t factor) {
- Unroll(loop, factor);
- LinkLastPhisToStart(loop);
- AddBlocksToLoop(loop);
- AddBlocksToFunction(loop->GetMergeBlock());
- RemoveDeadInstructions();
- }
- /*
- * End LoopUtilsImpl.
- */
- } // namespace
- /*
- *
- * Begin Utils.
- *
- * */
- bool LoopUtils::CanPerformUnroll() {
- // The loop is expected to be in structured order.
- if (!loop_->GetHeaderBlock()->GetMergeInst()) {
- return false;
- }
- // Find check the loop has a condition we can find and evaluate.
- const BasicBlock* condition = loop_->FindConditionBlock();
- if (!condition) return false;
- // Check that we can find and process the induction variable.
- const Instruction* induction = loop_->FindConditionVariable(condition);
- if (!induction || induction->opcode() != spv::Op::OpPhi) return false;
- // Check that we can find the number of loop iterations.
- if (!loop_->FindNumberOfIterations(induction, &*condition->ctail(), nullptr))
- return false;
- #ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
- // ClusterFuzz/OSS-Fuzz is likely to yield examples with very high loop
- // iteration counts. This can cause timeouts and memouts during fuzzing that
- // are not classed as bugs. To avoid this noise, loop unrolling is not applied
- // to loops with large iteration counts when fuzzing.
- constexpr size_t kFuzzerIterationLimit = 100;
- size_t num_iterations;
- loop_->FindNumberOfIterations(induction, &*condition->ctail(),
- &num_iterations);
- if (num_iterations > kFuzzerIterationLimit) {
- return false;
- }
- #endif
- // Make sure the latch block is a unconditional branch to the header
- // block.
- const Instruction& branch = *loop_->GetLatchBlock()->ctail();
- bool branching_assumption =
- branch.opcode() == spv::Op::OpBranch &&
- branch.GetSingleWordInOperand(0) == loop_->GetHeaderBlock()->id();
- if (!branching_assumption) {
- return false;
- }
- std::vector<Instruction*> inductions;
- loop_->GetInductionVariables(inductions);
- // Ban breaks within the loop.
- const std::vector<uint32_t>& merge_block_preds =
- context_->cfg()->preds(loop_->GetMergeBlock()->id());
- if (merge_block_preds.size() != 1) {
- return false;
- }
- // Ban continues within the loop.
- const std::vector<uint32_t>& continue_block_preds =
- context_->cfg()->preds(loop_->GetContinueBlock()->id());
- if (continue_block_preds.size() != 1) {
- return false;
- }
- // Ban returns in the loop.
- // Iterate over all the blocks within the loop and check that none of them
- // exit the loop.
- for (uint32_t label_id : loop_->GetBlocks()) {
- const BasicBlock* block = context_->cfg()->block(label_id);
- if (block->ctail()->opcode() == spv::Op::OpKill ||
- block->ctail()->opcode() == spv::Op::OpReturn ||
- block->ctail()->opcode() == spv::Op::OpReturnValue ||
- block->ctail()->opcode() == spv::Op::OpTerminateInvocation) {
- return false;
- }
- }
- // Can only unroll inner loops.
- if (!loop_->AreAllChildrenMarkedForRemoval()) {
- return false;
- }
- return true;
- }
- bool LoopUtils::PartiallyUnroll(size_t factor) {
- if (factor == 1 || !CanPerformUnroll()) return false;
- // Create the unroller utility.
- LoopUnrollerUtilsImpl unroller{context_,
- loop_->GetHeaderBlock()->GetParent()};
- unroller.Init(loop_);
- // If the unrolling factor is larger than or the same size as the loop just
- // fully unroll the loop.
- if (factor >= unroller.GetLoopIterationCount()) {
- unroller.FullyUnroll(loop_);
- return true;
- }
- // If the loop unrolling factor is an residual number of iterations we need to
- // let run the loop for the residual part then let it branch into the unrolled
- // remaining part. We add one when calucating the remainder to take into
- // account the one iteration already in the loop.
- if (unroller.GetLoopIterationCount() % factor != 0) {
- unroller.PartiallyUnrollResidualFactor(loop_, factor);
- } else {
- unroller.PartiallyUnroll(loop_, factor);
- }
- return true;
- }
- bool LoopUtils::FullyUnroll() {
- if (!CanPerformUnroll()) return false;
- std::vector<Instruction*> inductions;
- loop_->GetInductionVariables(inductions);
- LoopUnrollerUtilsImpl unroller{context_,
- loop_->GetHeaderBlock()->GetParent()};
- unroller.Init(loop_);
- unroller.FullyUnroll(loop_);
- return true;
- }
- void LoopUtils::Finalize() {
- // Clean up the loop descriptor to preserve the analysis.
- LoopDescriptor* LD = context_->GetLoopDescriptor(&function_);
- LD->PostModificationCleanup();
- }
- /*
- *
- * Begin Pass.
- *
- */
- Pass::Status LoopUnroller::Process() {
- bool changed = false;
- for (Function& f : *context()->module()) {
- if (f.IsDeclaration()) {
- continue;
- }
- LoopDescriptor* LD = context()->GetLoopDescriptor(&f);
- for (Loop& loop : *LD) {
- LoopUtils loop_utils{context(), &loop};
- if (!loop.HasUnrollLoopControl() || !loop_utils.CanPerformUnroll()) {
- continue;
- }
- if (fully_unroll_) {
- loop_utils.FullyUnroll();
- } else {
- loop_utils.PartiallyUnroll(unroll_factor_);
- }
- changed = true;
- }
- LD->PostModificationCleanup();
- }
- return changed ? Status::SuccessWithChange : Status::SuccessWithoutChange;
- }
- } // namespace opt
- } // namespace spvtools
|