| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514 |
- // Copyright (c) 2018 Google LLC.
- //
- // Licensed under the Apache License, Version 2.0 (the "License");
- // you may not use this file except in compliance with the License.
- // You may obtain a copy of the License at
- //
- // http://www.apache.org/licenses/LICENSE-2.0
- //
- // Unless required by applicable law or agreed to in writing, software
- // distributed under the License is distributed on an "AS IS" BASIS,
- // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- // See the License for the specific language governing permissions and
- // limitations under the License.
- #include "source/opt/loop_fission.h"
- #include <set>
- #include "source/opt/register_pressure.h"
- // Implement loop fission with an optional parameter to split only
- // if the register pressure in a given loop meets a certain criteria. This is
- // controlled via the constructors of LoopFissionPass.
- //
- // 1 - Build a list of loops to be split, these are top level loops (loops
- // without child loops themselves) which meet the register pressure criteria, as
- // determined by the ShouldSplitLoop method of LoopFissionPass.
- //
- // 2 - For each loop in the list, group each instruction into a set of related
- // instructions by traversing each instructions users and operands recursively.
- // We stop if we encounter an instruction we have seen before or an instruction
- // which we don't consider relevant (i.e OpLoopMerge). We then group these
- // groups into two different sets, one for the first loop and one for the
- // second.
- //
- // 3 - We then run CanPerformSplit to check that it would be legal to split a
- // loop using those two sets. We check that we haven't altered the relative
- // order load/stores appear in the binary and that we aren't breaking any
- // dependency between load/stores by splitting them into two loops. We also
- // check that none of the OpBranch instructions are dependent on a load as we
- // leave control flow structure intact and move only instructions in the body so
- // we want to avoid any loads with side affects or aliasing.
- //
- // 4 - We then split the loop by calling SplitLoop. This function clones the
- // loop and attaches it to the preheader and connects the new loops merge block
- // to the current loop header block. We then use the two sets built in step 2 to
- // remove instructions from each loop. If an instruction appears in the first
- // set it is removed from the second loop and vice versa.
- //
- // 5 - If the multiple split passes flag is set we check if each of the loops
- // still meet the register pressure criteria. If they do then we add them to the
- // list of loops to be split (created in step one) to allow for loops to be
- // split multiple times.
- //
- namespace spvtools {
- namespace opt {
- class LoopFissionImpl {
- public:
- LoopFissionImpl(IRContext* context, Loop* loop)
- : context_(context), loop_(loop), load_used_in_condition_(false) {}
- // Group each instruction in the loop into sets of instructions related by
- // their usedef chains. An instruction which uses another will appear in the
- // same set. Then merge those sets into just two sets. Returns false if there
- // was one or less sets created.
- bool GroupInstructionsByUseDef();
- // Check if the sets built by GroupInstructionsByUseDef violate any data
- // dependence rules.
- bool CanPerformSplit();
- // Split the loop and return a pointer to the new loop.
- Loop* SplitLoop();
- // Checks if |inst| is safe to move. We can only move instructions which don't
- // have any side effects and OpLoads and OpStores.
- bool MovableInstruction(const Instruction& inst) const;
- private:
- // Traverse the def use chain of |inst| and add the users and uses of |inst|
- // which are in the same loop to the |returned_set|.
- void TraverseUseDef(Instruction* inst, std::set<Instruction*>* returned_set,
- bool ignore_phi_users = false, bool report_loads = false);
- // We group the instructions in the block into two different groups, the
- // instructions to be kept in the original loop and the ones to be cloned into
- // the new loop. As the cloned loop is attached to the preheader it will be
- // the first loop and the second loop will be the original.
- std::set<Instruction*> cloned_loop_instructions_;
- std::set<Instruction*> original_loop_instructions_;
- // We need a set of all the instructions to be seen so we can break any
- // recursion and also so we can ignore certain instructions by preemptively
- // adding them to this set.
- std::set<Instruction*> seen_instructions_;
- // A map of instructions to their relative position in the function.
- std::map<Instruction*, size_t> instruction_order_;
- IRContext* context_;
- Loop* loop_;
- // This is set to true by TraverseUseDef when traversing the instructions
- // related to the loop condition and any if conditions should any of those
- // instructions be a load.
- bool load_used_in_condition_;
- };
- bool LoopFissionImpl::MovableInstruction(const Instruction& inst) const {
- return inst.opcode() == spv::Op::OpLoad ||
- inst.opcode() == spv::Op::OpStore ||
- inst.opcode() == spv::Op::OpSelectionMerge ||
- inst.opcode() == spv::Op::OpPhi || inst.IsOpcodeCodeMotionSafe();
- }
- void LoopFissionImpl::TraverseUseDef(Instruction* inst,
- std::set<Instruction*>* returned_set,
- bool ignore_phi_users, bool report_loads) {
- assert(returned_set && "Set to be returned cannot be null.");
- analysis::DefUseManager* def_use = context_->get_def_use_mgr();
- std::set<Instruction*>& inst_set = *returned_set;
- // We create this functor to traverse the use def chain to build the
- // grouping of related instructions. The lambda captures the std::function
- // to allow it to recurse.
- std::function<void(Instruction*)> traverser_functor;
- traverser_functor = [this, def_use, &inst_set, &traverser_functor,
- ignore_phi_users, report_loads](Instruction* user) {
- // If we've seen the instruction before or it is not inside the loop end the
- // traversal.
- if (!user || seen_instructions_.count(user) != 0 ||
- !context_->get_instr_block(user) ||
- !loop_->IsInsideLoop(context_->get_instr_block(user))) {
- return;
- }
- // Don't include labels or loop merge instructions in the instruction sets.
- // Including them would mean we group instructions related only by using the
- // same labels (i.e phis). We already preempt the inclusion of
- // OpSelectionMerge by adding related instructions to the seen_instructions_
- // set.
- if (user->opcode() == spv::Op::OpLoopMerge ||
- user->opcode() == spv::Op::OpLabel)
- return;
- // If the |report_loads| flag is set, set the class field
- // load_used_in_condition_ to false. This is used to check that none of the
- // condition checks in the loop rely on loads.
- if (user->opcode() == spv::Op::OpLoad && report_loads) {
- load_used_in_condition_ = true;
- }
- // Add the instruction to the set of instructions already seen, this breaks
- // recursion and allows us to ignore certain instructions.
- seen_instructions_.insert(user);
- inst_set.insert(user);
- // Wrapper functor to traverse the operands of each instruction.
- auto traverse_operand = [&traverser_functor, def_use](const uint32_t* id) {
- traverser_functor(def_use->GetDef(*id));
- };
- user->ForEachInOperand(traverse_operand);
- // For the first traversal we want to ignore the users of the phi.
- if (ignore_phi_users && user->opcode() == spv::Op::OpPhi) return;
- // Traverse each user with this lambda.
- def_use->ForEachUser(user, traverser_functor);
- // Wrapper functor for the use traversal.
- auto traverse_use = [&traverser_functor](Instruction* use, uint32_t) {
- traverser_functor(use);
- };
- def_use->ForEachUse(user, traverse_use);
- };
- // We start the traversal of the use def graph by invoking the above
- // lambda with the |inst| parameter.
- traverser_functor(inst);
- }
- bool LoopFissionImpl::GroupInstructionsByUseDef() {
- std::vector<std::set<Instruction*>> sets{};
- // We want to ignore all the instructions stemming from the loop condition
- // instruction.
- BasicBlock* condition_block = loop_->FindConditionBlock();
- if (!condition_block) return false;
- Instruction* condition = &*condition_block->tail();
- // We iterate over the blocks via iterating over all the blocks in the
- // function, we do this so we are iterating in the same order which the blocks
- // appear in the binary.
- Function& function = *loop_->GetHeaderBlock()->GetParent();
- // Create a temporary set to ignore certain groups of instructions within the
- // loop. We don't want any instructions related to control flow to be removed
- // from either loop only instructions within the control flow bodies.
- std::set<Instruction*> instructions_to_ignore{};
- TraverseUseDef(condition, &instructions_to_ignore, true, true);
- // Traverse control flow instructions to ensure they are added to the
- // seen_instructions_ set and will be ignored when it it called with actual
- // sets.
- for (BasicBlock& block : function) {
- if (!loop_->IsInsideLoop(block.id())) continue;
- for (Instruction& inst : block) {
- // Ignore all instructions related to control flow.
- if (inst.opcode() == spv::Op::OpSelectionMerge || inst.IsBranch()) {
- TraverseUseDef(&inst, &instructions_to_ignore, true, true);
- }
- }
- }
- // Traverse the instructions and generate the sets, automatically ignoring any
- // instructions in instructions_to_ignore.
- for (BasicBlock& block : function) {
- if (!loop_->IsInsideLoop(block.id()) ||
- loop_->GetHeaderBlock()->id() == block.id())
- continue;
- for (Instruction& inst : block) {
- // Record the order that each load/store is seen.
- if (inst.opcode() == spv::Op::OpLoad ||
- inst.opcode() == spv::Op::OpStore) {
- instruction_order_[&inst] = instruction_order_.size();
- }
- // Ignore instructions already seen in a traversal.
- if (seen_instructions_.count(&inst) != 0) {
- continue;
- }
- // Build the set.
- std::set<Instruction*> inst_set{};
- TraverseUseDef(&inst, &inst_set);
- if (!inst_set.empty()) sets.push_back(std::move(inst_set));
- }
- }
- // If we have one or zero sets return false to indicate that due to
- // insufficient instructions we couldn't split the loop into two groups and
- // thus the loop can't be split any further.
- if (sets.size() < 2) {
- return false;
- }
- // Merge the loop sets into two different sets. In CanPerformSplit we will
- // validate that we don't break the relative ordering of loads/stores by doing
- // this.
- for (size_t index = 0; index < sets.size() / 2; ++index) {
- cloned_loop_instructions_.insert(sets[index].begin(), sets[index].end());
- }
- for (size_t index = sets.size() / 2; index < sets.size(); ++index) {
- original_loop_instructions_.insert(sets[index].begin(), sets[index].end());
- }
- return true;
- }
- bool LoopFissionImpl::CanPerformSplit() {
- // Return false if any of the condition instructions in the loop depend on a
- // load.
- if (load_used_in_condition_) {
- return false;
- }
- // Build a list of all parent loops of this loop. Loop dependence analysis
- // needs this structure.
- std::vector<const Loop*> loops;
- Loop* parent_loop = loop_;
- while (parent_loop) {
- loops.push_back(parent_loop);
- parent_loop = parent_loop->GetParent();
- }
- LoopDependenceAnalysis analysis{context_, loops};
- // A list of all the stores in the cloned loop.
- std::vector<Instruction*> set_one_stores{};
- // A list of all the loads in the cloned loop.
- std::vector<Instruction*> set_one_loads{};
- // Populate the above lists.
- for (Instruction* inst : cloned_loop_instructions_) {
- if (inst->opcode() == spv::Op::OpStore) {
- set_one_stores.push_back(inst);
- } else if (inst->opcode() == spv::Op::OpLoad) {
- set_one_loads.push_back(inst);
- }
- // If we find any instruction which we can't move (such as a barrier),
- // return false.
- if (!MovableInstruction(*inst)) return false;
- }
- // We need to calculate the depth of the loop to create the loop dependency
- // distance vectors.
- const size_t loop_depth = loop_->GetDepth();
- // Check the dependencies between loads in the cloned loop and stores in the
- // original and vice versa.
- for (Instruction* inst : original_loop_instructions_) {
- // If we find any instruction which we can't move (such as a barrier),
- // return false.
- if (!MovableInstruction(*inst)) return false;
- // Look at the dependency between the loads in the original and stores in
- // the cloned loops.
- if (inst->opcode() == spv::Op::OpLoad) {
- for (Instruction* store : set_one_stores) {
- DistanceVector vec{loop_depth};
- // If the store actually should appear after the load, return false.
- // This means the store has been placed in the wrong grouping.
- if (instruction_order_[store] > instruction_order_[inst]) {
- return false;
- }
- // If not independent check the distance vector.
- if (!analysis.GetDependence(store, inst, &vec)) {
- for (DistanceEntry& entry : vec.GetEntries()) {
- // A distance greater than zero means that the store in the cloned
- // loop has a dependency on the load in the original loop.
- if (entry.distance > 0) return false;
- }
- }
- }
- } else if (inst->opcode() == spv::Op::OpStore) {
- for (Instruction* load : set_one_loads) {
- DistanceVector vec{loop_depth};
- // If the load actually should appear after the store, return false.
- if (instruction_order_[load] > instruction_order_[inst]) {
- return false;
- }
- // If not independent check the distance vector.
- if (!analysis.GetDependence(inst, load, &vec)) {
- for (DistanceEntry& entry : vec.GetEntries()) {
- // A distance less than zero means the load in the cloned loop is
- // dependent on the store instruction in the original loop.
- if (entry.distance < 0) return false;
- }
- }
- }
- }
- }
- return true;
- }
- Loop* LoopFissionImpl::SplitLoop() {
- // Clone the loop.
- LoopUtils util{context_, loop_};
- LoopUtils::LoopCloningResult clone_results;
- Loop* cloned_loop = util.CloneAndAttachLoopToHeader(&clone_results);
- // Update the OpLoopMerge in the cloned loop.
- cloned_loop->UpdateLoopMergeInst();
- // Add the loop_ to the module.
- // TODO(1841): Handle failure to create pre-header.
- Function::iterator it =
- util.GetFunction()->FindBlock(loop_->GetOrCreatePreHeaderBlock()->id());
- util.GetFunction()->AddBasicBlocks(clone_results.cloned_bb_.begin(),
- clone_results.cloned_bb_.end(), ++it);
- loop_->SetPreHeaderBlock(cloned_loop->GetMergeBlock());
- std::vector<Instruction*> instructions_to_kill{};
- // Kill all the instructions which should appear in the cloned loop but not in
- // the original loop.
- for (uint32_t id : loop_->GetBlocks()) {
- BasicBlock* block = context_->cfg()->block(id);
- for (Instruction& inst : *block) {
- // If the instruction appears in the cloned loop instruction group, kill
- // it.
- if (cloned_loop_instructions_.count(&inst) == 1 &&
- original_loop_instructions_.count(&inst) == 0) {
- instructions_to_kill.push_back(&inst);
- if (inst.opcode() == spv::Op::OpPhi) {
- context_->ReplaceAllUsesWith(
- inst.result_id(), clone_results.value_map_[inst.result_id()]);
- }
- }
- }
- }
- // Kill all instructions which should appear in the original loop and not in
- // the cloned loop.
- for (uint32_t id : cloned_loop->GetBlocks()) {
- BasicBlock* block = context_->cfg()->block(id);
- for (Instruction& inst : *block) {
- Instruction* old_inst = clone_results.ptr_map_[&inst];
- // If the instruction belongs to the original loop instruction group, kill
- // it.
- if (cloned_loop_instructions_.count(old_inst) == 0 &&
- original_loop_instructions_.count(old_inst) == 1) {
- instructions_to_kill.push_back(&inst);
- }
- }
- }
- for (Instruction* i : instructions_to_kill) {
- context_->KillInst(i);
- }
- return cloned_loop;
- }
- LoopFissionPass::LoopFissionPass(const size_t register_threshold_to_split,
- bool split_multiple_times)
- : split_multiple_times_(split_multiple_times) {
- // Split if the number of registers in the loop exceeds
- // |register_threshold_to_split|.
- split_criteria_ =
- [register_threshold_to_split](
- const RegisterLiveness::RegionRegisterLiveness& liveness) {
- return liveness.used_registers_ > register_threshold_to_split;
- };
- }
- LoopFissionPass::LoopFissionPass() : split_multiple_times_(false) {
- // Split by default.
- split_criteria_ = [](const RegisterLiveness::RegionRegisterLiveness&) {
- return true;
- };
- }
- bool LoopFissionPass::ShouldSplitLoop(const Loop& loop, IRContext* c) {
- LivenessAnalysis* analysis = c->GetLivenessAnalysis();
- RegisterLiveness::RegionRegisterLiveness liveness{};
- Function* function = loop.GetHeaderBlock()->GetParent();
- analysis->Get(function)->ComputeLoopRegisterPressure(loop, &liveness);
- return split_criteria_(liveness);
- }
- Pass::Status LoopFissionPass::Process() {
- bool changed = false;
- for (Function& f : *context()->module()) {
- // We collect all the inner most loops in the function and run the loop
- // splitting util on each. The reason we do this is to allow us to iterate
- // over each, as creating new loops will invalidate the loop iterator.
- std::vector<Loop*> inner_most_loops{};
- LoopDescriptor& loop_descriptor = *context()->GetLoopDescriptor(&f);
- for (Loop& loop : loop_descriptor) {
- if (!loop.HasChildren() && ShouldSplitLoop(loop, context())) {
- inner_most_loops.push_back(&loop);
- }
- }
- // List of new loops which meet the criteria to be split again.
- std::vector<Loop*> new_loops_to_split{};
- while (!inner_most_loops.empty()) {
- for (Loop* loop : inner_most_loops) {
- LoopFissionImpl impl{context(), loop};
- // Group the instructions in the loop into two different sets of related
- // instructions. If we can't group the instructions into the two sets
- // then we can't split the loop any further.
- if (!impl.GroupInstructionsByUseDef()) {
- continue;
- }
- if (impl.CanPerformSplit()) {
- Loop* second_loop = impl.SplitLoop();
- changed = true;
- context()->InvalidateAnalysesExceptFor(
- IRContext::kAnalysisLoopAnalysis);
- // If the newly created loop meets the criteria to be split, split it
- // again.
- if (ShouldSplitLoop(*second_loop, context()))
- new_loops_to_split.push_back(second_loop);
- // If the original loop (now split) still meets the criteria to be
- // split, split it again.
- if (ShouldSplitLoop(*loop, context()))
- new_loops_to_split.push_back(loop);
- }
- }
- // If the split multiple times flag has been set add the new loops which
- // meet the splitting criteria into the list of loops to be split on the
- // next iteration.
- if (split_multiple_times_) {
- inner_most_loops = std::move(new_loops_to_split);
- new_loops_to_split = {};
- } else {
- break;
- }
- }
- }
- return changed ? Pass::Status::SuccessWithChange
- : Pass::Status::SuccessWithoutChange;
- }
- } // namespace opt
- } // namespace spvtools
|