loop_unswitch_pass.cpp 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644
  1. // Copyright (c) 2018 Google LLC.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. #include "source/opt/loop_unswitch_pass.h"
  15. #include <functional>
  16. #include <list>
  17. #include <memory>
  18. #include <unordered_map>
  19. #include <unordered_set>
  20. #include <utility>
  21. #include <vector>
  22. #include "source/opt/basic_block.h"
  23. #include "source/opt/dominator_tree.h"
  24. #include "source/opt/fold.h"
  25. #include "source/opt/function.h"
  26. #include "source/opt/instruction.h"
  27. #include "source/opt/ir_builder.h"
  28. #include "source/opt/ir_context.h"
  29. #include "source/opt/loop_descriptor.h"
  30. #include "source/opt/loop_utils.h"
  31. namespace spvtools {
  32. namespace opt {
  33. namespace {
  34. constexpr uint32_t kTypePointerStorageClassInIdx = 0;
  35. // This class handle the unswitch procedure for a given loop.
  36. // The unswitch will not happen if:
  37. // - The loop has any instruction that will prevent it;
  38. // - The loop invariant condition is not uniform.
  39. class LoopUnswitch {
  40. public:
  41. LoopUnswitch(IRContext* context, Function* function, Loop* loop,
  42. LoopDescriptor* loop_desc)
  43. : function_(function),
  44. loop_(loop),
  45. loop_desc_(*loop_desc),
  46. context_(context),
  47. switch_block_(nullptr) {}
  48. // Returns true if the loop can be unswitched.
  49. // Can be unswitch if:
  50. // - The loop has no instructions that prevents it (such as barrier);
  51. // - The loop has one conditional branch or switch that do not depends on the
  52. // loop;
  53. // - The loop invariant condition is uniform;
  54. bool CanUnswitchLoop() {
  55. if (switch_block_) return true;
  56. if (loop_->IsSafeToClone()) return false;
  57. CFG& cfg = *context_->cfg();
  58. for (uint32_t bb_id : loop_->GetBlocks()) {
  59. BasicBlock* bb = cfg.block(bb_id);
  60. if (loop_->GetLatchBlock() == bb) {
  61. continue;
  62. }
  63. if (bb->terminator()->IsBranch() &&
  64. bb->terminator()->opcode() != spv::Op::OpBranch) {
  65. if (IsConditionNonConstantLoopInvariant(bb->terminator())) {
  66. switch_block_ = bb;
  67. break;
  68. }
  69. }
  70. }
  71. return switch_block_;
  72. }
  73. // Return the iterator to the basic block |bb|.
  74. Function::iterator FindBasicBlockPosition(BasicBlock* bb_to_find) {
  75. Function::iterator it = function_->FindBlock(bb_to_find->id());
  76. assert(it != function_->end() && "Basic Block not found");
  77. return it;
  78. }
  79. // Creates a new basic block and insert it into the function |fn| at the
  80. // position |ip|. This function preserves the def/use and instr to block
  81. // managers.
  82. BasicBlock* CreateBasicBlock(Function::iterator ip) {
  83. uint32_t new_label_id = TakeNextId();
  84. if (new_label_id == 0) {
  85. return nullptr;
  86. }
  87. analysis::DefUseManager* def_use_mgr = context_->get_def_use_mgr();
  88. BasicBlock* bb = &*ip.InsertBefore(std::unique_ptr<BasicBlock>(
  89. new BasicBlock(std::unique_ptr<Instruction>(new Instruction(
  90. context_, spv::Op::OpLabel, 0, new_label_id, {})))));
  91. bb->SetParent(function_);
  92. def_use_mgr->AnalyzeInstDef(bb->GetLabelInst());
  93. context_->set_instr_block(bb->GetLabelInst(), bb);
  94. return bb;
  95. }
  96. Instruction* GetValueForDefaultPathForSwitch(Instruction* switch_inst) {
  97. assert(switch_inst->opcode() == spv::Op::OpSwitch &&
  98. "The given instructoin must be an OpSwitch.");
  99. // Find a value that can be used to select the default path.
  100. // If none are possible, then it will just use 0. The value does not matter
  101. // because this path will never be taken because the new switch outside of
  102. // the loop cannot select this path either.
  103. std::vector<uint32_t> existing_values;
  104. for (uint32_t i = 2; i < switch_inst->NumInOperands(); i += 2) {
  105. existing_values.push_back(switch_inst->GetSingleWordInOperand(i));
  106. }
  107. std::sort(existing_values.begin(), existing_values.end());
  108. uint32_t value_for_default_path = 0;
  109. if (existing_values.size() < std::numeric_limits<uint32_t>::max()) {
  110. for (value_for_default_path = 0;
  111. value_for_default_path < existing_values.size();
  112. value_for_default_path++) {
  113. if (existing_values[value_for_default_path] != value_for_default_path) {
  114. break;
  115. }
  116. }
  117. }
  118. InstructionBuilder builder(
  119. context_, static_cast<Instruction*>(nullptr),
  120. IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping);
  121. return builder.GetUintConstant(value_for_default_path);
  122. }
  123. // Unswitches |loop_|.
  124. bool PerformUnswitch() {
  125. assert(CanUnswitchLoop() &&
  126. "Cannot unswitch if there is not constant condition");
  127. assert(loop_->GetPreHeaderBlock() && "This loop has no pre-header block");
  128. assert(loop_->IsLCSSA() && "This loop is not in LCSSA form");
  129. CFG& cfg = *context_->cfg();
  130. DominatorTree* dom_tree =
  131. &context_->GetDominatorAnalysis(function_)->GetDomTree();
  132. analysis::DefUseManager* def_use_mgr = context_->get_def_use_mgr();
  133. LoopUtils loop_utils(context_, loop_);
  134. //////////////////////////////////////////////////////////////////////////////
  135. // Step 1: Create the if merge block for structured modules.
  136. // To do so, the |loop_| merge block will become the if's one and we
  137. // create a merge for the loop. This will limit the amount of duplicated
  138. // code the structured control flow imposes.
  139. // For non structured program, the new loop will be connected to
  140. // the old loop's exit blocks.
  141. //////////////////////////////////////////////////////////////////////////////
  142. // Get the merge block if it exists.
  143. BasicBlock* if_merge_block = loop_->GetMergeBlock();
  144. // The merge block is only created if the loop has a unique exit block. We
  145. // have this guarantee for structured loops, for compute loop it will
  146. // trivially help maintain both a structured-like form and LCSAA.
  147. BasicBlock* loop_merge_block =
  148. if_merge_block
  149. ? CreateBasicBlock(FindBasicBlockPosition(if_merge_block))
  150. : nullptr;
  151. if (if_merge_block && !loop_merge_block) {
  152. return false;
  153. }
  154. if (loop_merge_block) {
  155. // Add the instruction and update managers.
  156. InstructionBuilder builder(
  157. context_, loop_merge_block,
  158. IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping);
  159. builder.AddBranch(if_merge_block->id());
  160. builder.SetInsertPoint(&*loop_merge_block->begin());
  161. cfg.RegisterBlock(loop_merge_block);
  162. def_use_mgr->AnalyzeInstDef(loop_merge_block->GetLabelInst());
  163. bool ok = true;
  164. if_merge_block->ForEachPhiInst(
  165. [loop_merge_block, &ok, &builder, this](Instruction* phi) -> bool {
  166. Instruction* cloned = phi->Clone(context_);
  167. uint32_t new_id = TakeNextId();
  168. if (new_id == 0) {
  169. ok = false;
  170. return false;
  171. }
  172. cloned->SetResultId(new_id);
  173. builder.AddInstruction(std::unique_ptr<Instruction>(cloned));
  174. phi->SetInOperand(0, {cloned->result_id()});
  175. phi->SetInOperand(1, {loop_merge_block->id()});
  176. for (uint32_t j = phi->NumInOperands() - 1; j > 1; j--)
  177. phi->RemoveInOperand(j);
  178. return true;
  179. });
  180. if (!ok) return false;
  181. // Copy the predecessor list (will get invalidated otherwise).
  182. std::vector<uint32_t> preds = cfg.preds(if_merge_block->id());
  183. for (uint32_t pid : preds) {
  184. if (pid == loop_merge_block->id()) continue;
  185. BasicBlock* p_bb = cfg.block(pid);
  186. p_bb->ForEachSuccessorLabel(
  187. [if_merge_block, loop_merge_block](uint32_t* id) {
  188. if (*id == if_merge_block->id()) *id = loop_merge_block->id();
  189. });
  190. cfg.AddEdge(pid, loop_merge_block->id());
  191. }
  192. cfg.RemoveNonExistingEdges(if_merge_block->id());
  193. // Update loop descriptor.
  194. if (Loop* ploop = loop_->GetParent()) {
  195. ploop->AddBasicBlock(loop_merge_block);
  196. loop_desc_.SetBasicBlockToLoop(loop_merge_block->id(), ploop);
  197. }
  198. // Update the dominator tree.
  199. DominatorTreeNode* loop_merge_dtn =
  200. dom_tree->GetOrInsertNode(loop_merge_block);
  201. DominatorTreeNode* if_merge_block_dtn =
  202. dom_tree->GetOrInsertNode(if_merge_block);
  203. loop_merge_dtn->parent_ = if_merge_block_dtn->parent_;
  204. loop_merge_dtn->children_.push_back(if_merge_block_dtn);
  205. loop_merge_dtn->parent_->children_.push_back(loop_merge_dtn);
  206. if_merge_block_dtn->parent_->children_.erase(std::find(
  207. if_merge_block_dtn->parent_->children_.begin(),
  208. if_merge_block_dtn->parent_->children_.end(), if_merge_block_dtn));
  209. loop_->SetMergeBlock(loop_merge_block);
  210. }
  211. ////////////////////////////////////////////////////////////////////////////
  212. // Step 2: Build a new preheader for |loop_|, use the old one
  213. // for the invariant branch.
  214. ////////////////////////////////////////////////////////////////////////////
  215. BasicBlock* if_block = loop_->GetPreHeaderBlock();
  216. // If this preheader is the parent loop header,
  217. // we need to create a dedicated block for the if.
  218. BasicBlock* loop_pre_header =
  219. CreateBasicBlock(++FindBasicBlockPosition(if_block));
  220. if (!loop_pre_header) {
  221. return false;
  222. }
  223. InstructionBuilder(
  224. context_, loop_pre_header,
  225. IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping)
  226. .AddBranch(loop_->GetHeaderBlock()->id());
  227. if_block->tail()->SetInOperand(0, {loop_pre_header->id()});
  228. // Update loop descriptor.
  229. if (Loop* ploop = loop_desc_[if_block]) {
  230. ploop->AddBasicBlock(loop_pre_header);
  231. loop_desc_.SetBasicBlockToLoop(loop_pre_header->id(), ploop);
  232. }
  233. // Update the CFG.
  234. cfg.RegisterBlock(loop_pre_header);
  235. def_use_mgr->AnalyzeInstDef(loop_pre_header->GetLabelInst());
  236. cfg.AddEdge(if_block->id(), loop_pre_header->id());
  237. cfg.RemoveNonExistingEdges(loop_->GetHeaderBlock()->id());
  238. loop_->GetHeaderBlock()->ForEachPhiInst(
  239. [loop_pre_header, if_block](Instruction* phi) {
  240. phi->ForEachInId([loop_pre_header, if_block](uint32_t* id) {
  241. if (*id == if_block->id()) {
  242. *id = loop_pre_header->id();
  243. }
  244. });
  245. });
  246. loop_->SetPreHeaderBlock(loop_pre_header);
  247. // Update the dominator tree.
  248. DominatorTreeNode* loop_pre_header_dtn =
  249. dom_tree->GetOrInsertNode(loop_pre_header);
  250. DominatorTreeNode* if_block_dtn = dom_tree->GetTreeNode(if_block);
  251. loop_pre_header_dtn->parent_ = if_block_dtn;
  252. assert(
  253. if_block_dtn->children_.size() == 1 &&
  254. "A loop preheader should only have the header block as a child in the "
  255. "dominator tree");
  256. loop_pre_header_dtn->children_.push_back(if_block_dtn->children_[0]);
  257. if_block_dtn->children_.clear();
  258. if_block_dtn->children_.push_back(loop_pre_header_dtn);
  259. // Make domination queries valid.
  260. dom_tree->ResetDFNumbering();
  261. // Compute an ordered list of basic block to clone: loop blocks + pre-header
  262. // + merge block.
  263. loop_->ComputeLoopStructuredOrder(&ordered_loop_blocks_, true, true);
  264. /////////////////////////////
  265. // Do the actual unswitch: //
  266. // - Clone the loop //
  267. // - Connect exits //
  268. // - Specialize the loop //
  269. /////////////////////////////
  270. Instruction* iv_condition = &*switch_block_->tail();
  271. spv::Op iv_opcode = iv_condition->opcode();
  272. Instruction* condition =
  273. def_use_mgr->GetDef(iv_condition->GetOperand(0).words[0]);
  274. analysis::ConstantManager* cst_mgr = context_->get_constant_mgr();
  275. const analysis::Type* cond_type =
  276. context_->get_type_mgr()->GetType(condition->type_id());
  277. // Build the list of value for which we need to clone and specialize the
  278. // loop.
  279. std::vector<std::pair<Instruction*, BasicBlock*>> constant_branch;
  280. // Special case for the original loop
  281. Instruction* original_loop_constant_value;
  282. if (iv_opcode == spv::Op::OpBranchConditional) {
  283. constant_branch.emplace_back(
  284. cst_mgr->GetDefiningInstruction(cst_mgr->GetConstant(cond_type, {0})),
  285. nullptr);
  286. original_loop_constant_value =
  287. cst_mgr->GetDefiningInstruction(cst_mgr->GetConstant(cond_type, {1}));
  288. } else {
  289. // We are looking to take the default branch, so we can't provide a
  290. // specific value.
  291. original_loop_constant_value =
  292. GetValueForDefaultPathForSwitch(iv_condition);
  293. if (!original_loop_constant_value) {
  294. return false;
  295. }
  296. if (!original_loop_constant_value) {
  297. return false;
  298. }
  299. for (uint32_t i = 2; i < iv_condition->NumInOperands(); i += 2) {
  300. constant_branch.emplace_back(
  301. cst_mgr->GetDefiningInstruction(cst_mgr->GetConstant(
  302. cond_type, iv_condition->GetInOperand(i).words)),
  303. nullptr);
  304. }
  305. }
  306. // Get the loop landing pads.
  307. std::unordered_set<uint32_t> if_merging_blocks;
  308. std::function<bool(uint32_t)> is_from_original_loop;
  309. if (loop_->GetHeaderBlock()->GetLoopMergeInst()) {
  310. if_merging_blocks.insert(if_merge_block->id());
  311. is_from_original_loop = [this](uint32_t id) {
  312. return loop_->IsInsideLoop(id) || loop_->GetMergeBlock()->id() == id;
  313. };
  314. } else {
  315. loop_->GetExitBlocks(&if_merging_blocks);
  316. is_from_original_loop = [this](uint32_t id) {
  317. return loop_->IsInsideLoop(id);
  318. };
  319. }
  320. for (auto& specialisation_pair : constant_branch) {
  321. Instruction* specialisation_value = specialisation_pair.first;
  322. //////////////////////////////////////////////////////////
  323. // Step 3: Duplicate |loop_|.
  324. //////////////////////////////////////////////////////////
  325. LoopUtils::LoopCloningResult clone_result;
  326. Loop* cloned_loop =
  327. loop_utils.CloneLoop(&clone_result, ordered_loop_blocks_);
  328. if (!cloned_loop) {
  329. return false;
  330. }
  331. specialisation_pair.second = cloned_loop->GetPreHeaderBlock();
  332. ////////////////////////////////////
  333. // Step 4: Specialize the loop. //
  334. ////////////////////////////////////
  335. {
  336. SpecializeLoop(cloned_loop, condition, specialisation_value);
  337. ///////////////////////////////////////////////////////////
  338. // Step 5: Connect convergent edges to the landing pads. //
  339. ///////////////////////////////////////////////////////////
  340. for (uint32_t merge_bb_id : if_merging_blocks) {
  341. BasicBlock* merge = context_->cfg()->block(merge_bb_id);
  342. // We are in LCSSA so we only care about phi instructions.
  343. merge->ForEachPhiInst(
  344. [is_from_original_loop, &clone_result](Instruction* phi) {
  345. uint32_t num_in_operands = phi->NumInOperands();
  346. for (uint32_t i = 0; i < num_in_operands; i += 2) {
  347. uint32_t pred = phi->GetSingleWordInOperand(i + 1);
  348. if (is_from_original_loop(pred)) {
  349. pred = clone_result.value_map_.at(pred);
  350. uint32_t incoming_value_id = phi->GetSingleWordInOperand(i);
  351. // Not all the incoming values are coming from the loop.
  352. ValueMapTy::iterator new_value =
  353. clone_result.value_map_.find(incoming_value_id);
  354. if (new_value != clone_result.value_map_.end()) {
  355. incoming_value_id = new_value->second;
  356. }
  357. phi->AddOperand({SPV_OPERAND_TYPE_ID, {incoming_value_id}});
  358. phi->AddOperand({SPV_OPERAND_TYPE_ID, {pred}});
  359. }
  360. }
  361. });
  362. }
  363. }
  364. function_->AddBasicBlocks(clone_result.cloned_bb_.begin(),
  365. clone_result.cloned_bb_.end(),
  366. ++FindBasicBlockPosition(if_block));
  367. }
  368. // Specialize the existing loop.
  369. SpecializeLoop(loop_, condition, original_loop_constant_value);
  370. BasicBlock* original_loop_target = loop_->GetPreHeaderBlock();
  371. /////////////////////////////////////
  372. // Finally: connect the new loops. //
  373. /////////////////////////////////////
  374. // Delete the old jump
  375. context_->KillInst(&*if_block->tail());
  376. InstructionBuilder builder(context_, if_block);
  377. if (iv_opcode == spv::Op::OpBranchConditional) {
  378. assert(constant_branch.size() == 1);
  379. builder.AddConditionalBranch(
  380. condition->result_id(), original_loop_target->id(),
  381. constant_branch[0].second->id(),
  382. if_merge_block ? if_merge_block->id() : kInvalidId);
  383. } else {
  384. std::vector<std::pair<Operand::OperandData, uint32_t>> targets;
  385. for (auto& t : constant_branch) {
  386. targets.emplace_back(t.first->GetInOperand(0).words, t.second->id());
  387. }
  388. builder.AddSwitch(condition->result_id(), original_loop_target->id(),
  389. targets,
  390. if_merge_block ? if_merge_block->id() : kInvalidId);
  391. }
  392. switch_block_ = nullptr;
  393. ordered_loop_blocks_.clear();
  394. context_->InvalidateAnalysesExceptFor(
  395. IRContext::Analysis::kAnalysisLoopAnalysis);
  396. return true;
  397. }
  398. private:
  399. using ValueMapTy = std::unordered_map<uint32_t, uint32_t>;
  400. using BlockMapTy = std::unordered_map<uint32_t, BasicBlock*>;
  401. Function* function_;
  402. Loop* loop_;
  403. LoopDescriptor& loop_desc_;
  404. IRContext* context_;
  405. BasicBlock* switch_block_;
  406. // Map between instructions and if they are dynamically uniform.
  407. std::unordered_map<uint32_t, bool> dynamically_uniform_;
  408. // The loop basic blocks in structured order.
  409. std::vector<BasicBlock*> ordered_loop_blocks_;
  410. // Returns the next usable id for the context.
  411. uint32_t TakeNextId() { return context_->TakeNextId(); }
  412. // Simplifies |loop| assuming the instruction |to_version_insn| takes the
  413. // value |cst_value|. |block_range| is an iterator range returning the loop
  414. // basic blocks in a structured order (dominator first).
  415. // The function will ignore basic blocks returned by |block_range| if they
  416. // does not belong to the loop.
  417. // The set |dead_blocks| will contain all the dead basic blocks.
  418. //
  419. // Requirements:
  420. // - |loop| must be in the LCSSA form;
  421. // - |cst_value| must be constant.
  422. void SpecializeLoop(Loop* loop, Instruction* to_version_insn,
  423. Instruction* cst_value) {
  424. analysis::DefUseManager* def_use_mgr = context_->get_def_use_mgr();
  425. std::function<bool(uint32_t)> ignore_node;
  426. ignore_node = [loop](uint32_t bb_id) { return !loop->IsInsideLoop(bb_id); };
  427. std::vector<std::pair<Instruction*, uint32_t>> use_list;
  428. def_use_mgr->ForEachUse(to_version_insn,
  429. [&use_list, &ignore_node, this](
  430. Instruction* inst, uint32_t operand_index) {
  431. BasicBlock* bb = context_->get_instr_block(inst);
  432. if (!bb || ignore_node(bb->id())) {
  433. // Out of the loop, the specialization does not
  434. // apply any more.
  435. return;
  436. }
  437. use_list.emplace_back(inst, operand_index);
  438. });
  439. // First pass: inject the specialized value into the loop (and only the
  440. // loop).
  441. for (auto use : use_list) {
  442. Instruction* inst = use.first;
  443. uint32_t operand_index = use.second;
  444. // To also handle switch, cst_value can be nullptr: this case
  445. // means that we are looking to branch to the default target of
  446. // the switch. We don't actually know its value so we don't touch
  447. // it if it not a switch.
  448. assert(cst_value && "We do not have a value to use.");
  449. inst->SetOperand(operand_index, {cst_value->result_id()});
  450. def_use_mgr->AnalyzeInstUse(inst);
  451. }
  452. }
  453. // Returns true if |var| is dynamically uniform.
  454. // Note: this is currently approximated as uniform.
  455. bool IsDynamicallyUniform(Instruction* var, const BasicBlock* entry,
  456. const DominatorTree& post_dom_tree) {
  457. assert(post_dom_tree.IsPostDominator());
  458. analysis::DefUseManager* def_use_mgr = context_->get_def_use_mgr();
  459. auto it = dynamically_uniform_.find(var->result_id());
  460. if (it != dynamically_uniform_.end()) return it->second;
  461. analysis::DecorationManager* dec_mgr = context_->get_decoration_mgr();
  462. bool& is_uniform = dynamically_uniform_[var->result_id()];
  463. is_uniform = false;
  464. dec_mgr->WhileEachDecoration(var->result_id(),
  465. uint32_t(spv::Decoration::Uniform),
  466. [&is_uniform](const Instruction&) {
  467. is_uniform = true;
  468. return false;
  469. });
  470. if (is_uniform) {
  471. return is_uniform;
  472. }
  473. BasicBlock* parent = context_->get_instr_block(var);
  474. if (!parent) {
  475. return is_uniform = true;
  476. }
  477. if (!post_dom_tree.Dominates(parent->id(), entry->id())) {
  478. return is_uniform = false;
  479. }
  480. if (var->opcode() == spv::Op::OpLoad) {
  481. const uint32_t PtrTypeId =
  482. def_use_mgr->GetDef(var->GetSingleWordInOperand(0))->type_id();
  483. const Instruction* PtrTypeInst = def_use_mgr->GetDef(PtrTypeId);
  484. auto storage_class = spv::StorageClass(
  485. PtrTypeInst->GetSingleWordInOperand(kTypePointerStorageClassInIdx));
  486. if (storage_class != spv::StorageClass::Uniform &&
  487. storage_class != spv::StorageClass::UniformConstant) {
  488. return is_uniform = false;
  489. }
  490. } else {
  491. if (!context_->IsCombinatorInstruction(var)) {
  492. return is_uniform = false;
  493. }
  494. }
  495. return is_uniform = var->WhileEachInId([entry, &post_dom_tree,
  496. this](const uint32_t* id) {
  497. return IsDynamicallyUniform(context_->get_def_use_mgr()->GetDef(*id),
  498. entry, post_dom_tree);
  499. });
  500. }
  501. // Returns true if |insn| is not a constant, but is loop invariant and
  502. // dynamically uniform.
  503. bool IsConditionNonConstantLoopInvariant(Instruction* insn) {
  504. assert(insn->IsBranch());
  505. assert(insn->opcode() != spv::Op::OpBranch);
  506. analysis::DefUseManager* def_use_mgr = context_->get_def_use_mgr();
  507. Instruction* condition = def_use_mgr->GetDef(insn->GetOperand(0).words[0]);
  508. if (condition->IsConstant()) {
  509. return false;
  510. }
  511. if (loop_->IsInsideLoop(condition)) {
  512. return false;
  513. }
  514. return IsDynamicallyUniform(
  515. condition, function_->entry().get(),
  516. context_->GetPostDominatorAnalysis(function_)->GetDomTree());
  517. }
  518. };
  519. } // namespace
  520. Pass::Status LoopUnswitchPass::Process() {
  521. bool modified = false;
  522. Module* module = context()->module();
  523. // Process each function in the module
  524. for (Function& f : *module) {
  525. Pass::Status status = ProcessFunction(&f);
  526. if (status == Status::Failure) return Status::Failure;
  527. if (status == Status::SuccessWithChange) modified = true;
  528. }
  529. return modified ? Status::SuccessWithChange : Status::SuccessWithoutChange;
  530. }
  531. Pass::Status LoopUnswitchPass::ProcessFunction(Function* f) {
  532. bool modified = false;
  533. std::unordered_set<Loop*> processed_loop;
  534. LoopDescriptor& loop_descriptor = *context()->GetLoopDescriptor(f);
  535. bool loop_changed = true;
  536. while (loop_changed) {
  537. loop_changed = false;
  538. for (Loop& loop : make_range(
  539. ++TreeDFIterator<Loop>(loop_descriptor.GetPlaceholderRootLoop()),
  540. TreeDFIterator<Loop>())) {
  541. if (processed_loop.count(&loop)) continue;
  542. processed_loop.insert(&loop);
  543. LoopUnswitch unswitcher(context(), f, &loop, &loop_descriptor);
  544. while (unswitcher.CanUnswitchLoop()) {
  545. if (!loop.IsLCSSA()) {
  546. if (!LoopUtils(context(), &loop).MakeLoopClosedSSA()) {
  547. return Status::Failure;
  548. }
  549. }
  550. if (!unswitcher.PerformUnswitch()) {
  551. return Status::Failure;
  552. }
  553. modified = true;
  554. loop_changed = true;
  555. }
  556. if (loop_changed) break;
  557. }
  558. }
  559. return modified ? Status::SuccessWithChange : Status::SuccessWithoutChange;
  560. }
  561. } // namespace opt
  562. } // namespace spvtools