loop_peeling.cpp 37 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086
  1. // Copyright (c) 2018 Google LLC.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. #include "source/opt/loop_peeling.h"
  15. #include <functional>
  16. #include <memory>
  17. #include <unordered_set>
  18. #include <vector>
  19. #include "source/opt/ir_builder.h"
  20. #include "source/opt/ir_context.h"
  21. #include "source/opt/loop_descriptor.h"
  22. #include "source/opt/loop_utils.h"
  23. #include "source/opt/scalar_analysis.h"
  24. #include "source/opt/scalar_analysis_nodes.h"
  25. namespace spvtools {
  26. namespace opt {
  27. namespace {
  28. // Gather the set of blocks for all the path from |entry| to |root|.
  29. void GetBlocksInPath(uint32_t block, uint32_t entry,
  30. std::unordered_set<uint32_t>* blocks_in_path,
  31. const CFG& cfg) {
  32. for (uint32_t pid : cfg.preds(block)) {
  33. if (blocks_in_path->insert(pid).second) {
  34. if (pid != entry) {
  35. GetBlocksInPath(pid, entry, blocks_in_path, cfg);
  36. }
  37. }
  38. }
  39. }
  40. } // namespace
  41. size_t LoopPeelingPass::code_grow_threshold_ = 1000;
  42. void LoopPeeling::DuplicateAndConnectLoop(
  43. LoopUtils::LoopCloningResult* clone_results) {
  44. CFG& cfg = *context_->cfg();
  45. analysis::DefUseManager* def_use_mgr = context_->get_def_use_mgr();
  46. assert(CanPeelLoop() && "Cannot peel loop!");
  47. std::vector<BasicBlock*> ordered_loop_blocks;
  48. // TODO(1841): Handle failure to create pre-header.
  49. BasicBlock* pre_header = loop_->GetOrCreatePreHeaderBlock();
  50. loop_->ComputeLoopStructuredOrder(&ordered_loop_blocks);
  51. cloned_loop_ = loop_utils_.CloneLoop(clone_results, ordered_loop_blocks);
  52. // Add the basic block to the function.
  53. Function::iterator it =
  54. loop_utils_.GetFunction()->FindBlock(pre_header->id());
  55. assert(it != loop_utils_.GetFunction()->end() &&
  56. "Pre-header not found in the function.");
  57. loop_utils_.GetFunction()->AddBasicBlocks(
  58. clone_results->cloned_bb_.begin(), clone_results->cloned_bb_.end(), ++it);
  59. // Make the |loop_|'s preheader the |cloned_loop_| one.
  60. BasicBlock* cloned_header = cloned_loop_->GetHeaderBlock();
  61. pre_header->ForEachSuccessorLabel(
  62. [cloned_header](uint32_t* succ) { *succ = cloned_header->id(); });
  63. // Update cfg.
  64. cfg.RemoveEdge(pre_header->id(), loop_->GetHeaderBlock()->id());
  65. cloned_loop_->SetPreHeaderBlock(pre_header);
  66. loop_->SetPreHeaderBlock(nullptr);
  67. // When cloning the loop, we didn't cloned the merge block, so currently
  68. // |cloned_loop_| shares the same block as |loop_|.
  69. // We mutate all branches from |cloned_loop_| block to |loop_|'s merge into a
  70. // branch to |loop_|'s header (so header will also be the merge of
  71. // |cloned_loop_|).
  72. uint32_t cloned_loop_exit = 0;
  73. for (uint32_t pred_id : cfg.preds(loop_->GetMergeBlock()->id())) {
  74. if (loop_->IsInsideLoop(pred_id)) continue;
  75. BasicBlock* bb = cfg.block(pred_id);
  76. assert(cloned_loop_exit == 0 && "The loop has multiple exits.");
  77. cloned_loop_exit = bb->id();
  78. bb->ForEachSuccessorLabel([this](uint32_t* succ) {
  79. if (*succ == loop_->GetMergeBlock()->id())
  80. *succ = loop_->GetHeaderBlock()->id();
  81. });
  82. }
  83. // Update cfg.
  84. cfg.RemoveNonExistingEdges(loop_->GetMergeBlock()->id());
  85. cfg.AddEdge(cloned_loop_exit, loop_->GetHeaderBlock()->id());
  86. // Patch the phi of the original loop header:
  87. // - Set the loop entry branch to come from the cloned loop exit block;
  88. // - Set the initial value of the phi using the corresponding cloned loop
  89. // exit values.
  90. //
  91. // We patch the iterating value initializers of the original loop using the
  92. // corresponding cloned loop exit values. Connects the cloned loop iterating
  93. // values to the original loop. This make sure that the initial value of the
  94. // second loop starts with the last value of the first loop.
  95. //
  96. // For example, loops like:
  97. //
  98. // int z = 0;
  99. // for (int i = 0; i++ < M; i += cst1) {
  100. // if (cond)
  101. // z += cst2;
  102. // }
  103. //
  104. // Will become:
  105. //
  106. // int z = 0;
  107. // int i = 0;
  108. // for (; i++ < M; i += cst1) {
  109. // if (cond)
  110. // z += cst2;
  111. // }
  112. // for (; i++ < M; i += cst1) {
  113. // if (cond)
  114. // z += cst2;
  115. // }
  116. loop_->GetHeaderBlock()->ForEachPhiInst([cloned_loop_exit, def_use_mgr,
  117. clone_results,
  118. this](Instruction* phi) {
  119. for (uint32_t i = 0; i < phi->NumInOperands(); i += 2) {
  120. if (!loop_->IsInsideLoop(phi->GetSingleWordInOperand(i + 1))) {
  121. phi->SetInOperand(i,
  122. {clone_results->value_map_.at(
  123. exit_value_.at(phi->result_id())->result_id())});
  124. phi->SetInOperand(i + 1, {cloned_loop_exit});
  125. def_use_mgr->AnalyzeInstUse(phi);
  126. return;
  127. }
  128. }
  129. });
  130. // Force the creation of a new preheader for the original loop and set it as
  131. // the merge block for the cloned loop.
  132. // TODO(1841): Handle failure to create pre-header.
  133. cloned_loop_->SetMergeBlock(loop_->GetOrCreatePreHeaderBlock());
  134. }
  135. void LoopPeeling::InsertCanonicalInductionVariable(
  136. LoopUtils::LoopCloningResult* clone_results) {
  137. if (original_loop_canonical_induction_variable_) {
  138. canonical_induction_variable_ =
  139. context_->get_def_use_mgr()->GetDef(clone_results->value_map_.at(
  140. original_loop_canonical_induction_variable_->result_id()));
  141. return;
  142. }
  143. BasicBlock::iterator insert_point = GetClonedLoop()->GetLatchBlock()->tail();
  144. if (GetClonedLoop()->GetLatchBlock()->GetMergeInst()) {
  145. --insert_point;
  146. }
  147. InstructionBuilder builder(
  148. context_, &*insert_point,
  149. IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping);
  150. Instruction* uint_1_cst =
  151. builder.GetIntConstant<uint32_t>(1, int_type_->IsSigned());
  152. // Create the increment.
  153. // Note that we do "1 + 1" here, one of the operand should the phi
  154. // value but we don't have it yet. The operand will be set latter.
  155. Instruction* iv_inc = builder.AddIAdd(
  156. uint_1_cst->type_id(), uint_1_cst->result_id(), uint_1_cst->result_id());
  157. builder.SetInsertPoint(&*GetClonedLoop()->GetHeaderBlock()->begin());
  158. canonical_induction_variable_ = builder.AddPhi(
  159. uint_1_cst->type_id(),
  160. {builder.GetIntConstant<uint32_t>(0, int_type_->IsSigned())->result_id(),
  161. GetClonedLoop()->GetPreHeaderBlock()->id(), iv_inc->result_id(),
  162. GetClonedLoop()->GetLatchBlock()->id()});
  163. // Connect everything.
  164. iv_inc->SetInOperand(0, {canonical_induction_variable_->result_id()});
  165. // Update def/use manager.
  166. context_->get_def_use_mgr()->AnalyzeInstUse(iv_inc);
  167. // If do-while form, use the incremented value.
  168. if (do_while_form_) {
  169. canonical_induction_variable_ = iv_inc;
  170. }
  171. }
  172. void LoopPeeling::GetIteratorUpdateOperations(
  173. const Loop* loop, Instruction* iterator,
  174. std::unordered_set<Instruction*>* operations) {
  175. analysis::DefUseManager* def_use_mgr = context_->get_def_use_mgr();
  176. operations->insert(iterator);
  177. iterator->ForEachInId([def_use_mgr, loop, operations, this](uint32_t* id) {
  178. Instruction* insn = def_use_mgr->GetDef(*id);
  179. if (insn->opcode() == spv::Op::OpLabel) {
  180. return;
  181. }
  182. if (operations->count(insn)) {
  183. return;
  184. }
  185. if (!loop->IsInsideLoop(insn)) {
  186. return;
  187. }
  188. GetIteratorUpdateOperations(loop, insn, operations);
  189. });
  190. }
  191. bool LoopPeeling::IsConditionCheckSideEffectFree() const {
  192. CFG& cfg = *context_->cfg();
  193. // The "do-while" form does not cause issues, the algorithm takes into account
  194. // the first iteration.
  195. if (!do_while_form_) {
  196. uint32_t condition_block_id = cfg.preds(loop_->GetMergeBlock()->id())[0];
  197. std::unordered_set<uint32_t> blocks_in_path;
  198. blocks_in_path.insert(condition_block_id);
  199. GetBlocksInPath(condition_block_id, loop_->GetHeaderBlock()->id(),
  200. &blocks_in_path, cfg);
  201. for (uint32_t bb_id : blocks_in_path) {
  202. BasicBlock* bb = cfg.block(bb_id);
  203. if (!bb->WhileEachInst([this](Instruction* insn) {
  204. if (insn->IsBranch()) return true;
  205. switch (insn->opcode()) {
  206. case spv::Op::OpLabel:
  207. case spv::Op::OpSelectionMerge:
  208. case spv::Op::OpLoopMerge:
  209. return true;
  210. default:
  211. break;
  212. }
  213. return context_->IsCombinatorInstruction(insn);
  214. })) {
  215. return false;
  216. }
  217. }
  218. }
  219. return true;
  220. }
  221. void LoopPeeling::GetIteratingExitValues() {
  222. CFG& cfg = *context_->cfg();
  223. loop_->GetHeaderBlock()->ForEachPhiInst(
  224. [this](Instruction* phi) { exit_value_[phi->result_id()] = nullptr; });
  225. if (!loop_->GetMergeBlock()) {
  226. return;
  227. }
  228. if (cfg.preds(loop_->GetMergeBlock()->id()).size() != 1) {
  229. return;
  230. }
  231. analysis::DefUseManager* def_use_mgr = context_->get_def_use_mgr();
  232. uint32_t condition_block_id = cfg.preds(loop_->GetMergeBlock()->id())[0];
  233. auto& header_pred = cfg.preds(loop_->GetHeaderBlock()->id());
  234. do_while_form_ = std::find(header_pred.begin(), header_pred.end(),
  235. condition_block_id) != header_pred.end();
  236. if (do_while_form_) {
  237. loop_->GetHeaderBlock()->ForEachPhiInst(
  238. [condition_block_id, def_use_mgr, this](Instruction* phi) {
  239. std::unordered_set<Instruction*> operations;
  240. for (uint32_t i = 0; i < phi->NumInOperands(); i += 2) {
  241. if (condition_block_id == phi->GetSingleWordInOperand(i + 1)) {
  242. exit_value_[phi->result_id()] =
  243. def_use_mgr->GetDef(phi->GetSingleWordInOperand(i));
  244. }
  245. }
  246. });
  247. } else {
  248. DominatorTree* dom_tree =
  249. &context_->GetDominatorAnalysis(loop_utils_.GetFunction())
  250. ->GetDomTree();
  251. BasicBlock* condition_block = cfg.block(condition_block_id);
  252. loop_->GetHeaderBlock()->ForEachPhiInst(
  253. [dom_tree, condition_block, this](Instruction* phi) {
  254. std::unordered_set<Instruction*> operations;
  255. // Not the back-edge value, check if the phi instruction is the only
  256. // possible candidate.
  257. GetIteratorUpdateOperations(loop_, phi, &operations);
  258. for (Instruction* insn : operations) {
  259. if (insn == phi) {
  260. continue;
  261. }
  262. if (dom_tree->Dominates(context_->get_instr_block(insn),
  263. condition_block)) {
  264. return;
  265. }
  266. }
  267. exit_value_[phi->result_id()] = phi;
  268. });
  269. }
  270. }
  271. void LoopPeeling::FixExitCondition(
  272. const std::function<uint32_t(Instruction*)>& condition_builder) {
  273. CFG& cfg = *context_->cfg();
  274. uint32_t condition_block_id = 0;
  275. for (uint32_t id : cfg.preds(GetClonedLoop()->GetMergeBlock()->id())) {
  276. if (GetClonedLoop()->IsInsideLoop(id)) {
  277. condition_block_id = id;
  278. break;
  279. }
  280. }
  281. assert(condition_block_id != 0 && "2nd loop in improperly connected");
  282. BasicBlock* condition_block = cfg.block(condition_block_id);
  283. Instruction* exit_condition = condition_block->terminator();
  284. assert(exit_condition->opcode() == spv::Op::OpBranchConditional);
  285. BasicBlock::iterator insert_point = condition_block->tail();
  286. if (condition_block->GetMergeInst()) {
  287. --insert_point;
  288. }
  289. exit_condition->SetInOperand(0, {condition_builder(&*insert_point)});
  290. uint32_t to_continue_block_idx =
  291. GetClonedLoop()->IsInsideLoop(exit_condition->GetSingleWordInOperand(1))
  292. ? 1
  293. : 2;
  294. exit_condition->SetInOperand(
  295. 1, {exit_condition->GetSingleWordInOperand(to_continue_block_idx)});
  296. exit_condition->SetInOperand(2, {GetClonedLoop()->GetMergeBlock()->id()});
  297. // Update def/use manager.
  298. context_->get_def_use_mgr()->AnalyzeInstUse(exit_condition);
  299. }
  300. BasicBlock* LoopPeeling::CreateBlockBefore(BasicBlock* bb) {
  301. analysis::DefUseManager* def_use_mgr = context_->get_def_use_mgr();
  302. CFG& cfg = *context_->cfg();
  303. assert(cfg.preds(bb->id()).size() == 1 && "More than one predecessor");
  304. // TODO(1841): Handle id overflow.
  305. std::unique_ptr<BasicBlock> new_bb =
  306. MakeUnique<BasicBlock>(std::unique_ptr<Instruction>(new Instruction(
  307. context_, spv::Op::OpLabel, 0, context_->TakeNextId(), {})));
  308. // Update the loop descriptor.
  309. Loop* in_loop = (*loop_utils_.GetLoopDescriptor())[bb];
  310. if (in_loop) {
  311. in_loop->AddBasicBlock(new_bb.get());
  312. loop_utils_.GetLoopDescriptor()->SetBasicBlockToLoop(new_bb->id(), in_loop);
  313. }
  314. context_->set_instr_block(new_bb->GetLabelInst(), new_bb.get());
  315. def_use_mgr->AnalyzeInstDefUse(new_bb->GetLabelInst());
  316. BasicBlock* bb_pred = cfg.block(cfg.preds(bb->id())[0]);
  317. bb_pred->tail()->ForEachInId([bb, &new_bb](uint32_t* id) {
  318. if (*id == bb->id()) {
  319. *id = new_bb->id();
  320. }
  321. });
  322. cfg.RemoveEdge(bb_pred->id(), bb->id());
  323. cfg.AddEdge(bb_pred->id(), new_bb->id());
  324. def_use_mgr->AnalyzeInstUse(&*bb_pred->tail());
  325. // Update the incoming branch.
  326. bb->ForEachPhiInst([&new_bb, def_use_mgr](Instruction* phi) {
  327. phi->SetInOperand(1, {new_bb->id()});
  328. def_use_mgr->AnalyzeInstUse(phi);
  329. });
  330. InstructionBuilder(
  331. context_, new_bb.get(),
  332. IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping)
  333. .AddBranch(bb->id());
  334. cfg.RegisterBlock(new_bb.get());
  335. // Add the basic block to the function.
  336. Function::iterator it = loop_utils_.GetFunction()->FindBlock(bb->id());
  337. assert(it != loop_utils_.GetFunction()->end() &&
  338. "Basic block not found in the function.");
  339. BasicBlock* ret = new_bb.get();
  340. loop_utils_.GetFunction()->AddBasicBlock(std::move(new_bb), it);
  341. return ret;
  342. }
  343. BasicBlock* LoopPeeling::ProtectLoop(Loop* loop, Instruction* condition,
  344. BasicBlock* if_merge) {
  345. // TODO(1841): Handle failure to create pre-header.
  346. BasicBlock* if_block = loop->GetOrCreatePreHeaderBlock();
  347. // Will no longer be a pre-header because of the if.
  348. loop->SetPreHeaderBlock(nullptr);
  349. // Kill the branch to the header.
  350. context_->KillInst(&*if_block->tail());
  351. InstructionBuilder builder(
  352. context_, if_block,
  353. IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping);
  354. builder.AddConditionalBranch(condition->result_id(),
  355. loop->GetHeaderBlock()->id(), if_merge->id(),
  356. if_merge->id());
  357. return if_block;
  358. }
  359. void LoopPeeling::PeelBefore(uint32_t peel_factor) {
  360. assert(CanPeelLoop() && "Cannot peel loop");
  361. LoopUtils::LoopCloningResult clone_results;
  362. // Clone the loop and insert the cloned one before the loop.
  363. DuplicateAndConnectLoop(&clone_results);
  364. // Add a canonical induction variable "canonical_induction_variable_".
  365. InsertCanonicalInductionVariable(&clone_results);
  366. InstructionBuilder builder(
  367. context_, &*cloned_loop_->GetPreHeaderBlock()->tail(),
  368. IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping);
  369. Instruction* factor =
  370. builder.GetIntConstant(peel_factor, int_type_->IsSigned());
  371. Instruction* has_remaining_iteration = builder.AddLessThan(
  372. factor->result_id(), loop_iteration_count_->result_id());
  373. Instruction* max_iteration = builder.AddSelect(
  374. factor->type_id(), has_remaining_iteration->result_id(),
  375. factor->result_id(), loop_iteration_count_->result_id());
  376. // Change the exit condition of the cloned loop to be (exit when become
  377. // false):
  378. // "canonical_induction_variable_" < min("factor", "loop_iteration_count_")
  379. FixExitCondition([max_iteration, this](Instruction* insert_before_point) {
  380. return InstructionBuilder(context_, insert_before_point,
  381. IRContext::kAnalysisDefUse |
  382. IRContext::kAnalysisInstrToBlockMapping)
  383. .AddLessThan(canonical_induction_variable_->result_id(),
  384. max_iteration->result_id())
  385. ->result_id();
  386. });
  387. // "Protect" the second loop: the second loop can only be executed if
  388. // |has_remaining_iteration| is true (i.e. factor < loop_iteration_count_).
  389. BasicBlock* if_merge_block = loop_->GetMergeBlock();
  390. loop_->SetMergeBlock(CreateBlockBefore(loop_->GetMergeBlock()));
  391. // Prevent the second loop from being executed if we already executed all the
  392. // required iterations.
  393. BasicBlock* if_block =
  394. ProtectLoop(loop_, has_remaining_iteration, if_merge_block);
  395. // Patch the phi of the merge block.
  396. if_merge_block->ForEachPhiInst(
  397. [&clone_results, if_block, this](Instruction* phi) {
  398. // if_merge_block had previously only 1 predecessor.
  399. uint32_t incoming_value = phi->GetSingleWordInOperand(0);
  400. auto def_in_loop = clone_results.value_map_.find(incoming_value);
  401. if (def_in_loop != clone_results.value_map_.end())
  402. incoming_value = def_in_loop->second;
  403. phi->AddOperand(
  404. {spv_operand_type_t::SPV_OPERAND_TYPE_ID, {incoming_value}});
  405. phi->AddOperand(
  406. {spv_operand_type_t::SPV_OPERAND_TYPE_ID, {if_block->id()}});
  407. context_->get_def_use_mgr()->AnalyzeInstUse(phi);
  408. });
  409. context_->InvalidateAnalysesExceptFor(
  410. IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping |
  411. IRContext::kAnalysisLoopAnalysis | IRContext::kAnalysisCFG);
  412. }
  413. void LoopPeeling::PeelAfter(uint32_t peel_factor) {
  414. assert(CanPeelLoop() && "Cannot peel loop");
  415. LoopUtils::LoopCloningResult clone_results;
  416. // Clone the loop and insert the cloned one before the loop.
  417. DuplicateAndConnectLoop(&clone_results);
  418. // Add a canonical induction variable "canonical_induction_variable_".
  419. InsertCanonicalInductionVariable(&clone_results);
  420. InstructionBuilder builder(
  421. context_, &*cloned_loop_->GetPreHeaderBlock()->tail(),
  422. IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping);
  423. Instruction* factor =
  424. builder.GetIntConstant(peel_factor, int_type_->IsSigned());
  425. Instruction* has_remaining_iteration = builder.AddLessThan(
  426. factor->result_id(), loop_iteration_count_->result_id());
  427. // Change the exit condition of the cloned loop to be (exit when become
  428. // false):
  429. // "canonical_induction_variable_" + "factor" < "loop_iteration_count_"
  430. FixExitCondition([factor, this](Instruction* insert_before_point) {
  431. InstructionBuilder cond_builder(
  432. context_, insert_before_point,
  433. IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping);
  434. // Build the following check: canonical_induction_variable_ + factor <
  435. // iteration_count
  436. return cond_builder
  437. .AddLessThan(cond_builder
  438. .AddIAdd(canonical_induction_variable_->type_id(),
  439. canonical_induction_variable_->result_id(),
  440. factor->result_id())
  441. ->result_id(),
  442. loop_iteration_count_->result_id())
  443. ->result_id();
  444. });
  445. // "Protect" the first loop: the first loop can only be executed if
  446. // factor < loop_iteration_count_.
  447. // The original loop's pre-header was the cloned loop merge block.
  448. GetClonedLoop()->SetMergeBlock(
  449. CreateBlockBefore(GetOriginalLoop()->GetPreHeaderBlock()));
  450. // Use the second loop preheader as if merge block.
  451. // Prevent the first loop if only the peeled loop needs it.
  452. BasicBlock* if_block = ProtectLoop(cloned_loop_, has_remaining_iteration,
  453. GetOriginalLoop()->GetPreHeaderBlock());
  454. // Patch the phi of the header block.
  455. // We added an if to enclose the first loop and because the phi node are
  456. // connected to the exit value of the first loop, the definition no longer
  457. // dominate the preheader.
  458. // We had to the preheader (our if merge block) the required phi instruction
  459. // and patch the header phi.
  460. GetOriginalLoop()->GetHeaderBlock()->ForEachPhiInst(
  461. [&clone_results, if_block, this](Instruction* phi) {
  462. analysis::DefUseManager* def_use_mgr = context_->get_def_use_mgr();
  463. auto find_value_idx = [](Instruction* phi_inst, Loop* loop) {
  464. uint32_t preheader_value_idx =
  465. !loop->IsInsideLoop(phi_inst->GetSingleWordInOperand(1)) ? 0 : 2;
  466. return preheader_value_idx;
  467. };
  468. Instruction* cloned_phi =
  469. def_use_mgr->GetDef(clone_results.value_map_.at(phi->result_id()));
  470. uint32_t cloned_preheader_value = cloned_phi->GetSingleWordInOperand(
  471. find_value_idx(cloned_phi, GetClonedLoop()));
  472. Instruction* new_phi =
  473. InstructionBuilder(context_,
  474. &*GetOriginalLoop()->GetPreHeaderBlock()->tail(),
  475. IRContext::kAnalysisDefUse |
  476. IRContext::kAnalysisInstrToBlockMapping)
  477. .AddPhi(phi->type_id(),
  478. {phi->GetSingleWordInOperand(
  479. find_value_idx(phi, GetOriginalLoop())),
  480. GetClonedLoop()->GetMergeBlock()->id(),
  481. cloned_preheader_value, if_block->id()});
  482. phi->SetInOperand(find_value_idx(phi, GetOriginalLoop()),
  483. {new_phi->result_id()});
  484. def_use_mgr->AnalyzeInstUse(phi);
  485. });
  486. context_->InvalidateAnalysesExceptFor(
  487. IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping |
  488. IRContext::kAnalysisLoopAnalysis | IRContext::kAnalysisCFG);
  489. }
  490. Pass::Status LoopPeelingPass::Process() {
  491. bool modified = false;
  492. Module* module = context()->module();
  493. // Process each function in the module
  494. for (Function& f : *module) {
  495. modified |= ProcessFunction(&f);
  496. }
  497. return modified ? Status::SuccessWithChange : Status::SuccessWithoutChange;
  498. }
  499. bool LoopPeelingPass::ProcessFunction(Function* f) {
  500. bool modified = false;
  501. LoopDescriptor& loop_descriptor = *context()->GetLoopDescriptor(f);
  502. std::vector<Loop*> to_process_loop;
  503. to_process_loop.reserve(loop_descriptor.NumLoops());
  504. for (Loop& l : loop_descriptor) {
  505. to_process_loop.push_back(&l);
  506. }
  507. ScalarEvolutionAnalysis scev_analysis(context());
  508. for (Loop* loop : to_process_loop) {
  509. CodeMetrics loop_size;
  510. loop_size.Analyze(*loop);
  511. auto try_peel = [&loop_size, &modified, this](Loop* loop_to_peel) -> Loop* {
  512. if (!loop_to_peel->IsLCSSA()) {
  513. LoopUtils(context(), loop_to_peel).MakeLoopClosedSSA();
  514. }
  515. bool peeled_loop;
  516. Loop* still_peelable_loop;
  517. std::tie(peeled_loop, still_peelable_loop) =
  518. ProcessLoop(loop_to_peel, &loop_size);
  519. if (peeled_loop) {
  520. modified = true;
  521. }
  522. return still_peelable_loop;
  523. };
  524. Loop* still_peelable_loop = try_peel(loop);
  525. // The pass is working out the maximum factor by which a loop can be peeled.
  526. // If the loop can potentially be peeled again, then there is only one
  527. // possible direction, so only one call is still needed.
  528. if (still_peelable_loop) {
  529. try_peel(loop);
  530. }
  531. }
  532. return modified;
  533. }
  534. std::pair<bool, Loop*> LoopPeelingPass::ProcessLoop(Loop* loop,
  535. CodeMetrics* loop_size) {
  536. ScalarEvolutionAnalysis* scev_analysis =
  537. context()->GetScalarEvolutionAnalysis();
  538. // Default values for bailing out.
  539. std::pair<bool, Loop*> bail_out{false, nullptr};
  540. BasicBlock* exit_block = loop->FindConditionBlock();
  541. if (!exit_block) {
  542. return bail_out;
  543. }
  544. Instruction* exiting_iv = loop->FindConditionVariable(exit_block);
  545. if (!exiting_iv) {
  546. return bail_out;
  547. }
  548. size_t iterations = 0;
  549. if (!loop->FindNumberOfIterations(exiting_iv, &*exit_block->tail(),
  550. &iterations)) {
  551. return bail_out;
  552. }
  553. if (!iterations) {
  554. return bail_out;
  555. }
  556. Instruction* canonical_induction_variable = nullptr;
  557. loop->GetHeaderBlock()->WhileEachPhiInst([&canonical_induction_variable,
  558. scev_analysis,
  559. this](Instruction* insn) {
  560. if (const SERecurrentNode* iv =
  561. scev_analysis->AnalyzeInstruction(insn)->AsSERecurrentNode()) {
  562. const SEConstantNode* offset = iv->GetOffset()->AsSEConstantNode();
  563. const SEConstantNode* coeff = iv->GetCoefficient()->AsSEConstantNode();
  564. if (offset && coeff && offset->FoldToSingleValue() == 0 &&
  565. coeff->FoldToSingleValue() == 1) {
  566. if (context()->get_type_mgr()->GetType(insn->type_id())->AsInteger()) {
  567. canonical_induction_variable = insn;
  568. return false;
  569. }
  570. }
  571. }
  572. return true;
  573. });
  574. bool is_signed = canonical_induction_variable
  575. ? context()
  576. ->get_type_mgr()
  577. ->GetType(canonical_induction_variable->type_id())
  578. ->AsInteger()
  579. ->IsSigned()
  580. : false;
  581. LoopPeeling peeler(
  582. loop,
  583. InstructionBuilder(
  584. context(), loop->GetHeaderBlock(),
  585. IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping)
  586. .GetIntConstant<uint32_t>(static_cast<uint32_t>(iterations),
  587. is_signed),
  588. canonical_induction_variable);
  589. if (!peeler.CanPeelLoop()) {
  590. return bail_out;
  591. }
  592. // For each basic block in the loop, check if it can be peeled. If it
  593. // can, get the direction (before/after) and by which factor.
  594. LoopPeelingInfo peel_info(loop, iterations, scev_analysis);
  595. uint32_t peel_before_factor = 0;
  596. uint32_t peel_after_factor = 0;
  597. for (uint32_t block : loop->GetBlocks()) {
  598. if (block == exit_block->id()) {
  599. continue;
  600. }
  601. BasicBlock* bb = cfg()->block(block);
  602. PeelDirection direction;
  603. uint32_t factor;
  604. std::tie(direction, factor) = peel_info.GetPeelingInfo(bb);
  605. if (direction == PeelDirection::kNone) {
  606. continue;
  607. }
  608. if (direction == PeelDirection::kBefore) {
  609. peel_before_factor = std::max(peel_before_factor, factor);
  610. } else {
  611. assert(direction == PeelDirection::kAfter);
  612. peel_after_factor = std::max(peel_after_factor, factor);
  613. }
  614. }
  615. PeelDirection direction = PeelDirection::kNone;
  616. uint32_t factor = 0;
  617. // Find which direction we should peel.
  618. if (peel_before_factor) {
  619. factor = peel_before_factor;
  620. direction = PeelDirection::kBefore;
  621. }
  622. if (peel_after_factor) {
  623. if (peel_before_factor < peel_after_factor) {
  624. // Favor a peel after here and give the peel before another shot later.
  625. factor = peel_after_factor;
  626. direction = PeelDirection::kAfter;
  627. }
  628. }
  629. // Do the peel if we can.
  630. if (direction == PeelDirection::kNone) return bail_out;
  631. // This does not take into account branch elimination opportunities and
  632. // the unrolling. It assumes the peeled loop will be unrolled as well.
  633. if (factor * loop_size->roi_size_ > code_grow_threshold_) {
  634. return bail_out;
  635. }
  636. loop_size->roi_size_ *= factor;
  637. // Find if a loop should be peeled again.
  638. Loop* extra_opportunity = nullptr;
  639. if (direction == PeelDirection::kBefore) {
  640. peeler.PeelBefore(factor);
  641. if (stats_) {
  642. stats_->peeled_loops_.emplace_back(loop, PeelDirection::kBefore, factor);
  643. }
  644. if (peel_after_factor) {
  645. // We could have peeled after, give it another try.
  646. extra_opportunity = peeler.GetOriginalLoop();
  647. }
  648. } else {
  649. peeler.PeelAfter(factor);
  650. if (stats_) {
  651. stats_->peeled_loops_.emplace_back(loop, PeelDirection::kAfter, factor);
  652. }
  653. if (peel_before_factor) {
  654. // We could have peeled before, give it another try.
  655. extra_opportunity = peeler.GetClonedLoop();
  656. }
  657. }
  658. return {true, extra_opportunity};
  659. }
  660. uint32_t LoopPeelingPass::LoopPeelingInfo::GetFirstLoopInvariantOperand(
  661. Instruction* condition) const {
  662. for (uint32_t i = 0; i < condition->NumInOperands(); i++) {
  663. BasicBlock* bb =
  664. context_->get_instr_block(condition->GetSingleWordInOperand(i));
  665. if (bb && loop_->IsInsideLoop(bb)) {
  666. return condition->GetSingleWordInOperand(i);
  667. }
  668. }
  669. return 0;
  670. }
  671. uint32_t LoopPeelingPass::LoopPeelingInfo::GetFirstNonLoopInvariantOperand(
  672. Instruction* condition) const {
  673. for (uint32_t i = 0; i < condition->NumInOperands(); i++) {
  674. BasicBlock* bb =
  675. context_->get_instr_block(condition->GetSingleWordInOperand(i));
  676. if (!bb || !loop_->IsInsideLoop(bb)) {
  677. return condition->GetSingleWordInOperand(i);
  678. }
  679. }
  680. return 0;
  681. }
  682. static bool IsHandledCondition(spv::Op opcode) {
  683. switch (opcode) {
  684. case spv::Op::OpIEqual:
  685. case spv::Op::OpINotEqual:
  686. case spv::Op::OpUGreaterThan:
  687. case spv::Op::OpSGreaterThan:
  688. case spv::Op::OpUGreaterThanEqual:
  689. case spv::Op::OpSGreaterThanEqual:
  690. case spv::Op::OpULessThan:
  691. case spv::Op::OpSLessThan:
  692. case spv::Op::OpULessThanEqual:
  693. case spv::Op::OpSLessThanEqual:
  694. return true;
  695. default:
  696. return false;
  697. }
  698. }
  699. LoopPeelingPass::LoopPeelingInfo::Direction
  700. LoopPeelingPass::LoopPeelingInfo::GetPeelingInfo(BasicBlock* bb) const {
  701. if (bb->terminator()->opcode() != spv::Op::OpBranchConditional) {
  702. return GetNoneDirection();
  703. }
  704. analysis::DefUseManager* def_use_mgr = context_->get_def_use_mgr();
  705. Instruction* condition =
  706. def_use_mgr->GetDef(bb->terminator()->GetSingleWordInOperand(0));
  707. if (!IsHandledCondition(condition->opcode())) {
  708. return GetNoneDirection();
  709. }
  710. if (!GetFirstLoopInvariantOperand(condition)) {
  711. // No loop invariant, it cannot be peeled by this pass.
  712. return GetNoneDirection();
  713. }
  714. if (!GetFirstNonLoopInvariantOperand(condition)) {
  715. // Seems to be a job for the unswitch pass.
  716. return GetNoneDirection();
  717. }
  718. // Left hand-side.
  719. SExpression lhs = scev_analysis_->AnalyzeInstruction(
  720. def_use_mgr->GetDef(condition->GetSingleWordInOperand(0)));
  721. if (lhs->GetType() == SENode::CanNotCompute) {
  722. // Can't make any conclusion.
  723. return GetNoneDirection();
  724. }
  725. // Right hand-side.
  726. SExpression rhs = scev_analysis_->AnalyzeInstruction(
  727. def_use_mgr->GetDef(condition->GetSingleWordInOperand(1)));
  728. if (rhs->GetType() == SENode::CanNotCompute) {
  729. // Can't make any conclusion.
  730. return GetNoneDirection();
  731. }
  732. // Only take into account recurrent expression over the current loop.
  733. bool is_lhs_rec = !scev_analysis_->IsLoopInvariant(loop_, lhs);
  734. bool is_rhs_rec = !scev_analysis_->IsLoopInvariant(loop_, rhs);
  735. if ((is_lhs_rec && is_rhs_rec) || (!is_lhs_rec && !is_rhs_rec)) {
  736. return GetNoneDirection();
  737. }
  738. if (is_lhs_rec) {
  739. if (!lhs->AsSERecurrentNode() ||
  740. lhs->AsSERecurrentNode()->GetLoop() != loop_) {
  741. return GetNoneDirection();
  742. }
  743. }
  744. if (is_rhs_rec) {
  745. if (!rhs->AsSERecurrentNode() ||
  746. rhs->AsSERecurrentNode()->GetLoop() != loop_) {
  747. return GetNoneDirection();
  748. }
  749. }
  750. // If the op code is ==, then we try a peel before or after.
  751. // If opcode is not <, >, <= or >=, we bail out.
  752. //
  753. // For the remaining cases, we canonicalize the expression so that the
  754. // constant expression is on the left hand side and the recurring expression
  755. // is on the right hand side. If we swap hand side, then < becomes >, <=
  756. // becomes >= etc.
  757. // If the opcode is <=, then we add 1 to the right hand side and do the peel
  758. // check on <.
  759. // If the opcode is >=, then we add 1 to the left hand side and do the peel
  760. // check on >.
  761. CmpOperator cmp_operator;
  762. switch (condition->opcode()) {
  763. default:
  764. return GetNoneDirection();
  765. case spv::Op::OpIEqual:
  766. case spv::Op::OpINotEqual:
  767. return HandleEquality(lhs, rhs);
  768. case spv::Op::OpUGreaterThan:
  769. case spv::Op::OpSGreaterThan: {
  770. cmp_operator = CmpOperator::kGT;
  771. break;
  772. }
  773. case spv::Op::OpULessThan:
  774. case spv::Op::OpSLessThan: {
  775. cmp_operator = CmpOperator::kLT;
  776. break;
  777. }
  778. // We add one to transform >= into > and <= into <.
  779. case spv::Op::OpUGreaterThanEqual:
  780. case spv::Op::OpSGreaterThanEqual: {
  781. cmp_operator = CmpOperator::kGE;
  782. break;
  783. }
  784. case spv::Op::OpULessThanEqual:
  785. case spv::Op::OpSLessThanEqual: {
  786. cmp_operator = CmpOperator::kLE;
  787. break;
  788. }
  789. }
  790. // Force the left hand side to be the non recurring expression.
  791. if (is_lhs_rec) {
  792. std::swap(lhs, rhs);
  793. switch (cmp_operator) {
  794. case CmpOperator::kLT: {
  795. cmp_operator = CmpOperator::kGT;
  796. break;
  797. }
  798. case CmpOperator::kGT: {
  799. cmp_operator = CmpOperator::kLT;
  800. break;
  801. }
  802. case CmpOperator::kLE: {
  803. cmp_operator = CmpOperator::kGE;
  804. break;
  805. }
  806. case CmpOperator::kGE: {
  807. cmp_operator = CmpOperator::kLE;
  808. break;
  809. }
  810. }
  811. }
  812. return HandleInequality(cmp_operator, lhs, rhs->AsSERecurrentNode());
  813. }
  814. SExpression LoopPeelingPass::LoopPeelingInfo::GetValueAtFirstIteration(
  815. SERecurrentNode* rec) const {
  816. return rec->GetOffset();
  817. }
  818. SExpression LoopPeelingPass::LoopPeelingInfo::GetValueAtIteration(
  819. SERecurrentNode* rec, int64_t iteration) const {
  820. SExpression coeff = rec->GetCoefficient();
  821. SExpression offset = rec->GetOffset();
  822. return (coeff * iteration) + offset;
  823. }
  824. SExpression LoopPeelingPass::LoopPeelingInfo::GetValueAtLastIteration(
  825. SERecurrentNode* rec) const {
  826. return GetValueAtIteration(rec, loop_max_iterations_ - 1);
  827. }
  828. bool LoopPeelingPass::LoopPeelingInfo::EvalOperator(CmpOperator cmp_op,
  829. SExpression lhs,
  830. SExpression rhs,
  831. bool* result) const {
  832. assert(scev_analysis_->IsLoopInvariant(loop_, lhs));
  833. assert(scev_analysis_->IsLoopInvariant(loop_, rhs));
  834. // We perform the test: 0 cmp_op rhs - lhs
  835. // What is left is then to determine the sign of the expression.
  836. switch (cmp_op) {
  837. case CmpOperator::kLT: {
  838. return scev_analysis_->IsAlwaysGreaterThanZero(rhs - lhs, result);
  839. }
  840. case CmpOperator::kGT: {
  841. return scev_analysis_->IsAlwaysGreaterThanZero(lhs - rhs, result);
  842. }
  843. case CmpOperator::kLE: {
  844. return scev_analysis_->IsAlwaysGreaterOrEqualToZero(rhs - lhs, result);
  845. }
  846. case CmpOperator::kGE: {
  847. return scev_analysis_->IsAlwaysGreaterOrEqualToZero(lhs - rhs, result);
  848. }
  849. }
  850. return false;
  851. }
  852. LoopPeelingPass::LoopPeelingInfo::Direction
  853. LoopPeelingPass::LoopPeelingInfo::HandleEquality(SExpression lhs,
  854. SExpression rhs) const {
  855. {
  856. // Try peel before opportunity.
  857. SExpression lhs_cst = lhs;
  858. if (SERecurrentNode* rec_node = lhs->AsSERecurrentNode()) {
  859. lhs_cst = rec_node->GetOffset();
  860. }
  861. SExpression rhs_cst = rhs;
  862. if (SERecurrentNode* rec_node = rhs->AsSERecurrentNode()) {
  863. rhs_cst = rec_node->GetOffset();
  864. }
  865. if (lhs_cst == rhs_cst) {
  866. return Direction{LoopPeelingPass::PeelDirection::kBefore, 1};
  867. }
  868. }
  869. {
  870. // Try peel after opportunity.
  871. SExpression lhs_cst = lhs;
  872. if (SERecurrentNode* rec_node = lhs->AsSERecurrentNode()) {
  873. // rec_node(x) = a * x + b
  874. // assign to lhs: a * (loop_max_iterations_ - 1) + b
  875. lhs_cst = GetValueAtLastIteration(rec_node);
  876. }
  877. SExpression rhs_cst = rhs;
  878. if (SERecurrentNode* rec_node = rhs->AsSERecurrentNode()) {
  879. // rec_node(x) = a * x + b
  880. // assign to lhs: a * (loop_max_iterations_ - 1) + b
  881. rhs_cst = GetValueAtLastIteration(rec_node);
  882. }
  883. if (lhs_cst == rhs_cst) {
  884. return Direction{LoopPeelingPass::PeelDirection::kAfter, 1};
  885. }
  886. }
  887. return GetNoneDirection();
  888. }
  889. LoopPeelingPass::LoopPeelingInfo::Direction
  890. LoopPeelingPass::LoopPeelingInfo::HandleInequality(CmpOperator cmp_op,
  891. SExpression lhs,
  892. SERecurrentNode* rhs) const {
  893. SExpression offset = rhs->GetOffset();
  894. SExpression coefficient = rhs->GetCoefficient();
  895. // Compute (cst - B) / A.
  896. std::pair<SExpression, int64_t> flip_iteration = (lhs - offset) / coefficient;
  897. if (!flip_iteration.first->AsSEConstantNode()) {
  898. return GetNoneDirection();
  899. }
  900. // note: !!flip_iteration.second normalize to 0/1 (via bool cast).
  901. int64_t iteration =
  902. flip_iteration.first->AsSEConstantNode()->FoldToSingleValue() +
  903. !!flip_iteration.second;
  904. if (iteration <= 0 ||
  905. loop_max_iterations_ <= static_cast<uint64_t>(iteration)) {
  906. // Always true or false within the loop bounds.
  907. return GetNoneDirection();
  908. }
  909. // If this is a <= or >= operator and the iteration, make sure |iteration| is
  910. // the one flipping the condition.
  911. // If (cst - B) and A are not divisible, this equivalent to a < or > check, so
  912. // we skip this test.
  913. if (!flip_iteration.second &&
  914. (cmp_op == CmpOperator::kLE || cmp_op == CmpOperator::kGE)) {
  915. bool first_iteration;
  916. bool current_iteration;
  917. if (!EvalOperator(cmp_op, lhs, offset, &first_iteration) ||
  918. !EvalOperator(cmp_op, lhs, GetValueAtIteration(rhs, iteration),
  919. &current_iteration)) {
  920. return GetNoneDirection();
  921. }
  922. // If the condition did not flip the next will.
  923. if (first_iteration == current_iteration) {
  924. iteration++;
  925. }
  926. }
  927. uint32_t cast_iteration = 0;
  928. // Integrity check: can we fit |iteration| in a uint32_t ?
  929. if (static_cast<uint64_t>(iteration) < std::numeric_limits<uint32_t>::max()) {
  930. cast_iteration = static_cast<uint32_t>(iteration);
  931. }
  932. if (cast_iteration) {
  933. // Peel before if we are closer to the start, after if closer to the end.
  934. if (loop_max_iterations_ / 2 > cast_iteration) {
  935. return Direction{LoopPeelingPass::PeelDirection::kBefore, cast_iteration};
  936. } else {
  937. return Direction{
  938. LoopPeelingPass::PeelDirection::kAfter,
  939. static_cast<uint32_t>(loop_max_iterations_ - cast_iteration)};
  940. }
  941. }
  942. return GetNoneDirection();
  943. }
  944. } // namespace opt
  945. } // namespace spvtools