loop_peeling.cpp 38 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087
  1. // Copyright (c) 2018 Google LLC.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. #include <algorithm>
  15. #include <functional>
  16. #include <memory>
  17. #include <unordered_map>
  18. #include <unordered_set>
  19. #include <vector>
  20. #include "source/opt/ir_builder.h"
  21. #include "source/opt/ir_context.h"
  22. #include "source/opt/loop_descriptor.h"
  23. #include "source/opt/loop_peeling.h"
  24. #include "source/opt/loop_utils.h"
  25. #include "source/opt/scalar_analysis.h"
  26. #include "source/opt/scalar_analysis_nodes.h"
  27. namespace spvtools {
  28. namespace opt {
  29. namespace {
  30. // Gather the set of blocks for all the path from |entry| to |root|.
  31. void GetBlocksInPath(uint32_t block, uint32_t entry,
  32. std::unordered_set<uint32_t>* blocks_in_path,
  33. const CFG& cfg) {
  34. for (uint32_t pid : cfg.preds(block)) {
  35. if (blocks_in_path->insert(pid).second) {
  36. if (pid != entry) {
  37. GetBlocksInPath(pid, entry, blocks_in_path, cfg);
  38. }
  39. }
  40. }
  41. }
  42. } // namespace
  43. size_t LoopPeelingPass::code_grow_threshold_ = 1000;
  44. void LoopPeeling::DuplicateAndConnectLoop(
  45. LoopUtils::LoopCloningResult* clone_results) {
  46. CFG& cfg = *context_->cfg();
  47. analysis::DefUseManager* def_use_mgr = context_->get_def_use_mgr();
  48. assert(CanPeelLoop() && "Cannot peel loop!");
  49. std::vector<BasicBlock*> ordered_loop_blocks;
  50. // TODO(1841): Handle failure to create pre-header.
  51. BasicBlock* pre_header = loop_->GetOrCreatePreHeaderBlock();
  52. loop_->ComputeLoopStructuredOrder(&ordered_loop_blocks);
  53. cloned_loop_ = loop_utils_.CloneLoop(clone_results, ordered_loop_blocks);
  54. // Add the basic block to the function.
  55. Function::iterator it =
  56. loop_utils_.GetFunction()->FindBlock(pre_header->id());
  57. assert(it != loop_utils_.GetFunction()->end() &&
  58. "Pre-header not found in the function.");
  59. loop_utils_.GetFunction()->AddBasicBlocks(
  60. clone_results->cloned_bb_.begin(), clone_results->cloned_bb_.end(), ++it);
  61. // Make the |loop_|'s preheader the |cloned_loop_| one.
  62. BasicBlock* cloned_header = cloned_loop_->GetHeaderBlock();
  63. pre_header->ForEachSuccessorLabel(
  64. [cloned_header](uint32_t* succ) { *succ = cloned_header->id(); });
  65. // Update cfg.
  66. cfg.RemoveEdge(pre_header->id(), loop_->GetHeaderBlock()->id());
  67. cloned_loop_->SetPreHeaderBlock(pre_header);
  68. loop_->SetPreHeaderBlock(nullptr);
  69. // When cloning the loop, we didn't cloned the merge block, so currently
  70. // |cloned_loop_| shares the same block as |loop_|.
  71. // We mutate all branches from |cloned_loop_| block to |loop_|'s merge into a
  72. // branch to |loop_|'s header (so header will also be the merge of
  73. // |cloned_loop_|).
  74. uint32_t cloned_loop_exit = 0;
  75. for (uint32_t pred_id : cfg.preds(loop_->GetMergeBlock()->id())) {
  76. if (loop_->IsInsideLoop(pred_id)) continue;
  77. BasicBlock* bb = cfg.block(pred_id);
  78. assert(cloned_loop_exit == 0 && "The loop has multiple exits.");
  79. cloned_loop_exit = bb->id();
  80. bb->ForEachSuccessorLabel([this](uint32_t* succ) {
  81. if (*succ == loop_->GetMergeBlock()->id())
  82. *succ = loop_->GetHeaderBlock()->id();
  83. });
  84. }
  85. // Update cfg.
  86. cfg.RemoveNonExistingEdges(loop_->GetMergeBlock()->id());
  87. cfg.AddEdge(cloned_loop_exit, loop_->GetHeaderBlock()->id());
  88. // Patch the phi of the original loop header:
  89. // - Set the loop entry branch to come from the cloned loop exit block;
  90. // - Set the initial value of the phi using the corresponding cloned loop
  91. // exit values.
  92. //
  93. // We patch the iterating value initializers of the original loop using the
  94. // corresponding cloned loop exit values. Connects the cloned loop iterating
  95. // values to the original loop. This make sure that the initial value of the
  96. // second loop starts with the last value of the first loop.
  97. //
  98. // For example, loops like:
  99. //
  100. // int z = 0;
  101. // for (int i = 0; i++ < M; i += cst1) {
  102. // if (cond)
  103. // z += cst2;
  104. // }
  105. //
  106. // Will become:
  107. //
  108. // int z = 0;
  109. // int i = 0;
  110. // for (; i++ < M; i += cst1) {
  111. // if (cond)
  112. // z += cst2;
  113. // }
  114. // for (; i++ < M; i += cst1) {
  115. // if (cond)
  116. // z += cst2;
  117. // }
  118. loop_->GetHeaderBlock()->ForEachPhiInst([cloned_loop_exit, def_use_mgr,
  119. clone_results,
  120. this](Instruction* phi) {
  121. for (uint32_t i = 0; i < phi->NumInOperands(); i += 2) {
  122. if (!loop_->IsInsideLoop(phi->GetSingleWordInOperand(i + 1))) {
  123. phi->SetInOperand(i,
  124. {clone_results->value_map_.at(
  125. exit_value_.at(phi->result_id())->result_id())});
  126. phi->SetInOperand(i + 1, {cloned_loop_exit});
  127. def_use_mgr->AnalyzeInstUse(phi);
  128. return;
  129. }
  130. }
  131. });
  132. // Force the creation of a new preheader for the original loop and set it as
  133. // the merge block for the cloned loop.
  134. // TODO(1841): Handle failure to create pre-header.
  135. cloned_loop_->SetMergeBlock(loop_->GetOrCreatePreHeaderBlock());
  136. }
  137. void LoopPeeling::InsertCanonicalInductionVariable(
  138. LoopUtils::LoopCloningResult* clone_results) {
  139. if (original_loop_canonical_induction_variable_) {
  140. canonical_induction_variable_ =
  141. context_->get_def_use_mgr()->GetDef(clone_results->value_map_.at(
  142. original_loop_canonical_induction_variable_->result_id()));
  143. return;
  144. }
  145. BasicBlock::iterator insert_point = GetClonedLoop()->GetLatchBlock()->tail();
  146. if (GetClonedLoop()->GetLatchBlock()->GetMergeInst()) {
  147. --insert_point;
  148. }
  149. InstructionBuilder builder(
  150. context_, &*insert_point,
  151. IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping);
  152. Instruction* uint_1_cst =
  153. builder.GetIntConstant<uint32_t>(1, int_type_->IsSigned());
  154. // Create the increment.
  155. // Note that we do "1 + 1" here, one of the operand should the phi
  156. // value but we don't have it yet. The operand will be set latter.
  157. Instruction* iv_inc = builder.AddIAdd(
  158. uint_1_cst->type_id(), uint_1_cst->result_id(), uint_1_cst->result_id());
  159. builder.SetInsertPoint(&*GetClonedLoop()->GetHeaderBlock()->begin());
  160. canonical_induction_variable_ = builder.AddPhi(
  161. uint_1_cst->type_id(),
  162. {builder.GetIntConstant<uint32_t>(0, int_type_->IsSigned())->result_id(),
  163. GetClonedLoop()->GetPreHeaderBlock()->id(), iv_inc->result_id(),
  164. GetClonedLoop()->GetLatchBlock()->id()});
  165. // Connect everything.
  166. iv_inc->SetInOperand(0, {canonical_induction_variable_->result_id()});
  167. // Update def/use manager.
  168. context_->get_def_use_mgr()->AnalyzeInstUse(iv_inc);
  169. // If do-while form, use the incremented value.
  170. if (do_while_form_) {
  171. canonical_induction_variable_ = iv_inc;
  172. }
  173. }
  174. void LoopPeeling::GetIteratorUpdateOperations(
  175. const Loop* loop, Instruction* iterator,
  176. std::unordered_set<Instruction*>* operations) {
  177. analysis::DefUseManager* def_use_mgr = context_->get_def_use_mgr();
  178. operations->insert(iterator);
  179. iterator->ForEachInId([def_use_mgr, loop, operations, this](uint32_t* id) {
  180. Instruction* insn = def_use_mgr->GetDef(*id);
  181. if (insn->opcode() == spv::Op::OpLabel) {
  182. return;
  183. }
  184. if (operations->count(insn)) {
  185. return;
  186. }
  187. if (!loop->IsInsideLoop(insn)) {
  188. return;
  189. }
  190. GetIteratorUpdateOperations(loop, insn, operations);
  191. });
  192. }
  193. bool LoopPeeling::IsConditionCheckSideEffectFree() const {
  194. CFG& cfg = *context_->cfg();
  195. // The "do-while" form does not cause issues, the algorithm takes into account
  196. // the first iteration.
  197. if (!do_while_form_) {
  198. uint32_t condition_block_id = cfg.preds(loop_->GetMergeBlock()->id())[0];
  199. std::unordered_set<uint32_t> blocks_in_path;
  200. blocks_in_path.insert(condition_block_id);
  201. GetBlocksInPath(condition_block_id, loop_->GetHeaderBlock()->id(),
  202. &blocks_in_path, cfg);
  203. for (uint32_t bb_id : blocks_in_path) {
  204. BasicBlock* bb = cfg.block(bb_id);
  205. if (!bb->WhileEachInst([this](Instruction* insn) {
  206. if (insn->IsBranch()) return true;
  207. switch (insn->opcode()) {
  208. case spv::Op::OpLabel:
  209. case spv::Op::OpSelectionMerge:
  210. case spv::Op::OpLoopMerge:
  211. return true;
  212. default:
  213. break;
  214. }
  215. return context_->IsCombinatorInstruction(insn);
  216. })) {
  217. return false;
  218. }
  219. }
  220. }
  221. return true;
  222. }
  223. void LoopPeeling::GetIteratingExitValues() {
  224. CFG& cfg = *context_->cfg();
  225. loop_->GetHeaderBlock()->ForEachPhiInst(
  226. [this](Instruction* phi) { exit_value_[phi->result_id()] = nullptr; });
  227. if (!loop_->GetMergeBlock()) {
  228. return;
  229. }
  230. if (cfg.preds(loop_->GetMergeBlock()->id()).size() != 1) {
  231. return;
  232. }
  233. analysis::DefUseManager* def_use_mgr = context_->get_def_use_mgr();
  234. uint32_t condition_block_id = cfg.preds(loop_->GetMergeBlock()->id())[0];
  235. auto& header_pred = cfg.preds(loop_->GetHeaderBlock()->id());
  236. do_while_form_ = std::find(header_pred.begin(), header_pred.end(),
  237. condition_block_id) != header_pred.end();
  238. if (do_while_form_) {
  239. loop_->GetHeaderBlock()->ForEachPhiInst(
  240. [condition_block_id, def_use_mgr, this](Instruction* phi) {
  241. std::unordered_set<Instruction*> operations;
  242. for (uint32_t i = 0; i < phi->NumInOperands(); i += 2) {
  243. if (condition_block_id == phi->GetSingleWordInOperand(i + 1)) {
  244. exit_value_[phi->result_id()] =
  245. def_use_mgr->GetDef(phi->GetSingleWordInOperand(i));
  246. }
  247. }
  248. });
  249. } else {
  250. DominatorTree* dom_tree =
  251. &context_->GetDominatorAnalysis(loop_utils_.GetFunction())
  252. ->GetDomTree();
  253. BasicBlock* condition_block = cfg.block(condition_block_id);
  254. loop_->GetHeaderBlock()->ForEachPhiInst(
  255. [dom_tree, condition_block, this](Instruction* phi) {
  256. std::unordered_set<Instruction*> operations;
  257. // Not the back-edge value, check if the phi instruction is the only
  258. // possible candidate.
  259. GetIteratorUpdateOperations(loop_, phi, &operations);
  260. for (Instruction* insn : operations) {
  261. if (insn == phi) {
  262. continue;
  263. }
  264. if (dom_tree->Dominates(context_->get_instr_block(insn),
  265. condition_block)) {
  266. return;
  267. }
  268. }
  269. exit_value_[phi->result_id()] = phi;
  270. });
  271. }
  272. }
  273. void LoopPeeling::FixExitCondition(
  274. const std::function<uint32_t(Instruction*)>& condition_builder) {
  275. CFG& cfg = *context_->cfg();
  276. uint32_t condition_block_id = 0;
  277. for (uint32_t id : cfg.preds(GetClonedLoop()->GetMergeBlock()->id())) {
  278. if (GetClonedLoop()->IsInsideLoop(id)) {
  279. condition_block_id = id;
  280. break;
  281. }
  282. }
  283. assert(condition_block_id != 0 && "2nd loop in improperly connected");
  284. BasicBlock* condition_block = cfg.block(condition_block_id);
  285. Instruction* exit_condition = condition_block->terminator();
  286. assert(exit_condition->opcode() == spv::Op::OpBranchConditional);
  287. BasicBlock::iterator insert_point = condition_block->tail();
  288. if (condition_block->GetMergeInst()) {
  289. --insert_point;
  290. }
  291. exit_condition->SetInOperand(0, {condition_builder(&*insert_point)});
  292. uint32_t to_continue_block_idx =
  293. GetClonedLoop()->IsInsideLoop(exit_condition->GetSingleWordInOperand(1))
  294. ? 1
  295. : 2;
  296. exit_condition->SetInOperand(
  297. 1, {exit_condition->GetSingleWordInOperand(to_continue_block_idx)});
  298. exit_condition->SetInOperand(2, {GetClonedLoop()->GetMergeBlock()->id()});
  299. // Update def/use manager.
  300. context_->get_def_use_mgr()->AnalyzeInstUse(exit_condition);
  301. }
  302. BasicBlock* LoopPeeling::CreateBlockBefore(BasicBlock* bb) {
  303. analysis::DefUseManager* def_use_mgr = context_->get_def_use_mgr();
  304. CFG& cfg = *context_->cfg();
  305. assert(cfg.preds(bb->id()).size() == 1 && "More than one predecessor");
  306. // TODO(1841): Handle id overflow.
  307. std::unique_ptr<BasicBlock> new_bb =
  308. MakeUnique<BasicBlock>(std::unique_ptr<Instruction>(new Instruction(
  309. context_, spv::Op::OpLabel, 0, context_->TakeNextId(), {})));
  310. // Update the loop descriptor.
  311. Loop* in_loop = (*loop_utils_.GetLoopDescriptor())[bb];
  312. if (in_loop) {
  313. in_loop->AddBasicBlock(new_bb.get());
  314. loop_utils_.GetLoopDescriptor()->SetBasicBlockToLoop(new_bb->id(), in_loop);
  315. }
  316. context_->set_instr_block(new_bb->GetLabelInst(), new_bb.get());
  317. def_use_mgr->AnalyzeInstDefUse(new_bb->GetLabelInst());
  318. BasicBlock* bb_pred = cfg.block(cfg.preds(bb->id())[0]);
  319. bb_pred->tail()->ForEachInId([bb, &new_bb](uint32_t* id) {
  320. if (*id == bb->id()) {
  321. *id = new_bb->id();
  322. }
  323. });
  324. cfg.RemoveEdge(bb_pred->id(), bb->id());
  325. cfg.AddEdge(bb_pred->id(), new_bb->id());
  326. def_use_mgr->AnalyzeInstUse(&*bb_pred->tail());
  327. // Update the incoming branch.
  328. bb->ForEachPhiInst([&new_bb, def_use_mgr](Instruction* phi) {
  329. phi->SetInOperand(1, {new_bb->id()});
  330. def_use_mgr->AnalyzeInstUse(phi);
  331. });
  332. InstructionBuilder(
  333. context_, new_bb.get(),
  334. IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping)
  335. .AddBranch(bb->id());
  336. cfg.RegisterBlock(new_bb.get());
  337. // Add the basic block to the function.
  338. Function::iterator it = loop_utils_.GetFunction()->FindBlock(bb->id());
  339. assert(it != loop_utils_.GetFunction()->end() &&
  340. "Basic block not found in the function.");
  341. BasicBlock* ret = new_bb.get();
  342. loop_utils_.GetFunction()->AddBasicBlock(std::move(new_bb), it);
  343. return ret;
  344. }
  345. BasicBlock* LoopPeeling::ProtectLoop(Loop* loop, Instruction* condition,
  346. BasicBlock* if_merge) {
  347. // TODO(1841): Handle failure to create pre-header.
  348. BasicBlock* if_block = loop->GetOrCreatePreHeaderBlock();
  349. // Will no longer be a pre-header because of the if.
  350. loop->SetPreHeaderBlock(nullptr);
  351. // Kill the branch to the header.
  352. context_->KillInst(&*if_block->tail());
  353. InstructionBuilder builder(
  354. context_, if_block,
  355. IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping);
  356. builder.AddConditionalBranch(condition->result_id(),
  357. loop->GetHeaderBlock()->id(), if_merge->id(),
  358. if_merge->id());
  359. return if_block;
  360. }
  361. void LoopPeeling::PeelBefore(uint32_t peel_factor) {
  362. assert(CanPeelLoop() && "Cannot peel loop");
  363. LoopUtils::LoopCloningResult clone_results;
  364. // Clone the loop and insert the cloned one before the loop.
  365. DuplicateAndConnectLoop(&clone_results);
  366. // Add a canonical induction variable "canonical_induction_variable_".
  367. InsertCanonicalInductionVariable(&clone_results);
  368. InstructionBuilder builder(
  369. context_, &*cloned_loop_->GetPreHeaderBlock()->tail(),
  370. IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping);
  371. Instruction* factor =
  372. builder.GetIntConstant(peel_factor, int_type_->IsSigned());
  373. Instruction* has_remaining_iteration = builder.AddLessThan(
  374. factor->result_id(), loop_iteration_count_->result_id());
  375. Instruction* max_iteration = builder.AddSelect(
  376. factor->type_id(), has_remaining_iteration->result_id(),
  377. factor->result_id(), loop_iteration_count_->result_id());
  378. // Change the exit condition of the cloned loop to be (exit when become
  379. // false):
  380. // "canonical_induction_variable_" < min("factor", "loop_iteration_count_")
  381. FixExitCondition([max_iteration, this](Instruction* insert_before_point) {
  382. return InstructionBuilder(context_, insert_before_point,
  383. IRContext::kAnalysisDefUse |
  384. IRContext::kAnalysisInstrToBlockMapping)
  385. .AddLessThan(canonical_induction_variable_->result_id(),
  386. max_iteration->result_id())
  387. ->result_id();
  388. });
  389. // "Protect" the second loop: the second loop can only be executed if
  390. // |has_remaining_iteration| is true (i.e. factor < loop_iteration_count_).
  391. BasicBlock* if_merge_block = loop_->GetMergeBlock();
  392. loop_->SetMergeBlock(CreateBlockBefore(loop_->GetMergeBlock()));
  393. // Prevent the second loop from being executed if we already executed all the
  394. // required iterations.
  395. BasicBlock* if_block =
  396. ProtectLoop(loop_, has_remaining_iteration, if_merge_block);
  397. // Patch the phi of the merge block.
  398. if_merge_block->ForEachPhiInst(
  399. [&clone_results, if_block, this](Instruction* phi) {
  400. // if_merge_block had previously only 1 predecessor.
  401. uint32_t incoming_value = phi->GetSingleWordInOperand(0);
  402. auto def_in_loop = clone_results.value_map_.find(incoming_value);
  403. if (def_in_loop != clone_results.value_map_.end())
  404. incoming_value = def_in_loop->second;
  405. phi->AddOperand(
  406. {spv_operand_type_t::SPV_OPERAND_TYPE_ID, {incoming_value}});
  407. phi->AddOperand(
  408. {spv_operand_type_t::SPV_OPERAND_TYPE_ID, {if_block->id()}});
  409. context_->get_def_use_mgr()->AnalyzeInstUse(phi);
  410. });
  411. context_->InvalidateAnalysesExceptFor(
  412. IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping |
  413. IRContext::kAnalysisLoopAnalysis | IRContext::kAnalysisCFG);
  414. }
  415. void LoopPeeling::PeelAfter(uint32_t peel_factor) {
  416. assert(CanPeelLoop() && "Cannot peel loop");
  417. LoopUtils::LoopCloningResult clone_results;
  418. // Clone the loop and insert the cloned one before the loop.
  419. DuplicateAndConnectLoop(&clone_results);
  420. // Add a canonical induction variable "canonical_induction_variable_".
  421. InsertCanonicalInductionVariable(&clone_results);
  422. InstructionBuilder builder(
  423. context_, &*cloned_loop_->GetPreHeaderBlock()->tail(),
  424. IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping);
  425. Instruction* factor =
  426. builder.GetIntConstant(peel_factor, int_type_->IsSigned());
  427. Instruction* has_remaining_iteration = builder.AddLessThan(
  428. factor->result_id(), loop_iteration_count_->result_id());
  429. // Change the exit condition of the cloned loop to be (exit when become
  430. // false):
  431. // "canonical_induction_variable_" + "factor" < "loop_iteration_count_"
  432. FixExitCondition([factor, this](Instruction* insert_before_point) {
  433. InstructionBuilder cond_builder(
  434. context_, insert_before_point,
  435. IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping);
  436. // Build the following check: canonical_induction_variable_ + factor <
  437. // iteration_count
  438. return cond_builder
  439. .AddLessThan(cond_builder
  440. .AddIAdd(canonical_induction_variable_->type_id(),
  441. canonical_induction_variable_->result_id(),
  442. factor->result_id())
  443. ->result_id(),
  444. loop_iteration_count_->result_id())
  445. ->result_id();
  446. });
  447. // "Protect" the first loop: the first loop can only be executed if
  448. // factor < loop_iteration_count_.
  449. // The original loop's pre-header was the cloned loop merge block.
  450. GetClonedLoop()->SetMergeBlock(
  451. CreateBlockBefore(GetOriginalLoop()->GetPreHeaderBlock()));
  452. // Use the second loop preheader as if merge block.
  453. // Prevent the first loop if only the peeled loop needs it.
  454. BasicBlock* if_block = ProtectLoop(cloned_loop_, has_remaining_iteration,
  455. GetOriginalLoop()->GetPreHeaderBlock());
  456. // Patch the phi of the header block.
  457. // We added an if to enclose the first loop and because the phi node are
  458. // connected to the exit value of the first loop, the definition no longer
  459. // dominate the preheader.
  460. // We had to the preheader (our if merge block) the required phi instruction
  461. // and patch the header phi.
  462. GetOriginalLoop()->GetHeaderBlock()->ForEachPhiInst(
  463. [&clone_results, if_block, this](Instruction* phi) {
  464. analysis::DefUseManager* def_use_mgr = context_->get_def_use_mgr();
  465. auto find_value_idx = [](Instruction* phi_inst, Loop* loop) {
  466. uint32_t preheader_value_idx =
  467. !loop->IsInsideLoop(phi_inst->GetSingleWordInOperand(1)) ? 0 : 2;
  468. return preheader_value_idx;
  469. };
  470. Instruction* cloned_phi =
  471. def_use_mgr->GetDef(clone_results.value_map_.at(phi->result_id()));
  472. uint32_t cloned_preheader_value = cloned_phi->GetSingleWordInOperand(
  473. find_value_idx(cloned_phi, GetClonedLoop()));
  474. Instruction* new_phi =
  475. InstructionBuilder(context_,
  476. &*GetOriginalLoop()->GetPreHeaderBlock()->tail(),
  477. IRContext::kAnalysisDefUse |
  478. IRContext::kAnalysisInstrToBlockMapping)
  479. .AddPhi(phi->type_id(),
  480. {phi->GetSingleWordInOperand(
  481. find_value_idx(phi, GetOriginalLoop())),
  482. GetClonedLoop()->GetMergeBlock()->id(),
  483. cloned_preheader_value, if_block->id()});
  484. phi->SetInOperand(find_value_idx(phi, GetOriginalLoop()),
  485. {new_phi->result_id()});
  486. def_use_mgr->AnalyzeInstUse(phi);
  487. });
  488. context_->InvalidateAnalysesExceptFor(
  489. IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping |
  490. IRContext::kAnalysisLoopAnalysis | IRContext::kAnalysisCFG);
  491. }
  492. Pass::Status LoopPeelingPass::Process() {
  493. bool modified = false;
  494. Module* module = context()->module();
  495. // Process each function in the module
  496. for (Function& f : *module) {
  497. modified |= ProcessFunction(&f);
  498. }
  499. return modified ? Status::SuccessWithChange : Status::SuccessWithoutChange;
  500. }
  501. bool LoopPeelingPass::ProcessFunction(Function* f) {
  502. bool modified = false;
  503. LoopDescriptor& loop_descriptor = *context()->GetLoopDescriptor(f);
  504. std::vector<Loop*> to_process_loop;
  505. to_process_loop.reserve(loop_descriptor.NumLoops());
  506. for (Loop& l : loop_descriptor) {
  507. to_process_loop.push_back(&l);
  508. }
  509. ScalarEvolutionAnalysis scev_analysis(context());
  510. for (Loop* loop : to_process_loop) {
  511. CodeMetrics loop_size;
  512. loop_size.Analyze(*loop);
  513. auto try_peel = [&loop_size, &modified, this](Loop* loop_to_peel) -> Loop* {
  514. if (!loop_to_peel->IsLCSSA()) {
  515. LoopUtils(context(), loop_to_peel).MakeLoopClosedSSA();
  516. }
  517. bool peeled_loop;
  518. Loop* still_peelable_loop;
  519. std::tie(peeled_loop, still_peelable_loop) =
  520. ProcessLoop(loop_to_peel, &loop_size);
  521. if (peeled_loop) {
  522. modified = true;
  523. }
  524. return still_peelable_loop;
  525. };
  526. Loop* still_peelable_loop = try_peel(loop);
  527. // The pass is working out the maximum factor by which a loop can be peeled.
  528. // If the loop can potentially be peeled again, then there is only one
  529. // possible direction, so only one call is still needed.
  530. if (still_peelable_loop) {
  531. try_peel(loop);
  532. }
  533. }
  534. return modified;
  535. }
  536. std::pair<bool, Loop*> LoopPeelingPass::ProcessLoop(Loop* loop,
  537. CodeMetrics* loop_size) {
  538. ScalarEvolutionAnalysis* scev_analysis =
  539. context()->GetScalarEvolutionAnalysis();
  540. // Default values for bailing out.
  541. std::pair<bool, Loop*> bail_out{false, nullptr};
  542. BasicBlock* exit_block = loop->FindConditionBlock();
  543. if (!exit_block) {
  544. return bail_out;
  545. }
  546. Instruction* exiting_iv = loop->FindConditionVariable(exit_block);
  547. if (!exiting_iv) {
  548. return bail_out;
  549. }
  550. size_t iterations = 0;
  551. if (!loop->FindNumberOfIterations(exiting_iv, &*exit_block->tail(),
  552. &iterations)) {
  553. return bail_out;
  554. }
  555. if (!iterations) {
  556. return bail_out;
  557. }
  558. Instruction* canonical_induction_variable = nullptr;
  559. loop->GetHeaderBlock()->WhileEachPhiInst([&canonical_induction_variable,
  560. scev_analysis,
  561. this](Instruction* insn) {
  562. if (const SERecurrentNode* iv =
  563. scev_analysis->AnalyzeInstruction(insn)->AsSERecurrentNode()) {
  564. const SEConstantNode* offset = iv->GetOffset()->AsSEConstantNode();
  565. const SEConstantNode* coeff = iv->GetCoefficient()->AsSEConstantNode();
  566. if (offset && coeff && offset->FoldToSingleValue() == 0 &&
  567. coeff->FoldToSingleValue() == 1) {
  568. if (context()->get_type_mgr()->GetType(insn->type_id())->AsInteger()) {
  569. canonical_induction_variable = insn;
  570. return false;
  571. }
  572. }
  573. }
  574. return true;
  575. });
  576. bool is_signed = canonical_induction_variable
  577. ? context()
  578. ->get_type_mgr()
  579. ->GetType(canonical_induction_variable->type_id())
  580. ->AsInteger()
  581. ->IsSigned()
  582. : false;
  583. LoopPeeling peeler(
  584. loop,
  585. InstructionBuilder(
  586. context(), loop->GetHeaderBlock(),
  587. IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping)
  588. .GetIntConstant<uint32_t>(static_cast<uint32_t>(iterations),
  589. is_signed),
  590. canonical_induction_variable);
  591. if (!peeler.CanPeelLoop()) {
  592. return bail_out;
  593. }
  594. // For each basic block in the loop, check if it can be peeled. If it
  595. // can, get the direction (before/after) and by which factor.
  596. LoopPeelingInfo peel_info(loop, iterations, scev_analysis);
  597. uint32_t peel_before_factor = 0;
  598. uint32_t peel_after_factor = 0;
  599. for (uint32_t block : loop->GetBlocks()) {
  600. if (block == exit_block->id()) {
  601. continue;
  602. }
  603. BasicBlock* bb = cfg()->block(block);
  604. PeelDirection direction;
  605. uint32_t factor;
  606. std::tie(direction, factor) = peel_info.GetPeelingInfo(bb);
  607. if (direction == PeelDirection::kNone) {
  608. continue;
  609. }
  610. if (direction == PeelDirection::kBefore) {
  611. peel_before_factor = std::max(peel_before_factor, factor);
  612. } else {
  613. assert(direction == PeelDirection::kAfter);
  614. peel_after_factor = std::max(peel_after_factor, factor);
  615. }
  616. }
  617. PeelDirection direction = PeelDirection::kNone;
  618. uint32_t factor = 0;
  619. // Find which direction we should peel.
  620. if (peel_before_factor) {
  621. factor = peel_before_factor;
  622. direction = PeelDirection::kBefore;
  623. }
  624. if (peel_after_factor) {
  625. if (peel_before_factor < peel_after_factor) {
  626. // Favor a peel after here and give the peel before another shot later.
  627. factor = peel_after_factor;
  628. direction = PeelDirection::kAfter;
  629. }
  630. }
  631. // Do the peel if we can.
  632. if (direction == PeelDirection::kNone) return bail_out;
  633. // This does not take into account branch elimination opportunities and
  634. // the unrolling. It assumes the peeled loop will be unrolled as well.
  635. if (factor * loop_size->roi_size_ > code_grow_threshold_) {
  636. return bail_out;
  637. }
  638. loop_size->roi_size_ *= factor;
  639. // Find if a loop should be peeled again.
  640. Loop* extra_opportunity = nullptr;
  641. if (direction == PeelDirection::kBefore) {
  642. peeler.PeelBefore(factor);
  643. if (stats_) {
  644. stats_->peeled_loops_.emplace_back(loop, PeelDirection::kBefore, factor);
  645. }
  646. if (peel_after_factor) {
  647. // We could have peeled after, give it another try.
  648. extra_opportunity = peeler.GetOriginalLoop();
  649. }
  650. } else {
  651. peeler.PeelAfter(factor);
  652. if (stats_) {
  653. stats_->peeled_loops_.emplace_back(loop, PeelDirection::kAfter, factor);
  654. }
  655. if (peel_before_factor) {
  656. // We could have peeled before, give it another try.
  657. extra_opportunity = peeler.GetClonedLoop();
  658. }
  659. }
  660. return {true, extra_opportunity};
  661. }
  662. uint32_t LoopPeelingPass::LoopPeelingInfo::GetFirstLoopInvariantOperand(
  663. Instruction* condition) const {
  664. for (uint32_t i = 0; i < condition->NumInOperands(); i++) {
  665. BasicBlock* bb =
  666. context_->get_instr_block(condition->GetSingleWordInOperand(i));
  667. if (bb && loop_->IsInsideLoop(bb)) {
  668. return condition->GetSingleWordInOperand(i);
  669. }
  670. }
  671. return 0;
  672. }
  673. uint32_t LoopPeelingPass::LoopPeelingInfo::GetFirstNonLoopInvariantOperand(
  674. Instruction* condition) const {
  675. for (uint32_t i = 0; i < condition->NumInOperands(); i++) {
  676. BasicBlock* bb =
  677. context_->get_instr_block(condition->GetSingleWordInOperand(i));
  678. if (!bb || !loop_->IsInsideLoop(bb)) {
  679. return condition->GetSingleWordInOperand(i);
  680. }
  681. }
  682. return 0;
  683. }
  684. static bool IsHandledCondition(spv::Op opcode) {
  685. switch (opcode) {
  686. case spv::Op::OpIEqual:
  687. case spv::Op::OpINotEqual:
  688. case spv::Op::OpUGreaterThan:
  689. case spv::Op::OpSGreaterThan:
  690. case spv::Op::OpUGreaterThanEqual:
  691. case spv::Op::OpSGreaterThanEqual:
  692. case spv::Op::OpULessThan:
  693. case spv::Op::OpSLessThan:
  694. case spv::Op::OpULessThanEqual:
  695. case spv::Op::OpSLessThanEqual:
  696. return true;
  697. default:
  698. return false;
  699. }
  700. }
  701. LoopPeelingPass::LoopPeelingInfo::Direction
  702. LoopPeelingPass::LoopPeelingInfo::GetPeelingInfo(BasicBlock* bb) const {
  703. if (bb->terminator()->opcode() != spv::Op::OpBranchConditional) {
  704. return GetNoneDirection();
  705. }
  706. analysis::DefUseManager* def_use_mgr = context_->get_def_use_mgr();
  707. Instruction* condition =
  708. def_use_mgr->GetDef(bb->terminator()->GetSingleWordInOperand(0));
  709. if (!IsHandledCondition(condition->opcode())) {
  710. return GetNoneDirection();
  711. }
  712. if (!GetFirstLoopInvariantOperand(condition)) {
  713. // No loop invariant, it cannot be peeled by this pass.
  714. return GetNoneDirection();
  715. }
  716. if (!GetFirstNonLoopInvariantOperand(condition)) {
  717. // Seems to be a job for the unswitch pass.
  718. return GetNoneDirection();
  719. }
  720. // Left hand-side.
  721. SExpression lhs = scev_analysis_->AnalyzeInstruction(
  722. def_use_mgr->GetDef(condition->GetSingleWordInOperand(0)));
  723. if (lhs->GetType() == SENode::CanNotCompute) {
  724. // Can't make any conclusion.
  725. return GetNoneDirection();
  726. }
  727. // Right hand-side.
  728. SExpression rhs = scev_analysis_->AnalyzeInstruction(
  729. def_use_mgr->GetDef(condition->GetSingleWordInOperand(1)));
  730. if (rhs->GetType() == SENode::CanNotCompute) {
  731. // Can't make any conclusion.
  732. return GetNoneDirection();
  733. }
  734. // Only take into account recurrent expression over the current loop.
  735. bool is_lhs_rec = !scev_analysis_->IsLoopInvariant(loop_, lhs);
  736. bool is_rhs_rec = !scev_analysis_->IsLoopInvariant(loop_, rhs);
  737. if ((is_lhs_rec && is_rhs_rec) || (!is_lhs_rec && !is_rhs_rec)) {
  738. return GetNoneDirection();
  739. }
  740. if (is_lhs_rec) {
  741. if (!lhs->AsSERecurrentNode() ||
  742. lhs->AsSERecurrentNode()->GetLoop() != loop_) {
  743. return GetNoneDirection();
  744. }
  745. }
  746. if (is_rhs_rec) {
  747. if (!rhs->AsSERecurrentNode() ||
  748. rhs->AsSERecurrentNode()->GetLoop() != loop_) {
  749. return GetNoneDirection();
  750. }
  751. }
  752. // If the op code is ==, then we try a peel before or after.
  753. // If opcode is not <, >, <= or >=, we bail out.
  754. //
  755. // For the remaining cases, we canonicalize the expression so that the
  756. // constant expression is on the left hand side and the recurring expression
  757. // is on the right hand side. If we swap hand side, then < becomes >, <=
  758. // becomes >= etc.
  759. // If the opcode is <=, then we add 1 to the right hand side and do the peel
  760. // check on <.
  761. // If the opcode is >=, then we add 1 to the left hand side and do the peel
  762. // check on >.
  763. CmpOperator cmp_operator;
  764. switch (condition->opcode()) {
  765. default:
  766. return GetNoneDirection();
  767. case spv::Op::OpIEqual:
  768. case spv::Op::OpINotEqual:
  769. return HandleEquality(lhs, rhs);
  770. case spv::Op::OpUGreaterThan:
  771. case spv::Op::OpSGreaterThan: {
  772. cmp_operator = CmpOperator::kGT;
  773. break;
  774. }
  775. case spv::Op::OpULessThan:
  776. case spv::Op::OpSLessThan: {
  777. cmp_operator = CmpOperator::kLT;
  778. break;
  779. }
  780. // We add one to transform >= into > and <= into <.
  781. case spv::Op::OpUGreaterThanEqual:
  782. case spv::Op::OpSGreaterThanEqual: {
  783. cmp_operator = CmpOperator::kGE;
  784. break;
  785. }
  786. case spv::Op::OpULessThanEqual:
  787. case spv::Op::OpSLessThanEqual: {
  788. cmp_operator = CmpOperator::kLE;
  789. break;
  790. }
  791. }
  792. // Force the left hand side to be the non recurring expression.
  793. if (is_lhs_rec) {
  794. std::swap(lhs, rhs);
  795. switch (cmp_operator) {
  796. case CmpOperator::kLT: {
  797. cmp_operator = CmpOperator::kGT;
  798. break;
  799. }
  800. case CmpOperator::kGT: {
  801. cmp_operator = CmpOperator::kLT;
  802. break;
  803. }
  804. case CmpOperator::kLE: {
  805. cmp_operator = CmpOperator::kGE;
  806. break;
  807. }
  808. case CmpOperator::kGE: {
  809. cmp_operator = CmpOperator::kLE;
  810. break;
  811. }
  812. }
  813. }
  814. return HandleInequality(cmp_operator, lhs, rhs->AsSERecurrentNode());
  815. }
  816. SExpression LoopPeelingPass::LoopPeelingInfo::GetValueAtFirstIteration(
  817. SERecurrentNode* rec) const {
  818. return rec->GetOffset();
  819. }
  820. SExpression LoopPeelingPass::LoopPeelingInfo::GetValueAtIteration(
  821. SERecurrentNode* rec, int64_t iteration) const {
  822. SExpression coeff = rec->GetCoefficient();
  823. SExpression offset = rec->GetOffset();
  824. return (coeff * iteration) + offset;
  825. }
  826. SExpression LoopPeelingPass::LoopPeelingInfo::GetValueAtLastIteration(
  827. SERecurrentNode* rec) const {
  828. return GetValueAtIteration(rec, loop_max_iterations_ - 1);
  829. }
  830. bool LoopPeelingPass::LoopPeelingInfo::EvalOperator(CmpOperator cmp_op,
  831. SExpression lhs,
  832. SExpression rhs,
  833. bool* result) const {
  834. assert(scev_analysis_->IsLoopInvariant(loop_, lhs));
  835. assert(scev_analysis_->IsLoopInvariant(loop_, rhs));
  836. // We perform the test: 0 cmp_op rhs - lhs
  837. // What is left is then to determine the sign of the expression.
  838. switch (cmp_op) {
  839. case CmpOperator::kLT: {
  840. return scev_analysis_->IsAlwaysGreaterThanZero(rhs - lhs, result);
  841. }
  842. case CmpOperator::kGT: {
  843. return scev_analysis_->IsAlwaysGreaterThanZero(lhs - rhs, result);
  844. }
  845. case CmpOperator::kLE: {
  846. return scev_analysis_->IsAlwaysGreaterOrEqualToZero(rhs - lhs, result);
  847. }
  848. case CmpOperator::kGE: {
  849. return scev_analysis_->IsAlwaysGreaterOrEqualToZero(lhs - rhs, result);
  850. }
  851. }
  852. return false;
  853. }
  854. LoopPeelingPass::LoopPeelingInfo::Direction
  855. LoopPeelingPass::LoopPeelingInfo::HandleEquality(SExpression lhs,
  856. SExpression rhs) const {
  857. {
  858. // Try peel before opportunity.
  859. SExpression lhs_cst = lhs;
  860. if (SERecurrentNode* rec_node = lhs->AsSERecurrentNode()) {
  861. lhs_cst = rec_node->GetOffset();
  862. }
  863. SExpression rhs_cst = rhs;
  864. if (SERecurrentNode* rec_node = rhs->AsSERecurrentNode()) {
  865. rhs_cst = rec_node->GetOffset();
  866. }
  867. if (lhs_cst == rhs_cst) {
  868. return Direction{LoopPeelingPass::PeelDirection::kBefore, 1};
  869. }
  870. }
  871. {
  872. // Try peel after opportunity.
  873. SExpression lhs_cst = lhs;
  874. if (SERecurrentNode* rec_node = lhs->AsSERecurrentNode()) {
  875. // rec_node(x) = a * x + b
  876. // assign to lhs: a * (loop_max_iterations_ - 1) + b
  877. lhs_cst = GetValueAtLastIteration(rec_node);
  878. }
  879. SExpression rhs_cst = rhs;
  880. if (SERecurrentNode* rec_node = rhs->AsSERecurrentNode()) {
  881. // rec_node(x) = a * x + b
  882. // assign to lhs: a * (loop_max_iterations_ - 1) + b
  883. rhs_cst = GetValueAtLastIteration(rec_node);
  884. }
  885. if (lhs_cst == rhs_cst) {
  886. return Direction{LoopPeelingPass::PeelDirection::kAfter, 1};
  887. }
  888. }
  889. return GetNoneDirection();
  890. }
  891. LoopPeelingPass::LoopPeelingInfo::Direction
  892. LoopPeelingPass::LoopPeelingInfo::HandleInequality(CmpOperator cmp_op,
  893. SExpression lhs,
  894. SERecurrentNode* rhs) const {
  895. SExpression offset = rhs->GetOffset();
  896. SExpression coefficient = rhs->GetCoefficient();
  897. // Compute (cst - B) / A.
  898. std::pair<SExpression, int64_t> flip_iteration = (lhs - offset) / coefficient;
  899. if (!flip_iteration.first->AsSEConstantNode()) {
  900. return GetNoneDirection();
  901. }
  902. // note: !!flip_iteration.second normalize to 0/1 (via bool cast).
  903. int64_t iteration =
  904. flip_iteration.first->AsSEConstantNode()->FoldToSingleValue() +
  905. !!flip_iteration.second;
  906. if (iteration <= 0 ||
  907. loop_max_iterations_ <= static_cast<uint64_t>(iteration)) {
  908. // Always true or false within the loop bounds.
  909. return GetNoneDirection();
  910. }
  911. // If this is a <= or >= operator and the iteration, make sure |iteration| is
  912. // the one flipping the condition.
  913. // If (cst - B) and A are not divisible, this equivalent to a < or > check, so
  914. // we skip this test.
  915. if (!flip_iteration.second &&
  916. (cmp_op == CmpOperator::kLE || cmp_op == CmpOperator::kGE)) {
  917. bool first_iteration;
  918. bool current_iteration;
  919. if (!EvalOperator(cmp_op, lhs, offset, &first_iteration) ||
  920. !EvalOperator(cmp_op, lhs, GetValueAtIteration(rhs, iteration),
  921. &current_iteration)) {
  922. return GetNoneDirection();
  923. }
  924. // If the condition did not flip the next will.
  925. if (first_iteration == current_iteration) {
  926. iteration++;
  927. }
  928. }
  929. uint32_t cast_iteration = 0;
  930. // Integrity check: can we fit |iteration| in a uint32_t ?
  931. if (static_cast<uint64_t>(iteration) < std::numeric_limits<uint32_t>::max()) {
  932. cast_iteration = static_cast<uint32_t>(iteration);
  933. }
  934. if (cast_iteration) {
  935. // Peel before if we are closer to the start, after if closer to the end.
  936. if (loop_max_iterations_ / 2 > cast_iteration) {
  937. return Direction{LoopPeelingPass::PeelDirection::kBefore, cast_iteration};
  938. } else {
  939. return Direction{
  940. LoopPeelingPass::PeelDirection::kAfter,
  941. static_cast<uint32_t>(loop_max_iterations_ - cast_iteration)};
  942. }
  943. }
  944. return GetNoneDirection();
  945. }
  946. } // namespace opt
  947. } // namespace spvtools