loop_fusion.cpp 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733
  1. // Copyright (c) 2018 Google LLC.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. #include "source/opt/loop_fusion.h"
  15. #include <algorithm>
  16. #include <vector>
  17. #include "source/opt/ir_context.h"
  18. #include "source/opt/loop_dependence.h"
  19. #include "source/opt/loop_descriptor.h"
  20. namespace spvtools {
  21. namespace opt {
  22. namespace {
  23. // Append all the loops nested in |loop| to |loops|.
  24. void CollectChildren(Loop* loop, std::vector<const Loop*>* loops) {
  25. for (auto child : *loop) {
  26. loops->push_back(child);
  27. if (child->NumImmediateChildren() != 0) {
  28. CollectChildren(child, loops);
  29. }
  30. }
  31. }
  32. // Return the set of locations accessed by |stores| and |loads|.
  33. std::set<Instruction*> GetLocationsAccessed(
  34. const std::map<Instruction*, std::vector<Instruction*>>& stores,
  35. const std::map<Instruction*, std::vector<Instruction*>>& loads) {
  36. std::set<Instruction*> locations{};
  37. for (const auto& kv : stores) {
  38. locations.insert(std::get<0>(kv));
  39. }
  40. for (const auto& kv : loads) {
  41. locations.insert(std::get<0>(kv));
  42. }
  43. return locations;
  44. }
  45. // Append all dependences from |sources| to |destinations| to |dependences|.
  46. void GetDependences(std::vector<DistanceVector>* dependences,
  47. LoopDependenceAnalysis* analysis,
  48. const std::vector<Instruction*>& sources,
  49. const std::vector<Instruction*>& destinations,
  50. size_t num_entries) {
  51. for (auto source : sources) {
  52. for (auto destination : destinations) {
  53. DistanceVector dist(num_entries);
  54. if (!analysis->GetDependence(source, destination, &dist)) {
  55. dependences->push_back(dist);
  56. }
  57. }
  58. }
  59. }
  60. // Apped all instructions in |block| to |instructions|.
  61. void AddInstructionsInBlock(std::vector<Instruction*>* instructions,
  62. BasicBlock* block) {
  63. for (auto& inst : *block) {
  64. instructions->push_back(&inst);
  65. }
  66. instructions->push_back(block->GetLabelInst());
  67. }
  68. } // namespace
  69. bool LoopFusion::UsedInContinueOrConditionBlock(Instruction* phi_instruction,
  70. Loop* loop) {
  71. auto condition_block = loop->FindConditionBlock()->id();
  72. auto continue_block = loop->GetContinueBlock()->id();
  73. auto not_used = context_->get_def_use_mgr()->WhileEachUser(
  74. phi_instruction,
  75. [this, condition_block, continue_block](Instruction* instruction) {
  76. auto block_id = context_->get_instr_block(instruction)->id();
  77. return block_id != condition_block && block_id != continue_block;
  78. });
  79. return !not_used;
  80. }
  81. void LoopFusion::RemoveIfNotUsedContinueOrConditionBlock(
  82. std::vector<Instruction*>* instructions, Loop* loop) {
  83. instructions->erase(
  84. std::remove_if(std::begin(*instructions), std::end(*instructions),
  85. [this, loop](Instruction* instruction) {
  86. return !UsedInContinueOrConditionBlock(instruction,
  87. loop);
  88. }),
  89. std::end(*instructions));
  90. }
  91. bool LoopFusion::AreCompatible() {
  92. // Check that the loops are in the same function.
  93. if (loop_0_->GetHeaderBlock()->GetParent() !=
  94. loop_1_->GetHeaderBlock()->GetParent()) {
  95. return false;
  96. }
  97. // Check that both loops have pre-header blocks.
  98. if (!loop_0_->GetPreHeaderBlock() || !loop_1_->GetPreHeaderBlock()) {
  99. return false;
  100. }
  101. // Check there are no breaks.
  102. if (context_->cfg()->preds(loop_0_->GetMergeBlock()->id()).size() != 1 ||
  103. context_->cfg()->preds(loop_1_->GetMergeBlock()->id()).size() != 1) {
  104. return false;
  105. }
  106. // Check there are no continues.
  107. if (context_->cfg()->preds(loop_0_->GetContinueBlock()->id()).size() != 1 ||
  108. context_->cfg()->preds(loop_1_->GetContinueBlock()->id()).size() != 1) {
  109. return false;
  110. }
  111. // |GetInductionVariables| returns all OpPhi in the header. Check that both
  112. // loops have exactly one that is used in the continue and condition blocks.
  113. std::vector<Instruction*> inductions_0{}, inductions_1{};
  114. loop_0_->GetInductionVariables(inductions_0);
  115. RemoveIfNotUsedContinueOrConditionBlock(&inductions_0, loop_0_);
  116. if (inductions_0.size() != 1) {
  117. return false;
  118. }
  119. induction_0_ = inductions_0.front();
  120. loop_1_->GetInductionVariables(inductions_1);
  121. RemoveIfNotUsedContinueOrConditionBlock(&inductions_1, loop_1_);
  122. if (inductions_1.size() != 1) {
  123. return false;
  124. }
  125. induction_1_ = inductions_1.front();
  126. if (!CheckInit()) {
  127. return false;
  128. }
  129. if (!CheckCondition()) {
  130. return false;
  131. }
  132. if (!CheckStep()) {
  133. return false;
  134. }
  135. // Check adjacency, |loop_0_| should come just before |loop_1_|.
  136. // There is always at least one block between loops, even if it's empty.
  137. // We'll check at most 2 preceding blocks.
  138. auto pre_header_1 = loop_1_->GetPreHeaderBlock();
  139. std::vector<BasicBlock*> block_to_check{};
  140. block_to_check.push_back(pre_header_1);
  141. if (loop_0_->GetMergeBlock() != loop_1_->GetPreHeaderBlock()) {
  142. // Follow CFG for one more block.
  143. auto preds = context_->cfg()->preds(pre_header_1->id());
  144. if (preds.size() == 1) {
  145. auto block = &*containing_function_->FindBlock(preds.front());
  146. if (block == loop_0_->GetMergeBlock()) {
  147. block_to_check.push_back(block);
  148. } else {
  149. return false;
  150. }
  151. } else {
  152. return false;
  153. }
  154. }
  155. // Check that the separating blocks are either empty or only contains a store
  156. // to a local variable that is never read (left behind by
  157. // '--eliminate-local-multi-store'). Also allow OpPhi, since the loop could be
  158. // in LCSSA form.
  159. for (auto block : block_to_check) {
  160. for (auto& inst : *block) {
  161. if (inst.opcode() == spv::Op::OpStore) {
  162. // Get the definition of the target to check it's function scope so
  163. // there are no observable side effects.
  164. auto variable =
  165. context_->get_def_use_mgr()->GetDef(inst.GetSingleWordInOperand(0));
  166. if (variable->opcode() != spv::Op::OpVariable ||
  167. spv::StorageClass(variable->GetSingleWordInOperand(0)) !=
  168. spv::StorageClass::Function) {
  169. return false;
  170. }
  171. // Check the target is never loaded.
  172. auto is_used = false;
  173. context_->get_def_use_mgr()->ForEachUse(
  174. inst.GetSingleWordInOperand(0),
  175. [&is_used](Instruction* use_inst, uint32_t) {
  176. if (use_inst->opcode() == spv::Op::OpLoad) {
  177. is_used = true;
  178. }
  179. });
  180. if (is_used) {
  181. return false;
  182. }
  183. } else if (inst.opcode() == spv::Op::OpPhi) {
  184. if (inst.NumInOperands() != 2) {
  185. return false;
  186. }
  187. } else if (inst.opcode() != spv::Op::OpBranch) {
  188. return false;
  189. }
  190. }
  191. }
  192. return true;
  193. } // namespace opt
  194. bool LoopFusion::ContainsBarriersOrFunctionCalls(Loop* loop) {
  195. for (const auto& block : loop->GetBlocks()) {
  196. for (const auto& inst : *containing_function_->FindBlock(block)) {
  197. auto opcode = inst.opcode();
  198. if (opcode == spv::Op::OpFunctionCall ||
  199. opcode == spv::Op::OpControlBarrier ||
  200. opcode == spv::Op::OpMemoryBarrier ||
  201. opcode == spv::Op::OpTypeNamedBarrier ||
  202. opcode == spv::Op::OpNamedBarrierInitialize ||
  203. opcode == spv::Op::OpMemoryNamedBarrier) {
  204. return true;
  205. }
  206. }
  207. }
  208. return false;
  209. }
  210. bool LoopFusion::CheckInit() {
  211. int64_t loop_0_init;
  212. if (!loop_0_->GetInductionInitValue(induction_0_, &loop_0_init)) {
  213. return false;
  214. }
  215. int64_t loop_1_init;
  216. if (!loop_1_->GetInductionInitValue(induction_1_, &loop_1_init)) {
  217. return false;
  218. }
  219. if (loop_0_init != loop_1_init) {
  220. return false;
  221. }
  222. return true;
  223. }
  224. bool LoopFusion::CheckCondition() {
  225. auto condition_0 = loop_0_->GetConditionInst();
  226. auto condition_1 = loop_1_->GetConditionInst();
  227. if (!loop_0_->IsSupportedCondition(condition_0->opcode()) ||
  228. !loop_1_->IsSupportedCondition(condition_1->opcode())) {
  229. return false;
  230. }
  231. if (condition_0->opcode() != condition_1->opcode()) {
  232. return false;
  233. }
  234. for (uint32_t i = 0; i < condition_0->NumInOperandWords(); ++i) {
  235. auto arg_0 = context_->get_def_use_mgr()->GetDef(
  236. condition_0->GetSingleWordInOperand(i));
  237. auto arg_1 = context_->get_def_use_mgr()->GetDef(
  238. condition_1->GetSingleWordInOperand(i));
  239. if (arg_0 == induction_0_ && arg_1 == induction_1_) {
  240. continue;
  241. }
  242. if (arg_0 == induction_0_ && arg_1 != induction_1_) {
  243. return false;
  244. }
  245. if (arg_1 == induction_1_ && arg_0 != induction_0_) {
  246. return false;
  247. }
  248. if (arg_0 != arg_1) {
  249. return false;
  250. }
  251. }
  252. return true;
  253. }
  254. bool LoopFusion::CheckStep() {
  255. auto scalar_analysis = context_->GetScalarEvolutionAnalysis();
  256. SENode* induction_node_0 = scalar_analysis->SimplifyExpression(
  257. scalar_analysis->AnalyzeInstruction(induction_0_));
  258. if (!induction_node_0->AsSERecurrentNode()) {
  259. return false;
  260. }
  261. SENode* induction_step_0 =
  262. induction_node_0->AsSERecurrentNode()->GetCoefficient();
  263. if (!induction_step_0->AsSEConstantNode()) {
  264. return false;
  265. }
  266. SENode* induction_node_1 = scalar_analysis->SimplifyExpression(
  267. scalar_analysis->AnalyzeInstruction(induction_1_));
  268. if (!induction_node_1->AsSERecurrentNode()) {
  269. return false;
  270. }
  271. SENode* induction_step_1 =
  272. induction_node_1->AsSERecurrentNode()->GetCoefficient();
  273. if (!induction_step_1->AsSEConstantNode()) {
  274. return false;
  275. }
  276. if (*induction_step_0 != *induction_step_1) {
  277. return false;
  278. }
  279. return true;
  280. }
  281. std::map<Instruction*, std::vector<Instruction*>> LoopFusion::LocationToMemOps(
  282. const std::vector<Instruction*>& mem_ops) {
  283. std::map<Instruction*, std::vector<Instruction*>> location_map{};
  284. for (auto instruction : mem_ops) {
  285. auto access_location = context_->get_def_use_mgr()->GetDef(
  286. instruction->GetSingleWordInOperand(0));
  287. while (access_location->opcode() == spv::Op::OpAccessChain) {
  288. access_location = context_->get_def_use_mgr()->GetDef(
  289. access_location->GetSingleWordInOperand(0));
  290. }
  291. location_map[access_location].push_back(instruction);
  292. }
  293. return location_map;
  294. }
  295. std::pair<std::vector<Instruction*>, std::vector<Instruction*>>
  296. LoopFusion::GetLoadsAndStoresInLoop(Loop* loop) {
  297. std::vector<Instruction*> loads{};
  298. std::vector<Instruction*> stores{};
  299. for (auto block_id : loop->GetBlocks()) {
  300. if (block_id == loop->GetContinueBlock()->id()) {
  301. continue;
  302. }
  303. for (auto& instruction : *containing_function_->FindBlock(block_id)) {
  304. if (instruction.opcode() == spv::Op::OpLoad) {
  305. loads.push_back(&instruction);
  306. } else if (instruction.opcode() == spv::Op::OpStore) {
  307. stores.push_back(&instruction);
  308. }
  309. }
  310. }
  311. return std::make_pair(loads, stores);
  312. }
  313. bool LoopFusion::IsUsedInLoop(Instruction* instruction, Loop* loop) {
  314. auto not_used = context_->get_def_use_mgr()->WhileEachUser(
  315. instruction, [this, loop](Instruction* user) {
  316. auto block_id = context_->get_instr_block(user)->id();
  317. return !loop->IsInsideLoop(block_id);
  318. });
  319. return !not_used;
  320. }
  321. bool LoopFusion::IsLegal() {
  322. assert(AreCompatible() && "Fusion can't be legal, loops are not compatible.");
  323. // Bail out if there are function calls as they could have side-effects that
  324. // cause dependencies or if there are any barriers.
  325. if (ContainsBarriersOrFunctionCalls(loop_0_) ||
  326. ContainsBarriersOrFunctionCalls(loop_1_)) {
  327. return false;
  328. }
  329. std::vector<Instruction*> phi_instructions{};
  330. loop_0_->GetInductionVariables(phi_instructions);
  331. // Check no OpPhi in |loop_0_| is used in |loop_1_|.
  332. for (auto phi_instruction : phi_instructions) {
  333. if (IsUsedInLoop(phi_instruction, loop_1_)) {
  334. return false;
  335. }
  336. }
  337. // Check no LCSSA OpPhi in merge block of |loop_0_| is used in |loop_1_|.
  338. auto phi_used = false;
  339. loop_0_->GetMergeBlock()->ForEachPhiInst(
  340. [this, &phi_used](Instruction* phi_instruction) {
  341. phi_used |= IsUsedInLoop(phi_instruction, loop_1_);
  342. });
  343. if (phi_used) {
  344. return false;
  345. }
  346. // Grab loads & stores from both loops.
  347. auto loads_stores_0 = GetLoadsAndStoresInLoop(loop_0_);
  348. auto loads_stores_1 = GetLoadsAndStoresInLoop(loop_1_);
  349. // Build memory location to operation maps.
  350. auto load_locs_0 = LocationToMemOps(std::get<0>(loads_stores_0));
  351. auto store_locs_0 = LocationToMemOps(std::get<1>(loads_stores_0));
  352. auto load_locs_1 = LocationToMemOps(std::get<0>(loads_stores_1));
  353. auto store_locs_1 = LocationToMemOps(std::get<1>(loads_stores_1));
  354. // Get the locations accessed in both loops.
  355. auto locations_0 = GetLocationsAccessed(store_locs_0, load_locs_0);
  356. auto locations_1 = GetLocationsAccessed(store_locs_1, load_locs_1);
  357. std::vector<Instruction*> potential_clashes{};
  358. std::set_intersection(std::begin(locations_0), std::end(locations_0),
  359. std::begin(locations_1), std::end(locations_1),
  360. std::back_inserter(potential_clashes));
  361. // If the loops don't access the same variables, the fusion is legal.
  362. if (potential_clashes.empty()) {
  363. return true;
  364. }
  365. // Find variables that have at least one store.
  366. std::vector<Instruction*> potential_clashes_with_stores{};
  367. for (auto location : potential_clashes) {
  368. if (store_locs_0.find(location) != std::end(store_locs_0) ||
  369. store_locs_1.find(location) != std::end(store_locs_1)) {
  370. potential_clashes_with_stores.push_back(location);
  371. }
  372. }
  373. // If there are only loads to the same variables, the fusion is legal.
  374. if (potential_clashes_with_stores.empty()) {
  375. return true;
  376. }
  377. // Else if loads and at least one store (across loops) to the same variable
  378. // there is a potential dependence and we need to check the dependence
  379. // distance.
  380. // Find all the loops in this loop nest for the dependency analysis.
  381. std::vector<const Loop*> loops{};
  382. // Find the parents.
  383. for (auto current_loop = loop_0_; current_loop != nullptr;
  384. current_loop = current_loop->GetParent()) {
  385. loops.push_back(current_loop);
  386. }
  387. auto this_loop_position = loops.size() - 1;
  388. std::reverse(std::begin(loops), std::end(loops));
  389. // Find the children.
  390. CollectChildren(loop_0_, &loops);
  391. CollectChildren(loop_1_, &loops);
  392. // Check that any dependes created are legal. That means the fused loops do
  393. // not have any dependencies with dependence distance greater than 0 that did
  394. // not exist in the original loops.
  395. LoopDependenceAnalysis analysis(context_, loops);
  396. analysis.GetScalarEvolution()->AddLoopsToPretendAreTheSame(
  397. {loop_0_, loop_1_});
  398. for (auto location : potential_clashes_with_stores) {
  399. // Analyse dependences from |loop_0_| to |loop_1_|.
  400. std::vector<DistanceVector> dependences;
  401. // Read-After-Write.
  402. GetDependences(&dependences, &analysis, store_locs_0[location],
  403. load_locs_1[location], loops.size());
  404. // Write-After-Read.
  405. GetDependences(&dependences, &analysis, load_locs_0[location],
  406. store_locs_1[location], loops.size());
  407. // Write-After-Write.
  408. GetDependences(&dependences, &analysis, store_locs_0[location],
  409. store_locs_1[location], loops.size());
  410. // Check that the induction variables either don't appear in the subscripts
  411. // or the dependence distance is negative.
  412. for (const auto& dependence : dependences) {
  413. const auto& entry = dependence.GetEntries()[this_loop_position];
  414. if ((entry.dependence_information ==
  415. DistanceEntry::DependenceInformation::DISTANCE &&
  416. entry.distance < 1) ||
  417. (entry.dependence_information ==
  418. DistanceEntry::DependenceInformation::IRRELEVANT)) {
  419. continue;
  420. } else {
  421. return false;
  422. }
  423. }
  424. }
  425. return true;
  426. }
  427. void ReplacePhiParentWith(Instruction* inst, uint32_t orig_block,
  428. uint32_t new_block) {
  429. if (inst->GetSingleWordInOperand(1) == orig_block) {
  430. inst->SetInOperand(1, {new_block});
  431. } else {
  432. inst->SetInOperand(3, {new_block});
  433. }
  434. }
  435. void LoopFusion::Fuse() {
  436. assert(AreCompatible() && "Can't fuse, loops aren't compatible");
  437. assert(IsLegal() && "Can't fuse, illegal");
  438. // Save the pointers/ids, won't be found in the middle of doing modifications.
  439. auto header_1 = loop_1_->GetHeaderBlock()->id();
  440. auto condition_1 = loop_1_->FindConditionBlock()->id();
  441. auto continue_1 = loop_1_->GetContinueBlock()->id();
  442. auto continue_0 = loop_0_->GetContinueBlock()->id();
  443. auto condition_block_of_0 = loop_0_->FindConditionBlock();
  444. // Find the blocks whose branches need updating.
  445. auto first_block_of_1 = &*(++containing_function_->FindBlock(condition_1));
  446. auto last_block_of_1 = &*(--containing_function_->FindBlock(continue_1));
  447. auto last_block_of_0 = &*(--containing_function_->FindBlock(continue_0));
  448. // Update the branch for |last_block_of_loop_0| to go to |first_block_of_1|.
  449. last_block_of_0->ForEachSuccessorLabel(
  450. [first_block_of_1](uint32_t* succ) { *succ = first_block_of_1->id(); });
  451. // Update the branch for the |last_block_of_loop_1| to go to the continue
  452. // block of |loop_0_|.
  453. last_block_of_1->ForEachSuccessorLabel(
  454. [this](uint32_t* succ) { *succ = loop_0_->GetContinueBlock()->id(); });
  455. // Update merge block id in the header of |loop_0_| to the merge block of
  456. // |loop_1_|.
  457. loop_0_->GetHeaderBlock()->ForEachInst([this](Instruction* inst) {
  458. if (inst->opcode() == spv::Op::OpLoopMerge) {
  459. inst->SetInOperand(0, {loop_1_->GetMergeBlock()->id()});
  460. }
  461. });
  462. // Update condition branch target in |loop_0_| to the merge block of
  463. // |loop_1_|.
  464. condition_block_of_0->ForEachInst([this](Instruction* inst) {
  465. if (inst->opcode() == spv::Op::OpBranchConditional) {
  466. auto loop_0_merge_block_id = loop_0_->GetMergeBlock()->id();
  467. if (inst->GetSingleWordInOperand(1) == loop_0_merge_block_id) {
  468. inst->SetInOperand(1, {loop_1_->GetMergeBlock()->id()});
  469. } else {
  470. inst->SetInOperand(2, {loop_1_->GetMergeBlock()->id()});
  471. }
  472. }
  473. });
  474. // Move OpPhi instructions not corresponding to the induction variable from
  475. // the header of |loop_1_| to the header of |loop_0_|.
  476. std::vector<Instruction*> instructions_to_move{};
  477. for (auto& instruction : *loop_1_->GetHeaderBlock()) {
  478. if (instruction.opcode() == spv::Op::OpPhi &&
  479. &instruction != induction_1_) {
  480. instructions_to_move.push_back(&instruction);
  481. }
  482. }
  483. for (auto& it : instructions_to_move) {
  484. it->RemoveFromList();
  485. it->InsertBefore(induction_0_);
  486. }
  487. // Update the OpPhi parents to the correct blocks in |loop_0_|.
  488. loop_0_->GetHeaderBlock()->ForEachPhiInst([this](Instruction* i) {
  489. ReplacePhiParentWith(i, loop_1_->GetPreHeaderBlock()->id(),
  490. loop_0_->GetPreHeaderBlock()->id());
  491. ReplacePhiParentWith(i, loop_1_->GetContinueBlock()->id(),
  492. loop_0_->GetContinueBlock()->id());
  493. });
  494. // Update instruction to block mapping & DefUseManager.
  495. for (auto& phi_instruction : instructions_to_move) {
  496. context_->set_instr_block(phi_instruction, loop_0_->GetHeaderBlock());
  497. context_->get_def_use_mgr()->AnalyzeInstUse(phi_instruction);
  498. }
  499. // Replace the uses of the induction variable of |loop_1_| with that the
  500. // induction variable of |loop_0_|.
  501. context_->ReplaceAllUsesWith(induction_1_->result_id(),
  502. induction_0_->result_id());
  503. // Replace LCSSA OpPhi in merge block of |loop_0_|.
  504. loop_0_->GetMergeBlock()->ForEachPhiInst([this](Instruction* instruction) {
  505. context_->ReplaceAllUsesWith(instruction->result_id(),
  506. instruction->GetSingleWordInOperand(0));
  507. });
  508. // Update LCSSA OpPhi in merge block of |loop_1_|.
  509. loop_1_->GetMergeBlock()->ForEachPhiInst(
  510. [condition_block_of_0](Instruction* instruction) {
  511. instruction->SetInOperand(1, {condition_block_of_0->id()});
  512. });
  513. // Move the continue block of |loop_0_| after the last block of |loop_1_|.
  514. containing_function_->MoveBasicBlockToAfter(continue_0, last_block_of_1);
  515. // Gather all instructions to be killed from |loop_1_| (induction variable
  516. // initialisation, header, condition and continue blocks).
  517. std::vector<Instruction*> instr_to_delete{};
  518. AddInstructionsInBlock(&instr_to_delete, loop_1_->GetPreHeaderBlock());
  519. AddInstructionsInBlock(&instr_to_delete, loop_1_->GetHeaderBlock());
  520. AddInstructionsInBlock(&instr_to_delete, loop_1_->FindConditionBlock());
  521. AddInstructionsInBlock(&instr_to_delete, loop_1_->GetContinueBlock());
  522. // There was an additional empty block between the loops, kill that too.
  523. if (loop_0_->GetMergeBlock() != loop_1_->GetPreHeaderBlock()) {
  524. AddInstructionsInBlock(&instr_to_delete, loop_0_->GetMergeBlock());
  525. }
  526. // Update the CFG, so it wouldn't need invalidating.
  527. auto cfg = context_->cfg();
  528. cfg->ForgetBlock(loop_1_->GetPreHeaderBlock());
  529. cfg->ForgetBlock(loop_1_->GetHeaderBlock());
  530. cfg->ForgetBlock(loop_1_->FindConditionBlock());
  531. cfg->ForgetBlock(loop_1_->GetContinueBlock());
  532. if (loop_0_->GetMergeBlock() != loop_1_->GetPreHeaderBlock()) {
  533. cfg->ForgetBlock(loop_0_->GetMergeBlock());
  534. }
  535. cfg->RemoveEdge(last_block_of_0->id(), loop_0_->GetContinueBlock()->id());
  536. cfg->AddEdge(last_block_of_0->id(), first_block_of_1->id());
  537. cfg->AddEdge(last_block_of_1->id(), loop_0_->GetContinueBlock()->id());
  538. cfg->AddEdge(loop_0_->GetContinueBlock()->id(),
  539. loop_1_->GetHeaderBlock()->id());
  540. cfg->AddEdge(condition_block_of_0->id(), loop_1_->GetMergeBlock()->id());
  541. // Update DefUseManager.
  542. auto def_use_mgr = context_->get_def_use_mgr();
  543. // Uses of labels that are in updated branches need analysing.
  544. def_use_mgr->AnalyzeInstUse(last_block_of_0->terminator());
  545. def_use_mgr->AnalyzeInstUse(last_block_of_1->terminator());
  546. def_use_mgr->AnalyzeInstUse(loop_0_->GetHeaderBlock()->GetLoopMergeInst());
  547. def_use_mgr->AnalyzeInstUse(condition_block_of_0->terminator());
  548. // Update the LoopDescriptor, so it wouldn't need invalidating.
  549. auto ld = context_->GetLoopDescriptor(containing_function_);
  550. // Create a copy, so the iterator wouldn't be invalidated.
  551. std::vector<Loop*> loops_to_add_remove{};
  552. for (auto child_loop : *loop_1_) {
  553. loops_to_add_remove.push_back(child_loop);
  554. }
  555. for (auto child_loop : loops_to_add_remove) {
  556. loop_1_->RemoveChildLoop(child_loop);
  557. loop_0_->AddNestedLoop(child_loop);
  558. }
  559. auto loop_1_blocks = loop_1_->GetBlocks();
  560. for (auto block : loop_1_blocks) {
  561. loop_1_->RemoveBasicBlock(block);
  562. if (block != header_1 && block != condition_1 && block != continue_1) {
  563. loop_0_->AddBasicBlock(block);
  564. if ((*ld)[block] == loop_1_) {
  565. ld->SetBasicBlockToLoop(block, loop_0_);
  566. }
  567. }
  568. if ((*ld)[block] == loop_1_) {
  569. ld->ForgetBasicBlock(block);
  570. }
  571. }
  572. loop_1_->RemoveBasicBlock(loop_1_->GetPreHeaderBlock()->id());
  573. ld->ForgetBasicBlock(loop_1_->GetPreHeaderBlock()->id());
  574. if (loop_0_->GetMergeBlock() != loop_1_->GetPreHeaderBlock()) {
  575. loop_0_->RemoveBasicBlock(loop_0_->GetMergeBlock()->id());
  576. ld->ForgetBasicBlock(loop_0_->GetMergeBlock()->id());
  577. }
  578. loop_0_->SetMergeBlock(loop_1_->GetMergeBlock());
  579. loop_1_->ClearBlocks();
  580. ld->RemoveLoop(loop_1_);
  581. // Kill unnecessary instructions and remove all empty blocks.
  582. for (auto inst : instr_to_delete) {
  583. context_->KillInst(inst);
  584. }
  585. containing_function_->RemoveEmptyBlocks();
  586. // Invalidate analyses.
  587. context_->InvalidateAnalysesExceptFor(
  588. IRContext::Analysis::kAnalysisInstrToBlockMapping |
  589. IRContext::Analysis::kAnalysisLoopAnalysis |
  590. IRContext::Analysis::kAnalysisDefUse | IRContext::Analysis::kAnalysisCFG);
  591. }
  592. } // namespace opt
  593. } // namespace spvtools