loop_fusion.cpp 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730
  1. // Copyright (c) 2018 Google LLC.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. #include "source/opt/loop_fusion.h"
  15. #include <algorithm>
  16. #include <vector>
  17. #include "source/opt/ir_context.h"
  18. #include "source/opt/loop_dependence.h"
  19. #include "source/opt/loop_descriptor.h"
  20. namespace spvtools {
  21. namespace opt {
  22. namespace {
  23. // Append all the loops nested in |loop| to |loops|.
  24. void CollectChildren(Loop* loop, std::vector<const Loop*>* loops) {
  25. for (auto child : *loop) {
  26. loops->push_back(child);
  27. if (child->NumImmediateChildren() != 0) {
  28. CollectChildren(child, loops);
  29. }
  30. }
  31. }
  32. // Return the set of locations accessed by |stores| and |loads|.
  33. std::set<Instruction*> GetLocationsAccessed(
  34. const std::map<Instruction*, std::vector<Instruction*>>& stores,
  35. const std::map<Instruction*, std::vector<Instruction*>>& loads) {
  36. std::set<Instruction*> locations{};
  37. for (const auto& kv : stores) {
  38. locations.insert(std::get<0>(kv));
  39. }
  40. for (const auto& kv : loads) {
  41. locations.insert(std::get<0>(kv));
  42. }
  43. return locations;
  44. }
  45. // Append all dependences from |sources| to |destinations| to |dependences|.
  46. void GetDependences(std::vector<DistanceVector>* dependences,
  47. LoopDependenceAnalysis* analysis,
  48. const std::vector<Instruction*>& sources,
  49. const std::vector<Instruction*>& destinations,
  50. size_t num_entries) {
  51. for (auto source : sources) {
  52. for (auto destination : destinations) {
  53. DistanceVector dist(num_entries);
  54. if (!analysis->GetDependence(source, destination, &dist)) {
  55. dependences->push_back(dist);
  56. }
  57. }
  58. }
  59. }
  60. // Apped all instructions in |block| to |instructions|.
  61. void AddInstructionsInBlock(std::vector<Instruction*>* instructions,
  62. BasicBlock* block) {
  63. for (auto& inst : *block) {
  64. instructions->push_back(&inst);
  65. }
  66. instructions->push_back(block->GetLabelInst());
  67. }
  68. } // namespace
  69. bool LoopFusion::UsedInContinueOrConditionBlock(Instruction* phi_instruction,
  70. Loop* loop) {
  71. auto condition_block = loop->FindConditionBlock()->id();
  72. auto continue_block = loop->GetContinueBlock()->id();
  73. auto not_used = context_->get_def_use_mgr()->WhileEachUser(
  74. phi_instruction,
  75. [this, condition_block, continue_block](Instruction* instruction) {
  76. auto block_id = context_->get_instr_block(instruction)->id();
  77. return block_id != condition_block && block_id != continue_block;
  78. });
  79. return !not_used;
  80. }
  81. void LoopFusion::RemoveIfNotUsedContinueOrConditionBlock(
  82. std::vector<Instruction*>* instructions, Loop* loop) {
  83. instructions->erase(
  84. std::remove_if(std::begin(*instructions), std::end(*instructions),
  85. [this, loop](Instruction* instruction) {
  86. return !UsedInContinueOrConditionBlock(instruction,
  87. loop);
  88. }),
  89. std::end(*instructions));
  90. }
  91. bool LoopFusion::AreCompatible() {
  92. // Check that the loops are in the same function.
  93. if (loop_0_->GetHeaderBlock()->GetParent() !=
  94. loop_1_->GetHeaderBlock()->GetParent()) {
  95. return false;
  96. }
  97. // Check that both loops have pre-header blocks.
  98. if (!loop_0_->GetPreHeaderBlock() || !loop_1_->GetPreHeaderBlock()) {
  99. return false;
  100. }
  101. // Check there are no breaks.
  102. if (context_->cfg()->preds(loop_0_->GetMergeBlock()->id()).size() != 1 ||
  103. context_->cfg()->preds(loop_1_->GetMergeBlock()->id()).size() != 1) {
  104. return false;
  105. }
  106. // Check there are no continues.
  107. if (context_->cfg()->preds(loop_0_->GetContinueBlock()->id()).size() != 1 ||
  108. context_->cfg()->preds(loop_1_->GetContinueBlock()->id()).size() != 1) {
  109. return false;
  110. }
  111. // |GetInductionVariables| returns all OpPhi in the header. Check that both
  112. // loops have exactly one that is used in the continue and condition blocks.
  113. std::vector<Instruction*> inductions_0{}, inductions_1{};
  114. loop_0_->GetInductionVariables(inductions_0);
  115. RemoveIfNotUsedContinueOrConditionBlock(&inductions_0, loop_0_);
  116. if (inductions_0.size() != 1) {
  117. return false;
  118. }
  119. induction_0_ = inductions_0.front();
  120. loop_1_->GetInductionVariables(inductions_1);
  121. RemoveIfNotUsedContinueOrConditionBlock(&inductions_1, loop_1_);
  122. if (inductions_1.size() != 1) {
  123. return false;
  124. }
  125. induction_1_ = inductions_1.front();
  126. if (!CheckInit()) {
  127. return false;
  128. }
  129. if (!CheckCondition()) {
  130. return false;
  131. }
  132. if (!CheckStep()) {
  133. return false;
  134. }
  135. // Check adjacency, |loop_0_| should come just before |loop_1_|.
  136. // There is always at least one block between loops, even if it's empty.
  137. // We'll check at most 2 preceeding blocks.
  138. auto pre_header_1 = loop_1_->GetPreHeaderBlock();
  139. std::vector<BasicBlock*> block_to_check{};
  140. block_to_check.push_back(pre_header_1);
  141. if (loop_0_->GetMergeBlock() != loop_1_->GetPreHeaderBlock()) {
  142. // Follow CFG for one more block.
  143. auto preds = context_->cfg()->preds(pre_header_1->id());
  144. if (preds.size() == 1) {
  145. auto block = &*containing_function_->FindBlock(preds.front());
  146. if (block == loop_0_->GetMergeBlock()) {
  147. block_to_check.push_back(block);
  148. } else {
  149. return false;
  150. }
  151. } else {
  152. return false;
  153. }
  154. }
  155. // Check that the separating blocks are either empty or only contains a store
  156. // to a local variable that is never read (left behind by
  157. // '--eliminate-local-multi-store'). Also allow OpPhi, since the loop could be
  158. // in LCSSA form.
  159. for (auto block : block_to_check) {
  160. for (auto& inst : *block) {
  161. if (inst.opcode() == SpvOpStore) {
  162. // Get the definition of the target to check it's function scope so
  163. // there are no observable side effects.
  164. auto variable =
  165. context_->get_def_use_mgr()->GetDef(inst.GetSingleWordInOperand(0));
  166. if (variable->opcode() != SpvOpVariable ||
  167. variable->GetSingleWordInOperand(0) != SpvStorageClassFunction) {
  168. return false;
  169. }
  170. // Check the target is never loaded.
  171. auto is_used = false;
  172. context_->get_def_use_mgr()->ForEachUse(
  173. inst.GetSingleWordInOperand(0),
  174. [&is_used](Instruction* use_inst, uint32_t) {
  175. if (use_inst->opcode() == SpvOpLoad) {
  176. is_used = true;
  177. }
  178. });
  179. if (is_used) {
  180. return false;
  181. }
  182. } else if (inst.opcode() == SpvOpPhi) {
  183. if (inst.NumInOperands() != 2) {
  184. return false;
  185. }
  186. } else if (inst.opcode() != SpvOpBranch) {
  187. return false;
  188. }
  189. }
  190. }
  191. return true;
  192. } // namespace opt
  193. bool LoopFusion::ContainsBarriersOrFunctionCalls(Loop* loop) {
  194. for (const auto& block : loop->GetBlocks()) {
  195. for (const auto& inst : *containing_function_->FindBlock(block)) {
  196. auto opcode = inst.opcode();
  197. if (opcode == SpvOpFunctionCall || opcode == SpvOpControlBarrier ||
  198. opcode == SpvOpMemoryBarrier || opcode == SpvOpTypeNamedBarrier ||
  199. opcode == SpvOpNamedBarrierInitialize ||
  200. opcode == SpvOpMemoryNamedBarrier) {
  201. return true;
  202. }
  203. }
  204. }
  205. return false;
  206. }
  207. bool LoopFusion::CheckInit() {
  208. int64_t loop_0_init;
  209. if (!loop_0_->GetInductionInitValue(induction_0_, &loop_0_init)) {
  210. return false;
  211. }
  212. int64_t loop_1_init;
  213. if (!loop_1_->GetInductionInitValue(induction_1_, &loop_1_init)) {
  214. return false;
  215. }
  216. if (loop_0_init != loop_1_init) {
  217. return false;
  218. }
  219. return true;
  220. }
  221. bool LoopFusion::CheckCondition() {
  222. auto condition_0 = loop_0_->GetConditionInst();
  223. auto condition_1 = loop_1_->GetConditionInst();
  224. if (!loop_0_->IsSupportedCondition(condition_0->opcode()) ||
  225. !loop_1_->IsSupportedCondition(condition_1->opcode())) {
  226. return false;
  227. }
  228. if (condition_0->opcode() != condition_1->opcode()) {
  229. return false;
  230. }
  231. for (uint32_t i = 0; i < condition_0->NumInOperandWords(); ++i) {
  232. auto arg_0 = context_->get_def_use_mgr()->GetDef(
  233. condition_0->GetSingleWordInOperand(i));
  234. auto arg_1 = context_->get_def_use_mgr()->GetDef(
  235. condition_1->GetSingleWordInOperand(i));
  236. if (arg_0 == induction_0_ && arg_1 == induction_1_) {
  237. continue;
  238. }
  239. if (arg_0 == induction_0_ && arg_1 != induction_1_) {
  240. return false;
  241. }
  242. if (arg_1 == induction_1_ && arg_0 != induction_0_) {
  243. return false;
  244. }
  245. if (arg_0 != arg_1) {
  246. return false;
  247. }
  248. }
  249. return true;
  250. }
  251. bool LoopFusion::CheckStep() {
  252. auto scalar_analysis = context_->GetScalarEvolutionAnalysis();
  253. SENode* induction_node_0 = scalar_analysis->SimplifyExpression(
  254. scalar_analysis->AnalyzeInstruction(induction_0_));
  255. if (!induction_node_0->AsSERecurrentNode()) {
  256. return false;
  257. }
  258. SENode* induction_step_0 =
  259. induction_node_0->AsSERecurrentNode()->GetCoefficient();
  260. if (!induction_step_0->AsSEConstantNode()) {
  261. return false;
  262. }
  263. SENode* induction_node_1 = scalar_analysis->SimplifyExpression(
  264. scalar_analysis->AnalyzeInstruction(induction_1_));
  265. if (!induction_node_1->AsSERecurrentNode()) {
  266. return false;
  267. }
  268. SENode* induction_step_1 =
  269. induction_node_1->AsSERecurrentNode()->GetCoefficient();
  270. if (!induction_step_1->AsSEConstantNode()) {
  271. return false;
  272. }
  273. if (*induction_step_0 != *induction_step_1) {
  274. return false;
  275. }
  276. return true;
  277. }
  278. std::map<Instruction*, std::vector<Instruction*>> LoopFusion::LocationToMemOps(
  279. const std::vector<Instruction*>& mem_ops) {
  280. std::map<Instruction*, std::vector<Instruction*>> location_map{};
  281. for (auto instruction : mem_ops) {
  282. auto access_location = context_->get_def_use_mgr()->GetDef(
  283. instruction->GetSingleWordInOperand(0));
  284. while (access_location->opcode() == SpvOpAccessChain) {
  285. access_location = context_->get_def_use_mgr()->GetDef(
  286. access_location->GetSingleWordInOperand(0));
  287. }
  288. location_map[access_location].push_back(instruction);
  289. }
  290. return location_map;
  291. }
  292. std::pair<std::vector<Instruction*>, std::vector<Instruction*>>
  293. LoopFusion::GetLoadsAndStoresInLoop(Loop* loop) {
  294. std::vector<Instruction*> loads{};
  295. std::vector<Instruction*> stores{};
  296. for (auto block_id : loop->GetBlocks()) {
  297. if (block_id == loop->GetContinueBlock()->id()) {
  298. continue;
  299. }
  300. for (auto& instruction : *containing_function_->FindBlock(block_id)) {
  301. if (instruction.opcode() == SpvOpLoad) {
  302. loads.push_back(&instruction);
  303. } else if (instruction.opcode() == SpvOpStore) {
  304. stores.push_back(&instruction);
  305. }
  306. }
  307. }
  308. return std::make_pair(loads, stores);
  309. }
  310. bool LoopFusion::IsUsedInLoop(Instruction* instruction, Loop* loop) {
  311. auto not_used = context_->get_def_use_mgr()->WhileEachUser(
  312. instruction, [this, loop](Instruction* user) {
  313. auto block_id = context_->get_instr_block(user)->id();
  314. return !loop->IsInsideLoop(block_id);
  315. });
  316. return !not_used;
  317. }
  318. bool LoopFusion::IsLegal() {
  319. assert(AreCompatible() && "Fusion can't be legal, loops are not compatible.");
  320. // Bail out if there are function calls as they could have side-effects that
  321. // cause dependencies or if there are any barriers.
  322. if (ContainsBarriersOrFunctionCalls(loop_0_) ||
  323. ContainsBarriersOrFunctionCalls(loop_1_)) {
  324. return false;
  325. }
  326. std::vector<Instruction*> phi_instructions{};
  327. loop_0_->GetInductionVariables(phi_instructions);
  328. // Check no OpPhi in |loop_0_| is used in |loop_1_|.
  329. for (auto phi_instruction : phi_instructions) {
  330. if (IsUsedInLoop(phi_instruction, loop_1_)) {
  331. return false;
  332. }
  333. }
  334. // Check no LCSSA OpPhi in merge block of |loop_0_| is used in |loop_1_|.
  335. auto phi_used = false;
  336. loop_0_->GetMergeBlock()->ForEachPhiInst(
  337. [this, &phi_used](Instruction* phi_instruction) {
  338. phi_used |= IsUsedInLoop(phi_instruction, loop_1_);
  339. });
  340. if (phi_used) {
  341. return false;
  342. }
  343. // Grab loads & stores from both loops.
  344. auto loads_stores_0 = GetLoadsAndStoresInLoop(loop_0_);
  345. auto loads_stores_1 = GetLoadsAndStoresInLoop(loop_1_);
  346. // Build memory location to operation maps.
  347. auto load_locs_0 = LocationToMemOps(std::get<0>(loads_stores_0));
  348. auto store_locs_0 = LocationToMemOps(std::get<1>(loads_stores_0));
  349. auto load_locs_1 = LocationToMemOps(std::get<0>(loads_stores_1));
  350. auto store_locs_1 = LocationToMemOps(std::get<1>(loads_stores_1));
  351. // Get the locations accessed in both loops.
  352. auto locations_0 = GetLocationsAccessed(store_locs_0, load_locs_0);
  353. auto locations_1 = GetLocationsAccessed(store_locs_1, load_locs_1);
  354. std::vector<Instruction*> potential_clashes{};
  355. std::set_intersection(std::begin(locations_0), std::end(locations_0),
  356. std::begin(locations_1), std::end(locations_1),
  357. std::back_inserter(potential_clashes));
  358. // If the loops don't access the same variables, the fusion is legal.
  359. if (potential_clashes.empty()) {
  360. return true;
  361. }
  362. // Find variables that have at least one store.
  363. std::vector<Instruction*> potential_clashes_with_stores{};
  364. for (auto location : potential_clashes) {
  365. if (store_locs_0.find(location) != std::end(store_locs_0) ||
  366. store_locs_1.find(location) != std::end(store_locs_1)) {
  367. potential_clashes_with_stores.push_back(location);
  368. }
  369. }
  370. // If there are only loads to the same variables, the fusion is legal.
  371. if (potential_clashes_with_stores.empty()) {
  372. return true;
  373. }
  374. // Else if loads and at least one store (across loops) to the same variable
  375. // there is a potential dependence and we need to check the dependence
  376. // distance.
  377. // Find all the loops in this loop nest for the dependency analysis.
  378. std::vector<const Loop*> loops{};
  379. // Find the parents.
  380. for (auto current_loop = loop_0_; current_loop != nullptr;
  381. current_loop = current_loop->GetParent()) {
  382. loops.push_back(current_loop);
  383. }
  384. auto this_loop_position = loops.size() - 1;
  385. std::reverse(std::begin(loops), std::end(loops));
  386. // Find the children.
  387. CollectChildren(loop_0_, &loops);
  388. CollectChildren(loop_1_, &loops);
  389. // Check that any dependes created are legal. That means the fused loops do
  390. // not have any dependencies with dependence distance greater than 0 that did
  391. // not exist in the original loops.
  392. LoopDependenceAnalysis analysis(context_, loops);
  393. analysis.GetScalarEvolution()->AddLoopsToPretendAreTheSame(
  394. {loop_0_, loop_1_});
  395. for (auto location : potential_clashes_with_stores) {
  396. // Analyse dependences from |loop_0_| to |loop_1_|.
  397. std::vector<DistanceVector> dependences;
  398. // Read-After-Write.
  399. GetDependences(&dependences, &analysis, store_locs_0[location],
  400. load_locs_1[location], loops.size());
  401. // Write-After-Read.
  402. GetDependences(&dependences, &analysis, load_locs_0[location],
  403. store_locs_1[location], loops.size());
  404. // Write-After-Write.
  405. GetDependences(&dependences, &analysis, store_locs_0[location],
  406. store_locs_1[location], loops.size());
  407. // Check that the induction variables either don't appear in the subscripts
  408. // or the dependence distance is negative.
  409. for (const auto& dependence : dependences) {
  410. const auto& entry = dependence.GetEntries()[this_loop_position];
  411. if ((entry.dependence_information ==
  412. DistanceEntry::DependenceInformation::DISTANCE &&
  413. entry.distance < 1) ||
  414. (entry.dependence_information ==
  415. DistanceEntry::DependenceInformation::IRRELEVANT)) {
  416. continue;
  417. } else {
  418. return false;
  419. }
  420. }
  421. }
  422. return true;
  423. }
  424. void ReplacePhiParentWith(Instruction* inst, uint32_t orig_block,
  425. uint32_t new_block) {
  426. if (inst->GetSingleWordInOperand(1) == orig_block) {
  427. inst->SetInOperand(1, {new_block});
  428. } else {
  429. inst->SetInOperand(3, {new_block});
  430. }
  431. }
  432. void LoopFusion::Fuse() {
  433. assert(AreCompatible() && "Can't fuse, loops aren't compatible");
  434. assert(IsLegal() && "Can't fuse, illegal");
  435. // Save the pointers/ids, won't be found in the middle of doing modifications.
  436. auto header_1 = loop_1_->GetHeaderBlock()->id();
  437. auto condition_1 = loop_1_->FindConditionBlock()->id();
  438. auto continue_1 = loop_1_->GetContinueBlock()->id();
  439. auto continue_0 = loop_0_->GetContinueBlock()->id();
  440. auto condition_block_of_0 = loop_0_->FindConditionBlock();
  441. // Find the blocks whose branches need updating.
  442. auto first_block_of_1 = &*(++containing_function_->FindBlock(condition_1));
  443. auto last_block_of_1 = &*(--containing_function_->FindBlock(continue_1));
  444. auto last_block_of_0 = &*(--containing_function_->FindBlock(continue_0));
  445. // Update the branch for |last_block_of_loop_0| to go to |first_block_of_1|.
  446. last_block_of_0->ForEachSuccessorLabel(
  447. [first_block_of_1](uint32_t* succ) { *succ = first_block_of_1->id(); });
  448. // Update the branch for the |last_block_of_loop_1| to go to the continue
  449. // block of |loop_0_|.
  450. last_block_of_1->ForEachSuccessorLabel(
  451. [this](uint32_t* succ) { *succ = loop_0_->GetContinueBlock()->id(); });
  452. // Update merge block id in the header of |loop_0_| to the merge block of
  453. // |loop_1_|.
  454. loop_0_->GetHeaderBlock()->ForEachInst([this](Instruction* inst) {
  455. if (inst->opcode() == SpvOpLoopMerge) {
  456. inst->SetInOperand(0, {loop_1_->GetMergeBlock()->id()});
  457. }
  458. });
  459. // Update condition branch target in |loop_0_| to the merge block of
  460. // |loop_1_|.
  461. condition_block_of_0->ForEachInst([this](Instruction* inst) {
  462. if (inst->opcode() == SpvOpBranchConditional) {
  463. auto loop_0_merge_block_id = loop_0_->GetMergeBlock()->id();
  464. if (inst->GetSingleWordInOperand(1) == loop_0_merge_block_id) {
  465. inst->SetInOperand(1, {loop_1_->GetMergeBlock()->id()});
  466. } else {
  467. inst->SetInOperand(2, {loop_1_->GetMergeBlock()->id()});
  468. }
  469. }
  470. });
  471. // Move OpPhi instructions not corresponding to the induction variable from
  472. // the header of |loop_1_| to the header of |loop_0_|.
  473. std::vector<Instruction*> instructions_to_move{};
  474. for (auto& instruction : *loop_1_->GetHeaderBlock()) {
  475. if (instruction.opcode() == SpvOpPhi && &instruction != induction_1_) {
  476. instructions_to_move.push_back(&instruction);
  477. }
  478. }
  479. for (auto& it : instructions_to_move) {
  480. it->RemoveFromList();
  481. it->InsertBefore(induction_0_);
  482. }
  483. // Update the OpPhi parents to the correct blocks in |loop_0_|.
  484. loop_0_->GetHeaderBlock()->ForEachPhiInst([this](Instruction* i) {
  485. ReplacePhiParentWith(i, loop_1_->GetPreHeaderBlock()->id(),
  486. loop_0_->GetPreHeaderBlock()->id());
  487. ReplacePhiParentWith(i, loop_1_->GetContinueBlock()->id(),
  488. loop_0_->GetContinueBlock()->id());
  489. });
  490. // Update instruction to block mapping & DefUseManager.
  491. for (auto& phi_instruction : instructions_to_move) {
  492. context_->set_instr_block(phi_instruction, loop_0_->GetHeaderBlock());
  493. context_->get_def_use_mgr()->AnalyzeInstUse(phi_instruction);
  494. }
  495. // Replace the uses of the induction variable of |loop_1_| with that the
  496. // induction variable of |loop_0_|.
  497. context_->ReplaceAllUsesWith(induction_1_->result_id(),
  498. induction_0_->result_id());
  499. // Replace LCSSA OpPhi in merge block of |loop_0_|.
  500. loop_0_->GetMergeBlock()->ForEachPhiInst([this](Instruction* instruction) {
  501. context_->ReplaceAllUsesWith(instruction->result_id(),
  502. instruction->GetSingleWordInOperand(0));
  503. });
  504. // Update LCSSA OpPhi in merge block of |loop_1_|.
  505. loop_1_->GetMergeBlock()->ForEachPhiInst(
  506. [condition_block_of_0](Instruction* instruction) {
  507. instruction->SetInOperand(1, {condition_block_of_0->id()});
  508. });
  509. // Move the continue block of |loop_0_| after the last block of |loop_1_|.
  510. containing_function_->MoveBasicBlockToAfter(continue_0, last_block_of_1);
  511. // Gather all instructions to be killed from |loop_1_| (induction variable
  512. // initialisation, header, condition and continue blocks).
  513. std::vector<Instruction*> instr_to_delete{};
  514. AddInstructionsInBlock(&instr_to_delete, loop_1_->GetPreHeaderBlock());
  515. AddInstructionsInBlock(&instr_to_delete, loop_1_->GetHeaderBlock());
  516. AddInstructionsInBlock(&instr_to_delete, loop_1_->FindConditionBlock());
  517. AddInstructionsInBlock(&instr_to_delete, loop_1_->GetContinueBlock());
  518. // There was an additional empty block between the loops, kill that too.
  519. if (loop_0_->GetMergeBlock() != loop_1_->GetPreHeaderBlock()) {
  520. AddInstructionsInBlock(&instr_to_delete, loop_0_->GetMergeBlock());
  521. }
  522. // Update the CFG, so it wouldn't need invalidating.
  523. auto cfg = context_->cfg();
  524. cfg->ForgetBlock(loop_1_->GetPreHeaderBlock());
  525. cfg->ForgetBlock(loop_1_->GetHeaderBlock());
  526. cfg->ForgetBlock(loop_1_->FindConditionBlock());
  527. cfg->ForgetBlock(loop_1_->GetContinueBlock());
  528. if (loop_0_->GetMergeBlock() != loop_1_->GetPreHeaderBlock()) {
  529. cfg->ForgetBlock(loop_0_->GetMergeBlock());
  530. }
  531. cfg->RemoveEdge(last_block_of_0->id(), loop_0_->GetContinueBlock()->id());
  532. cfg->AddEdge(last_block_of_0->id(), first_block_of_1->id());
  533. cfg->AddEdge(last_block_of_1->id(), loop_0_->GetContinueBlock()->id());
  534. cfg->AddEdge(loop_0_->GetContinueBlock()->id(),
  535. loop_1_->GetHeaderBlock()->id());
  536. cfg->AddEdge(condition_block_of_0->id(), loop_1_->GetMergeBlock()->id());
  537. // Update DefUseManager.
  538. auto def_use_mgr = context_->get_def_use_mgr();
  539. // Uses of labels that are in updated branches need analysing.
  540. def_use_mgr->AnalyzeInstUse(last_block_of_0->terminator());
  541. def_use_mgr->AnalyzeInstUse(last_block_of_1->terminator());
  542. def_use_mgr->AnalyzeInstUse(loop_0_->GetHeaderBlock()->GetLoopMergeInst());
  543. def_use_mgr->AnalyzeInstUse(condition_block_of_0->terminator());
  544. // Update the LoopDescriptor, so it wouldn't need invalidating.
  545. auto ld = context_->GetLoopDescriptor(containing_function_);
  546. // Create a copy, so the iterator wouldn't be invalidated.
  547. std::vector<Loop*> loops_to_add_remove{};
  548. for (auto child_loop : *loop_1_) {
  549. loops_to_add_remove.push_back(child_loop);
  550. }
  551. for (auto child_loop : loops_to_add_remove) {
  552. loop_1_->RemoveChildLoop(child_loop);
  553. loop_0_->AddNestedLoop(child_loop);
  554. }
  555. auto loop_1_blocks = loop_1_->GetBlocks();
  556. for (auto block : loop_1_blocks) {
  557. loop_1_->RemoveBasicBlock(block);
  558. if (block != header_1 && block != condition_1 && block != continue_1) {
  559. loop_0_->AddBasicBlock(block);
  560. if ((*ld)[block] == loop_1_) {
  561. ld->SetBasicBlockToLoop(block, loop_0_);
  562. }
  563. }
  564. if ((*ld)[block] == loop_1_) {
  565. ld->ForgetBasicBlock(block);
  566. }
  567. }
  568. loop_1_->RemoveBasicBlock(loop_1_->GetPreHeaderBlock()->id());
  569. ld->ForgetBasicBlock(loop_1_->GetPreHeaderBlock()->id());
  570. if (loop_0_->GetMergeBlock() != loop_1_->GetPreHeaderBlock()) {
  571. loop_0_->RemoveBasicBlock(loop_0_->GetMergeBlock()->id());
  572. ld->ForgetBasicBlock(loop_0_->GetMergeBlock()->id());
  573. }
  574. loop_0_->SetMergeBlock(loop_1_->GetMergeBlock());
  575. loop_1_->ClearBlocks();
  576. ld->RemoveLoop(loop_1_);
  577. // Kill unnessecary instructions and remove all empty blocks.
  578. for (auto inst : instr_to_delete) {
  579. context_->KillInst(inst);
  580. }
  581. containing_function_->RemoveEmptyBlocks();
  582. // Invalidate analyses.
  583. context_->InvalidateAnalysesExceptFor(
  584. IRContext::Analysis::kAnalysisInstrToBlockMapping |
  585. IRContext::Analysis::kAnalysisLoopAnalysis |
  586. IRContext::Analysis::kAnalysisDefUse | IRContext::Analysis::kAnalysisCFG);
  587. }
  588. } // namespace opt
  589. } // namespace spvtools