inline_pass.cpp 28 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695
  1. // Copyright (c) 2017 The Khronos Group Inc.
  2. // Copyright (c) 2017 Valve Corporation
  3. // Copyright (c) 2017 LunarG Inc.
  4. //
  5. // Licensed under the Apache License, Version 2.0 (the "License");
  6. // you may not use this file except in compliance with the License.
  7. // You may obtain a copy of the License at
  8. //
  9. // http://www.apache.org/licenses/LICENSE-2.0
  10. //
  11. // Unless required by applicable law or agreed to in writing, software
  12. // distributed under the License is distributed on an "AS IS" BASIS,
  13. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. // See the License for the specific language governing permissions and
  15. // limitations under the License.
  16. #include "source/opt/inline_pass.h"
  17. #include <unordered_set>
  18. #include <utility>
  19. #include "source/cfa.h"
  20. #include "source/util/make_unique.h"
  21. // Indices of operands in SPIR-V instructions
  22. static const int kSpvFunctionCallFunctionId = 2;
  23. static const int kSpvFunctionCallArgumentId = 3;
  24. static const int kSpvReturnValueId = 0;
  25. static const int kSpvLoopMergeMergeBlockId = 0;
  26. static const int kSpvLoopMergeContinueTargetIdInIdx = 1;
  27. namespace spvtools {
  28. namespace opt {
  29. uint32_t InlinePass::AddPointerToType(uint32_t type_id,
  30. SpvStorageClass storage_class) {
  31. uint32_t resultId = TakeNextId();
  32. std::unique_ptr<Instruction> type_inst(
  33. new Instruction(context(), SpvOpTypePointer, 0, resultId,
  34. {{spv_operand_type_t::SPV_OPERAND_TYPE_STORAGE_CLASS,
  35. {uint32_t(storage_class)}},
  36. {spv_operand_type_t::SPV_OPERAND_TYPE_ID, {type_id}}}));
  37. context()->AddType(std::move(type_inst));
  38. analysis::Type* pointeeTy;
  39. std::unique_ptr<analysis::Pointer> pointerTy;
  40. std::tie(pointeeTy, pointerTy) =
  41. context()->get_type_mgr()->GetTypeAndPointerType(type_id,
  42. SpvStorageClassFunction);
  43. context()->get_type_mgr()->RegisterType(resultId, *pointerTy);
  44. return resultId;
  45. }
  46. void InlinePass::AddBranch(uint32_t label_id,
  47. std::unique_ptr<BasicBlock>* block_ptr) {
  48. std::unique_ptr<Instruction> newBranch(
  49. new Instruction(context(), SpvOpBranch, 0, 0,
  50. {{spv_operand_type_t::SPV_OPERAND_TYPE_ID, {label_id}}}));
  51. (*block_ptr)->AddInstruction(std::move(newBranch));
  52. }
  53. void InlinePass::AddBranchCond(uint32_t cond_id, uint32_t true_id,
  54. uint32_t false_id,
  55. std::unique_ptr<BasicBlock>* block_ptr) {
  56. std::unique_ptr<Instruction> newBranch(
  57. new Instruction(context(), SpvOpBranchConditional, 0, 0,
  58. {{spv_operand_type_t::SPV_OPERAND_TYPE_ID, {cond_id}},
  59. {spv_operand_type_t::SPV_OPERAND_TYPE_ID, {true_id}},
  60. {spv_operand_type_t::SPV_OPERAND_TYPE_ID, {false_id}}}));
  61. (*block_ptr)->AddInstruction(std::move(newBranch));
  62. }
  63. void InlinePass::AddLoopMerge(uint32_t merge_id, uint32_t continue_id,
  64. std::unique_ptr<BasicBlock>* block_ptr) {
  65. std::unique_ptr<Instruction> newLoopMerge(new Instruction(
  66. context(), SpvOpLoopMerge, 0, 0,
  67. {{spv_operand_type_t::SPV_OPERAND_TYPE_ID, {merge_id}},
  68. {spv_operand_type_t::SPV_OPERAND_TYPE_ID, {continue_id}},
  69. {spv_operand_type_t::SPV_OPERAND_TYPE_LOOP_CONTROL, {0}}}));
  70. (*block_ptr)->AddInstruction(std::move(newLoopMerge));
  71. }
  72. void InlinePass::AddStore(uint32_t ptr_id, uint32_t val_id,
  73. std::unique_ptr<BasicBlock>* block_ptr) {
  74. std::unique_ptr<Instruction> newStore(
  75. new Instruction(context(), SpvOpStore, 0, 0,
  76. {{spv_operand_type_t::SPV_OPERAND_TYPE_ID, {ptr_id}},
  77. {spv_operand_type_t::SPV_OPERAND_TYPE_ID, {val_id}}}));
  78. (*block_ptr)->AddInstruction(std::move(newStore));
  79. }
  80. void InlinePass::AddLoad(uint32_t type_id, uint32_t resultId, uint32_t ptr_id,
  81. std::unique_ptr<BasicBlock>* block_ptr) {
  82. std::unique_ptr<Instruction> newLoad(
  83. new Instruction(context(), SpvOpLoad, type_id, resultId,
  84. {{spv_operand_type_t::SPV_OPERAND_TYPE_ID, {ptr_id}}}));
  85. (*block_ptr)->AddInstruction(std::move(newLoad));
  86. }
  87. std::unique_ptr<Instruction> InlinePass::NewLabel(uint32_t label_id) {
  88. std::unique_ptr<Instruction> newLabel(
  89. new Instruction(context(), SpvOpLabel, 0, label_id, {}));
  90. return newLabel;
  91. }
  92. uint32_t InlinePass::GetFalseId() {
  93. if (false_id_ != 0) return false_id_;
  94. false_id_ = get_module()->GetGlobalValue(SpvOpConstantFalse);
  95. if (false_id_ != 0) return false_id_;
  96. uint32_t boolId = get_module()->GetGlobalValue(SpvOpTypeBool);
  97. if (boolId == 0) {
  98. boolId = TakeNextId();
  99. get_module()->AddGlobalValue(SpvOpTypeBool, boolId, 0);
  100. }
  101. false_id_ = TakeNextId();
  102. get_module()->AddGlobalValue(SpvOpConstantFalse, false_id_, boolId);
  103. return false_id_;
  104. }
  105. void InlinePass::MapParams(
  106. Function* calleeFn, BasicBlock::iterator call_inst_itr,
  107. std::unordered_map<uint32_t, uint32_t>* callee2caller) {
  108. int param_idx = 0;
  109. calleeFn->ForEachParam([&call_inst_itr, &param_idx,
  110. &callee2caller](const Instruction* cpi) {
  111. const uint32_t pid = cpi->result_id();
  112. (*callee2caller)[pid] = call_inst_itr->GetSingleWordOperand(
  113. kSpvFunctionCallArgumentId + param_idx);
  114. ++param_idx;
  115. });
  116. }
  117. void InlinePass::CloneAndMapLocals(
  118. Function* calleeFn, std::vector<std::unique_ptr<Instruction>>* new_vars,
  119. std::unordered_map<uint32_t, uint32_t>* callee2caller) {
  120. auto callee_block_itr = calleeFn->begin();
  121. auto callee_var_itr = callee_block_itr->begin();
  122. while (callee_var_itr->opcode() == SpvOp::SpvOpVariable) {
  123. std::unique_ptr<Instruction> var_inst(callee_var_itr->Clone(context()));
  124. uint32_t newId = TakeNextId();
  125. get_decoration_mgr()->CloneDecorations(callee_var_itr->result_id(), newId);
  126. var_inst->SetResultId(newId);
  127. (*callee2caller)[callee_var_itr->result_id()] = newId;
  128. new_vars->push_back(std::move(var_inst));
  129. ++callee_var_itr;
  130. }
  131. }
  132. uint32_t InlinePass::CreateReturnVar(
  133. Function* calleeFn, std::vector<std::unique_ptr<Instruction>>* new_vars) {
  134. uint32_t returnVarId = 0;
  135. const uint32_t calleeTypeId = calleeFn->type_id();
  136. analysis::Type* calleeType = context()->get_type_mgr()->GetType(calleeTypeId);
  137. if (calleeType->AsVoid() == nullptr) {
  138. // Find or create ptr to callee return type.
  139. uint32_t returnVarTypeId = context()->get_type_mgr()->FindPointerToType(
  140. calleeTypeId, SpvStorageClassFunction);
  141. if (returnVarTypeId == 0)
  142. returnVarTypeId = AddPointerToType(calleeTypeId, SpvStorageClassFunction);
  143. // Add return var to new function scope variables.
  144. returnVarId = TakeNextId();
  145. std::unique_ptr<Instruction> var_inst(
  146. new Instruction(context(), SpvOpVariable, returnVarTypeId, returnVarId,
  147. {{spv_operand_type_t::SPV_OPERAND_TYPE_STORAGE_CLASS,
  148. {SpvStorageClassFunction}}}));
  149. new_vars->push_back(std::move(var_inst));
  150. }
  151. get_decoration_mgr()->CloneDecorations(calleeFn->result_id(), returnVarId);
  152. return returnVarId;
  153. }
  154. bool InlinePass::IsSameBlockOp(const Instruction* inst) const {
  155. return inst->opcode() == SpvOpSampledImage || inst->opcode() == SpvOpImage;
  156. }
  157. void InlinePass::CloneSameBlockOps(
  158. std::unique_ptr<Instruction>* inst,
  159. std::unordered_map<uint32_t, uint32_t>* postCallSB,
  160. std::unordered_map<uint32_t, Instruction*>* preCallSB,
  161. std::unique_ptr<BasicBlock>* block_ptr) {
  162. (*inst)->ForEachInId(
  163. [&postCallSB, &preCallSB, &block_ptr, this](uint32_t* iid) {
  164. const auto mapItr = (*postCallSB).find(*iid);
  165. if (mapItr == (*postCallSB).end()) {
  166. const auto mapItr2 = (*preCallSB).find(*iid);
  167. if (mapItr2 != (*preCallSB).end()) {
  168. // Clone pre-call same-block ops, map result id.
  169. const Instruction* inInst = mapItr2->second;
  170. std::unique_ptr<Instruction> sb_inst(inInst->Clone(context()));
  171. CloneSameBlockOps(&sb_inst, postCallSB, preCallSB, block_ptr);
  172. const uint32_t rid = sb_inst->result_id();
  173. const uint32_t nid = this->TakeNextId();
  174. get_decoration_mgr()->CloneDecorations(rid, nid);
  175. sb_inst->SetResultId(nid);
  176. (*postCallSB)[rid] = nid;
  177. *iid = nid;
  178. (*block_ptr)->AddInstruction(std::move(sb_inst));
  179. }
  180. } else {
  181. // Reset same-block op operand.
  182. *iid = mapItr->second;
  183. }
  184. });
  185. }
  186. void InlinePass::GenInlineCode(
  187. std::vector<std::unique_ptr<BasicBlock>>* new_blocks,
  188. std::vector<std::unique_ptr<Instruction>>* new_vars,
  189. BasicBlock::iterator call_inst_itr,
  190. UptrVectorIterator<BasicBlock> call_block_itr) {
  191. // Map from all ids in the callee to their equivalent id in the caller
  192. // as callee instructions are copied into caller.
  193. std::unordered_map<uint32_t, uint32_t> callee2caller;
  194. // Pre-call same-block insts
  195. std::unordered_map<uint32_t, Instruction*> preCallSB;
  196. // Post-call same-block op ids
  197. std::unordered_map<uint32_t, uint32_t> postCallSB;
  198. // Invalidate the def-use chains. They are not kept up to date while
  199. // inlining. However, certain calls try to keep them up-to-date if they are
  200. // valid. These operations can fail.
  201. context()->InvalidateAnalyses(IRContext::kAnalysisDefUse);
  202. Function* calleeFn = id2function_[call_inst_itr->GetSingleWordOperand(
  203. kSpvFunctionCallFunctionId)];
  204. // Check for multiple returns in the callee.
  205. auto fi = multi_return_funcs_.find(calleeFn->result_id());
  206. const bool multiReturn = fi != multi_return_funcs_.end();
  207. // Map parameters to actual arguments.
  208. MapParams(calleeFn, call_inst_itr, &callee2caller);
  209. // Define caller local variables for all callee variables and create map to
  210. // them.
  211. CloneAndMapLocals(calleeFn, new_vars, &callee2caller);
  212. // Create return var if needed.
  213. uint32_t returnVarId = CreateReturnVar(calleeFn, new_vars);
  214. // Create set of callee result ids. Used to detect forward references
  215. std::unordered_set<uint32_t> callee_result_ids;
  216. calleeFn->ForEachInst([&callee_result_ids](const Instruction* cpi) {
  217. const uint32_t rid = cpi->result_id();
  218. if (rid != 0) callee_result_ids.insert(rid);
  219. });
  220. // If the caller is in a single-block loop, and the callee has multiple
  221. // blocks, then the normal inlining logic will place the OpLoopMerge in
  222. // the last of several blocks in the loop. Instead, it should be placed
  223. // at the end of the first block. First determine if the caller is in a
  224. // single block loop. We'll wait to move the OpLoopMerge until the end
  225. // of the regular inlining logic, and only if necessary.
  226. bool caller_is_single_block_loop = false;
  227. bool caller_is_loop_header = false;
  228. if (auto* loop_merge = call_block_itr->GetLoopMergeInst()) {
  229. caller_is_loop_header = true;
  230. caller_is_single_block_loop =
  231. call_block_itr->id() ==
  232. loop_merge->GetSingleWordInOperand(kSpvLoopMergeContinueTargetIdInIdx);
  233. }
  234. bool callee_begins_with_structured_header =
  235. (*(calleeFn->begin())).GetMergeInst() != nullptr;
  236. // Clone and map callee code. Copy caller block code to beginning of
  237. // first block and end of last block.
  238. bool prevInstWasReturn = false;
  239. uint32_t singleTripLoopHeaderId = 0;
  240. uint32_t singleTripLoopContinueId = 0;
  241. uint32_t returnLabelId = 0;
  242. bool multiBlocks = false;
  243. const uint32_t calleeTypeId = calleeFn->type_id();
  244. // new_blk_ptr is a new basic block in the caller. New instructions are
  245. // written to it. It is created when we encounter the OpLabel
  246. // of the first callee block. It is appended to new_blocks only when
  247. // it is complete.
  248. std::unique_ptr<BasicBlock> new_blk_ptr;
  249. calleeFn->ForEachInst([&new_blocks, &callee2caller, &call_block_itr,
  250. &call_inst_itr, &new_blk_ptr, &prevInstWasReturn,
  251. &returnLabelId, &returnVarId, caller_is_loop_header,
  252. callee_begins_with_structured_header, &calleeTypeId,
  253. &multiBlocks, &postCallSB, &preCallSB, multiReturn,
  254. &singleTripLoopHeaderId, &singleTripLoopContinueId,
  255. &callee_result_ids, this](const Instruction* cpi) {
  256. switch (cpi->opcode()) {
  257. case SpvOpFunction:
  258. case SpvOpFunctionParameter:
  259. // Already processed
  260. break;
  261. case SpvOpVariable:
  262. if (cpi->NumInOperands() == 2) {
  263. assert(callee2caller.count(cpi->result_id()) &&
  264. "Expected the variable to have already been mapped.");
  265. uint32_t new_var_id = callee2caller.at(cpi->result_id());
  266. // The initializer must be a constant or global value. No mapped
  267. // should be used.
  268. uint32_t val_id = cpi->GetSingleWordInOperand(1);
  269. AddStore(new_var_id, val_id, &new_blk_ptr);
  270. }
  271. break;
  272. case SpvOpUnreachable:
  273. case SpvOpKill: {
  274. // Generate a return label so that we split the block with the function
  275. // call. Copy the terminator into the new block.
  276. if (returnLabelId == 0) returnLabelId = this->TakeNextId();
  277. std::unique_ptr<Instruction> terminator(
  278. new Instruction(context(), cpi->opcode(), 0, 0, {}));
  279. new_blk_ptr->AddInstruction(std::move(terminator));
  280. break;
  281. }
  282. case SpvOpLabel: {
  283. // If previous instruction was early return, insert branch
  284. // instruction to return block.
  285. if (prevInstWasReturn) {
  286. if (returnLabelId == 0) returnLabelId = this->TakeNextId();
  287. AddBranch(returnLabelId, &new_blk_ptr);
  288. prevInstWasReturn = false;
  289. }
  290. // Finish current block (if it exists) and get label for next block.
  291. uint32_t labelId;
  292. bool firstBlock = false;
  293. if (new_blk_ptr != nullptr) {
  294. new_blocks->push_back(std::move(new_blk_ptr));
  295. // If result id is already mapped, use it, otherwise get a new
  296. // one.
  297. const uint32_t rid = cpi->result_id();
  298. const auto mapItr = callee2caller.find(rid);
  299. labelId = (mapItr != callee2caller.end()) ? mapItr->second
  300. : this->TakeNextId();
  301. } else {
  302. // First block needs to use label of original block
  303. // but map callee label in case of phi reference.
  304. labelId = call_block_itr->id();
  305. callee2caller[cpi->result_id()] = labelId;
  306. firstBlock = true;
  307. }
  308. // Create first/next block.
  309. new_blk_ptr = MakeUnique<BasicBlock>(NewLabel(labelId));
  310. if (firstBlock) {
  311. // Copy contents of original caller block up to call instruction.
  312. for (auto cii = call_block_itr->begin(); cii != call_inst_itr;
  313. cii = call_block_itr->begin()) {
  314. Instruction* inst = &*cii;
  315. inst->RemoveFromList();
  316. std::unique_ptr<Instruction> cp_inst(inst);
  317. // Remember same-block ops for possible regeneration.
  318. if (IsSameBlockOp(&*cp_inst)) {
  319. auto* sb_inst_ptr = cp_inst.get();
  320. preCallSB[cp_inst->result_id()] = sb_inst_ptr;
  321. }
  322. new_blk_ptr->AddInstruction(std::move(cp_inst));
  323. }
  324. if (caller_is_loop_header && callee_begins_with_structured_header) {
  325. // We can't place both the caller's merge instruction and another
  326. // merge instruction in the same block. So split the calling block.
  327. // Insert an unconditional branch to a new guard block. Later,
  328. // once we know the ID of the last block, we will move the caller's
  329. // OpLoopMerge from the last generated block into the first block.
  330. // We also wait to avoid invalidating various iterators.
  331. const auto guard_block_id = this->TakeNextId();
  332. AddBranch(guard_block_id, &new_blk_ptr);
  333. new_blocks->push_back(std::move(new_blk_ptr));
  334. // Start the next block.
  335. new_blk_ptr = MakeUnique<BasicBlock>(NewLabel(guard_block_id));
  336. // Reset the mapping of the callee's entry block to point to
  337. // the guard block. Do this so we can fix up phis later on to
  338. // satisfy dominance.
  339. callee2caller[cpi->result_id()] = guard_block_id;
  340. }
  341. // If callee has multiple returns, insert a header block for
  342. // single-trip loop that will encompass callee code. Start postheader
  343. // block.
  344. //
  345. // Note: Consider the following combination:
  346. // - the caller is a single block loop
  347. // - the callee does not begin with a structure header
  348. // - the callee has multiple returns.
  349. // We still need to split the caller block and insert a guard block.
  350. // But we only need to do it once. We haven't done it yet, but the
  351. // single-trip loop header will serve the same purpose.
  352. if (multiReturn) {
  353. singleTripLoopHeaderId = this->TakeNextId();
  354. AddBranch(singleTripLoopHeaderId, &new_blk_ptr);
  355. new_blocks->push_back(std::move(new_blk_ptr));
  356. new_blk_ptr =
  357. MakeUnique<BasicBlock>(NewLabel(singleTripLoopHeaderId));
  358. returnLabelId = this->TakeNextId();
  359. singleTripLoopContinueId = this->TakeNextId();
  360. AddLoopMerge(returnLabelId, singleTripLoopContinueId, &new_blk_ptr);
  361. uint32_t postHeaderId = this->TakeNextId();
  362. AddBranch(postHeaderId, &new_blk_ptr);
  363. new_blocks->push_back(std::move(new_blk_ptr));
  364. new_blk_ptr = MakeUnique<BasicBlock>(NewLabel(postHeaderId));
  365. multiBlocks = true;
  366. // Reset the mapping of the callee's entry block to point to
  367. // the post-header block. Do this so we can fix up phis later
  368. // on to satisfy dominance.
  369. callee2caller[cpi->result_id()] = postHeaderId;
  370. }
  371. } else {
  372. multiBlocks = true;
  373. }
  374. } break;
  375. case SpvOpReturnValue: {
  376. // Store return value to return variable.
  377. assert(returnVarId != 0);
  378. uint32_t valId = cpi->GetInOperand(kSpvReturnValueId).words[0];
  379. const auto mapItr = callee2caller.find(valId);
  380. if (mapItr != callee2caller.end()) {
  381. valId = mapItr->second;
  382. }
  383. AddStore(returnVarId, valId, &new_blk_ptr);
  384. // Remember we saw a return; if followed by a label, will need to
  385. // insert branch.
  386. prevInstWasReturn = true;
  387. } break;
  388. case SpvOpReturn: {
  389. // Remember we saw a return; if followed by a label, will need to
  390. // insert branch.
  391. prevInstWasReturn = true;
  392. } break;
  393. case SpvOpFunctionEnd: {
  394. // If there was an early return, we generated a return label id
  395. // for it. Now we have to generate the return block with that Id.
  396. if (returnLabelId != 0) {
  397. // If previous instruction was return, insert branch instruction
  398. // to return block.
  399. if (prevInstWasReturn) AddBranch(returnLabelId, &new_blk_ptr);
  400. if (multiReturn) {
  401. // If we generated a loop header to for the single-trip loop
  402. // to accommodate multiple returns, insert the continue
  403. // target block now, with a false branch back to the loop header.
  404. new_blocks->push_back(std::move(new_blk_ptr));
  405. new_blk_ptr =
  406. MakeUnique<BasicBlock>(NewLabel(singleTripLoopContinueId));
  407. AddBranchCond(GetFalseId(), singleTripLoopHeaderId, returnLabelId,
  408. &new_blk_ptr);
  409. }
  410. // Generate the return block.
  411. new_blocks->push_back(std::move(new_blk_ptr));
  412. new_blk_ptr = MakeUnique<BasicBlock>(NewLabel(returnLabelId));
  413. multiBlocks = true;
  414. }
  415. // Load return value into result id of call, if it exists.
  416. if (returnVarId != 0) {
  417. const uint32_t resId = call_inst_itr->result_id();
  418. assert(resId != 0);
  419. AddLoad(calleeTypeId, resId, returnVarId, &new_blk_ptr);
  420. }
  421. // Copy remaining instructions from caller block.
  422. for (Instruction* inst = call_inst_itr->NextNode(); inst;
  423. inst = call_inst_itr->NextNode()) {
  424. inst->RemoveFromList();
  425. std::unique_ptr<Instruction> cp_inst(inst);
  426. // If multiple blocks generated, regenerate any same-block
  427. // instruction that has not been seen in this last block.
  428. if (multiBlocks) {
  429. CloneSameBlockOps(&cp_inst, &postCallSB, &preCallSB, &new_blk_ptr);
  430. // Remember same-block ops in this block.
  431. if (IsSameBlockOp(&*cp_inst)) {
  432. const uint32_t rid = cp_inst->result_id();
  433. postCallSB[rid] = rid;
  434. }
  435. }
  436. new_blk_ptr->AddInstruction(std::move(cp_inst));
  437. }
  438. // Finalize inline code.
  439. new_blocks->push_back(std::move(new_blk_ptr));
  440. } break;
  441. default: {
  442. // Copy callee instruction and remap all input Ids.
  443. std::unique_ptr<Instruction> cp_inst(cpi->Clone(context()));
  444. cp_inst->ForEachInId([&callee2caller, &callee_result_ids,
  445. this](uint32_t* iid) {
  446. const auto mapItr = callee2caller.find(*iid);
  447. if (mapItr != callee2caller.end()) {
  448. *iid = mapItr->second;
  449. } else if (callee_result_ids.find(*iid) != callee_result_ids.end()) {
  450. // Forward reference. Allocate a new id, map it,
  451. // use it and check for it when remapping result ids
  452. const uint32_t nid = this->TakeNextId();
  453. callee2caller[*iid] = nid;
  454. *iid = nid;
  455. }
  456. });
  457. // If result id is non-zero, remap it. If already mapped, use mapped
  458. // value, else use next id.
  459. const uint32_t rid = cp_inst->result_id();
  460. if (rid != 0) {
  461. const auto mapItr = callee2caller.find(rid);
  462. uint32_t nid;
  463. if (mapItr != callee2caller.end()) {
  464. nid = mapItr->second;
  465. } else {
  466. nid = this->TakeNextId();
  467. callee2caller[rid] = nid;
  468. }
  469. cp_inst->SetResultId(nid);
  470. get_decoration_mgr()->CloneDecorations(rid, nid);
  471. }
  472. new_blk_ptr->AddInstruction(std::move(cp_inst));
  473. } break;
  474. }
  475. });
  476. if (caller_is_loop_header && (new_blocks->size() > 1)) {
  477. // Move the OpLoopMerge from the last block back to the first, where
  478. // it belongs.
  479. auto& first = new_blocks->front();
  480. auto& last = new_blocks->back();
  481. assert(first != last);
  482. // Insert a modified copy of the loop merge into the first block.
  483. auto loop_merge_itr = last->tail();
  484. --loop_merge_itr;
  485. assert(loop_merge_itr->opcode() == SpvOpLoopMerge);
  486. std::unique_ptr<Instruction> cp_inst(loop_merge_itr->Clone(context()));
  487. if (caller_is_single_block_loop) {
  488. // Also, update its continue target to point to the last block.
  489. cp_inst->SetInOperand(kSpvLoopMergeContinueTargetIdInIdx, {last->id()});
  490. }
  491. first->tail().InsertBefore(std::move(cp_inst));
  492. // Remove the loop merge from the last block.
  493. loop_merge_itr->RemoveFromList();
  494. delete &*loop_merge_itr;
  495. }
  496. // Update block map given replacement blocks.
  497. for (auto& blk : *new_blocks) {
  498. id2block_[blk->id()] = &*blk;
  499. }
  500. }
  501. bool InlinePass::IsInlinableFunctionCall(const Instruction* inst) {
  502. if (inst->opcode() != SpvOp::SpvOpFunctionCall) return false;
  503. const uint32_t calleeFnId =
  504. inst->GetSingleWordOperand(kSpvFunctionCallFunctionId);
  505. const auto ci = inlinable_.find(calleeFnId);
  506. return ci != inlinable_.cend();
  507. }
  508. void InlinePass::UpdateSucceedingPhis(
  509. std::vector<std::unique_ptr<BasicBlock>>& new_blocks) {
  510. const auto firstBlk = new_blocks.begin();
  511. const auto lastBlk = new_blocks.end() - 1;
  512. const uint32_t firstId = (*firstBlk)->id();
  513. const uint32_t lastId = (*lastBlk)->id();
  514. const BasicBlock& const_last_block = *lastBlk->get();
  515. const_last_block.ForEachSuccessorLabel(
  516. [&firstId, &lastId, this](const uint32_t succ) {
  517. BasicBlock* sbp = this->id2block_[succ];
  518. sbp->ForEachPhiInst([&firstId, &lastId](Instruction* phi) {
  519. phi->ForEachInId([&firstId, &lastId](uint32_t* id) {
  520. if (*id == firstId) *id = lastId;
  521. });
  522. });
  523. });
  524. }
  525. bool InlinePass::HasMultipleReturns(Function* func) {
  526. bool seenReturn = false;
  527. bool multipleReturns = false;
  528. for (auto& blk : *func) {
  529. auto terminal_ii = blk.cend();
  530. --terminal_ii;
  531. if (terminal_ii->opcode() == SpvOpReturn ||
  532. terminal_ii->opcode() == SpvOpReturnValue) {
  533. if (seenReturn) {
  534. multipleReturns = true;
  535. break;
  536. }
  537. seenReturn = true;
  538. }
  539. }
  540. return multipleReturns;
  541. }
  542. void InlinePass::ComputeStructuredSuccessors(Function* func) {
  543. // If header, make merge block first successor.
  544. for (auto& blk : *func) {
  545. uint32_t mbid = blk.MergeBlockIdIfAny();
  546. if (mbid != 0) {
  547. block2structured_succs_[&blk].push_back(id2block_[mbid]);
  548. }
  549. // Add true successors.
  550. const auto& const_blk = blk;
  551. const_blk.ForEachSuccessorLabel([&blk, this](const uint32_t sbid) {
  552. block2structured_succs_[&blk].push_back(id2block_[sbid]);
  553. });
  554. }
  555. }
  556. InlinePass::GetBlocksFunction InlinePass::StructuredSuccessorsFunction() {
  557. return [this](const BasicBlock* block) {
  558. return &(block2structured_succs_[block]);
  559. };
  560. }
  561. bool InlinePass::HasNoReturnInLoop(Function* func) {
  562. // If control not structured, do not do loop/return analysis
  563. // TODO: Analyze returns in non-structured control flow
  564. if (!context()->get_feature_mgr()->HasCapability(SpvCapabilityShader))
  565. return false;
  566. // Compute structured block order. This order has the property
  567. // that dominators are before all blocks they dominate and merge blocks
  568. // are after all blocks that are in the control constructs of their header.
  569. ComputeStructuredSuccessors(func);
  570. auto ignore_block = [](cbb_ptr) {};
  571. auto ignore_edge = [](cbb_ptr, cbb_ptr) {};
  572. std::list<const BasicBlock*> structuredOrder;
  573. CFA<BasicBlock>::DepthFirstTraversal(
  574. &*func->begin(), StructuredSuccessorsFunction(), ignore_block,
  575. [&](cbb_ptr b) { structuredOrder.push_front(b); }, ignore_edge);
  576. // Search for returns in loops. Only need to track outermost loop
  577. bool return_in_loop = false;
  578. uint32_t outerLoopMergeId = 0;
  579. for (auto& blk : structuredOrder) {
  580. // Exiting current outer loop
  581. if (blk->id() == outerLoopMergeId) outerLoopMergeId = 0;
  582. // Return block
  583. auto terminal_ii = blk->cend();
  584. --terminal_ii;
  585. if (terminal_ii->opcode() == SpvOpReturn ||
  586. terminal_ii->opcode() == SpvOpReturnValue) {
  587. if (outerLoopMergeId != 0) {
  588. return_in_loop = true;
  589. break;
  590. }
  591. } else if (terminal_ii != blk->cbegin()) {
  592. auto merge_ii = terminal_ii;
  593. --merge_ii;
  594. // Entering outermost loop
  595. if (merge_ii->opcode() == SpvOpLoopMerge && outerLoopMergeId == 0)
  596. outerLoopMergeId =
  597. merge_ii->GetSingleWordOperand(kSpvLoopMergeMergeBlockId);
  598. }
  599. }
  600. return !return_in_loop;
  601. }
  602. void InlinePass::AnalyzeReturns(Function* func) {
  603. // Look for multiple returns
  604. if (!HasMultipleReturns(func)) {
  605. no_return_in_loop_.insert(func->result_id());
  606. return;
  607. }
  608. multi_return_funcs_.insert(func->result_id());
  609. // If multiple returns, see if any are in a loop
  610. if (HasNoReturnInLoop(func)) no_return_in_loop_.insert(func->result_id());
  611. }
  612. bool InlinePass::IsInlinableFunction(Function* func) {
  613. // We can only inline a function if it has blocks.
  614. if (func->cbegin() == func->cend()) return false;
  615. // Do not inline functions with returns in loops. Currently early return
  616. // functions are inlined by wrapping them in a one trip loop and implementing
  617. // the returns as a branch to the loop's merge block. However, this can only
  618. // done validly if the return was not in a loop in the original function.
  619. // Also remember functions with multiple (early) returns.
  620. AnalyzeReturns(func);
  621. return no_return_in_loop_.find(func->result_id()) !=
  622. no_return_in_loop_.cend();
  623. }
  624. void InlinePass::InitializeInline() {
  625. false_id_ = 0;
  626. // clear collections
  627. id2function_.clear();
  628. id2block_.clear();
  629. block2structured_succs_.clear();
  630. inlinable_.clear();
  631. no_return_in_loop_.clear();
  632. multi_return_funcs_.clear();
  633. for (auto& fn : *get_module()) {
  634. // Initialize function and block maps.
  635. id2function_[fn.result_id()] = &fn;
  636. for (auto& blk : fn) {
  637. id2block_[blk.id()] = &blk;
  638. }
  639. // Compute inlinability
  640. if (IsInlinableFunction(&fn)) inlinable_.insert(fn.result_id());
  641. }
  642. }
  643. InlinePass::InlinePass() {}
  644. } // namespace opt
  645. } // namespace spvtools