invocation_interlock_placement_pass.cpp 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493
  1. // Copyright (c) 2023 Google Inc.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. #include "source/opt/invocation_interlock_placement_pass.h"
  15. #include <algorithm>
  16. #include <array>
  17. #include <cassert>
  18. #include <functional>
  19. #include <optional>
  20. #include <queue>
  21. #include <stack>
  22. #include <unordered_map>
  23. #include <unordered_set>
  24. #include <vector>
  25. #include "source/enum_set.h"
  26. #include "source/enum_string_mapping.h"
  27. #include "source/opt/ir_context.h"
  28. #include "source/opt/reflect.h"
  29. #include "source/spirv_target_env.h"
  30. #include "source/util/string_utils.h"
  31. namespace spvtools {
  32. namespace opt {
  33. namespace {
  34. constexpr uint32_t kEntryPointExecutionModelInIdx = 0;
  35. constexpr uint32_t kEntryPointFunctionIdInIdx = 1;
  36. constexpr uint32_t kFunctionCallFunctionIdInIdx = 0;
  37. } // namespace
  38. bool InvocationInterlockPlacementPass::hasSingleNextBlock(uint32_t block_id,
  39. bool reverse_cfg) {
  40. if (reverse_cfg) {
  41. // We are traversing forward, so check whether there is a single successor.
  42. BasicBlock* block = cfg()->block(block_id);
  43. switch (block->tail()->opcode()) {
  44. case spv::Op::OpBranchConditional:
  45. return false;
  46. case spv::Op::OpSwitch:
  47. return block->tail()->NumInOperandWords() == 1;
  48. default:
  49. return !block->tail()->IsReturnOrAbort();
  50. }
  51. } else {
  52. // We are traversing backward, so check whether there is a single
  53. // predecessor.
  54. return cfg()->preds(block_id).size() == 1;
  55. }
  56. }
  57. void InvocationInterlockPlacementPass::forEachNext(
  58. uint32_t block_id, bool reverse_cfg, std::function<void(uint32_t)> f) {
  59. if (reverse_cfg) {
  60. BasicBlock* block = cfg()->block(block_id);
  61. block->ForEachSuccessorLabel([f](uint32_t succ_id) { f(succ_id); });
  62. } else {
  63. for (uint32_t pred_id : cfg()->preds(block_id)) {
  64. f(pred_id);
  65. }
  66. }
  67. }
  68. void InvocationInterlockPlacementPass::addInstructionAtBlockBoundary(
  69. BasicBlock* block, spv::Op opcode, bool at_end) {
  70. if (at_end) {
  71. assert(block->begin()->opcode() != spv::Op::OpPhi &&
  72. "addInstructionAtBlockBoundary expects to be called with at_end == "
  73. "true only if there is a single successor to block");
  74. // Insert a begin instruction at the end of the block.
  75. Instruction* begin_inst = new Instruction(context(), opcode);
  76. begin_inst->InsertAfter(&*--block->tail());
  77. } else {
  78. assert(block->begin()->opcode() != spv::Op::OpPhi &&
  79. "addInstructionAtBlockBoundary expects to be called with at_end == "
  80. "false only if there is a single predecessor to block");
  81. // Insert an end instruction at the beginning of the block.
  82. Instruction* end_inst = new Instruction(context(), opcode);
  83. end_inst->InsertBefore(&*block->begin());
  84. }
  85. }
  86. bool InvocationInterlockPlacementPass::killDuplicateBegin(BasicBlock* block) {
  87. bool found = false;
  88. return context()->KillInstructionIf(
  89. block->begin(), block->end(), [&found](Instruction* inst) {
  90. if (inst->opcode() == spv::Op::OpBeginInvocationInterlockEXT) {
  91. if (found) {
  92. return true;
  93. }
  94. found = true;
  95. }
  96. return false;
  97. });
  98. }
  99. bool InvocationInterlockPlacementPass::killDuplicateEnd(BasicBlock* block) {
  100. std::vector<Instruction*> to_kill;
  101. block->ForEachInst([&to_kill](Instruction* inst) {
  102. if (inst->opcode() == spv::Op::OpEndInvocationInterlockEXT) {
  103. to_kill.push_back(inst);
  104. }
  105. });
  106. if (to_kill.size() <= 1) {
  107. return false;
  108. }
  109. to_kill.pop_back();
  110. for (Instruction* inst : to_kill) {
  111. context()->KillInst(inst);
  112. }
  113. return true;
  114. }
  115. void InvocationInterlockPlacementPass::recordBeginOrEndInFunction(
  116. Function* func) {
  117. if (extracted_functions_.count(func)) {
  118. return;
  119. }
  120. bool had_begin = false;
  121. bool had_end = false;
  122. func->ForEachInst([this, &had_begin, &had_end](Instruction* inst) {
  123. switch (inst->opcode()) {
  124. case spv::Op::OpBeginInvocationInterlockEXT:
  125. had_begin = true;
  126. break;
  127. case spv::Op::OpEndInvocationInterlockEXT:
  128. had_end = true;
  129. break;
  130. case spv::Op::OpFunctionCall: {
  131. uint32_t function_id =
  132. inst->GetSingleWordInOperand(kFunctionCallFunctionIdInIdx);
  133. Function* inner_func = context()->GetFunction(function_id);
  134. recordBeginOrEndInFunction(inner_func);
  135. ExtractionResult result = extracted_functions_[inner_func];
  136. had_begin = had_begin || result.had_begin;
  137. had_end = had_end || result.had_end;
  138. break;
  139. }
  140. default:
  141. break;
  142. }
  143. });
  144. ExtractionResult result = {had_begin, had_end};
  145. extracted_functions_[func] = result;
  146. }
  147. bool InvocationInterlockPlacementPass::
  148. removeBeginAndEndInstructionsFromFunction(Function* func) {
  149. bool modified = false;
  150. func->ForEachInst([this, &modified](Instruction* inst) {
  151. switch (inst->opcode()) {
  152. case spv::Op::OpBeginInvocationInterlockEXT:
  153. context()->KillInst(inst);
  154. modified = true;
  155. break;
  156. case spv::Op::OpEndInvocationInterlockEXT:
  157. context()->KillInst(inst);
  158. modified = true;
  159. break;
  160. default:
  161. break;
  162. }
  163. });
  164. return modified;
  165. }
  166. bool InvocationInterlockPlacementPass::extractInstructionsFromCalls(
  167. std::vector<BasicBlock*> blocks) {
  168. bool modified = false;
  169. for (BasicBlock* block : blocks) {
  170. block->ForEachInst([this, &modified](Instruction* inst) {
  171. if (inst->opcode() == spv::Op::OpFunctionCall) {
  172. uint32_t function_id =
  173. inst->GetSingleWordInOperand(kFunctionCallFunctionIdInIdx);
  174. Function* func = context()->GetFunction(function_id);
  175. ExtractionResult result = extracted_functions_[func];
  176. if (result.had_begin) {
  177. Instruction* new_inst = new Instruction(
  178. context(), spv::Op::OpBeginInvocationInterlockEXT);
  179. new_inst->InsertBefore(inst);
  180. modified = true;
  181. }
  182. if (result.had_end) {
  183. Instruction* new_inst =
  184. new Instruction(context(), spv::Op::OpEndInvocationInterlockEXT);
  185. new_inst->InsertAfter(inst);
  186. modified = true;
  187. }
  188. }
  189. });
  190. }
  191. return modified;
  192. }
  193. void InvocationInterlockPlacementPass::recordExistingBeginAndEndBlock(
  194. std::vector<BasicBlock*> blocks) {
  195. for (BasicBlock* block : blocks) {
  196. block->ForEachInst([this, block](Instruction* inst) {
  197. switch (inst->opcode()) {
  198. case spv::Op::OpBeginInvocationInterlockEXT:
  199. begin_.insert(block->id());
  200. break;
  201. case spv::Op::OpEndInvocationInterlockEXT:
  202. end_.insert(block->id());
  203. break;
  204. default:
  205. break;
  206. }
  207. });
  208. }
  209. }
  210. InvocationInterlockPlacementPass::BlockSet
  211. InvocationInterlockPlacementPass::computeReachableBlocks(
  212. BlockSet& previous_inside, const BlockSet& starting_nodes,
  213. bool reverse_cfg) {
  214. BlockSet inside = starting_nodes;
  215. std::deque<uint32_t> worklist;
  216. worklist.insert(worklist.begin(), starting_nodes.begin(),
  217. starting_nodes.end());
  218. while (!worklist.empty()) {
  219. uint32_t block_id = worklist.front();
  220. worklist.pop_front();
  221. forEachNext(block_id, reverse_cfg,
  222. [&inside, &previous_inside, &worklist](uint32_t next_id) {
  223. previous_inside.insert(next_id);
  224. if (inside.insert(next_id).second) {
  225. worklist.push_back(next_id);
  226. }
  227. });
  228. }
  229. return inside;
  230. }
  231. bool InvocationInterlockPlacementPass::removeUnneededInstructions(
  232. BasicBlock* block) {
  233. bool modified = false;
  234. if (!predecessors_after_begin_.count(block->id()) &&
  235. after_begin_.count(block->id())) {
  236. // None of the previous blocks are in the critical section, but this block
  237. // is. This can only happen if this block already has at least one begin
  238. // instruction. Leave the first begin instruction, and remove any others.
  239. modified |= killDuplicateBegin(block);
  240. } else if (predecessors_after_begin_.count(block->id())) {
  241. // At least one previous block is in the critical section; remove all
  242. // begin instructions in this block.
  243. modified |= context()->KillInstructionIf(
  244. block->begin(), block->end(), [](Instruction* inst) {
  245. return inst->opcode() == spv::Op::OpBeginInvocationInterlockEXT;
  246. });
  247. }
  248. if (!successors_before_end_.count(block->id()) &&
  249. before_end_.count(block->id())) {
  250. // Same as above
  251. modified |= killDuplicateEnd(block);
  252. } else if (successors_before_end_.count(block->id())) {
  253. modified |= context()->KillInstructionIf(
  254. block->begin(), block->end(), [](Instruction* inst) {
  255. return inst->opcode() == spv::Op::OpEndInvocationInterlockEXT;
  256. });
  257. }
  258. return modified;
  259. }
  260. BasicBlock* InvocationInterlockPlacementPass::splitEdge(BasicBlock* block,
  261. uint32_t succ_id) {
  262. // Create a new block to replace the critical edge.
  263. auto new_succ_temp = MakeUnique<BasicBlock>(
  264. MakeUnique<Instruction>(context(), spv::Op::OpLabel, 0, TakeNextId(),
  265. std::initializer_list<Operand>{}));
  266. auto* new_succ = new_succ_temp.get();
  267. // Insert the new block into the function.
  268. block->GetParent()->InsertBasicBlockAfter(std::move(new_succ_temp), block);
  269. new_succ->AddInstruction(MakeUnique<Instruction>(
  270. context(), spv::Op::OpBranch, 0, 0,
  271. std::initializer_list<Operand>{
  272. Operand(spv_operand_type_t::SPV_OPERAND_TYPE_ID, {succ_id})}));
  273. assert(block->tail()->opcode() == spv::Op::OpBranchConditional ||
  274. block->tail()->opcode() == spv::Op::OpSwitch);
  275. // Update the first branch to successor to instead branch to
  276. // the new successor. If there are multiple edges, we arbitrarily choose the
  277. // first time it appears in the list. The other edges to `succ_id` will have
  278. // to be split by another call to `splitEdge`.
  279. block->tail()->WhileEachInId([new_succ, succ_id](uint32_t* branch_id) {
  280. if (*branch_id == succ_id) {
  281. *branch_id = new_succ->id();
  282. return false;
  283. }
  284. return true;
  285. });
  286. return new_succ;
  287. }
  288. bool InvocationInterlockPlacementPass::placeInstructionsForEdge(
  289. BasicBlock* block, uint32_t next_id, BlockSet& inside,
  290. BlockSet& previous_inside, spv::Op opcode, bool reverse_cfg) {
  291. bool modified = false;
  292. if (previous_inside.count(next_id) && !inside.count(block->id())) {
  293. // This block is not in the critical section but the next has at least one
  294. // other previous block that is, so this block should be enter it as well.
  295. // We need to add begin or end instructions to the edge.
  296. modified = true;
  297. if (hasSingleNextBlock(block->id(), reverse_cfg)) {
  298. // This is the only next block.
  299. // Additionally, because `next_id` is in `previous_inside`, we know that
  300. // `next_id` has at least one previous block in `inside`. And because
  301. // 'block` is not in `inside`, that means the `next_id` has to have at
  302. // least one other previous block in `inside`.
  303. // This is solely for a debug assertion. It is essentially recomputing the
  304. // value of `previous_inside` to verify that it was computed correctly
  305. // such that the above statement is true.
  306. bool next_has_previous_inside = false;
  307. // By passing !reverse_cfg to forEachNext, we are actually iterating over
  308. // the previous blocks.
  309. forEachNext(next_id, !reverse_cfg,
  310. [&next_has_previous_inside, inside](uint32_t previous_id) {
  311. if (inside.count(previous_id)) {
  312. next_has_previous_inside = true;
  313. }
  314. });
  315. assert(next_has_previous_inside &&
  316. "`previous_inside` must be the set of blocks with at least one "
  317. "previous block in `inside`");
  318. addInstructionAtBlockBoundary(block, opcode, reverse_cfg);
  319. } else {
  320. // This block has multiple next blocks. Split the edge and insert the
  321. // instruction in the new next block.
  322. BasicBlock* new_branch;
  323. if (reverse_cfg) {
  324. new_branch = splitEdge(block, next_id);
  325. } else {
  326. new_branch = splitEdge(cfg()->block(next_id), block->id());
  327. }
  328. auto inst = new Instruction(context(), opcode);
  329. inst->InsertBefore(&*new_branch->tail());
  330. }
  331. }
  332. return modified;
  333. }
  334. bool InvocationInterlockPlacementPass::placeInstructions(BasicBlock* block) {
  335. bool modified = false;
  336. block->ForEachSuccessorLabel([this, block, &modified](uint32_t succ_id) {
  337. modified |= placeInstructionsForEdge(
  338. block, succ_id, after_begin_, predecessors_after_begin_,
  339. spv::Op::OpBeginInvocationInterlockEXT, /* reverse_cfg= */ true);
  340. modified |= placeInstructionsForEdge(cfg()->block(succ_id), block->id(),
  341. before_end_, successors_before_end_,
  342. spv::Op::OpEndInvocationInterlockEXT,
  343. /* reverse_cfg= */ false);
  344. });
  345. return modified;
  346. }
  347. bool InvocationInterlockPlacementPass::processFragmentShaderEntry(
  348. Function* entry_func) {
  349. bool modified = false;
  350. // Save the original order of blocks in the function, so we don't iterate over
  351. // newly-added blocks.
  352. std::vector<BasicBlock*> original_blocks;
  353. for (auto bi = entry_func->begin(); bi != entry_func->end(); ++bi) {
  354. original_blocks.push_back(&*bi);
  355. }
  356. modified |= extractInstructionsFromCalls(original_blocks);
  357. recordExistingBeginAndEndBlock(original_blocks);
  358. after_begin_ = computeReachableBlocks(predecessors_after_begin_, begin_,
  359. /* reverse_cfg= */ true);
  360. before_end_ = computeReachableBlocks(successors_before_end_, end_,
  361. /* reverse_cfg= */ false);
  362. for (BasicBlock* block : original_blocks) {
  363. modified |= removeUnneededInstructions(block);
  364. modified |= placeInstructions(block);
  365. }
  366. return modified;
  367. }
  368. bool InvocationInterlockPlacementPass::isFragmentShaderInterlockEnabled() {
  369. if (!context()->get_feature_mgr()->HasExtension(
  370. kSPV_EXT_fragment_shader_interlock)) {
  371. return false;
  372. }
  373. if (context()->get_feature_mgr()->HasCapability(
  374. spv::Capability::FragmentShaderSampleInterlockEXT)) {
  375. return true;
  376. }
  377. if (context()->get_feature_mgr()->HasCapability(
  378. spv::Capability::FragmentShaderPixelInterlockEXT)) {
  379. return true;
  380. }
  381. if (context()->get_feature_mgr()->HasCapability(
  382. spv::Capability::FragmentShaderShadingRateInterlockEXT)) {
  383. return true;
  384. }
  385. return false;
  386. }
  387. Pass::Status InvocationInterlockPlacementPass::Process() {
  388. // Skip this pass if the necessary extension or capability is missing
  389. if (!isFragmentShaderInterlockEnabled()) {
  390. return Status::SuccessWithoutChange;
  391. }
  392. bool modified = false;
  393. std::unordered_set<Function*> entry_points;
  394. for (Instruction& entry_inst : context()->module()->entry_points()) {
  395. uint32_t entry_id =
  396. entry_inst.GetSingleWordInOperand(kEntryPointFunctionIdInIdx);
  397. entry_points.insert(context()->GetFunction(entry_id));
  398. }
  399. for (auto fi = context()->module()->begin(); fi != context()->module()->end();
  400. ++fi) {
  401. Function* func = &*fi;
  402. recordBeginOrEndInFunction(func);
  403. if (!entry_points.count(func) && extracted_functions_.count(func)) {
  404. modified |= removeBeginAndEndInstructionsFromFunction(func);
  405. }
  406. }
  407. for (Instruction& entry_inst : context()->module()->entry_points()) {
  408. uint32_t entry_id =
  409. entry_inst.GetSingleWordInOperand(kEntryPointFunctionIdInIdx);
  410. Function* entry_func = context()->GetFunction(entry_id);
  411. auto execution_model = spv::ExecutionModel(
  412. entry_inst.GetSingleWordInOperand(kEntryPointExecutionModelInIdx));
  413. if (execution_model != spv::ExecutionModel::Fragment) {
  414. continue;
  415. }
  416. modified |= processFragmentShaderEntry(entry_func);
  417. }
  418. return modified ? Pass::Status::SuccessWithChange
  419. : Pass::Status::SuccessWithoutChange;
  420. }
  421. } // namespace opt
  422. } // namespace spvtools