2
0

SpvPostProcess.cpp 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626
  1. //
  2. // Copyright (C) 2018 Google, Inc.
  3. //
  4. // All rights reserved.
  5. //
  6. // Redistribution and use in source and binary forms, with or without
  7. // modification, are permitted provided that the following conditions
  8. // are met:
  9. //
  10. // Redistributions of source code must retain the above copyright
  11. // notice, this list of conditions and the following disclaimer.
  12. //
  13. // Redistributions in binary form must reproduce the above
  14. // copyright notice, this list of conditions and the following
  15. // disclaimer in the documentation and/or other materials provided
  16. // with the distribution.
  17. //
  18. // Neither the name of 3Dlabs Inc. Ltd. nor the names of its
  19. // contributors may be used to endorse or promote products derived
  20. // from this software without specific prior written permission.
  21. //
  22. // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  23. // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  24. // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
  25. // FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
  26. // COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  27. // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
  28. // BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  29. // LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  30. // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  31. // LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
  32. // ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  33. // POSSIBILITY OF SUCH DAMAGE.
  34. //
  35. // Post-processing for SPIR-V IR, in internal form, not standard binary form.
  36. //
  37. #include <cassert>
  38. #include <cstdlib>
  39. #include <unordered_map>
  40. #include <unordered_set>
  41. #include <algorithm>
  42. #include "SPIRV/spvIR.h"
  43. #include "SpvBuilder.h"
  44. #include "spirv.hpp11"
  45. #include "spvUtil.h"
  46. namespace spv {
  47. #include "GLSL.std.450.h"
  48. #include "GLSL.ext.KHR.h"
  49. #include "GLSL.ext.EXT.h"
  50. #include "GLSL.ext.AMD.h"
  51. #include "GLSL.ext.NV.h"
  52. #include "GLSL.ext.ARM.h"
  53. #include "GLSL.ext.QCOM.h"
  54. }
  55. namespace spv {
  56. // Hook to visit each operand type and result type of an instruction.
  57. // Will be called multiple times for one instruction, once for each typed
  58. // operand and the result.
  59. void Builder::postProcessType(const Instruction& inst, Id typeId)
  60. {
  61. // Characterize the type being questioned
  62. Op basicTypeOp = getMostBasicTypeClass(typeId);
  63. int width = 0;
  64. if (basicTypeOp == Op::OpTypeFloat || basicTypeOp == Op::OpTypeInt)
  65. width = getScalarTypeWidth(typeId);
  66. // Do opcode-specific checks
  67. switch (inst.getOpCode()) {
  68. case Op::OpLoad:
  69. case Op::OpStore:
  70. if (basicTypeOp == Op::OpTypeStruct) {
  71. if (containsType(typeId, Op::OpTypeInt, 8))
  72. addCapability(Capability::Int8);
  73. if (containsType(typeId, Op::OpTypeInt, 16))
  74. addCapability(Capability::Int16);
  75. if (containsType(typeId, Op::OpTypeFloat, 16))
  76. addCapability(Capability::Float16);
  77. } else {
  78. StorageClass storageClass = StorageClass::Max;
  79. if (module.getInstruction(inst.getIdOperand(0))->getOpCode() != Op::OpUntypedAccessChainKHR) {
  80. storageClass = getStorageClass(inst.getIdOperand(0));
  81. }
  82. if (width == 8) {
  83. switch (storageClass) {
  84. case StorageClass::PhysicalStorageBufferEXT:
  85. case StorageClass::Uniform:
  86. case StorageClass::StorageBuffer:
  87. case StorageClass::PushConstant:
  88. break;
  89. default:
  90. addCapability(Capability::Int8);
  91. break;
  92. }
  93. } else if (width == 16) {
  94. switch (storageClass) {
  95. case StorageClass::PhysicalStorageBufferEXT:
  96. case StorageClass::Uniform:
  97. case StorageClass::StorageBuffer:
  98. case StorageClass::PushConstant:
  99. case StorageClass::Input:
  100. case StorageClass::Output:
  101. break;
  102. default:
  103. if (basicTypeOp == Op::OpTypeInt)
  104. addCapability(Capability::Int16);
  105. if (basicTypeOp == Op::OpTypeFloat)
  106. addCapability(Capability::Float16);
  107. break;
  108. }
  109. }
  110. }
  111. break;
  112. case Op::OpCopyObject:
  113. break;
  114. case Op::OpFConvert:
  115. case Op::OpSConvert:
  116. case Op::OpUConvert:
  117. // Look for any 8/16-bit storage capabilities. If there are none, assume that
  118. // the convert instruction requires the Float16/Int8/16 capability.
  119. if (containsType(typeId, Op::OpTypeFloat, 16) || containsType(typeId, Op::OpTypeInt, 16)) {
  120. bool foundStorage = false;
  121. for (auto it = capabilities.begin(); it != capabilities.end(); ++it) {
  122. spv::Capability cap = *it;
  123. if (cap == spv::Capability::StorageInputOutput16 ||
  124. cap == spv::Capability::StoragePushConstant16 ||
  125. cap == spv::Capability::StorageUniformBufferBlock16 ||
  126. cap == spv::Capability::StorageUniform16) {
  127. foundStorage = true;
  128. break;
  129. }
  130. }
  131. if (!foundStorage) {
  132. if (containsType(typeId, Op::OpTypeFloat, 16))
  133. addCapability(Capability::Float16);
  134. if (containsType(typeId, Op::OpTypeInt, 16))
  135. addCapability(Capability::Int16);
  136. }
  137. }
  138. if (containsType(typeId, Op::OpTypeInt, 8)) {
  139. bool foundStorage = false;
  140. for (auto it = capabilities.begin(); it != capabilities.end(); ++it) {
  141. spv::Capability cap = *it;
  142. if (cap == spv::Capability::StoragePushConstant8 ||
  143. cap == spv::Capability::UniformAndStorageBuffer8BitAccess ||
  144. cap == spv::Capability::StorageBuffer8BitAccess) {
  145. foundStorage = true;
  146. break;
  147. }
  148. }
  149. if (!foundStorage) {
  150. addCapability(Capability::Int8);
  151. }
  152. }
  153. break;
  154. case Op::OpExtInst:
  155. switch (inst.getImmediateOperand(1)) {
  156. case GLSLstd450Frexp:
  157. case GLSLstd450FrexpStruct:
  158. if (getSpvVersion() < spv::Spv_1_3 && containsType(typeId, Op::OpTypeInt, 16))
  159. addExtension(spv::E_SPV_AMD_gpu_shader_int16);
  160. break;
  161. case GLSLstd450InterpolateAtCentroid:
  162. case GLSLstd450InterpolateAtSample:
  163. case GLSLstd450InterpolateAtOffset:
  164. if (getSpvVersion() < spv::Spv_1_3 && containsType(typeId, Op::OpTypeFloat, 16))
  165. addExtension(spv::E_SPV_AMD_gpu_shader_half_float);
  166. break;
  167. default:
  168. break;
  169. }
  170. break;
  171. case Op::OpAccessChain:
  172. case Op::OpPtrAccessChain:
  173. if (isPointerType(typeId))
  174. break;
  175. if (basicTypeOp == Op::OpTypeInt) {
  176. if (width == 16)
  177. addCapability(Capability::Int16);
  178. else if (width == 8)
  179. addCapability(Capability::Int8);
  180. }
  181. break;
  182. default:
  183. if (basicTypeOp == Op::OpTypeInt) {
  184. if (width == 16)
  185. addCapability(Capability::Int16);
  186. else if (width == 8)
  187. addCapability(Capability::Int8);
  188. else if (width == 64)
  189. addCapability(Capability::Int64);
  190. } else if (basicTypeOp == Op::OpTypeFloat) {
  191. if (width == 16)
  192. addCapability(Capability::Float16);
  193. else if (width == 64)
  194. addCapability(Capability::Float64);
  195. }
  196. break;
  197. }
  198. }
  199. unsigned int Builder::postProcessGetLargestScalarSize(const Instruction& type)
  200. {
  201. switch (type.getOpCode()) {
  202. case Op::OpTypeBool:
  203. return 1;
  204. case Op::OpTypeInt:
  205. case Op::OpTypeFloat:
  206. return type.getImmediateOperand(0) / 8;
  207. case Op::OpTypePointer:
  208. return 8;
  209. case Op::OpTypeVector:
  210. case Op::OpTypeMatrix:
  211. case Op::OpTypeArray:
  212. case Op::OpTypeRuntimeArray: {
  213. const Instruction* elem_type = module.getInstruction(type.getIdOperand(0));
  214. return postProcessGetLargestScalarSize(*elem_type);
  215. }
  216. case Op::OpTypeStruct: {
  217. unsigned int largest = 0;
  218. for (int i = 0; i < type.getNumOperands(); ++i) {
  219. const Instruction* elem_type = module.getInstruction(type.getIdOperand(i));
  220. unsigned int elem_size = postProcessGetLargestScalarSize(*elem_type);
  221. largest = std::max(largest, elem_size);
  222. }
  223. return largest;
  224. }
  225. default:
  226. return 0;
  227. }
  228. }
  229. // Called for each instruction that resides in a block.
  230. void Builder::postProcess(Instruction& inst)
  231. {
  232. // Add capabilities based simply on the opcode.
  233. switch (inst.getOpCode()) {
  234. case Op::OpExtInst:
  235. switch (inst.getImmediateOperand(1)) {
  236. case GLSLstd450InterpolateAtCentroid:
  237. case GLSLstd450InterpolateAtSample:
  238. case GLSLstd450InterpolateAtOffset:
  239. addCapability(Capability::InterpolationFunction);
  240. break;
  241. default:
  242. break;
  243. }
  244. break;
  245. case Op::OpDPdxFine:
  246. case Op::OpDPdyFine:
  247. case Op::OpFwidthFine:
  248. case Op::OpDPdxCoarse:
  249. case Op::OpDPdyCoarse:
  250. case Op::OpFwidthCoarse:
  251. addCapability(Capability::DerivativeControl);
  252. break;
  253. case Op::OpImageQueryLod:
  254. case Op::OpImageQuerySize:
  255. case Op::OpImageQuerySizeLod:
  256. case Op::OpImageQuerySamples:
  257. case Op::OpImageQueryLevels:
  258. addCapability(Capability::ImageQuery);
  259. break;
  260. case Op::OpGroupNonUniformPartitionNV:
  261. addExtension(E_SPV_NV_shader_subgroup_partitioned);
  262. addCapability(Capability::GroupNonUniformPartitionedNV);
  263. break;
  264. case Op::OpLoad:
  265. case Op::OpStore:
  266. {
  267. // For any load/store to a PhysicalStorageBufferEXT, walk the accesschain
  268. // index list to compute the misalignment. The pre-existing alignment value
  269. // (set via Builder::AccessChain::alignment) only accounts for the base of
  270. // the reference type and any scalar component selection in the accesschain,
  271. // and this function computes the rest from the SPIR-V Offset decorations.
  272. Instruction *accessChain = module.getInstruction(inst.getIdOperand(0));
  273. if (accessChain->getOpCode() == Op::OpAccessChain) {
  274. const Instruction* base = module.getInstruction(accessChain->getIdOperand(0));
  275. // Get the type of the base of the access chain. It must be a pointer type.
  276. Id typeId = base->getTypeId();
  277. Instruction *type = module.getInstruction(typeId);
  278. assert(type->getOpCode() == Op::OpTypePointer);
  279. if (type->getImmediateOperand(0) != StorageClass::PhysicalStorageBuffer) {
  280. break;
  281. }
  282. // Get the pointee type.
  283. typeId = type->getIdOperand(1);
  284. type = module.getInstruction(typeId);
  285. // Walk the index list for the access chain. For each index, find any
  286. // misalignment that can apply when accessing the member/element via
  287. // Offset/ArrayStride/MatrixStride decorations, and bitwise OR them all
  288. // together.
  289. int alignment = 0;
  290. bool first_struct_elem = false;
  291. for (int i = 1; i < accessChain->getNumOperands(); ++i) {
  292. Instruction *idx = module.getInstruction(accessChain->getIdOperand(i));
  293. if (type->getOpCode() == Op::OpTypeStruct) {
  294. assert(idx->getOpCode() == Op::OpConstant);
  295. unsigned int c = idx->getImmediateOperand(0);
  296. const auto function = [&](const std::unique_ptr<Instruction>& decoration) {
  297. if (decoration.get()->getOpCode() == Op::OpMemberDecorate &&
  298. decoration.get()->getIdOperand(0) == typeId &&
  299. decoration.get()->getImmediateOperand(1) == c &&
  300. (decoration.get()->getImmediateOperand(2) == Decoration::Offset ||
  301. decoration.get()->getImmediateOperand(2) == Decoration::MatrixStride)) {
  302. unsigned int opernad_value = decoration.get()->getImmediateOperand(3);
  303. alignment |= opernad_value;
  304. if (opernad_value == 0 &&
  305. decoration.get()->getImmediateOperand(2) == Decoration::Offset) {
  306. first_struct_elem = true;
  307. }
  308. }
  309. };
  310. std::for_each(decorations.begin(), decorations.end(), function);
  311. // get the next member type
  312. typeId = type->getIdOperand(c);
  313. type = module.getInstruction(typeId);
  314. } else if (type->getOpCode() == Op::OpTypeArray ||
  315. type->getOpCode() == Op::OpTypeRuntimeArray) {
  316. const auto function = [&](const std::unique_ptr<Instruction>& decoration) {
  317. if (decoration.get()->getOpCode() == Op::OpDecorate &&
  318. decoration.get()->getIdOperand(0) == typeId &&
  319. decoration.get()->getImmediateOperand(1) == Decoration::ArrayStride) {
  320. alignment |= decoration.get()->getImmediateOperand(2);
  321. }
  322. };
  323. std::for_each(decorations.begin(), decorations.end(), function);
  324. // Get the element type
  325. typeId = type->getIdOperand(0);
  326. type = module.getInstruction(typeId);
  327. } else {
  328. // Once we get to any non-aggregate type, we're done.
  329. break;
  330. }
  331. }
  332. assert(inst.getNumOperands() >= 3);
  333. const bool is_store = inst.getOpCode() == Op::OpStore;
  334. auto const memoryAccess = (MemoryAccessMask)inst.getImmediateOperand(is_store ? 2 : 1);
  335. assert(anySet(memoryAccess, MemoryAccessMask::Aligned));
  336. static_cast<void>(memoryAccess);
  337. // Compute the index of the alignment operand.
  338. int alignmentIdx = 2;
  339. if (is_store)
  340. alignmentIdx++;
  341. // Merge new and old (mis)alignment
  342. alignment |= inst.getImmediateOperand(alignmentIdx);
  343. if (!is_store) {
  344. Instruction* inst_type = module.getInstruction(inst.getTypeId());
  345. if (inst_type->getOpCode() == Op::OpTypePointer &&
  346. inst_type->getImmediateOperand(0) == StorageClass::PhysicalStorageBuffer) {
  347. // This means we are loading a pointer which means need to ensure it is at least 8-byte aligned
  348. // See https://github.com/KhronosGroup/glslang/issues/4084
  349. // In case the alignment is currently 4, need to ensure it is 8 before grabbing the LSB
  350. alignment |= 8;
  351. alignment &= 8;
  352. }
  353. }
  354. // Pick the LSB
  355. alignment = alignment & ~(alignment & (alignment-1));
  356. // The edge case we find is when copying a struct to another struct, we never find the alignment anywhere,
  357. // so in this case, fallback to doing a full size lookup on the type
  358. if (alignment == 0 && first_struct_elem) {
  359. // Quick get the struct type back
  360. const Instruction* pointer_type = module.getInstruction(base->getTypeId());
  361. const Instruction* struct_type = module.getInstruction(pointer_type->getIdOperand(1));
  362. assert(struct_type->getOpCode() == Op::OpTypeStruct);
  363. const Instruction* elem_type = module.getInstruction(struct_type->getIdOperand(0));
  364. unsigned int largest_scalar = postProcessGetLargestScalarSize(*elem_type);
  365. if (largest_scalar != 0) {
  366. alignment = largest_scalar;
  367. } else {
  368. alignment = 16; // fallback if can't determine a godo alignment
  369. }
  370. }
  371. // update the Aligned operand
  372. assert(alignment != 0);
  373. inst.setImmediateOperand(alignmentIdx, alignment);
  374. }
  375. break;
  376. }
  377. default:
  378. break;
  379. }
  380. // Checks based on type
  381. if (inst.getTypeId() != NoType)
  382. postProcessType(inst, inst.getTypeId());
  383. for (int op = 0; op < inst.getNumOperands(); ++op) {
  384. if (inst.isIdOperand(op)) {
  385. // In blocks, these are always result ids, but we are relying on
  386. // getTypeId() to return NoType for things like OpLabel.
  387. if (getTypeId(inst.getIdOperand(op)) != NoType)
  388. postProcessType(inst, getTypeId(inst.getIdOperand(op)));
  389. }
  390. }
  391. }
  392. // comment in header
  393. void Builder::postProcessCFG()
  394. {
  395. // reachableBlocks is the set of blockss reached via control flow, or which are
  396. // unreachable continue targert or unreachable merge.
  397. std::unordered_set<const Block*> reachableBlocks;
  398. std::unordered_map<Block*, Block*> headerForUnreachableContinue;
  399. std::unordered_set<Block*> unreachableMerges;
  400. std::unordered_set<Id> unreachableDefinitions;
  401. // Collect IDs defined in unreachable blocks. For each function, label the
  402. // reachable blocks first. Then for each unreachable block, collect the
  403. // result IDs of the instructions in it.
  404. for (auto fi = module.getFunctions().cbegin(); fi != module.getFunctions().cend(); fi++) {
  405. Function* f = *fi;
  406. Block* entry = f->getEntryBlock();
  407. inReadableOrder(entry,
  408. [&reachableBlocks, &unreachableMerges, &headerForUnreachableContinue]
  409. (Block* b, ReachReason why, Block* header) {
  410. reachableBlocks.insert(b);
  411. if (why == ReachDeadContinue) headerForUnreachableContinue[b] = header;
  412. if (why == ReachDeadMerge) unreachableMerges.insert(b);
  413. });
  414. for (auto bi = f->getBlocks().cbegin(); bi != f->getBlocks().cend(); bi++) {
  415. Block* b = *bi;
  416. if (unreachableMerges.count(b) != 0 || headerForUnreachableContinue.count(b) != 0) {
  417. auto ii = b->getInstructions().cbegin();
  418. ++ii; // Keep potential decorations on the label.
  419. for (; ii != b->getInstructions().cend(); ++ii)
  420. unreachableDefinitions.insert(ii->get()->getResultId());
  421. } else if (reachableBlocks.count(b) == 0) {
  422. // The normal case for unreachable code. All definitions are considered dead.
  423. for (auto ii = b->getInstructions().cbegin(); ii != b->getInstructions().cend(); ++ii)
  424. unreachableDefinitions.insert(ii->get()->getResultId());
  425. }
  426. }
  427. }
  428. // Modify unreachable merge blocks and unreachable continue targets.
  429. // Delete their contents.
  430. for (auto mergeIter = unreachableMerges.begin(); mergeIter != unreachableMerges.end(); ++mergeIter) {
  431. (*mergeIter)->rewriteAsCanonicalUnreachableMerge();
  432. }
  433. for (auto continueIter = headerForUnreachableContinue.begin();
  434. continueIter != headerForUnreachableContinue.end();
  435. ++continueIter) {
  436. Block* continue_target = continueIter->first;
  437. Block* header = continueIter->second;
  438. continue_target->rewriteAsCanonicalUnreachableContinue(header);
  439. }
  440. // Remove unneeded decorations, for unreachable instructions
  441. for (auto decorationIter = decorations.begin(); decorationIter != decorations.end();) {
  442. Id decorationId = (*decorationIter)->getIdOperand(0);
  443. if (unreachableDefinitions.count(decorationId) != 0) {
  444. decorationIter = decorations.erase(decorationIter);
  445. } else {
  446. ++decorationIter;
  447. }
  448. }
  449. }
  450. // comment in header
  451. void Builder::postProcessFeatures() {
  452. // Add per-instruction capabilities, extensions, etc.,
  453. // Look for any 8/16 bit type in physical storage buffer class, and set the
  454. // appropriate capability. This happens in createSpvVariable for other storage
  455. // classes, but there isn't always a variable for physical storage buffer.
  456. for (int t = 0; t < (int)groupedTypes[enumCast(Op::OpTypePointer)].size(); ++t) {
  457. Instruction* type = groupedTypes[enumCast(Op::OpTypePointer)][t];
  458. if (type->getImmediateOperand(0) == (unsigned)StorageClass::PhysicalStorageBufferEXT) {
  459. if (containsType(type->getIdOperand(1), Op::OpTypeInt, 8)) {
  460. addIncorporatedExtension(spv::E_SPV_KHR_8bit_storage, spv::Spv_1_5);
  461. addCapability(spv::Capability::StorageBuffer8BitAccess);
  462. }
  463. if (containsType(type->getIdOperand(1), Op::OpTypeInt, 16) ||
  464. containsType(type->getIdOperand(1), Op::OpTypeFloat, 16)) {
  465. addIncorporatedExtension(spv::E_SPV_KHR_16bit_storage, spv::Spv_1_3);
  466. addCapability(spv::Capability::StorageBuffer16BitAccess);
  467. }
  468. }
  469. }
  470. // process all block-contained instructions
  471. for (auto fi = module.getFunctions().cbegin(); fi != module.getFunctions().cend(); fi++) {
  472. Function* f = *fi;
  473. for (auto bi = f->getBlocks().cbegin(); bi != f->getBlocks().cend(); bi++) {
  474. Block* b = *bi;
  475. for (auto ii = b->getInstructions().cbegin(); ii != b->getInstructions().cend(); ii++)
  476. postProcess(*ii->get());
  477. // For all local variables that contain pointers to PhysicalStorageBufferEXT, check whether
  478. // there is an existing restrict/aliased decoration. If we don't find one, add Aliased as the
  479. // default.
  480. for (auto vi = b->getLocalVariables().cbegin(); vi != b->getLocalVariables().cend(); vi++) {
  481. const Instruction& inst = *vi->get();
  482. Id resultId = inst.getResultId();
  483. if (containsPhysicalStorageBufferOrArray(getDerefTypeId(resultId))) {
  484. bool foundDecoration = false;
  485. const auto function = [&](const std::unique_ptr<Instruction>& decoration) {
  486. if (decoration.get()->getIdOperand(0) == resultId &&
  487. decoration.get()->getOpCode() == Op::OpDecorate &&
  488. (decoration.get()->getImmediateOperand(1) == spv::Decoration::AliasedPointerEXT ||
  489. decoration.get()->getImmediateOperand(1) == spv::Decoration::RestrictPointerEXT)) {
  490. foundDecoration = true;
  491. }
  492. };
  493. std::for_each(decorations.begin(), decorations.end(), function);
  494. if (!foundDecoration) {
  495. addDecoration(resultId, spv::Decoration::AliasedPointerEXT);
  496. }
  497. }
  498. }
  499. }
  500. }
  501. // If any Vulkan memory model-specific functionality is used, update the
  502. // OpMemoryModel to match.
  503. if (capabilities.find(spv::Capability::VulkanMemoryModelKHR) != capabilities.end()) {
  504. memoryModel = spv::MemoryModel::VulkanKHR;
  505. addIncorporatedExtension(spv::E_SPV_KHR_vulkan_memory_model, spv::Spv_1_5);
  506. }
  507. // Add Aliased decoration if there's more than one Workgroup Block variable.
  508. if (capabilities.find(spv::Capability::WorkgroupMemoryExplicitLayoutKHR) != capabilities.end()) {
  509. assert(entryPoints.size() == 1);
  510. auto &ep = entryPoints[0];
  511. std::vector<Id> workgroup_variables;
  512. for (int i = 0; i < (int)ep->getNumOperands(); i++) {
  513. if (!ep->isIdOperand(i))
  514. continue;
  515. const Id id = ep->getIdOperand(i);
  516. const Instruction *instr = module.getInstruction(id);
  517. if (instr->getOpCode() != spv::Op::OpVariable)
  518. continue;
  519. if (instr->getImmediateOperand(0) == spv::StorageClass::Workgroup)
  520. workgroup_variables.push_back(id);
  521. }
  522. if (workgroup_variables.size() > 1) {
  523. for (size_t i = 0; i < workgroup_variables.size(); i++)
  524. addDecoration(workgroup_variables[i], spv::Decoration::Aliased);
  525. }
  526. }
  527. }
  528. // SPIR-V requires that any instruction consuming the result of an OpSampledImage
  529. // be in the same block as the OpSampledImage instruction. This pass goes finds
  530. // uses of OpSampledImage where that is not the case and duplicates the
  531. // OpSampledImage to be immediately before the instruction that consumes it.
  532. // The old OpSampledImage is left in place, potentially with no users.
  533. void Builder::postProcessSamplers()
  534. {
  535. // first, find all OpSampledImage instructions and store them in a map.
  536. std::map<Id, Instruction*> sampledImageInstrs;
  537. for (auto f: module.getFunctions()) {
  538. for (auto b: f->getBlocks()) {
  539. for (auto &i: b->getInstructions()) {
  540. if (i->getOpCode() == spv::Op::OpSampledImage) {
  541. sampledImageInstrs[i->getResultId()] = i.get();
  542. }
  543. }
  544. }
  545. }
  546. // next find all uses of the given ids and rewrite them if needed.
  547. for (auto f: module.getFunctions()) {
  548. for (auto b: f->getBlocks()) {
  549. auto &instrs = b->getInstructions();
  550. for (size_t idx = 0; idx < instrs.size(); idx++) {
  551. Instruction *i = instrs[idx].get();
  552. for (int opnum = 0; opnum < i->getNumOperands(); opnum++) {
  553. // Is this operand of the current instruction the result of an OpSampledImage?
  554. if (i->isIdOperand(opnum) &&
  555. sampledImageInstrs.count(i->getIdOperand(opnum)))
  556. {
  557. Instruction *opSampImg = sampledImageInstrs[i->getIdOperand(opnum)];
  558. if (i->getBlock() != opSampImg->getBlock()) {
  559. Instruction *newInstr = new Instruction(getUniqueId(),
  560. opSampImg->getTypeId(),
  561. spv::Op::OpSampledImage);
  562. newInstr->addIdOperand(opSampImg->getIdOperand(0));
  563. newInstr->addIdOperand(opSampImg->getIdOperand(1));
  564. newInstr->setBlock(b);
  565. // rewrite the user of the OpSampledImage to use the new instruction.
  566. i->setIdOperand(opnum, newInstr->getResultId());
  567. // insert the new OpSampledImage right before the current instruction.
  568. instrs.insert(instrs.begin() + idx,
  569. std::unique_ptr<Instruction>(newInstr));
  570. idx++;
  571. }
  572. }
  573. }
  574. }
  575. }
  576. }
  577. }
  578. // comment in header
  579. void Builder::postProcess(bool compileOnly)
  580. {
  581. // postProcessCFG needs an entrypoint to determine what is reachable, but if we are not creating an "executable" shader, we don't have an entrypoint
  582. if (!compileOnly)
  583. postProcessCFG();
  584. postProcessFeatures();
  585. postProcessSamplers();
  586. }
  587. } // end spv namespace