SpvPostProcess.cpp 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426
  1. //
  2. // Copyright (C) 2018 Google, Inc.
  3. //
  4. // All rights reserved.
  5. //
  6. // Redistribution and use in source and binary forms, with or without
  7. // modification, are permitted provided that the following conditions
  8. // are met:
  9. //
  10. // Redistributions of source code must retain the above copyright
  11. // notice, this list of conditions and the following disclaimer.
  12. //
  13. // Redistributions in binary form must reproduce the above
  14. // copyright notice, this list of conditions and the following
  15. // disclaimer in the documentation and/or other materials provided
  16. // with the distribution.
  17. //
  18. // Neither the name of 3Dlabs Inc. Ltd. nor the names of its
  19. // contributors may be used to endorse or promote products derived
  20. // from this software without specific prior written permission.
  21. //
  22. // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  23. // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  24. // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
  25. // FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
  26. // COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  27. // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
  28. // BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  29. // LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  30. // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  31. // LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
  32. // ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  33. // POSSIBILITY OF SUCH DAMAGE.
  34. //
  35. // Post-processing for SPIR-V IR, in internal form, not standard binary form.
  36. //
  37. #include <cassert>
  38. #include <cstdlib>
  39. #include <unordered_set>
  40. #include <algorithm>
  41. #include "SpvBuilder.h"
  42. #include "spirv.hpp"
  43. #include "GlslangToSpv.h"
  44. #include "SpvBuilder.h"
  45. namespace spv {
  46. #include "GLSL.std.450.h"
  47. #include "GLSL.ext.KHR.h"
  48. #include "GLSL.ext.EXT.h"
  49. #ifdef AMD_EXTENSIONS
  50. #include "GLSL.ext.AMD.h"
  51. #endif
  52. #ifdef NV_EXTENSIONS
  53. #include "GLSL.ext.NV.h"
  54. #endif
  55. }
  56. namespace spv {
  57. // Hook to visit each operand type and result type of an instruction.
  58. // Will be called multiple times for one instruction, once for each typed
  59. // operand and the result.
  60. void Builder::postProcessType(const Instruction& inst, Id typeId)
  61. {
  62. // Characterize the type being questioned
  63. Id basicTypeOp = getMostBasicTypeClass(typeId);
  64. int width = 0;
  65. if (basicTypeOp == OpTypeFloat || basicTypeOp == OpTypeInt)
  66. width = getScalarTypeWidth(typeId);
  67. // Do opcode-specific checks
  68. switch (inst.getOpCode()) {
  69. case OpLoad:
  70. case OpStore:
  71. if (basicTypeOp == OpTypeStruct) {
  72. if (containsType(typeId, OpTypeInt, 8))
  73. addCapability(CapabilityInt8);
  74. if (containsType(typeId, OpTypeInt, 16))
  75. addCapability(CapabilityInt16);
  76. if (containsType(typeId, OpTypeFloat, 16))
  77. addCapability(CapabilityFloat16);
  78. } else {
  79. StorageClass storageClass = getStorageClass(inst.getIdOperand(0));
  80. if (width == 8) {
  81. switch (storageClass) {
  82. case StorageClassPhysicalStorageBufferEXT:
  83. case StorageClassUniform:
  84. case StorageClassStorageBuffer:
  85. case StorageClassPushConstant:
  86. break;
  87. default:
  88. addCapability(CapabilityInt8);
  89. break;
  90. }
  91. } else if (width == 16) {
  92. switch (storageClass) {
  93. case StorageClassPhysicalStorageBufferEXT:
  94. case StorageClassUniform:
  95. case StorageClassStorageBuffer:
  96. case StorageClassPushConstant:
  97. case StorageClassInput:
  98. case StorageClassOutput:
  99. break;
  100. default:
  101. if (basicTypeOp == OpTypeInt)
  102. addCapability(CapabilityInt16);
  103. if (basicTypeOp == OpTypeFloat)
  104. addCapability(CapabilityFloat16);
  105. break;
  106. }
  107. }
  108. }
  109. break;
  110. case OpAccessChain:
  111. case OpPtrAccessChain:
  112. case OpCopyObject:
  113. break;
  114. case OpFConvert:
  115. case OpSConvert:
  116. case OpUConvert:
  117. // Look for any 8/16-bit storage capabilities. If there are none, assume that
  118. // the convert instruction requires the Float16/Int8/16 capability.
  119. if (containsType(typeId, OpTypeFloat, 16) || containsType(typeId, OpTypeInt, 16)) {
  120. bool foundStorage = false;
  121. for (auto it = capabilities.begin(); it != capabilities.end(); ++it) {
  122. spv::Capability cap = *it;
  123. if (cap == spv::CapabilityStorageInputOutput16 ||
  124. cap == spv::CapabilityStoragePushConstant16 ||
  125. cap == spv::CapabilityStorageUniformBufferBlock16 ||
  126. cap == spv::CapabilityStorageUniform16) {
  127. foundStorage = true;
  128. break;
  129. }
  130. }
  131. if (!foundStorage) {
  132. if (containsType(typeId, OpTypeFloat, 16))
  133. addCapability(CapabilityFloat16);
  134. if (containsType(typeId, OpTypeInt, 16))
  135. addCapability(CapabilityInt16);
  136. }
  137. }
  138. if (containsType(typeId, OpTypeInt, 8)) {
  139. bool foundStorage = false;
  140. for (auto it = capabilities.begin(); it != capabilities.end(); ++it) {
  141. spv::Capability cap = *it;
  142. if (cap == spv::CapabilityStoragePushConstant8 ||
  143. cap == spv::CapabilityUniformAndStorageBuffer8BitAccess ||
  144. cap == spv::CapabilityStorageBuffer8BitAccess) {
  145. foundStorage = true;
  146. break;
  147. }
  148. }
  149. if (!foundStorage) {
  150. addCapability(CapabilityInt8);
  151. }
  152. }
  153. break;
  154. case OpExtInst:
  155. #if AMD_EXTENSIONS
  156. switch (inst.getImmediateOperand(1)) {
  157. case GLSLstd450Frexp:
  158. case GLSLstd450FrexpStruct:
  159. if (getSpvVersion() < glslang::EShTargetSpv_1_3 && containsType(typeId, OpTypeInt, 16))
  160. addExtension(spv::E_SPV_AMD_gpu_shader_int16);
  161. break;
  162. case GLSLstd450InterpolateAtCentroid:
  163. case GLSLstd450InterpolateAtSample:
  164. case GLSLstd450InterpolateAtOffset:
  165. if (getSpvVersion() < glslang::EShTargetSpv_1_3 && containsType(typeId, OpTypeFloat, 16))
  166. addExtension(spv::E_SPV_AMD_gpu_shader_half_float);
  167. break;
  168. default:
  169. break;
  170. }
  171. #endif
  172. break;
  173. default:
  174. if (basicTypeOp == OpTypeFloat && width == 16)
  175. addCapability(CapabilityFloat16);
  176. if (basicTypeOp == OpTypeInt && width == 16)
  177. addCapability(CapabilityInt16);
  178. if (basicTypeOp == OpTypeInt && width == 8)
  179. addCapability(CapabilityInt8);
  180. break;
  181. }
  182. }
  183. // Called for each instruction that resides in a block.
  184. void Builder::postProcess(Instruction& inst)
  185. {
  186. // Add capabilities based simply on the opcode.
  187. switch (inst.getOpCode()) {
  188. case OpExtInst:
  189. switch (inst.getImmediateOperand(1)) {
  190. case GLSLstd450InterpolateAtCentroid:
  191. case GLSLstd450InterpolateAtSample:
  192. case GLSLstd450InterpolateAtOffset:
  193. addCapability(CapabilityInterpolationFunction);
  194. break;
  195. default:
  196. break;
  197. }
  198. break;
  199. case OpDPdxFine:
  200. case OpDPdyFine:
  201. case OpFwidthFine:
  202. case OpDPdxCoarse:
  203. case OpDPdyCoarse:
  204. case OpFwidthCoarse:
  205. addCapability(CapabilityDerivativeControl);
  206. break;
  207. case OpImageQueryLod:
  208. case OpImageQuerySize:
  209. case OpImageQuerySizeLod:
  210. case OpImageQuerySamples:
  211. case OpImageQueryLevels:
  212. addCapability(CapabilityImageQuery);
  213. break;
  214. #ifdef NV_EXTENSIONS
  215. case OpGroupNonUniformPartitionNV:
  216. addExtension(E_SPV_NV_shader_subgroup_partitioned);
  217. addCapability(CapabilityGroupNonUniformPartitionedNV);
  218. break;
  219. #endif
  220. case OpLoad:
  221. case OpStore:
  222. {
  223. // For any load/store to a PhysicalStorageBufferEXT, walk the accesschain
  224. // index list to compute the misalignment. The pre-existing alignment value
  225. // (set via Builder::AccessChain::alignment) only accounts for the base of
  226. // the reference type and any scalar component selection in the accesschain,
  227. // and this function computes the rest from the SPIR-V Offset decorations.
  228. Instruction *accessChain = module.getInstruction(inst.getIdOperand(0));
  229. if (accessChain->getOpCode() == OpAccessChain) {
  230. Instruction *base = module.getInstruction(accessChain->getIdOperand(0));
  231. // Get the type of the base of the access chain. It must be a pointer type.
  232. Id typeId = base->getTypeId();
  233. Instruction *type = module.getInstruction(typeId);
  234. assert(type->getOpCode() == OpTypePointer);
  235. if (type->getImmediateOperand(0) != StorageClassPhysicalStorageBufferEXT) {
  236. break;
  237. }
  238. // Get the pointee type.
  239. typeId = type->getIdOperand(1);
  240. type = module.getInstruction(typeId);
  241. // Walk the index list for the access chain. For each index, find any
  242. // misalignment that can apply when accessing the member/element via
  243. // Offset/ArrayStride/MatrixStride decorations, and bitwise OR them all
  244. // together.
  245. int alignment = 0;
  246. for (int i = 1; i < accessChain->getNumOperands(); ++i) {
  247. Instruction *idx = module.getInstruction(accessChain->getIdOperand(i));
  248. if (type->getOpCode() == OpTypeStruct) {
  249. assert(idx->getOpCode() == OpConstant);
  250. unsigned int c = idx->getImmediateOperand(0);
  251. const auto function = [&](const std::unique_ptr<Instruction>& decoration) {
  252. if (decoration.get()->getOpCode() == OpMemberDecorate &&
  253. decoration.get()->getIdOperand(0) == typeId &&
  254. decoration.get()->getImmediateOperand(1) == c &&
  255. (decoration.get()->getImmediateOperand(2) == DecorationOffset ||
  256. decoration.get()->getImmediateOperand(2) == DecorationMatrixStride)) {
  257. alignment |= decoration.get()->getImmediateOperand(3);
  258. }
  259. };
  260. std::for_each(decorations.begin(), decorations.end(), function);
  261. // get the next member type
  262. typeId = type->getIdOperand(c);
  263. type = module.getInstruction(typeId);
  264. } else if (type->getOpCode() == OpTypeArray ||
  265. type->getOpCode() == OpTypeRuntimeArray) {
  266. const auto function = [&](const std::unique_ptr<Instruction>& decoration) {
  267. if (decoration.get()->getOpCode() == OpDecorate &&
  268. decoration.get()->getIdOperand(0) == typeId &&
  269. decoration.get()->getImmediateOperand(1) == DecorationArrayStride) {
  270. alignment |= decoration.get()->getImmediateOperand(2);
  271. }
  272. };
  273. std::for_each(decorations.begin(), decorations.end(), function);
  274. // Get the element type
  275. typeId = type->getIdOperand(0);
  276. type = module.getInstruction(typeId);
  277. } else {
  278. // Once we get to any non-aggregate type, we're done.
  279. break;
  280. }
  281. }
  282. assert(inst.getNumOperands() >= 3);
  283. unsigned int memoryAccess = inst.getImmediateOperand((inst.getOpCode() == OpStore) ? 2 : 1);
  284. assert(memoryAccess & MemoryAccessAlignedMask);
  285. static_cast<void>(memoryAccess);
  286. // Compute the index of the alignment operand.
  287. int alignmentIdx = 2;
  288. if (inst.getOpCode() == OpStore)
  289. alignmentIdx++;
  290. // Merge new and old (mis)alignment
  291. alignment |= inst.getImmediateOperand(alignmentIdx);
  292. // Pick the LSB
  293. alignment = alignment & ~(alignment & (alignment-1));
  294. // update the Aligned operand
  295. inst.setImmediateOperand(alignmentIdx, alignment);
  296. }
  297. break;
  298. }
  299. default:
  300. break;
  301. }
  302. // Checks based on type
  303. if (inst.getTypeId() != NoType)
  304. postProcessType(inst, inst.getTypeId());
  305. for (int op = 0; op < inst.getNumOperands(); ++op) {
  306. if (inst.isIdOperand(op)) {
  307. // In blocks, these are always result ids, but we are relying on
  308. // getTypeId() to return NoType for things like OpLabel.
  309. if (getTypeId(inst.getIdOperand(op)) != NoType)
  310. postProcessType(inst, getTypeId(inst.getIdOperand(op)));
  311. }
  312. }
  313. }
  314. // Called for each instruction in a reachable block.
  315. void Builder::postProcessReachable(const Instruction&)
  316. {
  317. // did have code here, but questionable to do so without deleting the instructions
  318. }
  319. // comment in header
  320. void Builder::postProcess()
  321. {
  322. std::unordered_set<const Block*> reachableBlocks;
  323. std::unordered_set<Id> unreachableDefinitions;
  324. // Collect IDs defined in unreachable blocks. For each function, label the
  325. // reachable blocks first. Then for each unreachable block, collect the
  326. // result IDs of the instructions in it.
  327. for (auto fi = module.getFunctions().cbegin(); fi != module.getFunctions().cend(); fi++) {
  328. Function* f = *fi;
  329. Block* entry = f->getEntryBlock();
  330. inReadableOrder(entry, [&reachableBlocks](const Block* b) { reachableBlocks.insert(b); });
  331. for (auto bi = f->getBlocks().cbegin(); bi != f->getBlocks().cend(); bi++) {
  332. Block* b = *bi;
  333. if (reachableBlocks.count(b) == 0) {
  334. for (auto ii = b->getInstructions().cbegin(); ii != b->getInstructions().cend(); ii++)
  335. unreachableDefinitions.insert(ii->get()->getResultId());
  336. }
  337. }
  338. }
  339. // Remove unneeded decorations, for unreachable instructions
  340. decorations.erase(std::remove_if(decorations.begin(), decorations.end(),
  341. [&unreachableDefinitions](std::unique_ptr<Instruction>& I) -> bool {
  342. Id decoration_id = I.get()->getIdOperand(0);
  343. return unreachableDefinitions.count(decoration_id) != 0;
  344. }),
  345. decorations.end());
  346. // Add per-instruction capabilities, extensions, etc.,
  347. // Look for any 8/16 bit type in physical storage buffer class, and set the
  348. // appropriate capability. This happens in createSpvVariable for other storage
  349. // classes, but there isn't always a variable for physical storage buffer.
  350. for (int t = 0; t < (int)groupedTypes[OpTypePointer].size(); ++t) {
  351. Instruction* type = groupedTypes[OpTypePointer][t];
  352. if (type->getImmediateOperand(0) == (unsigned)StorageClassPhysicalStorageBufferEXT) {
  353. if (containsType(type->getIdOperand(1), OpTypeInt, 8)) {
  354. addExtension(spv::E_SPV_KHR_8bit_storage);
  355. addCapability(spv::CapabilityStorageBuffer8BitAccess);
  356. }
  357. if (containsType(type->getIdOperand(1), OpTypeInt, 16) ||
  358. containsType(type->getIdOperand(1), OpTypeFloat, 16)) {
  359. addExtension(spv::E_SPV_KHR_16bit_storage);
  360. addCapability(spv::CapabilityStorageBuffer16BitAccess);
  361. }
  362. }
  363. }
  364. // process all reachable instructions...
  365. for (auto bi = reachableBlocks.cbegin(); bi != reachableBlocks.cend(); ++bi) {
  366. const Block* block = *bi;
  367. const auto function = [this](const std::unique_ptr<Instruction>& inst) { postProcessReachable(*inst.get()); };
  368. std::for_each(block->getInstructions().begin(), block->getInstructions().end(), function);
  369. }
  370. // process all block-contained instructions
  371. for (auto fi = module.getFunctions().cbegin(); fi != module.getFunctions().cend(); fi++) {
  372. Function* f = *fi;
  373. for (auto bi = f->getBlocks().cbegin(); bi != f->getBlocks().cend(); bi++) {
  374. Block* b = *bi;
  375. for (auto ii = b->getInstructions().cbegin(); ii != b->getInstructions().cend(); ii++)
  376. postProcess(*ii->get());
  377. // For all local variables that contain pointers to PhysicalStorageBufferEXT, check whether
  378. // there is an existing restrict/aliased decoration. If we don't find one, add Aliased as the
  379. // default.
  380. for (auto vi = b->getLocalVariables().cbegin(); vi != b->getLocalVariables().cend(); vi++) {
  381. const Instruction& inst = *vi->get();
  382. Id resultId = inst.getResultId();
  383. if (containsPhysicalStorageBufferOrArray(getDerefTypeId(resultId))) {
  384. bool foundDecoration = false;
  385. const auto function = [&](const std::unique_ptr<Instruction>& decoration) {
  386. if (decoration.get()->getIdOperand(0) == resultId &&
  387. decoration.get()->getOpCode() == OpDecorate &&
  388. (decoration.get()->getImmediateOperand(1) == spv::DecorationAliasedPointerEXT ||
  389. decoration.get()->getImmediateOperand(1) == spv::DecorationRestrictPointerEXT)) {
  390. foundDecoration = true;
  391. }
  392. };
  393. std::for_each(decorations.begin(), decorations.end(), function);
  394. if (!foundDecoration) {
  395. addDecoration(resultId, spv::DecorationAliasedPointerEXT);
  396. }
  397. }
  398. }
  399. }
  400. }
  401. }
  402. }; // end spv namespace