linkValidate.cpp 75 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828
  1. //
  2. // Copyright (C) 2013 LunarG, Inc.
  3. // Copyright (C) 2017 ARM Limited.
  4. // Copyright (C) 2015-2018 Google, Inc.
  5. //
  6. // All rights reserved.
  7. //
  8. // Redistribution and use in source and binary forms, with or without
  9. // modification, are permitted provided that the following conditions
  10. // are met:
  11. //
  12. // Redistributions of source code must retain the above copyright
  13. // notice, this list of conditions and the following disclaimer.
  14. //
  15. // Redistributions in binary form must reproduce the above
  16. // copyright notice, this list of conditions and the following
  17. // disclaimer in the documentation and/or other materials provided
  18. // with the distribution.
  19. //
  20. // Neither the name of 3Dlabs Inc. Ltd. nor the names of its
  21. // contributors may be used to endorse or promote products derived
  22. // from this software without specific prior written permission.
  23. //
  24. // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  25. // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  26. // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
  27. // FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
  28. // COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  29. // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
  30. // BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  31. // LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  32. // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  33. // LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
  34. // ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  35. // POSSIBILITY OF SUCH DAMAGE.
  36. //
  37. //
  38. // Do link-time merging and validation of intermediate representations.
  39. //
  40. // Basic model is that during compilation, each compilation unit (shader) is
  41. // compiled into one TIntermediate instance. Then, at link time, multiple
  42. // units for the same stage can be merged together, which can generate errors.
  43. // Then, after all merging, a single instance of TIntermediate represents
  44. // the whole stage. A final error check can be done on the resulting stage,
  45. // even if no merging was done (i.e., the stage was only one compilation unit).
  46. //
  47. #include "localintermediate.h"
  48. #include "../Include/InfoSink.h"
  49. namespace glslang {
  50. //
  51. // Link-time error emitter.
  52. //
  53. void TIntermediate::error(TInfoSink& infoSink, const char* message)
  54. {
  55. #ifndef GLSLANG_WEB
  56. infoSink.info.prefix(EPrefixError);
  57. infoSink.info << "Linking " << StageName(language) << " stage: " << message << "\n";
  58. #endif
  59. ++numErrors;
  60. }
  61. // Link-time warning.
  62. void TIntermediate::warn(TInfoSink& infoSink, const char* message)
  63. {
  64. #ifndef GLSLANG_WEB
  65. infoSink.info.prefix(EPrefixWarning);
  66. infoSink.info << "Linking " << StageName(language) << " stage: " << message << "\n";
  67. #endif
  68. }
  69. // TODO: 4.4 offset/align: "Two blocks linked together in the same program with the same block
  70. // name must have the exact same set of members qualified with offset and their integral-constant
  71. // expression values must be the same, or a link-time error results."
  72. //
  73. // Merge the information from 'unit' into 'this'
  74. //
  75. void TIntermediate::merge(TInfoSink& infoSink, TIntermediate& unit)
  76. {
  77. #if !defined(GLSLANG_WEB) && !defined(GLSLANG_ANGLE)
  78. mergeCallGraphs(infoSink, unit);
  79. mergeModes(infoSink, unit);
  80. mergeTrees(infoSink, unit);
  81. #endif
  82. }
  83. void TIntermediate::mergeCallGraphs(TInfoSink& infoSink, TIntermediate& unit)
  84. {
  85. if (unit.getNumEntryPoints() > 0) {
  86. if (getNumEntryPoints() > 0)
  87. error(infoSink, "can't handle multiple entry points per stage");
  88. else {
  89. entryPointName = unit.getEntryPointName();
  90. entryPointMangledName = unit.getEntryPointMangledName();
  91. }
  92. }
  93. numEntryPoints += unit.getNumEntryPoints();
  94. callGraph.insert(callGraph.end(), unit.callGraph.begin(), unit.callGraph.end());
  95. }
  96. #if !defined(GLSLANG_WEB) && !defined(GLSLANG_ANGLE)
  97. #define MERGE_MAX(member) member = std::max(member, unit.member)
  98. #define MERGE_TRUE(member) if (unit.member) member = unit.member;
  99. void TIntermediate::mergeModes(TInfoSink& infoSink, TIntermediate& unit)
  100. {
  101. if (language != unit.language)
  102. error(infoSink, "stages must match when linking into a single stage");
  103. if (getSource() == EShSourceNone)
  104. setSource(unit.getSource());
  105. if (getSource() != unit.getSource())
  106. error(infoSink, "can't link compilation units from different source languages");
  107. if (treeRoot == nullptr) {
  108. profile = unit.profile;
  109. version = unit.version;
  110. requestedExtensions = unit.requestedExtensions;
  111. } else {
  112. if ((isEsProfile()) != (unit.isEsProfile()))
  113. error(infoSink, "Cannot cross link ES and desktop profiles");
  114. else if (unit.profile == ECompatibilityProfile)
  115. profile = ECompatibilityProfile;
  116. version = std::max(version, unit.version);
  117. requestedExtensions.insert(unit.requestedExtensions.begin(), unit.requestedExtensions.end());
  118. }
  119. MERGE_MAX(spvVersion.spv);
  120. MERGE_MAX(spvVersion.vulkanGlsl);
  121. MERGE_MAX(spvVersion.vulkan);
  122. MERGE_MAX(spvVersion.openGl);
  123. numErrors += unit.getNumErrors();
  124. // Only one push_constant is allowed, mergeLinkerObjects() will ensure the push_constant
  125. // is the same for all units.
  126. if (numPushConstants > 1 || unit.numPushConstants > 1)
  127. error(infoSink, "Only one push_constant block is allowed per stage");
  128. numPushConstants = std::min(numPushConstants + unit.numPushConstants, 1);
  129. if (unit.invocations != TQualifier::layoutNotSet) {
  130. if (invocations == TQualifier::layoutNotSet)
  131. invocations = unit.invocations;
  132. else if (invocations != unit.invocations)
  133. error(infoSink, "number of invocations must match between compilation units");
  134. }
  135. if (vertices == TQualifier::layoutNotSet)
  136. vertices = unit.vertices;
  137. else if (unit.vertices != TQualifier::layoutNotSet && vertices != unit.vertices) {
  138. if (language == EShLangGeometry || language == EShLangMeshNV)
  139. error(infoSink, "Contradictory layout max_vertices values");
  140. else if (language == EShLangTessControl)
  141. error(infoSink, "Contradictory layout vertices values");
  142. else
  143. assert(0);
  144. }
  145. if (primitives == TQualifier::layoutNotSet)
  146. primitives = unit.primitives;
  147. else if (primitives != unit.primitives) {
  148. if (language == EShLangMeshNV)
  149. error(infoSink, "Contradictory layout max_primitives values");
  150. else
  151. assert(0);
  152. }
  153. if (inputPrimitive == ElgNone)
  154. inputPrimitive = unit.inputPrimitive;
  155. else if (unit.inputPrimitive != ElgNone && inputPrimitive != unit.inputPrimitive)
  156. error(infoSink, "Contradictory input layout primitives");
  157. if (outputPrimitive == ElgNone)
  158. outputPrimitive = unit.outputPrimitive;
  159. else if (unit.outputPrimitive != ElgNone && outputPrimitive != unit.outputPrimitive)
  160. error(infoSink, "Contradictory output layout primitives");
  161. if (originUpperLeft != unit.originUpperLeft || pixelCenterInteger != unit.pixelCenterInteger)
  162. error(infoSink, "gl_FragCoord redeclarations must match across shaders");
  163. if (vertexSpacing == EvsNone)
  164. vertexSpacing = unit.vertexSpacing;
  165. else if (vertexSpacing != unit.vertexSpacing)
  166. error(infoSink, "Contradictory input vertex spacing");
  167. if (vertexOrder == EvoNone)
  168. vertexOrder = unit.vertexOrder;
  169. else if (vertexOrder != unit.vertexOrder)
  170. error(infoSink, "Contradictory triangle ordering");
  171. MERGE_TRUE(pointMode);
  172. for (int i = 0; i < 3; ++i) {
  173. if (unit.localSizeNotDefault[i]) {
  174. if (!localSizeNotDefault[i]) {
  175. localSize[i] = unit.localSize[i];
  176. localSizeNotDefault[i] = true;
  177. }
  178. else if (localSize[i] != unit.localSize[i])
  179. error(infoSink, "Contradictory local size");
  180. }
  181. if (localSizeSpecId[i] == TQualifier::layoutNotSet)
  182. localSizeSpecId[i] = unit.localSizeSpecId[i];
  183. else if (localSizeSpecId[i] != unit.localSizeSpecId[i])
  184. error(infoSink, "Contradictory local size specialization ids");
  185. }
  186. MERGE_TRUE(earlyFragmentTests);
  187. MERGE_TRUE(postDepthCoverage);
  188. if (depthLayout == EldNone)
  189. depthLayout = unit.depthLayout;
  190. else if (depthLayout != unit.depthLayout)
  191. error(infoSink, "Contradictory depth layouts");
  192. MERGE_TRUE(depthReplacing);
  193. MERGE_TRUE(hlslFunctionality1);
  194. blendEquations |= unit.blendEquations;
  195. MERGE_TRUE(xfbMode);
  196. for (size_t b = 0; b < xfbBuffers.size(); ++b) {
  197. if (xfbBuffers[b].stride == TQualifier::layoutXfbStrideEnd)
  198. xfbBuffers[b].stride = unit.xfbBuffers[b].stride;
  199. else if (xfbBuffers[b].stride != unit.xfbBuffers[b].stride)
  200. error(infoSink, "Contradictory xfb_stride");
  201. xfbBuffers[b].implicitStride = std::max(xfbBuffers[b].implicitStride, unit.xfbBuffers[b].implicitStride);
  202. if (unit.xfbBuffers[b].contains64BitType)
  203. xfbBuffers[b].contains64BitType = true;
  204. if (unit.xfbBuffers[b].contains32BitType)
  205. xfbBuffers[b].contains32BitType = true;
  206. if (unit.xfbBuffers[b].contains16BitType)
  207. xfbBuffers[b].contains16BitType = true;
  208. // TODO: 4.4 link: enhanced layouts: compare ranges
  209. }
  210. MERGE_TRUE(multiStream);
  211. MERGE_TRUE(layoutOverrideCoverage);
  212. MERGE_TRUE(geoPassthroughEXT);
  213. for (unsigned int i = 0; i < unit.shiftBinding.size(); ++i) {
  214. if (unit.shiftBinding[i] > 0)
  215. setShiftBinding((TResourceType)i, unit.shiftBinding[i]);
  216. }
  217. for (unsigned int i = 0; i < unit.shiftBindingForSet.size(); ++i) {
  218. for (auto it = unit.shiftBindingForSet[i].begin(); it != unit.shiftBindingForSet[i].end(); ++it)
  219. setShiftBindingForSet((TResourceType)i, it->second, it->first);
  220. }
  221. resourceSetBinding.insert(resourceSetBinding.end(), unit.resourceSetBinding.begin(), unit.resourceSetBinding.end());
  222. MERGE_TRUE(autoMapBindings);
  223. MERGE_TRUE(autoMapLocations);
  224. MERGE_TRUE(invertY);
  225. MERGE_TRUE(flattenUniformArrays);
  226. MERGE_TRUE(useUnknownFormat);
  227. MERGE_TRUE(hlslOffsets);
  228. MERGE_TRUE(useStorageBuffer);
  229. MERGE_TRUE(hlslIoMapping);
  230. // TODO: sourceFile
  231. // TODO: sourceText
  232. // TODO: processes
  233. MERGE_TRUE(needToLegalize);
  234. MERGE_TRUE(binaryDoubleOutput);
  235. MERGE_TRUE(usePhysicalStorageBuffer);
  236. }
  237. //
  238. // Merge the 'unit' AST into 'this' AST.
  239. // That includes rationalizing the unique IDs, which were set up independently,
  240. // and might have overlaps that are not the same symbol, or might have different
  241. // IDs for what should be the same shared symbol.
  242. //
  243. void TIntermediate::mergeTrees(TInfoSink& infoSink, TIntermediate& unit)
  244. {
  245. if (unit.treeRoot == nullptr)
  246. return;
  247. if (treeRoot == nullptr) {
  248. treeRoot = unit.treeRoot;
  249. return;
  250. }
  251. // Getting this far means we have two existing trees to merge...
  252. numShaderRecordBlocks += unit.numShaderRecordBlocks;
  253. numTaskNVBlocks += unit.numTaskNVBlocks;
  254. // Get the top-level globals of each unit
  255. TIntermSequence& globals = treeRoot->getAsAggregate()->getSequence();
  256. TIntermSequence& unitGlobals = unit.treeRoot->getAsAggregate()->getSequence();
  257. // Get the linker-object lists
  258. TIntermSequence& linkerObjects = findLinkerObjects()->getSequence();
  259. const TIntermSequence& unitLinkerObjects = unit.findLinkerObjects()->getSequence();
  260. // Map by global name to unique ID to rationalize the same object having
  261. // differing IDs in different trees.
  262. TIdMaps idMaps;
  263. int maxId;
  264. seedIdMap(idMaps, maxId);
  265. remapIds(idMaps, maxId + 1, unit);
  266. mergeBodies(infoSink, globals, unitGlobals);
  267. mergeLinkerObjects(infoSink, linkerObjects, unitLinkerObjects);
  268. ioAccessed.insert(unit.ioAccessed.begin(), unit.ioAccessed.end());
  269. }
  270. #endif
  271. static const TString& getNameForIdMap(TIntermSymbol* symbol)
  272. {
  273. TShaderInterface si = symbol->getType().getShaderInterface();
  274. if (si == EsiNone)
  275. return symbol->getName();
  276. else
  277. return symbol->getType().getTypeName();
  278. }
  279. // Traverser that seeds an ID map with all built-ins, and tracks the
  280. // maximum ID used.
  281. // (It would be nice to put this in a function, but that causes warnings
  282. // on having no bodies for the copy-constructor/operator=.)
  283. class TBuiltInIdTraverser : public TIntermTraverser {
  284. public:
  285. TBuiltInIdTraverser(TIdMaps& idMaps) : idMaps(idMaps), maxId(0) { }
  286. // If it's a built in, add it to the map.
  287. // Track the max ID.
  288. virtual void visitSymbol(TIntermSymbol* symbol)
  289. {
  290. const TQualifier& qualifier = symbol->getType().getQualifier();
  291. if (qualifier.builtIn != EbvNone) {
  292. TShaderInterface si = symbol->getType().getShaderInterface();
  293. idMaps[si][getNameForIdMap(symbol)] = symbol->getId();
  294. }
  295. maxId = std::max(maxId, symbol->getId());
  296. }
  297. int getMaxId() const { return maxId; }
  298. protected:
  299. TBuiltInIdTraverser(TBuiltInIdTraverser&);
  300. TBuiltInIdTraverser& operator=(TBuiltInIdTraverser&);
  301. TIdMaps& idMaps;
  302. int maxId;
  303. };
  304. // Traverser that seeds an ID map with non-builtins.
  305. // (It would be nice to put this in a function, but that causes warnings
  306. // on having no bodies for the copy-constructor/operator=.)
  307. class TUserIdTraverser : public TIntermTraverser {
  308. public:
  309. TUserIdTraverser(TIdMaps& idMaps) : idMaps(idMaps) { }
  310. // If its a non-built-in global, add it to the map.
  311. virtual void visitSymbol(TIntermSymbol* symbol)
  312. {
  313. const TQualifier& qualifier = symbol->getType().getQualifier();
  314. if (qualifier.builtIn == EbvNone) {
  315. TShaderInterface si = symbol->getType().getShaderInterface();
  316. idMaps[si][getNameForIdMap(symbol)] = symbol->getId();
  317. }
  318. }
  319. protected:
  320. TUserIdTraverser(TUserIdTraverser&);
  321. TUserIdTraverser& operator=(TUserIdTraverser&);
  322. TIdMaps& idMaps; // over biggest id
  323. };
  324. // Initialize the the ID map with what we know of 'this' AST.
  325. void TIntermediate::seedIdMap(TIdMaps& idMaps, int& maxId)
  326. {
  327. // all built-ins everywhere need to align on IDs and contribute to the max ID
  328. TBuiltInIdTraverser builtInIdTraverser(idMaps);
  329. treeRoot->traverse(&builtInIdTraverser);
  330. maxId = builtInIdTraverser.getMaxId();
  331. // user variables in the linker object list need to align on ids
  332. TUserIdTraverser userIdTraverser(idMaps);
  333. findLinkerObjects()->traverse(&userIdTraverser);
  334. }
  335. // Traverser to map an AST ID to what was known from the seeding AST.
  336. // (It would be nice to put this in a function, but that causes warnings
  337. // on having no bodies for the copy-constructor/operator=.)
  338. class TRemapIdTraverser : public TIntermTraverser {
  339. public:
  340. TRemapIdTraverser(const TIdMaps& idMaps, int idShift) : idMaps(idMaps), idShift(idShift) { }
  341. // Do the mapping:
  342. // - if the same symbol, adopt the 'this' ID
  343. // - otherwise, ensure a unique ID by shifting to a new space
  344. virtual void visitSymbol(TIntermSymbol* symbol)
  345. {
  346. const TQualifier& qualifier = symbol->getType().getQualifier();
  347. bool remapped = false;
  348. if (qualifier.isLinkable() || qualifier.builtIn != EbvNone) {
  349. TShaderInterface si = symbol->getType().getShaderInterface();
  350. auto it = idMaps[si].find(getNameForIdMap(symbol));
  351. if (it != idMaps[si].end()) {
  352. symbol->changeId(it->second);
  353. remapped = true;
  354. }
  355. }
  356. if (!remapped)
  357. symbol->changeId(symbol->getId() + idShift);
  358. }
  359. protected:
  360. TRemapIdTraverser(TRemapIdTraverser&);
  361. TRemapIdTraverser& operator=(TRemapIdTraverser&);
  362. const TIdMaps& idMaps;
  363. int idShift;
  364. };
  365. void TIntermediate::remapIds(const TIdMaps& idMaps, int idShift, TIntermediate& unit)
  366. {
  367. // Remap all IDs to either share or be unique, as dictated by the idMap and idShift.
  368. TRemapIdTraverser idTraverser(idMaps, idShift);
  369. unit.getTreeRoot()->traverse(&idTraverser);
  370. }
  371. //
  372. // Merge the function bodies and global-level initializers from unitGlobals into globals.
  373. // Will error check duplication of function bodies for the same signature.
  374. //
  375. void TIntermediate::mergeBodies(TInfoSink& infoSink, TIntermSequence& globals, const TIntermSequence& unitGlobals)
  376. {
  377. // TODO: link-time performance: Processing in alphabetical order will be faster
  378. // Error check the global objects, not including the linker objects
  379. for (unsigned int child = 0; child < globals.size() - 1; ++child) {
  380. for (unsigned int unitChild = 0; unitChild < unitGlobals.size() - 1; ++unitChild) {
  381. TIntermAggregate* body = globals[child]->getAsAggregate();
  382. TIntermAggregate* unitBody = unitGlobals[unitChild]->getAsAggregate();
  383. if (body && unitBody && body->getOp() == EOpFunction && unitBody->getOp() == EOpFunction && body->getName() == unitBody->getName()) {
  384. error(infoSink, "Multiple function bodies in multiple compilation units for the same signature in the same stage:");
  385. infoSink.info << " " << globals[child]->getAsAggregate()->getName() << "\n";
  386. }
  387. }
  388. }
  389. // Merge the global objects, just in front of the linker objects
  390. globals.insert(globals.end() - 1, unitGlobals.begin(), unitGlobals.end() - 1);
  391. }
  392. //
  393. // Merge the linker objects from unitLinkerObjects into linkerObjects.
  394. // Duplication is expected and filtered out, but contradictions are an error.
  395. //
  396. void TIntermediate::mergeLinkerObjects(TInfoSink& infoSink, TIntermSequence& linkerObjects, const TIntermSequence& unitLinkerObjects)
  397. {
  398. // Error check and merge the linker objects (duplicates should not be created)
  399. std::size_t initialNumLinkerObjects = linkerObjects.size();
  400. for (unsigned int unitLinkObj = 0; unitLinkObj < unitLinkerObjects.size(); ++unitLinkObj) {
  401. bool merge = true;
  402. for (std::size_t linkObj = 0; linkObj < initialNumLinkerObjects; ++linkObj) {
  403. TIntermSymbol* symbol = linkerObjects[linkObj]->getAsSymbolNode();
  404. TIntermSymbol* unitSymbol = unitLinkerObjects[unitLinkObj]->getAsSymbolNode();
  405. assert(symbol && unitSymbol);
  406. bool isSameSymbol = false;
  407. // If they are both blocks in the same shader interface,
  408. // match by the block-name, not the identifier name.
  409. if (symbol->getType().getBasicType() == EbtBlock && unitSymbol->getType().getBasicType() == EbtBlock) {
  410. if (symbol->getType().getShaderInterface() == unitSymbol->getType().getShaderInterface()) {
  411. isSameSymbol = symbol->getType().getTypeName() == unitSymbol->getType().getTypeName();
  412. }
  413. }
  414. else if (symbol->getName() == unitSymbol->getName())
  415. isSameSymbol = true;
  416. if (isSameSymbol) {
  417. // filter out copy
  418. merge = false;
  419. // but if one has an initializer and the other does not, update
  420. // the initializer
  421. if (symbol->getConstArray().empty() && ! unitSymbol->getConstArray().empty())
  422. symbol->setConstArray(unitSymbol->getConstArray());
  423. // Similarly for binding
  424. if (! symbol->getQualifier().hasBinding() && unitSymbol->getQualifier().hasBinding())
  425. symbol->getQualifier().layoutBinding = unitSymbol->getQualifier().layoutBinding;
  426. // Update implicit array sizes
  427. mergeImplicitArraySizes(symbol->getWritableType(), unitSymbol->getType());
  428. // Check for consistent types/qualification/initializers etc.
  429. mergeErrorCheck(infoSink, *symbol, *unitSymbol, false);
  430. }
  431. // If different symbols, verify they arn't push_constant since there can only be one per stage
  432. else if (symbol->getQualifier().isPushConstant() && unitSymbol->getQualifier().isPushConstant())
  433. error(infoSink, "Only one push_constant block is allowed per stage");
  434. }
  435. if (merge)
  436. linkerObjects.push_back(unitLinkerObjects[unitLinkObj]);
  437. }
  438. }
  439. // TODO 4.5 link functionality: cull distance array size checking
  440. // Recursively merge the implicit array sizes through the objects' respective type trees.
  441. void TIntermediate::mergeImplicitArraySizes(TType& type, const TType& unitType)
  442. {
  443. if (type.isUnsizedArray()) {
  444. if (unitType.isUnsizedArray()) {
  445. type.updateImplicitArraySize(unitType.getImplicitArraySize());
  446. if (unitType.isArrayVariablyIndexed())
  447. type.setArrayVariablyIndexed();
  448. } else if (unitType.isSizedArray())
  449. type.changeOuterArraySize(unitType.getOuterArraySize());
  450. }
  451. // Type mismatches are caught and reported after this, just be careful for now.
  452. if (! type.isStruct() || ! unitType.isStruct() || type.getStruct()->size() != unitType.getStruct()->size())
  453. return;
  454. for (int i = 0; i < (int)type.getStruct()->size(); ++i)
  455. mergeImplicitArraySizes(*(*type.getStruct())[i].type, *(*unitType.getStruct())[i].type);
  456. }
  457. //
  458. // Compare two global objects from two compilation units and see if they match
  459. // well enough. Rules can be different for intra- vs. cross-stage matching.
  460. //
  461. // This function only does one of intra- or cross-stage matching per call.
  462. //
  463. void TIntermediate::mergeErrorCheck(TInfoSink& infoSink, const TIntermSymbol& symbol, const TIntermSymbol& unitSymbol, bool crossStage)
  464. {
  465. #if !defined(GLSLANG_WEB) && !defined(GLSLANG_ANGLE)
  466. bool writeTypeComparison = false;
  467. // Types have to match
  468. if (symbol.getType() != unitSymbol.getType()) {
  469. // but, we make an exception if one is an implicit array and the other is sized
  470. if (! (symbol.getType().isArray() && unitSymbol.getType().isArray() &&
  471. symbol.getType().sameElementType(unitSymbol.getType()) &&
  472. (symbol.getType().isUnsizedArray() || unitSymbol.getType().isUnsizedArray()))) {
  473. error(infoSink, "Types must match:");
  474. writeTypeComparison = true;
  475. }
  476. }
  477. // Qualifiers have to (almost) match
  478. // Storage...
  479. if (symbol.getQualifier().storage != unitSymbol.getQualifier().storage) {
  480. error(infoSink, "Storage qualifiers must match:");
  481. writeTypeComparison = true;
  482. }
  483. // Uniform and buffer blocks must either both have an instance name, or
  484. // must both be anonymous. The names don't need to match though.
  485. if (symbol.getQualifier().isUniformOrBuffer() &&
  486. (IsAnonymous(symbol.getName()) != IsAnonymous(unitSymbol.getName()))) {
  487. error(infoSink, "Matched Uniform or Storage blocks must all be anonymous,"
  488. " or all be named:");
  489. writeTypeComparison = true;
  490. }
  491. if (symbol.getQualifier().storage == unitSymbol.getQualifier().storage &&
  492. (IsAnonymous(symbol.getName()) != IsAnonymous(unitSymbol.getName()) ||
  493. (!IsAnonymous(symbol.getName()) && symbol.getName() != unitSymbol.getName()))) {
  494. warn(infoSink, "Matched shader interfaces are using different instance names.");
  495. writeTypeComparison = true;
  496. }
  497. // Precision...
  498. if (symbol.getQualifier().precision != unitSymbol.getQualifier().precision) {
  499. error(infoSink, "Precision qualifiers must match:");
  500. writeTypeComparison = true;
  501. }
  502. // Invariance...
  503. if (! crossStage && symbol.getQualifier().invariant != unitSymbol.getQualifier().invariant) {
  504. error(infoSink, "Presence of invariant qualifier must match:");
  505. writeTypeComparison = true;
  506. }
  507. // Precise...
  508. if (! crossStage && symbol.getQualifier().isNoContraction() != unitSymbol.getQualifier().isNoContraction()) {
  509. error(infoSink, "Presence of precise qualifier must match:");
  510. writeTypeComparison = true;
  511. }
  512. // Auxiliary and interpolation...
  513. if (symbol.getQualifier().centroid != unitSymbol.getQualifier().centroid ||
  514. symbol.getQualifier().smooth != unitSymbol.getQualifier().smooth ||
  515. symbol.getQualifier().flat != unitSymbol.getQualifier().flat ||
  516. symbol.getQualifier().isSample()!= unitSymbol.getQualifier().isSample() ||
  517. symbol.getQualifier().isPatch() != unitSymbol.getQualifier().isPatch() ||
  518. symbol.getQualifier().isNonPerspective() != unitSymbol.getQualifier().isNonPerspective()) {
  519. error(infoSink, "Interpolation and auxiliary storage qualifiers must match:");
  520. writeTypeComparison = true;
  521. }
  522. // Memory...
  523. if (symbol.getQualifier().coherent != unitSymbol.getQualifier().coherent ||
  524. symbol.getQualifier().devicecoherent != unitSymbol.getQualifier().devicecoherent ||
  525. symbol.getQualifier().queuefamilycoherent != unitSymbol.getQualifier().queuefamilycoherent ||
  526. symbol.getQualifier().workgroupcoherent != unitSymbol.getQualifier().workgroupcoherent ||
  527. symbol.getQualifier().subgroupcoherent != unitSymbol.getQualifier().subgroupcoherent ||
  528. symbol.getQualifier().shadercallcoherent!= unitSymbol.getQualifier().shadercallcoherent ||
  529. symbol.getQualifier().nonprivate != unitSymbol.getQualifier().nonprivate ||
  530. symbol.getQualifier().volatil != unitSymbol.getQualifier().volatil ||
  531. symbol.getQualifier().restrict != unitSymbol.getQualifier().restrict ||
  532. symbol.getQualifier().readonly != unitSymbol.getQualifier().readonly ||
  533. symbol.getQualifier().writeonly != unitSymbol.getQualifier().writeonly) {
  534. error(infoSink, "Memory qualifiers must match:");
  535. writeTypeComparison = true;
  536. }
  537. // Layouts...
  538. // TODO: 4.4 enhanced layouts: Generalize to include offset/align: current spec
  539. // requires separate user-supplied offset from actual computed offset, but
  540. // current implementation only has one offset.
  541. if (symbol.getQualifier().layoutMatrix != unitSymbol.getQualifier().layoutMatrix ||
  542. symbol.getQualifier().layoutPacking != unitSymbol.getQualifier().layoutPacking ||
  543. symbol.getQualifier().layoutLocation != unitSymbol.getQualifier().layoutLocation ||
  544. symbol.getQualifier().layoutComponent != unitSymbol.getQualifier().layoutComponent ||
  545. symbol.getQualifier().layoutIndex != unitSymbol.getQualifier().layoutIndex ||
  546. symbol.getQualifier().layoutBinding != unitSymbol.getQualifier().layoutBinding ||
  547. (symbol.getQualifier().hasBinding() && (symbol.getQualifier().layoutOffset != unitSymbol.getQualifier().layoutOffset))) {
  548. error(infoSink, "Layout qualification must match:");
  549. writeTypeComparison = true;
  550. }
  551. // Initializers have to match, if both are present, and if we don't already know the types don't match
  552. if (! writeTypeComparison) {
  553. if (! symbol.getConstArray().empty() && ! unitSymbol.getConstArray().empty()) {
  554. if (symbol.getConstArray() != unitSymbol.getConstArray()) {
  555. error(infoSink, "Initializers must match:");
  556. infoSink.info << " " << symbol.getName() << "\n";
  557. }
  558. }
  559. }
  560. if (writeTypeComparison) {
  561. infoSink.info << " " << symbol.getName() << ": \"" << symbol.getType().getCompleteString() << "\" versus ";
  562. if (symbol.getName() != unitSymbol.getName())
  563. infoSink.info << unitSymbol.getName() << ": ";
  564. infoSink.info << "\"" << unitSymbol.getType().getCompleteString() << "\"\n";
  565. }
  566. #endif
  567. }
  568. void TIntermediate::sharedBlockCheck(TInfoSink& infoSink)
  569. {
  570. bool has_shared_block = false;
  571. bool has_shared_non_block = false;
  572. TIntermSequence& linkObjects = findLinkerObjects()->getSequence();
  573. for (size_t i = 0; i < linkObjects.size(); ++i) {
  574. const TType& type = linkObjects[i]->getAsTyped()->getType();
  575. const TQualifier& qualifier = type.getQualifier();
  576. if (qualifier.storage == glslang::EvqShared) {
  577. if (type.getBasicType() == glslang::EbtBlock)
  578. has_shared_block = true;
  579. else
  580. has_shared_non_block = true;
  581. }
  582. }
  583. if (has_shared_block && has_shared_non_block)
  584. error(infoSink, "cannot mix use of shared variables inside and outside blocks");
  585. }
  586. //
  587. // Do final link-time error checking of a complete (merged) intermediate representation.
  588. // (Much error checking was done during merging).
  589. //
  590. // Also, lock in defaults of things not set, including array sizes.
  591. //
  592. void TIntermediate::finalCheck(TInfoSink& infoSink, bool keepUncalled)
  593. {
  594. if (getTreeRoot() == nullptr)
  595. return;
  596. if (numEntryPoints < 1) {
  597. if (getSource() == EShSourceGlsl)
  598. error(infoSink, "Missing entry point: Each stage requires one entry point");
  599. else
  600. warn(infoSink, "Entry point not found");
  601. }
  602. // recursion and missing body checking
  603. checkCallGraphCycles(infoSink);
  604. checkCallGraphBodies(infoSink, keepUncalled);
  605. // overlap/alias/missing I/O, etc.
  606. inOutLocationCheck(infoSink);
  607. #ifndef GLSLANG_WEB
  608. if (getNumPushConstants() > 1)
  609. error(infoSink, "Only one push_constant block is allowed per stage");
  610. // invocations
  611. if (invocations == TQualifier::layoutNotSet)
  612. invocations = 1;
  613. if (inIoAccessed("gl_ClipDistance") && inIoAccessed("gl_ClipVertex"))
  614. error(infoSink, "Can only use one of gl_ClipDistance or gl_ClipVertex (gl_ClipDistance is preferred)");
  615. if (inIoAccessed("gl_CullDistance") && inIoAccessed("gl_ClipVertex"))
  616. error(infoSink, "Can only use one of gl_CullDistance or gl_ClipVertex (gl_ClipDistance is preferred)");
  617. if (userOutputUsed() && (inIoAccessed("gl_FragColor") || inIoAccessed("gl_FragData")))
  618. error(infoSink, "Cannot use gl_FragColor or gl_FragData when using user-defined outputs");
  619. if (inIoAccessed("gl_FragColor") && inIoAccessed("gl_FragData"))
  620. error(infoSink, "Cannot use both gl_FragColor and gl_FragData");
  621. for (size_t b = 0; b < xfbBuffers.size(); ++b) {
  622. if (xfbBuffers[b].contains64BitType)
  623. RoundToPow2(xfbBuffers[b].implicitStride, 8);
  624. else if (xfbBuffers[b].contains32BitType)
  625. RoundToPow2(xfbBuffers[b].implicitStride, 4);
  626. else if (xfbBuffers[b].contains16BitType)
  627. RoundToPow2(xfbBuffers[b].implicitStride, 2);
  628. // "It is a compile-time or link-time error to have
  629. // any xfb_offset that overflows xfb_stride, whether stated on declarations before or after the xfb_stride, or
  630. // in different compilation units. While xfb_stride can be declared multiple times for the same buffer, it is a
  631. // compile-time or link-time error to have different values specified for the stride for the same buffer."
  632. if (xfbBuffers[b].stride != TQualifier::layoutXfbStrideEnd && xfbBuffers[b].implicitStride > xfbBuffers[b].stride) {
  633. error(infoSink, "xfb_stride is too small to hold all buffer entries:");
  634. infoSink.info.prefix(EPrefixError);
  635. infoSink.info << " xfb_buffer " << (unsigned int)b << ", xfb_stride " << xfbBuffers[b].stride << ", minimum stride needed: " << xfbBuffers[b].implicitStride << "\n";
  636. }
  637. if (xfbBuffers[b].stride == TQualifier::layoutXfbStrideEnd)
  638. xfbBuffers[b].stride = xfbBuffers[b].implicitStride;
  639. // "If the buffer is capturing any
  640. // outputs with double-precision or 64-bit integer components, the stride must be a multiple of 8, otherwise it must be a
  641. // multiple of 4, or a compile-time or link-time error results."
  642. if (xfbBuffers[b].contains64BitType && ! IsMultipleOfPow2(xfbBuffers[b].stride, 8)) {
  643. error(infoSink, "xfb_stride must be multiple of 8 for buffer holding a double or 64-bit integer:");
  644. infoSink.info.prefix(EPrefixError);
  645. infoSink.info << " xfb_buffer " << (unsigned int)b << ", xfb_stride " << xfbBuffers[b].stride << "\n";
  646. } else if (xfbBuffers[b].contains32BitType && ! IsMultipleOfPow2(xfbBuffers[b].stride, 4)) {
  647. error(infoSink, "xfb_stride must be multiple of 4:");
  648. infoSink.info.prefix(EPrefixError);
  649. infoSink.info << " xfb_buffer " << (unsigned int)b << ", xfb_stride " << xfbBuffers[b].stride << "\n";
  650. }
  651. // "If the buffer is capturing any
  652. // outputs with half-precision or 16-bit integer components, the stride must be a multiple of 2"
  653. else if (xfbBuffers[b].contains16BitType && ! IsMultipleOfPow2(xfbBuffers[b].stride, 2)) {
  654. error(infoSink, "xfb_stride must be multiple of 2 for buffer holding a half float or 16-bit integer:");
  655. infoSink.info.prefix(EPrefixError);
  656. infoSink.info << " xfb_buffer " << (unsigned int)b << ", xfb_stride " << xfbBuffers[b].stride << "\n";
  657. }
  658. // "The resulting stride (implicit or explicit), when divided by 4, must be less than or equal to the
  659. // implementation-dependent constant gl_MaxTransformFeedbackInterleavedComponents."
  660. if (xfbBuffers[b].stride > (unsigned int)(4 * resources->maxTransformFeedbackInterleavedComponents)) {
  661. error(infoSink, "xfb_stride is too large:");
  662. infoSink.info.prefix(EPrefixError);
  663. infoSink.info << " xfb_buffer " << (unsigned int)b << ", components (1/4 stride) needed are " << xfbBuffers[b].stride/4 << ", gl_MaxTransformFeedbackInterleavedComponents is " << resources->maxTransformFeedbackInterleavedComponents << "\n";
  664. }
  665. }
  666. switch (language) {
  667. case EShLangVertex:
  668. break;
  669. case EShLangTessControl:
  670. if (vertices == TQualifier::layoutNotSet)
  671. error(infoSink, "At least one shader must specify an output layout(vertices=...)");
  672. break;
  673. case EShLangTessEvaluation:
  674. if (getSource() == EShSourceGlsl) {
  675. if (inputPrimitive == ElgNone)
  676. error(infoSink, "At least one shader must specify an input layout primitive");
  677. if (vertexSpacing == EvsNone)
  678. vertexSpacing = EvsEqual;
  679. if (vertexOrder == EvoNone)
  680. vertexOrder = EvoCcw;
  681. }
  682. break;
  683. case EShLangGeometry:
  684. if (inputPrimitive == ElgNone)
  685. error(infoSink, "At least one shader must specify an input layout primitive");
  686. if (outputPrimitive == ElgNone)
  687. error(infoSink, "At least one shader must specify an output layout primitive");
  688. if (vertices == TQualifier::layoutNotSet)
  689. error(infoSink, "At least one shader must specify a layout(max_vertices = value)");
  690. break;
  691. case EShLangFragment:
  692. // for GL_ARB_post_depth_coverage, EarlyFragmentTest is set automatically in
  693. // ParseHelper.cpp. So if we reach here, this must be GL_EXT_post_depth_coverage
  694. // requiring explicit early_fragment_tests
  695. if (getPostDepthCoverage() && !getEarlyFragmentTests())
  696. error(infoSink, "post_depth_coverage requires early_fragment_tests");
  697. break;
  698. case EShLangCompute:
  699. sharedBlockCheck(infoSink);
  700. break;
  701. case EShLangRayGen:
  702. case EShLangIntersect:
  703. case EShLangAnyHit:
  704. case EShLangClosestHit:
  705. case EShLangMiss:
  706. case EShLangCallable:
  707. if (numShaderRecordBlocks > 1)
  708. error(infoSink, "Only one shaderRecordNV buffer block is allowed per stage");
  709. break;
  710. case EShLangMeshNV:
  711. // NV_mesh_shader doesn't allow use of both single-view and per-view builtins.
  712. if (inIoAccessed("gl_Position") && inIoAccessed("gl_PositionPerViewNV"))
  713. error(infoSink, "Can only use one of gl_Position or gl_PositionPerViewNV");
  714. if (inIoAccessed("gl_ClipDistance") && inIoAccessed("gl_ClipDistancePerViewNV"))
  715. error(infoSink, "Can only use one of gl_ClipDistance or gl_ClipDistancePerViewNV");
  716. if (inIoAccessed("gl_CullDistance") && inIoAccessed("gl_CullDistancePerViewNV"))
  717. error(infoSink, "Can only use one of gl_CullDistance or gl_CullDistancePerViewNV");
  718. if (inIoAccessed("gl_Layer") && inIoAccessed("gl_LayerPerViewNV"))
  719. error(infoSink, "Can only use one of gl_Layer or gl_LayerPerViewNV");
  720. if (inIoAccessed("gl_ViewportMask") && inIoAccessed("gl_ViewportMaskPerViewNV"))
  721. error(infoSink, "Can only use one of gl_ViewportMask or gl_ViewportMaskPerViewNV");
  722. if (outputPrimitive == ElgNone)
  723. error(infoSink, "At least one shader must specify an output layout primitive");
  724. if (vertices == TQualifier::layoutNotSet)
  725. error(infoSink, "At least one shader must specify a layout(max_vertices = value)");
  726. if (primitives == TQualifier::layoutNotSet)
  727. error(infoSink, "At least one shader must specify a layout(max_primitives = value)");
  728. // fall through
  729. case EShLangTaskNV:
  730. if (numTaskNVBlocks > 1)
  731. error(infoSink, "Only one taskNV interface block is allowed per shader");
  732. sharedBlockCheck(infoSink);
  733. break;
  734. default:
  735. error(infoSink, "Unknown Stage.");
  736. break;
  737. }
  738. // Process the tree for any node-specific work.
  739. class TFinalLinkTraverser : public TIntermTraverser {
  740. public:
  741. TFinalLinkTraverser() { }
  742. virtual ~TFinalLinkTraverser() { }
  743. virtual void visitSymbol(TIntermSymbol* symbol)
  744. {
  745. // Implicitly size arrays.
  746. // If an unsized array is left as unsized, it effectively
  747. // becomes run-time sized.
  748. symbol->getWritableType().adoptImplicitArraySizes(false);
  749. }
  750. } finalLinkTraverser;
  751. treeRoot->traverse(&finalLinkTraverser);
  752. #endif
  753. }
  754. //
  755. // See if the call graph contains any static recursion, which is disallowed
  756. // by the specification.
  757. //
  758. void TIntermediate::checkCallGraphCycles(TInfoSink& infoSink)
  759. {
  760. // Clear fields we'll use for this.
  761. for (TGraph::iterator call = callGraph.begin(); call != callGraph.end(); ++call) {
  762. call->visited = false;
  763. call->currentPath = false;
  764. call->errorGiven = false;
  765. }
  766. //
  767. // Loop, looking for a new connected subgraph. One subgraph is handled per loop iteration.
  768. //
  769. TCall* newRoot;
  770. do {
  771. // See if we have unvisited parts of the graph.
  772. newRoot = 0;
  773. for (TGraph::iterator call = callGraph.begin(); call != callGraph.end(); ++call) {
  774. if (! call->visited) {
  775. newRoot = &(*call);
  776. break;
  777. }
  778. }
  779. // If not, we are done.
  780. if (! newRoot)
  781. break;
  782. // Otherwise, we found a new subgraph, process it:
  783. // See what all can be reached by this new root, and if any of
  784. // that is recursive. This is done by depth-first traversals, seeing
  785. // if a new call is found that was already in the currentPath (a back edge),
  786. // thereby detecting recursion.
  787. std::list<TCall*> stack;
  788. newRoot->currentPath = true; // currentPath will be true iff it is on the stack
  789. stack.push_back(newRoot);
  790. while (! stack.empty()) {
  791. // get a caller
  792. TCall* call = stack.back();
  793. // Add to the stack just one callee.
  794. // This algorithm always terminates, because only !visited and !currentPath causes a push
  795. // and all pushes change currentPath to true, and all pops change visited to true.
  796. TGraph::iterator child = callGraph.begin();
  797. for (; child != callGraph.end(); ++child) {
  798. // If we already visited this node, its whole subgraph has already been processed, so skip it.
  799. if (child->visited)
  800. continue;
  801. if (call->callee == child->caller) {
  802. if (child->currentPath) {
  803. // Then, we found a back edge
  804. if (! child->errorGiven) {
  805. error(infoSink, "Recursion detected:");
  806. infoSink.info << " " << call->callee << " calling " << child->callee << "\n";
  807. child->errorGiven = true;
  808. recursive = true;
  809. }
  810. } else {
  811. child->currentPath = true;
  812. stack.push_back(&(*child));
  813. break;
  814. }
  815. }
  816. }
  817. if (child == callGraph.end()) {
  818. // no more callees, we bottomed out, never look at this node again
  819. stack.back()->currentPath = false;
  820. stack.back()->visited = true;
  821. stack.pop_back();
  822. }
  823. } // end while, meaning nothing left to process in this subtree
  824. } while (newRoot); // redundant loop check; should always exit via the 'break' above
  825. }
  826. //
  827. // See which functions are reachable from the entry point and which have bodies.
  828. // Reachable ones with missing bodies are errors.
  829. // Unreachable bodies are dead code.
  830. //
  831. void TIntermediate::checkCallGraphBodies(TInfoSink& infoSink, bool keepUncalled)
  832. {
  833. // Clear fields we'll use for this.
  834. for (TGraph::iterator call = callGraph.begin(); call != callGraph.end(); ++call) {
  835. call->visited = false;
  836. call->calleeBodyPosition = -1;
  837. }
  838. // The top level of the AST includes function definitions (bodies).
  839. // Compare these to function calls in the call graph.
  840. // We'll end up knowing which have bodies, and if so,
  841. // how to map the call-graph node to the location in the AST.
  842. TIntermSequence &functionSequence = getTreeRoot()->getAsAggregate()->getSequence();
  843. std::vector<bool> reachable(functionSequence.size(), true); // so that non-functions are reachable
  844. for (int f = 0; f < (int)functionSequence.size(); ++f) {
  845. glslang::TIntermAggregate* node = functionSequence[f]->getAsAggregate();
  846. if (node && (node->getOp() == glslang::EOpFunction)) {
  847. if (node->getName().compare(getEntryPointMangledName().c_str()) != 0)
  848. reachable[f] = false; // so that function bodies are unreachable, until proven otherwise
  849. for (TGraph::iterator call = callGraph.begin(); call != callGraph.end(); ++call) {
  850. if (call->callee == node->getName())
  851. call->calleeBodyPosition = f;
  852. }
  853. }
  854. }
  855. // Start call-graph traversal by visiting the entry point nodes.
  856. for (TGraph::iterator call = callGraph.begin(); call != callGraph.end(); ++call) {
  857. if (call->caller.compare(getEntryPointMangledName().c_str()) == 0)
  858. call->visited = true;
  859. }
  860. // Propagate 'visited' through the call-graph to every part of the graph it
  861. // can reach (seeded with the entry-point setting above).
  862. bool changed;
  863. do {
  864. changed = false;
  865. for (auto call1 = callGraph.begin(); call1 != callGraph.end(); ++call1) {
  866. if (call1->visited) {
  867. for (TGraph::iterator call2 = callGraph.begin(); call2 != callGraph.end(); ++call2) {
  868. if (! call2->visited) {
  869. if (call1->callee == call2->caller) {
  870. changed = true;
  871. call2->visited = true;
  872. }
  873. }
  874. }
  875. }
  876. }
  877. } while (changed);
  878. // Any call-graph node set to visited but without a callee body is an error.
  879. for (TGraph::iterator call = callGraph.begin(); call != callGraph.end(); ++call) {
  880. if (call->visited) {
  881. if (call->calleeBodyPosition == -1) {
  882. error(infoSink, "No function definition (body) found: ");
  883. infoSink.info << " " << call->callee << "\n";
  884. } else
  885. reachable[call->calleeBodyPosition] = true;
  886. }
  887. }
  888. // Bodies in the AST not reached by the call graph are dead;
  889. // clear them out, since they can't be reached and also can't
  890. // be translated further due to possibility of being ill defined.
  891. if (! keepUncalled) {
  892. for (int f = 0; f < (int)functionSequence.size(); ++f) {
  893. if (! reachable[f])
  894. functionSequence[f] = nullptr;
  895. }
  896. functionSequence.erase(std::remove(functionSequence.begin(), functionSequence.end(), nullptr), functionSequence.end());
  897. }
  898. }
  899. //
  900. // Satisfy rules for location qualifiers on inputs and outputs
  901. //
  902. void TIntermediate::inOutLocationCheck(TInfoSink& infoSink)
  903. {
  904. // ES 3.0 requires all outputs to have location qualifiers if there is more than one output
  905. bool fragOutWithNoLocation = false;
  906. int numFragOut = 0;
  907. // TODO: linker functionality: location collision checking
  908. TIntermSequence& linkObjects = findLinkerObjects()->getSequence();
  909. for (size_t i = 0; i < linkObjects.size(); ++i) {
  910. const TType& type = linkObjects[i]->getAsTyped()->getType();
  911. const TQualifier& qualifier = type.getQualifier();
  912. if (language == EShLangFragment) {
  913. if (qualifier.storage == EvqVaryingOut && qualifier.builtIn == EbvNone) {
  914. ++numFragOut;
  915. if (!qualifier.hasAnyLocation())
  916. fragOutWithNoLocation = true;
  917. }
  918. }
  919. }
  920. if (isEsProfile()) {
  921. if (numFragOut > 1 && fragOutWithNoLocation)
  922. error(infoSink, "when more than one fragment shader output, all must have location qualifiers");
  923. }
  924. }
  925. TIntermAggregate* TIntermediate::findLinkerObjects() const
  926. {
  927. // Get the top-level globals
  928. TIntermSequence& globals = treeRoot->getAsAggregate()->getSequence();
  929. // Get the last member of the sequences, expected to be the linker-object lists
  930. assert(globals.back()->getAsAggregate()->getOp() == EOpLinkerObjects);
  931. return globals.back()->getAsAggregate();
  932. }
  933. // See if a variable was both a user-declared output and used.
  934. // Note: the spec discusses writing to one, but this looks at read or write, which
  935. // is more useful, and perhaps the spec should be changed to reflect that.
  936. bool TIntermediate::userOutputUsed() const
  937. {
  938. const TIntermSequence& linkerObjects = findLinkerObjects()->getSequence();
  939. bool found = false;
  940. for (size_t i = 0; i < linkerObjects.size(); ++i) {
  941. const TIntermSymbol& symbolNode = *linkerObjects[i]->getAsSymbolNode();
  942. if (symbolNode.getQualifier().storage == EvqVaryingOut &&
  943. symbolNode.getName().compare(0, 3, "gl_") != 0 &&
  944. inIoAccessed(symbolNode.getName())) {
  945. found = true;
  946. break;
  947. }
  948. }
  949. return found;
  950. }
  951. // Accumulate locations used for inputs, outputs, and uniforms, payload and callable data
  952. // and check for collisions as the accumulation is done.
  953. //
  954. // Returns < 0 if no collision, >= 0 if collision and the value returned is a colliding value.
  955. //
  956. // typeCollision is set to true if there is no direct collision, but the types in the same location
  957. // are different.
  958. //
  959. int TIntermediate::addUsedLocation(const TQualifier& qualifier, const TType& type, bool& typeCollision)
  960. {
  961. typeCollision = false;
  962. int set;
  963. int setRT;
  964. if (qualifier.isPipeInput())
  965. set = 0;
  966. else if (qualifier.isPipeOutput())
  967. set = 1;
  968. else if (qualifier.storage == EvqUniform)
  969. set = 2;
  970. else if (qualifier.storage == EvqBuffer)
  971. set = 3;
  972. else if (qualifier.isAnyPayload())
  973. setRT = 0;
  974. else if (qualifier.isAnyCallable())
  975. setRT = 1;
  976. else
  977. return -1;
  978. int size;
  979. if (qualifier.isAnyPayload() || qualifier.isAnyCallable()) {
  980. size = 1;
  981. } else if (qualifier.isUniformOrBuffer() || qualifier.isTaskMemory()) {
  982. if (type.isSizedArray())
  983. size = type.getCumulativeArraySize();
  984. else
  985. size = 1;
  986. } else {
  987. // Strip off the outer array dimension for those having an extra one.
  988. if (type.isArray() && qualifier.isArrayedIo(language)) {
  989. TType elementType(type, 0);
  990. size = computeTypeLocationSize(elementType, language);
  991. } else
  992. size = computeTypeLocationSize(type, language);
  993. }
  994. // Locations, and components within locations.
  995. //
  996. // Almost always, dealing with components means a single location is involved.
  997. // The exception is a dvec3. From the spec:
  998. //
  999. // "A dvec3 will consume all four components of the first location and components 0 and 1 of
  1000. // the second location. This leaves components 2 and 3 available for other component-qualified
  1001. // declarations."
  1002. //
  1003. // That means, without ever mentioning a component, a component range
  1004. // for a different location gets specified, if it's not a vertex shader input. (!)
  1005. // (A vertex shader input will show using only one location, even for a dvec3/4.)
  1006. //
  1007. // So, for the case of dvec3, we need two independent ioRanges.
  1008. //
  1009. // For raytracing IO (payloads and callabledata) each declaration occupies a single
  1010. // slot irrespective of type.
  1011. int collision = -1; // no collision
  1012. #ifndef GLSLANG_WEB
  1013. if (qualifier.isAnyPayload() || qualifier.isAnyCallable()) {
  1014. TRange range(qualifier.layoutLocation, qualifier.layoutLocation);
  1015. collision = checkLocationRT(setRT, qualifier.layoutLocation);
  1016. if (collision < 0)
  1017. usedIoRT[setRT].push_back(range);
  1018. } else if (size == 2 && type.getBasicType() == EbtDouble && type.getVectorSize() == 3 &&
  1019. (qualifier.isPipeInput() || qualifier.isPipeOutput())) {
  1020. // Dealing with dvec3 in/out split across two locations.
  1021. // Need two io-ranges.
  1022. // The case where the dvec3 doesn't start at component 0 was previously caught as overflow.
  1023. // First range:
  1024. TRange locationRange(qualifier.layoutLocation, qualifier.layoutLocation);
  1025. TRange componentRange(0, 3);
  1026. TIoRange range(locationRange, componentRange, type.getBasicType(), 0);
  1027. // check for collisions
  1028. collision = checkLocationRange(set, range, type, typeCollision);
  1029. if (collision < 0) {
  1030. usedIo[set].push_back(range);
  1031. // Second range:
  1032. TRange locationRange2(qualifier.layoutLocation + 1, qualifier.layoutLocation + 1);
  1033. TRange componentRange2(0, 1);
  1034. TIoRange range2(locationRange2, componentRange2, type.getBasicType(), 0);
  1035. // check for collisions
  1036. collision = checkLocationRange(set, range2, type, typeCollision);
  1037. if (collision < 0)
  1038. usedIo[set].push_back(range2);
  1039. }
  1040. } else
  1041. #endif
  1042. {
  1043. // Not a dvec3 in/out split across two locations, generic path.
  1044. // Need a single IO-range block.
  1045. TRange locationRange(qualifier.layoutLocation, qualifier.layoutLocation + size - 1);
  1046. TRange componentRange(0, 3);
  1047. if (qualifier.hasComponent() || type.getVectorSize() > 0) {
  1048. int consumedComponents = type.getVectorSize() * (type.getBasicType() == EbtDouble ? 2 : 1);
  1049. if (qualifier.hasComponent())
  1050. componentRange.start = qualifier.layoutComponent;
  1051. componentRange.last = componentRange.start + consumedComponents - 1;
  1052. }
  1053. // combine location and component ranges
  1054. TIoRange range(locationRange, componentRange, type.getBasicType(), qualifier.hasIndex() ? qualifier.getIndex() : 0);
  1055. // check for collisions, except for vertex inputs on desktop targeting OpenGL
  1056. if (! (!isEsProfile() && language == EShLangVertex && qualifier.isPipeInput()) || spvVersion.vulkan > 0)
  1057. collision = checkLocationRange(set, range, type, typeCollision);
  1058. if (collision < 0)
  1059. usedIo[set].push_back(range);
  1060. }
  1061. return collision;
  1062. }
  1063. // Compare a new (the passed in) 'range' against the existing set, and see
  1064. // if there are any collisions.
  1065. //
  1066. // Returns < 0 if no collision, >= 0 if collision and the value returned is a colliding value.
  1067. //
  1068. int TIntermediate::checkLocationRange(int set, const TIoRange& range, const TType& type, bool& typeCollision)
  1069. {
  1070. for (size_t r = 0; r < usedIo[set].size(); ++r) {
  1071. if (range.overlap(usedIo[set][r])) {
  1072. // there is a collision; pick one
  1073. return std::max(range.location.start, usedIo[set][r].location.start);
  1074. } else if (range.location.overlap(usedIo[set][r].location) && type.getBasicType() != usedIo[set][r].basicType) {
  1075. // aliased-type mismatch
  1076. typeCollision = true;
  1077. return std::max(range.location.start, usedIo[set][r].location.start);
  1078. }
  1079. }
  1080. return -1; // no collision
  1081. }
  1082. int TIntermediate::checkLocationRT(int set, int location) {
  1083. TRange range(location, location);
  1084. for (size_t r = 0; r < usedIoRT[set].size(); ++r) {
  1085. if (range.overlap(usedIoRT[set][r])) {
  1086. return range.start;
  1087. }
  1088. }
  1089. return -1; // no collision
  1090. }
  1091. // Accumulate bindings and offsets, and check for collisions
  1092. // as the accumulation is done.
  1093. //
  1094. // Returns < 0 if no collision, >= 0 if collision and the value returned is a colliding value.
  1095. //
  1096. int TIntermediate::addUsedOffsets(int binding, int offset, int numOffsets)
  1097. {
  1098. TRange bindingRange(binding, binding);
  1099. TRange offsetRange(offset, offset + numOffsets - 1);
  1100. TOffsetRange range(bindingRange, offsetRange);
  1101. // check for collisions, except for vertex inputs on desktop
  1102. for (size_t r = 0; r < usedAtomics.size(); ++r) {
  1103. if (range.overlap(usedAtomics[r])) {
  1104. // there is a collision; pick one
  1105. return std::max(offset, usedAtomics[r].offset.start);
  1106. }
  1107. }
  1108. usedAtomics.push_back(range);
  1109. return -1; // no collision
  1110. }
  1111. // Accumulate used constant_id values.
  1112. //
  1113. // Return false is one was already used.
  1114. bool TIntermediate::addUsedConstantId(int id)
  1115. {
  1116. if (usedConstantId.find(id) != usedConstantId.end())
  1117. return false;
  1118. usedConstantId.insert(id);
  1119. return true;
  1120. }
  1121. // Recursively figure out how many locations are used up by an input or output type.
  1122. // Return the size of type, as measured by "locations".
  1123. int TIntermediate::computeTypeLocationSize(const TType& type, EShLanguage stage)
  1124. {
  1125. // "If the declared input is an array of size n and each element takes m locations, it will be assigned m * n
  1126. // consecutive locations..."
  1127. if (type.isArray()) {
  1128. // TODO: perf: this can be flattened by using getCumulativeArraySize(), and a deref that discards all arrayness
  1129. // TODO: are there valid cases of having an unsized array with a location? If so, running this code too early.
  1130. TType elementType(type, 0);
  1131. if (type.isSizedArray() && !type.getQualifier().isPerView())
  1132. return type.getOuterArraySize() * computeTypeLocationSize(elementType, stage);
  1133. else {
  1134. #ifndef GLSLANG_WEB
  1135. // unset perViewNV attributes for arrayed per-view outputs: "perviewNV vec4 v[MAX_VIEWS][3];"
  1136. elementType.getQualifier().perViewNV = false;
  1137. #endif
  1138. return computeTypeLocationSize(elementType, stage);
  1139. }
  1140. }
  1141. // "The locations consumed by block and structure members are determined by applying the rules above
  1142. // recursively..."
  1143. if (type.isStruct()) {
  1144. int size = 0;
  1145. for (int member = 0; member < (int)type.getStruct()->size(); ++member) {
  1146. TType memberType(type, member);
  1147. size += computeTypeLocationSize(memberType, stage);
  1148. }
  1149. return size;
  1150. }
  1151. // ES: "If a shader input is any scalar or vector type, it will consume a single location."
  1152. // Desktop: "If a vertex shader input is any scalar or vector type, it will consume a single location. If a non-vertex
  1153. // shader input is a scalar or vector type other than dvec3 or dvec4, it will consume a single location, while
  1154. // types dvec3 or dvec4 will consume two consecutive locations. Inputs of type double and dvec2 will
  1155. // consume only a single location, in all stages."
  1156. if (type.isScalar())
  1157. return 1;
  1158. if (type.isVector()) {
  1159. if (stage == EShLangVertex && type.getQualifier().isPipeInput())
  1160. return 1;
  1161. if (type.getBasicType() == EbtDouble && type.getVectorSize() > 2)
  1162. return 2;
  1163. else
  1164. return 1;
  1165. }
  1166. // "If the declared input is an n x m single- or double-precision matrix, ...
  1167. // The number of locations assigned for each matrix will be the same as
  1168. // for an n-element array of m-component vectors..."
  1169. if (type.isMatrix()) {
  1170. TType columnType(type, 0);
  1171. return type.getMatrixCols() * computeTypeLocationSize(columnType, stage);
  1172. }
  1173. assert(0);
  1174. return 1;
  1175. }
  1176. // Same as computeTypeLocationSize but for uniforms
  1177. int TIntermediate::computeTypeUniformLocationSize(const TType& type)
  1178. {
  1179. // "Individual elements of a uniform array are assigned
  1180. // consecutive locations with the first element taking location
  1181. // location."
  1182. if (type.isArray()) {
  1183. // TODO: perf: this can be flattened by using getCumulativeArraySize(), and a deref that discards all arrayness
  1184. TType elementType(type, 0);
  1185. if (type.isSizedArray()) {
  1186. return type.getOuterArraySize() * computeTypeUniformLocationSize(elementType);
  1187. } else {
  1188. // TODO: are there valid cases of having an implicitly-sized array with a location? If so, running this code too early.
  1189. return computeTypeUniformLocationSize(elementType);
  1190. }
  1191. }
  1192. // "Each subsequent inner-most member or element gets incremental
  1193. // locations for the entire structure or array."
  1194. if (type.isStruct()) {
  1195. int size = 0;
  1196. for (int member = 0; member < (int)type.getStruct()->size(); ++member) {
  1197. TType memberType(type, member);
  1198. size += computeTypeUniformLocationSize(memberType);
  1199. }
  1200. return size;
  1201. }
  1202. return 1;
  1203. }
  1204. #ifndef GLSLANG_WEB
  1205. // Accumulate xfb buffer ranges and check for collisions as the accumulation is done.
  1206. //
  1207. // Returns < 0 if no collision, >= 0 if collision and the value returned is a colliding value.
  1208. //
  1209. int TIntermediate::addXfbBufferOffset(const TType& type)
  1210. {
  1211. const TQualifier& qualifier = type.getQualifier();
  1212. assert(qualifier.hasXfbOffset() && qualifier.hasXfbBuffer());
  1213. TXfbBuffer& buffer = xfbBuffers[qualifier.layoutXfbBuffer];
  1214. // compute the range
  1215. unsigned int size = computeTypeXfbSize(type, buffer.contains64BitType, buffer.contains32BitType, buffer.contains16BitType);
  1216. buffer.implicitStride = std::max(buffer.implicitStride, qualifier.layoutXfbOffset + size);
  1217. TRange range(qualifier.layoutXfbOffset, qualifier.layoutXfbOffset + size - 1);
  1218. // check for collisions
  1219. for (size_t r = 0; r < buffer.ranges.size(); ++r) {
  1220. if (range.overlap(buffer.ranges[r])) {
  1221. // there is a collision; pick an example to return
  1222. return std::max(range.start, buffer.ranges[r].start);
  1223. }
  1224. }
  1225. buffer.ranges.push_back(range);
  1226. return -1; // no collision
  1227. }
  1228. // Recursively figure out how many bytes of xfb buffer are used by the given type.
  1229. // Return the size of type, in bytes.
  1230. // Sets contains64BitType to true if the type contains a 64-bit data type.
  1231. // Sets contains32BitType to true if the type contains a 32-bit data type.
  1232. // Sets contains16BitType to true if the type contains a 16-bit data type.
  1233. // N.B. Caller must set contains64BitType, contains32BitType, and contains16BitType to false before calling.
  1234. unsigned int TIntermediate::computeTypeXfbSize(const TType& type, bool& contains64BitType, bool& contains32BitType, bool& contains16BitType) const
  1235. {
  1236. // "...if applied to an aggregate containing a double or 64-bit integer, the offset must also be a multiple of 8,
  1237. // and the space taken in the buffer will be a multiple of 8.
  1238. // ...within the qualified entity, subsequent components are each
  1239. // assigned, in order, to the next available offset aligned to a multiple of
  1240. // that component's size. Aggregate types are flattened down to the component
  1241. // level to get this sequence of components."
  1242. if (type.isSizedArray()) {
  1243. // TODO: perf: this can be flattened by using getCumulativeArraySize(), and a deref that discards all arrayness
  1244. // Unsized array use to xfb should be a compile error.
  1245. TType elementType(type, 0);
  1246. return type.getOuterArraySize() * computeTypeXfbSize(elementType, contains64BitType, contains16BitType, contains16BitType);
  1247. }
  1248. if (type.isStruct()) {
  1249. unsigned int size = 0;
  1250. bool structContains64BitType = false;
  1251. bool structContains32BitType = false;
  1252. bool structContains16BitType = false;
  1253. for (int member = 0; member < (int)type.getStruct()->size(); ++member) {
  1254. TType memberType(type, member);
  1255. // "... if applied to
  1256. // an aggregate containing a double or 64-bit integer, the offset must also be a multiple of 8,
  1257. // and the space taken in the buffer will be a multiple of 8."
  1258. bool memberContains64BitType = false;
  1259. bool memberContains32BitType = false;
  1260. bool memberContains16BitType = false;
  1261. int memberSize = computeTypeXfbSize(memberType, memberContains64BitType, memberContains32BitType, memberContains16BitType);
  1262. if (memberContains64BitType) {
  1263. structContains64BitType = true;
  1264. RoundToPow2(size, 8);
  1265. } else if (memberContains32BitType) {
  1266. structContains32BitType = true;
  1267. RoundToPow2(size, 4);
  1268. } else if (memberContains16BitType) {
  1269. structContains16BitType = true;
  1270. RoundToPow2(size, 2);
  1271. }
  1272. size += memberSize;
  1273. }
  1274. if (structContains64BitType) {
  1275. contains64BitType = true;
  1276. RoundToPow2(size, 8);
  1277. } else if (structContains32BitType) {
  1278. contains32BitType = true;
  1279. RoundToPow2(size, 4);
  1280. } else if (structContains16BitType) {
  1281. contains16BitType = true;
  1282. RoundToPow2(size, 2);
  1283. }
  1284. return size;
  1285. }
  1286. int numComponents;
  1287. if (type.isScalar())
  1288. numComponents = 1;
  1289. else if (type.isVector())
  1290. numComponents = type.getVectorSize();
  1291. else if (type.isMatrix())
  1292. numComponents = type.getMatrixCols() * type.getMatrixRows();
  1293. else {
  1294. assert(0);
  1295. numComponents = 1;
  1296. }
  1297. if (type.getBasicType() == EbtDouble || type.getBasicType() == EbtInt64 || type.getBasicType() == EbtUint64) {
  1298. contains64BitType = true;
  1299. return 8 * numComponents;
  1300. } else if (type.getBasicType() == EbtFloat16 || type.getBasicType() == EbtInt16 || type.getBasicType() == EbtUint16) {
  1301. contains16BitType = true;
  1302. return 2 * numComponents;
  1303. } else if (type.getBasicType() == EbtInt8 || type.getBasicType() == EbtUint8)
  1304. return numComponents;
  1305. else {
  1306. contains32BitType = true;
  1307. return 4 * numComponents;
  1308. }
  1309. }
  1310. #endif
  1311. const int baseAlignmentVec4Std140 = 16;
  1312. // Return the size and alignment of a component of the given type.
  1313. // The size is returned in the 'size' parameter
  1314. // Return value is the alignment..
  1315. int TIntermediate::getBaseAlignmentScalar(const TType& type, int& size)
  1316. {
  1317. #ifdef GLSLANG_WEB
  1318. size = 4; return 4;
  1319. #endif
  1320. switch (type.getBasicType()) {
  1321. case EbtInt64:
  1322. case EbtUint64:
  1323. case EbtDouble: size = 8; return 8;
  1324. case EbtFloat16: size = 2; return 2;
  1325. case EbtInt8:
  1326. case EbtUint8: size = 1; return 1;
  1327. case EbtInt16:
  1328. case EbtUint16: size = 2; return 2;
  1329. case EbtReference: size = 8; return 8;
  1330. default: size = 4; return 4;
  1331. }
  1332. }
  1333. // Implement base-alignment and size rules from section 7.6.2.2 Standard Uniform Block Layout
  1334. // Operates recursively.
  1335. //
  1336. // If std140 is true, it does the rounding up to vec4 size required by std140,
  1337. // otherwise it does not, yielding std430 rules.
  1338. //
  1339. // The size is returned in the 'size' parameter
  1340. //
  1341. // The stride is only non-0 for arrays or matrices, and is the stride of the
  1342. // top-level object nested within the type. E.g., for an array of matrices,
  1343. // it is the distances needed between matrices, despite the rules saying the
  1344. // stride comes from the flattening down to vectors.
  1345. //
  1346. // Return value is the alignment of the type.
  1347. int TIntermediate::getBaseAlignment(const TType& type, int& size, int& stride, TLayoutPacking layoutPacking, bool rowMajor)
  1348. {
  1349. int alignment;
  1350. bool std140 = layoutPacking == glslang::ElpStd140;
  1351. // When using the std140 storage layout, structures will be laid out in buffer
  1352. // storage with its members stored in monotonically increasing order based on their
  1353. // location in the declaration. A structure and each structure member have a base
  1354. // offset and a base alignment, from which an aligned offset is computed by rounding
  1355. // the base offset up to a multiple of the base alignment. The base offset of the first
  1356. // member of a structure is taken from the aligned offset of the structure itself. The
  1357. // base offset of all other structure members is derived by taking the offset of the
  1358. // last basic machine unit consumed by the previous member and adding one. Each
  1359. // structure member is stored in memory at its aligned offset. The members of a top-
  1360. // level uniform block are laid out in buffer storage by treating the uniform block as
  1361. // a structure with a base offset of zero.
  1362. //
  1363. // 1. If the member is a scalar consuming N basic machine units, the base alignment is N.
  1364. //
  1365. // 2. If the member is a two- or four-component vector with components consuming N basic
  1366. // machine units, the base alignment is 2N or 4N, respectively.
  1367. //
  1368. // 3. If the member is a three-component vector with components consuming N
  1369. // basic machine units, the base alignment is 4N.
  1370. //
  1371. // 4. If the member is an array of scalars or vectors, the base alignment and array
  1372. // stride are set to match the base alignment of a single array element, according
  1373. // to rules (1), (2), and (3), and rounded up to the base alignment of a vec4. The
  1374. // array may have padding at the end; the base offset of the member following
  1375. // the array is rounded up to the next multiple of the base alignment.
  1376. //
  1377. // 5. If the member is a column-major matrix with C columns and R rows, the
  1378. // matrix is stored identically to an array of C column vectors with R
  1379. // components each, according to rule (4).
  1380. //
  1381. // 6. If the member is an array of S column-major matrices with C columns and
  1382. // R rows, the matrix is stored identically to a row of S X C column vectors
  1383. // with R components each, according to rule (4).
  1384. //
  1385. // 7. If the member is a row-major matrix with C columns and R rows, the matrix
  1386. // is stored identically to an array of R row vectors with C components each,
  1387. // according to rule (4).
  1388. //
  1389. // 8. If the member is an array of S row-major matrices with C columns and R
  1390. // rows, the matrix is stored identically to a row of S X R row vectors with C
  1391. // components each, according to rule (4).
  1392. //
  1393. // 9. If the member is a structure, the base alignment of the structure is N , where
  1394. // N is the largest base alignment value of any of its members, and rounded
  1395. // up to the base alignment of a vec4. The individual members of this substructure
  1396. // are then assigned offsets by applying this set of rules recursively,
  1397. // where the base offset of the first member of the sub-structure is equal to the
  1398. // aligned offset of the structure. The structure may have padding at the end;
  1399. // the base offset of the member following the sub-structure is rounded up to
  1400. // the next multiple of the base alignment of the structure.
  1401. //
  1402. // 10. If the member is an array of S structures, the S elements of the array are laid
  1403. // out in order, according to rule (9).
  1404. //
  1405. // Assuming, for rule 10: The stride is the same as the size of an element.
  1406. stride = 0;
  1407. int dummyStride;
  1408. // rules 4, 6, 8, and 10
  1409. if (type.isArray()) {
  1410. // TODO: perf: this might be flattened by using getCumulativeArraySize(), and a deref that discards all arrayness
  1411. TType derefType(type, 0);
  1412. alignment = getBaseAlignment(derefType, size, dummyStride, layoutPacking, rowMajor);
  1413. if (std140)
  1414. alignment = std::max(baseAlignmentVec4Std140, alignment);
  1415. RoundToPow2(size, alignment);
  1416. stride = size; // uses full matrix size for stride of an array of matrices (not quite what rule 6/8, but what's expected)
  1417. // uses the assumption for rule 10 in the comment above
  1418. // use one element to represent the last member of SSBO which is unsized array
  1419. int arraySize = (type.isUnsizedArray() && (type.getOuterArraySize() == 0)) ? 1 : type.getOuterArraySize();
  1420. size = stride * arraySize;
  1421. return alignment;
  1422. }
  1423. // rule 9
  1424. if (type.getBasicType() == EbtStruct) {
  1425. const TTypeList& memberList = *type.getStruct();
  1426. size = 0;
  1427. int maxAlignment = std140 ? baseAlignmentVec4Std140 : 0;
  1428. for (size_t m = 0; m < memberList.size(); ++m) {
  1429. int memberSize;
  1430. // modify just the children's view of matrix layout, if there is one for this member
  1431. TLayoutMatrix subMatrixLayout = memberList[m].type->getQualifier().layoutMatrix;
  1432. int memberAlignment = getBaseAlignment(*memberList[m].type, memberSize, dummyStride, layoutPacking,
  1433. (subMatrixLayout != ElmNone) ? (subMatrixLayout == ElmRowMajor) : rowMajor);
  1434. maxAlignment = std::max(maxAlignment, memberAlignment);
  1435. RoundToPow2(size, memberAlignment);
  1436. size += memberSize;
  1437. }
  1438. // The structure may have padding at the end; the base offset of
  1439. // the member following the sub-structure is rounded up to the next
  1440. // multiple of the base alignment of the structure.
  1441. RoundToPow2(size, maxAlignment);
  1442. return maxAlignment;
  1443. }
  1444. // rule 1
  1445. if (type.isScalar())
  1446. return getBaseAlignmentScalar(type, size);
  1447. // rules 2 and 3
  1448. if (type.isVector()) {
  1449. int scalarAlign = getBaseAlignmentScalar(type, size);
  1450. switch (type.getVectorSize()) {
  1451. case 1: // HLSL has this, GLSL does not
  1452. return scalarAlign;
  1453. case 2:
  1454. size *= 2;
  1455. return 2 * scalarAlign;
  1456. default:
  1457. size *= type.getVectorSize();
  1458. return 4 * scalarAlign;
  1459. }
  1460. }
  1461. // rules 5 and 7
  1462. if (type.isMatrix()) {
  1463. // rule 5: deref to row, not to column, meaning the size of vector is num columns instead of num rows
  1464. TType derefType(type, 0, rowMajor);
  1465. alignment = getBaseAlignment(derefType, size, dummyStride, layoutPacking, rowMajor);
  1466. if (std140)
  1467. alignment = std::max(baseAlignmentVec4Std140, alignment);
  1468. RoundToPow2(size, alignment);
  1469. stride = size; // use intra-matrix stride for stride of a just a matrix
  1470. if (rowMajor)
  1471. size = stride * type.getMatrixRows();
  1472. else
  1473. size = stride * type.getMatrixCols();
  1474. return alignment;
  1475. }
  1476. assert(0); // all cases should be covered above
  1477. size = baseAlignmentVec4Std140;
  1478. return baseAlignmentVec4Std140;
  1479. }
  1480. // To aid the basic HLSL rule about crossing vec4 boundaries.
  1481. bool TIntermediate::improperStraddle(const TType& type, int size, int offset)
  1482. {
  1483. if (! type.isVector() || type.isArray())
  1484. return false;
  1485. return size <= 16 ? offset / 16 != (offset + size - 1) / 16
  1486. : offset % 16 != 0;
  1487. }
  1488. int TIntermediate::getScalarAlignment(const TType& type, int& size, int& stride, bool rowMajor)
  1489. {
  1490. int alignment;
  1491. stride = 0;
  1492. int dummyStride;
  1493. if (type.isArray()) {
  1494. TType derefType(type, 0);
  1495. alignment = getScalarAlignment(derefType, size, dummyStride, rowMajor);
  1496. stride = size;
  1497. RoundToPow2(stride, alignment);
  1498. size = stride * (type.getOuterArraySize() - 1) + size;
  1499. return alignment;
  1500. }
  1501. if (type.getBasicType() == EbtStruct) {
  1502. const TTypeList& memberList = *type.getStruct();
  1503. size = 0;
  1504. int maxAlignment = 0;
  1505. for (size_t m = 0; m < memberList.size(); ++m) {
  1506. int memberSize;
  1507. // modify just the children's view of matrix layout, if there is one for this member
  1508. TLayoutMatrix subMatrixLayout = memberList[m].type->getQualifier().layoutMatrix;
  1509. int memberAlignment = getScalarAlignment(*memberList[m].type, memberSize, dummyStride,
  1510. (subMatrixLayout != ElmNone) ? (subMatrixLayout == ElmRowMajor) : rowMajor);
  1511. maxAlignment = std::max(maxAlignment, memberAlignment);
  1512. RoundToPow2(size, memberAlignment);
  1513. size += memberSize;
  1514. }
  1515. return maxAlignment;
  1516. }
  1517. if (type.isScalar())
  1518. return getBaseAlignmentScalar(type, size);
  1519. if (type.isVector()) {
  1520. int scalarAlign = getBaseAlignmentScalar(type, size);
  1521. size *= type.getVectorSize();
  1522. return scalarAlign;
  1523. }
  1524. if (type.isMatrix()) {
  1525. TType derefType(type, 0, rowMajor);
  1526. alignment = getScalarAlignment(derefType, size, dummyStride, rowMajor);
  1527. stride = size; // use intra-matrix stride for stride of a just a matrix
  1528. if (rowMajor)
  1529. size = stride * type.getMatrixRows();
  1530. else
  1531. size = stride * type.getMatrixCols();
  1532. return alignment;
  1533. }
  1534. assert(0); // all cases should be covered above
  1535. size = 1;
  1536. return 1;
  1537. }
  1538. int TIntermediate::getMemberAlignment(const TType& type, int& size, int& stride, TLayoutPacking layoutPacking, bool rowMajor)
  1539. {
  1540. if (layoutPacking == glslang::ElpScalar) {
  1541. return getScalarAlignment(type, size, stride, rowMajor);
  1542. } else {
  1543. return getBaseAlignment(type, size, stride, layoutPacking, rowMajor);
  1544. }
  1545. }
  1546. // shared calculation by getOffset and getOffsets
  1547. void TIntermediate::updateOffset(const TType& parentType, const TType& memberType, int& offset, int& memberSize)
  1548. {
  1549. int dummyStride;
  1550. // modify just the children's view of matrix layout, if there is one for this member
  1551. TLayoutMatrix subMatrixLayout = memberType.getQualifier().layoutMatrix;
  1552. int memberAlignment = getMemberAlignment(memberType, memberSize, dummyStride,
  1553. parentType.getQualifier().layoutPacking,
  1554. subMatrixLayout != ElmNone
  1555. ? subMatrixLayout == ElmRowMajor
  1556. : parentType.getQualifier().layoutMatrix == ElmRowMajor);
  1557. RoundToPow2(offset, memberAlignment);
  1558. }
  1559. // Lookup or calculate the offset of a block member, using the recursively
  1560. // defined block offset rules.
  1561. int TIntermediate::getOffset(const TType& type, int index)
  1562. {
  1563. const TTypeList& memberList = *type.getStruct();
  1564. // Don't calculate offset if one is present, it could be user supplied
  1565. // and different than what would be calculated. That is, this is faster,
  1566. // but not just an optimization.
  1567. if (memberList[index].type->getQualifier().hasOffset())
  1568. return memberList[index].type->getQualifier().layoutOffset;
  1569. int memberSize = 0;
  1570. int offset = 0;
  1571. for (int m = 0; m <= index; ++m) {
  1572. updateOffset(type, *memberList[m].type, offset, memberSize);
  1573. if (m < index)
  1574. offset += memberSize;
  1575. }
  1576. return offset;
  1577. }
  1578. // Calculate the block data size.
  1579. // Block arrayness is not taken into account, each element is backed by a separate buffer.
  1580. int TIntermediate::getBlockSize(const TType& blockType)
  1581. {
  1582. const TTypeList& memberList = *blockType.getStruct();
  1583. int lastIndex = (int)memberList.size() - 1;
  1584. int lastOffset = getOffset(blockType, lastIndex);
  1585. int lastMemberSize;
  1586. int dummyStride;
  1587. getMemberAlignment(*memberList[lastIndex].type, lastMemberSize, dummyStride,
  1588. blockType.getQualifier().layoutPacking,
  1589. blockType.getQualifier().layoutMatrix == ElmRowMajor);
  1590. return lastOffset + lastMemberSize;
  1591. }
  1592. int TIntermediate::computeBufferReferenceTypeSize(const TType& type)
  1593. {
  1594. assert(type.isReference());
  1595. int size = getBlockSize(*type.getReferentType());
  1596. int align = type.getBufferReferenceAlignment();
  1597. if (align) {
  1598. size = (size + align - 1) & ~(align-1);
  1599. }
  1600. return size;
  1601. }
  1602. } // end namespace glslang