BsGpuParamsSet.cpp 29 KB


  1. //********************************** Banshee Engine (www.banshee3d.com) **************************************************//
  2. //**************** Copyright (c) 2016 Marko Pintera ([email protected]). All rights reserved. **********************//
  3. #include "BsGpuParamsSet.h"
  4. #include "BsShader.h"
  5. #include "BsTechnique.h"
  6. #include "BsPass.h"
  7. #include "BsGpuProgram.h"
  8. #include "BsMaterialParams.h"
  9. #include "BsGpuParamDesc.h"
  10. #include "BsRenderAPI.h"
  11. #include "BsGpuParamBlockBuffer.h"
  12. namespace BansheeEngine
  13. {
  14. struct ShaderBlockDesc
  15. {
  16. String name;
  17. GpuParamBlockUsage usage;
  18. int size;
  19. bool external;
  20. UINT32 sequentialIdx;
  21. };
  22. Vector<SPtr<GpuParamDesc>> getAllParamDescs(const SPtr<Technique>& technique)
  23. {
  24. Vector<SPtr<GpuParamDesc>> allParamDescs;
  25. // Make sure all gpu programs are fully loaded
  26. for (UINT32 i = 0; i < technique->getNumPasses(); i++)
  27. {
  28. SPtr<Pass> curPass = technique->getPass(i);
  29. SPtr<GpuProgram> vertProgram = curPass->getVertexProgram();
  30. if (vertProgram)
  31. {
  32. vertProgram->blockUntilCoreInitialized();
  33. allParamDescs.push_back(vertProgram->getParamDesc());
  34. }
  35. SPtr<GpuProgram> fragProgram = curPass->getFragmentProgram();
  36. if (fragProgram)
  37. {
  38. fragProgram->blockUntilCoreInitialized();
  39. allParamDescs.push_back(fragProgram->getParamDesc());
  40. }
  41. SPtr<GpuProgram> geomProgram = curPass->getGeometryProgram();
  42. if (geomProgram)
  43. {
  44. geomProgram->blockUntilCoreInitialized();
  45. allParamDescs.push_back(geomProgram->getParamDesc());
  46. }
  47. SPtr<GpuProgram> hullProgram = curPass->getHullProgram();
  48. if (hullProgram)
  49. {
  50. hullProgram->blockUntilCoreInitialized();
  51. allParamDescs.push_back(hullProgram->getParamDesc());
  52. }
  53. SPtr<GpuProgram> domainProgram = curPass->getDomainProgram();
  54. if (domainProgram)
  55. {
  56. domainProgram->blockUntilCoreInitialized();
  57. allParamDescs.push_back(domainProgram->getParamDesc());
  58. }
  59. SPtr<GpuProgram> computeProgram = curPass->getComputeProgram();
  60. if (computeProgram)
  61. {
  62. computeProgram->blockUntilCoreInitialized();
  63. allParamDescs.push_back(computeProgram->getParamDesc());
  64. }
  65. }
  66. return allParamDescs;
  67. }
  68. Vector<SPtr<GpuParamDesc>> getAllParamDescs(const SPtr<TechniqueCore>& technique)
  69. {
  70. Vector<SPtr<GpuParamDesc>> allParamDescs;
  71. // Make sure all gpu programs are fully loaded
  72. for (UINT32 i = 0; i < technique->getNumPasses(); i++)
  73. {
  74. SPtr<PassCore> curPass = technique->getPass(i);
  75. SPtr<GpuProgramCore> vertProgram = curPass->getVertexProgram();
  76. if (vertProgram)
  77. allParamDescs.push_back(vertProgram->getParamDesc());
  78. SPtr<GpuProgramCore> fragProgram = curPass->getFragmentProgram();
  79. if (fragProgram)
  80. allParamDescs.push_back(fragProgram->getParamDesc());
  81. SPtr<GpuProgramCore> geomProgram = curPass->getGeometryProgram();
  82. if (geomProgram)
  83. allParamDescs.push_back(geomProgram->getParamDesc());
  84. SPtr<GpuProgramCore> hullProgram = curPass->getHullProgram();
  85. if (hullProgram)
  86. allParamDescs.push_back(hullProgram->getParamDesc());
  87. SPtr<GpuProgramCore> domainProgram = curPass->getDomainProgram();
  88. if (domainProgram)
  89. allParamDescs.push_back(domainProgram->getParamDesc());
  90. SPtr<GpuProgramCore> computeProgram = curPass->getComputeProgram();
  91. if (computeProgram)
  92. allParamDescs.push_back(computeProgram->getParamDesc());
  93. }
  94. return allParamDescs;
  95. }
  96. bool areParamsEqual(const GpuParamDataDesc& paramA, const GpuParamDataDesc& paramB, bool ignoreBufferOffsets)
  97. {
  98. bool equal = paramA.arraySize == paramB.arraySize && paramA.elementSize == paramB.elementSize
  99. && paramA.type == paramB.type && paramA.arrayElementStride == paramB.arrayElementStride;
  100. if (!ignoreBufferOffsets)
  101. equal &= paramA.cpuMemOffset == paramB.cpuMemOffset && paramA.gpuMemOffset == paramB.gpuMemOffset;
  102. return equal;
  103. }
  104. Vector<ShaderBlockDesc> determineValidShareableParamBlocks(const Vector<SPtr<GpuParamDesc>>& paramDescs,
  105. const Map<String, SHADER_PARAM_BLOCK_DESC>& shaderDesc)
  106. {
  107. struct BlockInfo
  108. {
  109. BlockInfo() { }
  110. BlockInfo(const String& name, const SPtr<GpuParamDesc>& paramDesc, bool isValid = true)
  111. :name(name), paramDesc(paramDesc), isValid(isValid)
  112. { }
  113. String name;
  114. SPtr<GpuParamDesc> paramDesc;
  115. bool isValid;
  116. };
  117. // Make sure param blocks with the same name actually contain the same fields
  118. Map<String, BlockInfo> uniqueParamBlocks;
  119. for (auto iter = paramDescs.begin(); iter != paramDescs.end(); ++iter)
  120. {
  121. const GpuParamDesc& curDesc = **iter;
  122. for (auto blockIter = curDesc.paramBlocks.begin(); blockIter != curDesc.paramBlocks.end(); ++blockIter)
  123. {
  124. bool isBlockValid = true;
  125. const GpuParamBlockDesc& curBlock = blockIter->second;
  126. if (!curBlock.isShareable) // Non-shareable buffers are handled differently, they're allowed same names
  127. continue;
  128. auto iterFind = uniqueParamBlocks.find(blockIter->first);
  129. if (iterFind == uniqueParamBlocks.end())
  130. {
  131. uniqueParamBlocks[blockIter->first] = BlockInfo(blockIter->first, *iter);
  132. continue;
  133. }
  134. // The block was already determined as invalid, no need to check further
  135. if (!iterFind->second.isValid)
  136. continue;
  137. String otherBlockName = iterFind->second.name;
  138. SPtr<GpuParamDesc> otherDesc = iterFind->second.paramDesc;
  139. for (auto myParamIter = curDesc.params.begin(); myParamIter != curDesc.params.end(); ++myParamIter)
  140. {
  141. const GpuParamDataDesc& myParam = myParamIter->second;
  142. if (myParam.paramBlockSlot != curBlock.slot)
  143. continue; // Param is in another block, so we will check it when its time for that block
  144. auto otherParamFind = otherDesc->params.find(myParamIter->first);
  145. // Cannot find other param, blocks aren't equal
  146. if (otherParamFind == otherDesc->params.end())
  147. {
  148. isBlockValid = false;
  149. break;
  150. }
  151. const GpuParamDataDesc& otherParam = otherParamFind->second;
  152. if (!areParamsEqual(myParam, otherParam, false) || curBlock.name != otherBlockName)
  153. {
  154. isBlockValid = false;
  155. break;
  156. }
  157. }
  158. if (!isBlockValid)
  159. {
  160. LOGWRN("Found two param blocks with the same name but different contents: " + blockIter->first);
  161. uniqueParamBlocks[blockIter->first] = BlockInfo(blockIter->first, nullptr, false);
  162. }
  163. }
  164. }
  165. Vector<ShaderBlockDesc> output;
  166. for (auto& entry : uniqueParamBlocks)
  167. {
  168. if (!entry.second.isValid)
  169. continue;
  170. ShaderBlockDesc shaderBlockDesc;
  171. shaderBlockDesc.external = false;
  172. shaderBlockDesc.usage = GPBU_STATIC;
  173. shaderBlockDesc.size = 0;
  174. shaderBlockDesc.name = entry.first;
  175. auto iterFind = shaderDesc.find(entry.first);
  176. if (iterFind != shaderDesc.end())
  177. {
  178. shaderBlockDesc.external = iterFind->second.shared || iterFind->second.rendererSemantic != StringID::NONE;
  179. shaderBlockDesc.usage = iterFind->second.usage;
  180. }
  181. for (auto iter2 = paramDescs.begin(); iter2 != paramDescs.end(); ++iter2)
  182. {
  183. auto findParamBlockDesc = (*iter2)->paramBlocks.find(entry.first);
  184. if (findParamBlockDesc != (*iter2)->paramBlocks.end())
  185. {
  186. shaderBlockDesc.size = findParamBlockDesc->second.blockSize * sizeof(UINT32);
  187. break;
  188. }
  189. }
  190. output.push_back(shaderBlockDesc);
  191. }
  192. return output;
  193. }
  194. Map<String, const GpuParamDataDesc*> determineValidDataParameters(const Vector<SPtr<GpuParamDesc>>& paramDescs)
  195. {
  196. Map<String, const GpuParamDataDesc*> foundDataParams;
  197. Map<String, bool> validParams;
  198. for (auto iter = paramDescs.begin(); iter != paramDescs.end(); ++iter)
  199. {
  200. const GpuParamDesc& curDesc = **iter;
  201. // Check regular data params
  202. for (auto iter2 = curDesc.params.begin(); iter2 != curDesc.params.end(); ++iter2)
  203. {
  204. const GpuParamDataDesc& curParam = iter2->second;
  205. auto dataFindIter = validParams.find(iter2->first);
  206. if (dataFindIter == validParams.end())
  207. {
  208. validParams[iter2->first] = true;
  209. foundDataParams[iter2->first] = &curParam;
  210. }
  211. else
  212. {
  213. if (validParams[iter2->first])
  214. {
  215. auto dataFindIter2 = foundDataParams.find(iter2->first);
  216. const GpuParamDataDesc* otherParam = dataFindIter2->second;
  217. if (!areParamsEqual(curParam, *otherParam, true))
  218. {
  219. validParams[iter2->first] = false;
  220. foundDataParams.erase(dataFindIter2);
  221. }
  222. }
  223. }
  224. }
  225. }
  226. return foundDataParams;
  227. }
  228. Vector<const GpuParamObjectDesc*> determineValidObjectParameters(const Vector<SPtr<GpuParamDesc>>& paramDescs)
  229. {
  230. Vector<const GpuParamObjectDesc*> validParams;
  231. for (auto iter = paramDescs.begin(); iter != paramDescs.end(); ++iter)
  232. {
  233. const GpuParamDesc& curDesc = **iter;
  234. // Check sampler params
  235. for (auto iter2 = curDesc.samplers.begin(); iter2 != curDesc.samplers.end(); ++iter2)
  236. {
  237. validParams.push_back(&iter2->second);
  238. }
  239. // Check texture params
  240. for (auto iter2 = curDesc.textures.begin(); iter2 != curDesc.textures.end(); ++iter2)
  241. {
  242. validParams.push_back(&iter2->second);
  243. }
  244. // Check load-store texture params
  245. for (auto iter2 = curDesc.loadStoreTextures.begin(); iter2 != curDesc.loadStoreTextures.end(); ++iter2)
  246. {
  247. validParams.push_back(&iter2->second);
  248. }
  249. // Check buffer params
  250. for (auto iter2 = curDesc.buffers.begin(); iter2 != curDesc.buffers.end(); ++iter2)
  251. {
  252. validParams.push_back(&iter2->second);
  253. }
  254. }
  255. return validParams;
  256. }
  257. Map<String, String> determineParameterToBlockMapping(const Vector<SPtr<GpuParamDesc>>& paramDescs)
  258. {
  259. Map<String, String> paramToParamBlock;
  260. for (auto iter = paramDescs.begin(); iter != paramDescs.end(); ++iter)
  261. {
  262. const GpuParamDesc& curDesc = **iter;
  263. for (auto iter2 = curDesc.params.begin(); iter2 != curDesc.params.end(); ++iter2)
  264. {
  265. const GpuParamDataDesc& curParam = iter2->second;
  266. auto iterFind = paramToParamBlock.find(curParam.name);
  267. if (iterFind != paramToParamBlock.end())
  268. continue;
  269. for (auto iterBlock = curDesc.paramBlocks.begin(); iterBlock != curDesc.paramBlocks.end(); ++iterBlock)
  270. {
  271. if (iterBlock->second.slot == curParam.paramBlockSlot)
  272. {
  273. paramToParamBlock[curParam.name] = iterBlock->second.name;
  274. break;
  275. }
  276. }
  277. }
  278. }
  279. return paramToParamBlock;
  280. }
  281. UnorderedSet<String> determineValidParameters(const Vector<SPtr<GpuParamDesc>>& paramDescs,
  282. const Map<String, SHADER_DATA_PARAM_DESC>& dataParams,
  283. const Map<String, SHADER_OBJECT_PARAM_DESC>& textureParams,
  284. const Map<String, SHADER_OBJECT_PARAM_DESC>& bufferParams,
  285. const Map<String, SHADER_OBJECT_PARAM_DESC>& samplerParams)
  286. {
  287. UnorderedSet<String> validParams;
  288. Map<String, const GpuParamDataDesc*> validDataParameters = determineValidDataParameters(paramDescs);
  289. Vector<const GpuParamObjectDesc*> validObjectParameters = determineValidObjectParameters(paramDescs);
  290. Map<String, String> paramToParamBlockMap = determineParameterToBlockMapping(paramDescs);
  291. // Create data param mappings
  292. for (auto iter = dataParams.begin(); iter != dataParams.end(); ++iter)
  293. {
  294. auto findIter = validDataParameters.find(iter->second.gpuVariableName);
  295. // Not valid so we skip it
  296. if (findIter == validDataParameters.end())
  297. continue;
  298. if (findIter->second->type != iter->second.type && !(iter->second.type == GPDT_COLOR && findIter->second->type == GPDT_FLOAT4))
  299. {
  300. LOGWRN("Ignoring shader parameter \"" + iter->first + "\". Type doesn't match the one defined in the gpu program. "
  301. + "Shader defined type: " + toString(iter->second.type) + " - Gpu program defined type: " + toString(findIter->second->type));
  302. continue;
  303. }
  304. if (findIter->second->arraySize != iter->second.arraySize)
  305. {
  306. LOGWRN("Ignoring shader parameter \"" + iter->first + "\". Array size doesn't match the one defined in the gpu program."
  307. + "Shader defined array size: " + toString(iter->second.arraySize) + " - Gpu program defined array size: " + toString(findIter->second->arraySize));
  308. continue;
  309. }
  310. auto findBlockIter = paramToParamBlockMap.find(iter->second.gpuVariableName);
  311. if (findBlockIter == paramToParamBlockMap.end())
  312. BS_EXCEPT(InternalErrorException, "Parameter doesn't exist in param to param block map but exists in valid param map.");
  313. validParams.insert(iter->first);
  314. }
  315. // Create object param mappings
  316. auto determineObjectMappings = [&](const Map<String, SHADER_OBJECT_PARAM_DESC>& params)
  317. {
  318. for (auto iter = params.begin(); iter != params.end(); ++iter)
  319. {
  320. const Vector<String>& gpuVariableNames = iter->second.gpuVariableNames;
  321. for (auto iter2 = gpuVariableNames.begin(); iter2 != gpuVariableNames.end(); ++iter2)
  322. {
  323. for (auto iter3 = validObjectParameters.begin(); iter3 != validObjectParameters.end(); ++iter3)
  324. {
  325. if ((*iter3)->name == (*iter2) && (*iter3)->type == iter->second.type)
  326. {
  327. validParams.insert(iter->first);
  328. break;
  329. }
  330. }
  331. }
  332. }
  333. };
  334. determineObjectMappings(textureParams);
  335. determineObjectMappings(samplerParams);
  336. determineObjectMappings(bufferParams);
  337. return validParams;
  338. }
  339. template<bool Core>
  340. const UINT32 TGpuParamsSet<Core>::NUM_STAGES = 6;
  341. template<bool Core>
  342. TGpuParamsSet<Core>::TGpuParamsSet(const SPtr<TechniqueType>& technique, const ShaderType& shader,
  343. const SPtr<MaterialParamsType>& params)
  344. :mPassParams(technique->getNumPasses())
  345. {
  346. UINT32 numPasses = technique->getNumPasses();
  347. // Create GpuParams for each pass and shader stage
  348. for (UINT32 i = 0; i < numPasses; i++)
  349. {
  350. SPtr<PassType> curPass = technique->getPass(i);
  351. GpuProgramPtrType vertProgram = curPass->getVertexProgram();
  352. if (vertProgram)
  353. mPassParams[i].vertex = vertProgram->createParameters();
  354. GpuProgramPtrType fragProgram = curPass->getFragmentProgram();
  355. if (fragProgram)
  356. mPassParams[i].fragment = fragProgram->createParameters();
  357. GpuProgramPtrType geomProgram = curPass->getGeometryProgram();
  358. if (geomProgram)
  359. mPassParams[i].geometry = geomProgram->createParameters();
  360. GpuProgramPtrType hullProgram = curPass->getHullProgram();
  361. if (hullProgram)
  362. mPassParams[i].hull = hullProgram->createParameters();
  363. GpuProgramPtrType domainProgram = curPass->getDomainProgram();
  364. if (domainProgram)
  365. mPassParams[i].domain = domainProgram->createParameters();
  366. GpuProgramPtrType computeProgram = curPass->getComputeProgram();
  367. if (computeProgram)
  368. mPassParams[i].compute = computeProgram->createParameters();
  369. }
  370. // Create and assign parameter block buffers
  371. Vector<SPtr<GpuParamDesc>> allParamDescs = getAllParamDescs(technique);
  372. //// Fill out various helper structures
  373. Vector<ShaderBlockDesc> paramBlockData = determineValidShareableParamBlocks(allParamDescs, shader->getParamBlocks());
  374. UnorderedSet<String> validParams = determineValidParameters(
  375. allParamDescs,
  376. shader->getDataParams(),
  377. shader->getTextureParams(),
  378. shader->getBufferParams(),
  379. shader->getSamplerParams());
  380. Map<String, ParamBlockPtrType> paramBlockBuffers;
  381. //// Create param blocks
  382. for (auto& paramBlock : paramBlockData)
  383. {
  384. ParamBlockPtrType newParamBlockBuffer;
  385. if (!paramBlock.external)
  386. newParamBlockBuffer = ParamBlockType::create(paramBlock.size, paramBlock.usage);
  387. paramBlock.sequentialIdx = (UINT32)mBlocks.size();
  388. paramBlockBuffers[paramBlock.name] = newParamBlockBuffer;
  389. mBlocks.push_back(BlockInfo(paramBlock.name, newParamBlockBuffer, true));
  390. }
  391. //// Assign param block buffers and generate information about data parameters
  392. for (UINT32 i = 0; i < numPasses; i++)
  393. {
  394. for (UINT32 j = 0; j < NUM_STAGES; j++)
  395. {
  396. GpuParamsType paramPtr = getParamByIdx(j, i);
  397. if (paramPtr != nullptr)
  398. {
  399. // Assign shareable buffers
  400. UINT32 paramBlockIdx = 0;
  401. for (auto& block : paramBlockData)
  402. {
  403. const String& paramBlockName = block.name;
  404. if (paramPtr->hasParamBlock(paramBlockName))
  405. {
  406. ParamBlockPtrType blockBuffer = paramBlockBuffers[paramBlockName];
  407. paramPtr->setParamBlockBuffer(paramBlockName, blockBuffer);
  408. }
  409. paramBlockIdx++;
  410. }
  411. // Create non-shareable ones (these are buffers defined by default by the RHI usually)
  412. const GpuParamDesc& desc = paramPtr->getParamDesc();
  413. for (auto iterBlockDesc = desc.paramBlocks.begin(); iterBlockDesc != desc.paramBlocks.end(); ++iterBlockDesc)
  414. {
  415. const GpuParamBlockDesc& blockDesc = iterBlockDesc->second;
  416. UINT32 globalBlockIdx = (UINT32)-1;
  417. if (!blockDesc.isShareable)
  418. {
  419. ParamBlockPtrType newParamBlockBuffer = ParamBlockType::create(blockDesc.blockSize * sizeof(UINT32));
  420. globalBlockIdx = (UINT32)mBlocks.size();
  421. paramPtr->setParamBlockBuffer(iterBlockDesc->first, newParamBlockBuffer);
  422. mBlocks.push_back(BlockInfo(iterBlockDesc->first, newParamBlockBuffer, false));
  423. }
  424. else
  425. {
  426. auto iterFind = std::find_if(paramBlockData.begin(), paramBlockData.end(), [&](const auto& x)
  427. {
  428. return x.name == iterBlockDesc->first;
  429. });
  430. if(iterFind != paramBlockData.end())
  431. globalBlockIdx = iterFind->sequentialIdx;
  432. }
  433. // If this parameter block is valid, create data/struct mappings for it
  434. if (globalBlockIdx == (UINT32)-1)
  435. continue;
  436. for(auto& dataParam : desc.params)
  437. {
  438. if (dataParam.second.paramBlockSlot != blockDesc.slot)
  439. continue;
  440. if (validParams.count(dataParam.first) == 0)
  441. continue;
  442. UINT32 paramIdx = params->getParamIndex(dataParam.first);
  443. // Parameter shouldn't be in the valid parameter list if it cannot be found
  444. assert(paramIdx != -1);
  445. mDataParamInfos.push_back(DataParamInfo());
  446. DataParamInfo& paramInfo = mDataParamInfos.back();
  447. paramInfo.paramIdx = paramIdx;
  448. paramInfo.blockIdx = globalBlockIdx;
  449. paramInfo.offset = dataParam.second.cpuMemOffset;
  450. }
  451. }
  452. }
  453. }
  454. }
  455. // Add buffers defined in shader but not actually used by GPU programs (so we can check if user is providing a
  456. // valid buffer name)
  457. auto& allParamBlocks = shader->getParamBlocks();
  458. for (auto& entry : allParamBlocks)
  459. {
  460. auto iterFind = std::find_if(mBlocks.begin(), mBlocks.end(),
  461. [&](auto& x)
  462. {
  463. return x.name == entry.first;
  464. });
  465. if(iterFind == mBlocks.end())
  466. {
  467. mBlocks.push_back(BlockInfo(entry.first, nullptr, true));
  468. mBlocks.back().isUsed = false;
  469. }
  470. }
  471. // Generate information about object parameters
  472. bs_frame_mark();
  473. {
  474. FrameVector<ObjectParamInfo> objParamInfos;
  475. UINT32 offsetsSize = numPasses * NUM_STAGES * 4 * sizeof(UINT32);
  476. UINT32* offsets = (UINT32*)bs_frame_alloc(offsetsSize);
  477. memset(offsets, 0, offsetsSize);
  478. // First store all objects in temporary arrays since we don't know how many of them are
  479. UINT32 totalNumObjects = 0;
  480. UINT32* stageOffsets = offsets;
  481. for (UINT32 i = 0; i < numPasses; i++)
  482. {
  483. for (UINT32 j = 0; j < NUM_STAGES; j++)
  484. {
  485. GpuParamsType paramPtr = getParamByIdx(j, i);
  486. if (paramPtr == nullptr)
  487. {
  488. stageOffsets += 4;
  489. continue;
  490. }
  491. auto processObjectParams = [&](const Map<String, GpuParamObjectDesc>& gpuParams,
  492. UINT32 stageIdx, MaterialParams::ParamType paramType)
  493. {
  494. for (auto& param : gpuParams)
  495. {
  496. if (validParams.count(param.first) == 0)
  497. continue;
  498. UINT32 paramIdx;
  499. auto result = params->getParamIndex(param.first, paramType, GPDT_UNKNOWN, 0, paramIdx);
  500. // Parameter shouldn't be in the valid parameter list if it cannot be found
  501. assert(result == MaterialParams::GetParamResult::Success);
  502. objParamInfos.push_back(ObjectParamInfo());
  503. ObjectParamInfo& paramInfo = objParamInfos.back();
  504. paramInfo.paramIdx = paramIdx;
  505. paramInfo.slotIdx = param.second.slot;
  506. stageOffsets[stageIdx]++;
  507. totalNumObjects++;
  508. }
  509. };
  510. const GpuParamDesc& desc = paramPtr->getParamDesc();
  511. processObjectParams(desc.textures, 0, MaterialParams::ParamType::Texture);
  512. processObjectParams(desc.loadStoreTextures, 1, MaterialParams::ParamType::Texture);
  513. processObjectParams(desc.buffers, 2, MaterialParams::ParamType::Buffer);
  514. processObjectParams(desc.samplers, 3, MaterialParams::ParamType::Sampler);
  515. stageOffsets += 4;
  516. }
  517. }
  518. // Transfer all objects into their permanent storage
  519. UINT32 objectParamInfosSize = totalNumObjects * sizeof(ObjectParamInfo) + numPasses * sizeof(PassParamInfo);
  520. mPassParamInfos = (PassParamInfo*)bs_alloc(objectParamInfosSize);
  521. memset(mPassParamInfos, 0, objectParamInfosSize);
  522. StageParamInfo* stageInfos = (StageParamInfo*)mPassParamInfos;
  523. ObjectParamInfo* objInfos = (ObjectParamInfo*)(mPassParamInfos + numPasses);
  524. memcpy(objInfos, objParamInfos.data(), totalNumObjects * sizeof(ObjectParamInfo));
  525. UINT32 objInfoOffset = 0;
  526. stageOffsets = offsets;
  527. for (UINT32 i = 0; i < numPasses; i++)
  528. {
  529. for (UINT32 j = 0; j < NUM_STAGES; j++)
  530. {
  531. StageParamInfo& stage = stageInfos[i * NUM_STAGES + j];
  532. if(stageOffsets[0] > 0)
  533. {
  534. UINT32 numEntries = stageOffsets[0];
  535. stage.textures = objInfos + objInfoOffset;
  536. stage.numTextures = numEntries;
  537. objInfoOffset += numEntries;
  538. }
  539. if (stageOffsets[1] > 0)
  540. {
  541. UINT32 numEntries = stageOffsets[1];
  542. stage.loadStoreTextures = objInfos + objInfoOffset;
  543. stage.numLoadStoreTextures = numEntries;
  544. objInfoOffset += numEntries;
  545. }
  546. if (stageOffsets[2] > 0)
  547. {
  548. UINT32 numEntries = stageOffsets[2];
  549. stage.buffers = objInfos + objInfoOffset;
  550. stage.numBuffers = numEntries;
  551. objInfoOffset += numEntries;
  552. }
  553. if (stageOffsets[3] > 0)
  554. {
  555. UINT32 numEntries = stageOffsets[3];
  556. stage.samplerStates = objInfos + objInfoOffset;
  557. stage.numSamplerStates = numEntries;
  558. objInfoOffset += numEntries;
  559. }
  560. stageOffsets += 4;
  561. }
  562. }
  563. bs_frame_free(offsets);
  564. }
  565. bs_frame_clear();
  566. }
  567. template<bool Core>
  568. TGpuParamsSet<Core>::~TGpuParamsSet()
  569. {
  570. // All allocations share the same memory, so we just clear it all at once
  571. bs_free(mPassParamInfos);
  572. }
  573. template<bool Core>
  574. typename TGpuParamsSet<Core>::GpuParamsType TGpuParamsSet<Core>::getGpuParams(GpuProgramType type, UINT32 passIdx)
  575. {
  576. if (passIdx >= mPassParams.size())
  577. return nullptr;
  578. switch (type)
  579. {
  580. case GPT_VERTEX_PROGRAM:
  581. return mPassParams[passIdx].vertex;
  582. case GPT_FRAGMENT_PROGRAM:
  583. return mPassParams[passIdx].fragment;
  584. case GPT_GEOMETRY_PROGRAM:
  585. return mPassParams[passIdx].geometry;
  586. case GPT_HULL_PROGRAM:
  587. return mPassParams[passIdx].hull;
  588. case GPT_DOMAIN_PROGRAM:
  589. return mPassParams[passIdx].domain;
  590. case GPT_COMPUTE_PROGRAM:
  591. return mPassParams[passIdx].compute;
  592. }
  593. return nullptr;
  594. }
  595. template<bool Core>
  596. void TGpuParamsSet<Core>::setParamBlockBuffer(const String& name, const ParamBlockPtrType& paramBlock,
  597. bool ignoreInUpdate)
  598. {
  599. UINT32 foundIdx = (UINT32)-1;
  600. for(UINT32 i = 0; i < (UINT32)mBlocks.size(); i++)
  601. {
  602. BlockInfo& block = mBlocks[i];
  603. if(block.name == name)
  604. {
  605. if (!block.shareable)
  606. {
  607. LOGERR("Cannot set parameter block buffer with the name \"" + name + "\". Buffer is not assignable. ");
  608. return;
  609. }
  610. foundIdx = i;
  611. }
  612. }
  613. if(foundIdx == (UINT32)-1)
  614. {
  615. LOGERR("Cannot set parameter block buffer with the name \"" + name + "\". Buffer name not found. ");
  616. return;
  617. }
  618. if (!mBlocks[foundIdx].isUsed)
  619. return;
  620. mBlocks[foundIdx].buffer = paramBlock;
  621. mBlocks[foundIdx].allowUpdate = !ignoreInUpdate;
  622. UINT32 numPasses = (UINT32)mPassParams.size();
  623. for (UINT32 j = 0; j < numPasses; j++)
  624. {
  625. for (UINT32 i = 0; i < NUM_STAGES; i++)
  626. {
  627. GpuParamsType paramPtr = getParamByIdx(i);
  628. if (paramPtr != nullptr)
  629. {
  630. if (paramPtr->hasParamBlock(name))
  631. paramPtr->setParamBlockBuffer(name, paramBlock);
  632. }
  633. }
  634. }
  635. }
  636. template<bool Core>
  637. void TGpuParamsSet<Core>::update(const SPtr<MaterialParamsType>& params, UINT32 dirtyBitIdx, bool updateAll)
  638. {
  639. // Note: Instead of iterating over every single parameter, it might be more efficient for @p params to keep
  640. // a ring buffer and a version number. Then we could just iterate over the ring buffer and only access dirty
  641. // parameters. If the version number is too high (larger than ring buffer can store), then we force update for all.
  642. // Maximum of 31 techniques are supported. Bit 32 is reserved.
  643. assert(dirtyBitIdx < 31);
  644. UINT32 dirtyFlagMask = 1 << dirtyBitIdx;
  645. // Update data params
  646. for(auto& paramInfo : mDataParamInfos)
  647. {
  648. ParamBlockPtrType paramBlock = mBlocks[paramInfo.blockIdx].buffer;
  649. if (paramBlock == nullptr || !mBlocks[paramInfo.blockIdx].allowUpdate)
  650. continue;
  651. const MaterialParams::ParamData* materialParamInfo = params->getParamData(paramInfo.paramIdx);
  652. if ((materialParamInfo->dirtyFlags & dirtyFlagMask) == 0 && !updateAll)
  653. continue;
  654. UINT32 arraySize = materialParamInfo->arraySize == 0 ? 1 : materialParamInfo->arraySize;
  655. const GpuParamDataTypeInfo& typeInfo = GpuParams::PARAM_SIZES.lookup[(int)materialParamInfo->dataType];
  656. UINT32 paramSize = typeInfo.numColumns * typeInfo.numRows * typeInfo.baseTypeSize;
  657. UINT8* data = params->getData(materialParamInfo->index);
  658. bool transposeMatrices = RenderAPICore::instance().getAPIInfo().getGpuProgramHasColumnMajorMatrices();
  659. if (transposeMatrices)
  660. {
  661. auto writeTransposed = [&](auto& temp)
  662. {
  663. for (UINT32 i = 0; i < arraySize; i++)
  664. {
  665. UINT32 arrayOffset = i * paramSize;
  666. memcpy(&temp, data + arrayOffset, paramSize);
  667. temp.transpose();
  668. paramBlock->write((paramInfo.offset + arrayOffset) * sizeof(UINT32), &temp, paramSize);
  669. }
  670. };
  671. switch (materialParamInfo->dataType)
  672. {
  673. case GPDT_MATRIX_2X2:
  674. {
  675. MatrixNxM<2, 2> matrix;
  676. writeTransposed(matrix);
  677. }
  678. break;
  679. case GPDT_MATRIX_2X3:
  680. {
  681. MatrixNxM<2, 3> matrix;
  682. writeTransposed(matrix);
  683. }
  684. break;
  685. case GPDT_MATRIX_2X4:
  686. {
  687. MatrixNxM<2, 4> matrix;
  688. writeTransposed(matrix);
  689. }
  690. break;
  691. case GPDT_MATRIX_3X2:
  692. {
  693. MatrixNxM<3, 2> matrix;
  694. writeTransposed(matrix);
  695. }
  696. break;
  697. case GPDT_MATRIX_3X3:
  698. {
  699. Matrix3 matrix;
  700. writeTransposed(matrix);
  701. }
  702. break;
  703. case GPDT_MATRIX_3X4:
  704. {
  705. MatrixNxM<3, 4> matrix;
  706. writeTransposed(matrix);
  707. }
  708. break;
  709. case GPDT_MATRIX_4X2:
  710. {
  711. MatrixNxM<4, 2> matrix;
  712. writeTransposed(matrix);
  713. }
  714. break;
  715. case GPDT_MATRIX_4X3:
  716. {
  717. MatrixNxM<4, 3> matrix;
  718. writeTransposed(matrix);
  719. }
  720. break;
  721. case GPDT_MATRIX_4X4:
  722. {
  723. Matrix4 matrix;
  724. writeTransposed(matrix);
  725. }
  726. break;
  727. default:
  728. {
  729. paramBlock->write(paramInfo.offset * sizeof(UINT32), data, paramSize * arraySize);
  730. break;
  731. }
  732. }
  733. }
  734. else
  735. paramBlock->write(paramInfo.offset * sizeof(UINT32), data, paramSize * arraySize);
  736. }
  737. // Update object params
  738. UINT32 numPasses = (UINT32)mPassParams.size();
  739. for(UINT32 i = 0; i < numPasses; i++)
  740. {
  741. for(UINT32 j = 0; j < NUM_STAGES; j++)
  742. {
  743. GpuParamsType paramsPtr = getParamByIdx(j, i);
  744. if(paramsPtr != nullptr)
  745. {
  746. const StageParamInfo& stageInfo = mPassParamInfos[i].stages[j];
  747. for(UINT32 k = 0; k < stageInfo.numTextures; k++)
  748. {
  749. const ObjectParamInfo& paramInfo = stageInfo.textures[k];
  750. const MaterialParams::ParamData* materialParamInfo = params->getParamData(paramInfo.paramIdx);
  751. if ((materialParamInfo->dirtyFlags & dirtyFlagMask) == 0 && !updateAll)
  752. continue;
  753. TextureType texture;
  754. params->getTexture(materialParamInfo->index, texture);
  755. paramsPtr->setTexture(paramInfo.slotIdx, texture);
  756. }
  757. for (UINT32 k = 0; k < stageInfo.numLoadStoreTextures; k++)
  758. {
  759. const ObjectParamInfo& paramInfo = stageInfo.loadStoreTextures[k];
  760. const MaterialParams::ParamData* materialParamInfo = params->getParamData(paramInfo.paramIdx);
  761. if ((materialParamInfo->dirtyFlags & dirtyFlagMask) == 0 && !updateAll)
  762. continue;
  763. TextureSurface surface;
  764. TextureType texture;
  765. params->getLoadStoreTexture(materialParamInfo->index, texture, surface);
  766. paramsPtr->setLoadStoreTexture(paramInfo.slotIdx, texture, surface);
  767. }
  768. for (UINT32 k = 0; k < stageInfo.numBuffers; k++)
  769. {
  770. const ObjectParamInfo& paramInfo = stageInfo.buffers[k];
  771. const MaterialParams::ParamData* materialParamInfo = params->getParamData(paramInfo.paramIdx);
  772. if ((materialParamInfo->dirtyFlags & dirtyFlagMask) == 0 && !updateAll)
  773. continue;
  774. BufferType buffer;
  775. params->getBuffer(materialParamInfo->index, buffer);
  776. paramsPtr->setBuffer(paramInfo.slotIdx, buffer);
  777. }
  778. for (UINT32 k = 0; k < stageInfo.numSamplerStates; k++)
  779. {
  780. const ObjectParamInfo& paramInfo = stageInfo.samplerStates[k];
  781. const MaterialParams::ParamData* materialParamInfo = params->getParamData(paramInfo.paramIdx);
  782. if ((materialParamInfo->dirtyFlags & dirtyFlagMask) == 0 && !updateAll)
  783. continue;
  784. SamplerStateType samplerState;
  785. params->getSamplerState(materialParamInfo->index, samplerState);
  786. paramsPtr->setSamplerState(paramInfo.slotIdx, samplerState);
  787. }
  788. paramsPtr->_markCoreDirty();
  789. }
  790. }
  791. }
  792. }
  793. template class TGpuParamsSet <false>;
  794. template class TGpuParamsSet <true>;
  795. }