Shader.cpp 71 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666
  1. /******************************************************************************
  2. Shader having 'SV_SampleIndex' will execute on a per-sample basis,
  3. others will execute on per-pixel basis.
  4. Depth/Stencil tests however are always performed on a per-sample basis.
  5. TODO: !! All GLSL shaders need to be optimized either by hand or with a better converter, this could give performance boost even 2x !!
  6. /******************************************************************************/
  7. #include "stdafx.h"
  8. #include "../Shaders/!Header CPU.h"
  9. namespace EE{
  10. #if DEBUG
  11. #define FORCE_TEX 0
  12. #define FORCE_BUF 0
  13. #else
  14. #define FORCE_TEX 0
  15. #define FORCE_BUF 0
  16. #endif
  17. #define ALLOW_PARTIAL_BUFFERS 0 // using partial buffers (1) actually made things slower, 100fps(1) vs 102fps(0), so use default value (0), TODO: check on newer hardware
  18. #define BUFFER_DYNAMIC 0 // for ALLOW_PARTIAL_BUFFERS=0, using 1 made no difference in performance, so use 0 to reduce API calls. But for ALLOW_PARTIAL_BUFFERS=1 using 1 was slower
  19. /******************************************************************************/
  20. #if DX9
  21. static IDirect3DBaseTexture9 *Tex[MAX_DX9_TEXTURES];
  22. #elif DX11
  23. static ID3D11ShaderResourceView *VSTex[MAX_TEXTURES], *HSTex[MAX_TEXTURES], *DSTex[MAX_TEXTURES], *PSTex[MAX_TEXTURES];
  24. #elif GL
  25. static UInt Tex[MAX_TEXTURES];
  26. #endif
  27. INLINE void DisplayState::texVS(Int index, GPU_API(IDirect3DBaseTexture9*, ID3D11ShaderResourceView*, UInt) tex)
  28. {
  29. #if DX11
  30. if(VSTex[index]!=tex)D3DC->VSSetShaderResources(index, 1, &(VSTex[index]=tex));
  31. #endif
  32. }
  33. INLINE void DisplayState::texHS(Int index, GPU_API(IDirect3DBaseTexture9*, ID3D11ShaderResourceView*, UInt) tex)
  34. {
  35. #if DX11
  36. if(HSTex[index]!=tex)D3DC->HSSetShaderResources(index, 1, &(HSTex[index]=tex));
  37. #endif
  38. }
  39. INLINE void DisplayState::texDS(Int index, GPU_API(IDirect3DBaseTexture9*, ID3D11ShaderResourceView*, UInt) tex)
  40. {
  41. #if DX11
  42. if(DSTex[index]!=tex)D3DC->DSSetShaderResources(index, 1, &(DSTex[index]=tex));
  43. #endif
  44. }
  45. INLINE void DisplayState::texPS(Int index, GPU_API(IDirect3DBaseTexture9*, ID3D11ShaderResourceView*, UInt) tex)
  46. {
  47. #if DX9
  48. if(Tex[index]!=tex || FORCE_TEX)D3D->SetTexture(index, Tex[index]=tex);
  49. #elif DX11
  50. if(PSTex[index]!=tex || FORCE_TEX)D3DC->PSSetShaderResources(index, 1, &(PSTex[index]=tex));
  51. #endif
  52. }
  53. void DisplayState::texClear(GPU_API(IDirect3DBaseTexture9*, ID3D11ShaderResourceView*, UInt) tex)
  54. {
  55. #if DX9
  56. if(tex)REPA(Tex)if(Tex[i]==tex)Tex[i]=null;
  57. #elif DX11
  58. if(tex)REPA(PSTex)if(PSTex[i]==tex)PSTex[i]=null;
  59. #elif GL
  60. if(tex)REPA(Tex)if(Tex[i]==tex)Tex[i]=~0;
  61. #endif
  62. }
  63. #if GL
  64. static UInt ActiveTexture=0;
  65. INLINE static void ActivateTexture(Int index)
  66. {
  67. if(ActiveTexture!=index || FORCE_TEX)
  68. {
  69. ActiveTexture=index;
  70. glActiveTexture(GL_TEXTURE0+index);
  71. }
  72. }
  73. void DisplayState::texBind(UInt mode, UInt tex) // this should be called instead of 'glBindTexture'
  74. {
  75. if(GetThreadId()==App.threadID()) // textures are bound per-context, so remember them only on the main thread
  76. {
  77. if(Tex[ActiveTexture]==tex)return;
  78. Tex[ActiveTexture]= tex;
  79. }
  80. glBindTexture(mode, tex);
  81. }
  82. INLINE static void TexBind(UInt mode, UInt tex)
  83. {
  84. Tex[ActiveTexture]=tex;
  85. glBindTexture(mode, tex);
  86. }
  87. static void SetTexture(Int index, C Image *image, ShaderImage::Sampler *sampler) // this is called only on the Main thread
  88. {
  89. #if 0
  90. glBindMultiTextureEXT(GL_TEXTURE0+index, GL_TEXTURE_2D, txtr); // not supported on ATI (tested on Radeon 5850)
  91. #else
  92. UInt txtr=(image ? image->_txtr : 0);
  93. if(Tex[index]!=txtr || FORCE_TEX)
  94. {
  95. ActivateTexture(index);
  96. if(!txtr) // clear all modes
  97. {
  98. Tex[index]=0;
  99. glBindTexture(GL_TEXTURE_2D , 0);
  100. glBindTexture(GL_TEXTURE_3D , 0);
  101. glBindTexture(GL_TEXTURE_CUBE_MAP, 0);
  102. }else
  103. switch(image->mode())
  104. {
  105. case IMAGE_2D:
  106. case IMAGE_RT:
  107. case IMAGE_DS_RT:
  108. case IMAGE_SHADOW_MAP:
  109. {
  110. TexBind(GL_TEXTURE_2D, image->_txtr);
  111. UInt s, t;
  112. if(!sampler)s=t=D._sampler_address;else // use default
  113. {
  114. s=sampler->address[0];
  115. t=sampler->address[1];
  116. }
  117. if(image->_w_s!=s)glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, ConstCast(image->_w_s)=s);
  118. if(image->_w_t!=t)glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, ConstCast(image->_w_t)=t);
  119. }break;
  120. case IMAGE_3D:
  121. {
  122. TexBind(GL_TEXTURE_3D, image->_txtr);
  123. UInt s, t, r;
  124. if(!sampler)s=t=r=D._sampler_address;else
  125. {
  126. s=sampler->address[0];
  127. t=sampler->address[1];
  128. r=sampler->address[2];
  129. }
  130. if(image->_w_s!=s)glTexParameteri(GL_TEXTURE_3D, GL_TEXTURE_WRAP_S, ConstCast(image->_w_s)=s);
  131. if(image->_w_t!=t)glTexParameteri(GL_TEXTURE_3D, GL_TEXTURE_WRAP_T, ConstCast(image->_w_t)=t);
  132. if(image->_w_r!=r)glTexParameteri(GL_TEXTURE_3D, GL_TEXTURE_WRAP_R, ConstCast(image->_w_r)=r);
  133. }break;
  134. case IMAGE_CUBE:
  135. {
  136. TexBind(GL_TEXTURE_CUBE_MAP, image->_txtr);
  137. }break;
  138. }
  139. }else
  140. if(txtr)switch(image->mode()) // check if sampler states need to be adjusted
  141. {
  142. case IMAGE_2D:
  143. case IMAGE_RT:
  144. case IMAGE_DS_RT:
  145. case IMAGE_SHADOW_MAP:
  146. {
  147. UInt s, t;
  148. if(!sampler)s=t=D._sampler_address;else
  149. {
  150. s=sampler->address[0];
  151. t=sampler->address[1];
  152. }
  153. if(image->_w_s!=s || image->_w_t!=t)
  154. {
  155. ActivateTexture(index); TexBind(GL_TEXTURE_2D, image->_txtr);
  156. if(image->_w_s!=s)glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, ConstCast(image->_w_s)=s);
  157. if(image->_w_t!=t)glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, ConstCast(image->_w_t)=t);
  158. }
  159. }break;
  160. case IMAGE_3D:
  161. {
  162. UInt s, t, r;
  163. if(!sampler)s=t=r=D._sampler_address;else
  164. {
  165. s=sampler->address[0];
  166. t=sampler->address[1];
  167. r=sampler->address[2];
  168. }
  169. if(image->_w_s!=s || image->_w_t!=t || image->_w_r!=r)
  170. {
  171. ActivateTexture(index); TexBind(GL_TEXTURE_3D, image->_txtr);
  172. if(image->_w_s!=s)glTexParameteri(GL_TEXTURE_3D, GL_TEXTURE_WRAP_S, ConstCast(image->_w_s)=s);
  173. if(image->_w_t!=t)glTexParameteri(GL_TEXTURE_3D, GL_TEXTURE_WRAP_T, ConstCast(image->_w_t)=t);
  174. if(image->_w_r!=r)glTexParameteri(GL_TEXTURE_3D, GL_TEXTURE_WRAP_R, ConstCast(image->_w_r)=r);
  175. }
  176. }break;
  177. }
  178. #endif
  179. }
  180. #endif
  181. /******************************************************************************/
  182. #if DX11
  183. static ID3D11Buffer *vs_buf[MAX_SHADER_BUFFERS], *hs_buf[MAX_SHADER_BUFFERS], *ds_buf[MAX_SHADER_BUFFERS], *ps_buf[MAX_SHADER_BUFFERS];
  184. static INLINE void BufVS(Int index, ID3D11Buffer *buf) {if(vs_buf[index]!=buf || FORCE_BUF)D3DC->VSSetConstantBuffers(index, 1, &(vs_buf[index]=buf));}
  185. static INLINE void BufHS(Int index, ID3D11Buffer *buf) {if(hs_buf[index]!=buf || FORCE_BUF)D3DC->HSSetConstantBuffers(index, 1, &(hs_buf[index]=buf));}
  186. static INLINE void BufDS(Int index, ID3D11Buffer *buf) {if(ds_buf[index]!=buf || FORCE_BUF)D3DC->DSSetConstantBuffers(index, 1, &(ds_buf[index]=buf));}
  187. static INLINE void BufPS(Int index, ID3D11Buffer *buf) {if(ps_buf[index]!=buf || FORCE_BUF)D3DC->PSSetConstantBuffers(index, 1, &(ps_buf[index]=buf));}
  188. #endif
  189. /******************************************************************************/
  190. Cache<ShaderFile> ShaderFiles("Shader");
  191. static Byte RestoreSamplerIndex[256],
  192. RestoreSamplers;
  193. GPU_API(Shader9, Shader11, ShaderGL) *ShaderCur;
  194. /******************************************************************************/
  195. INLINE static void COPY(Ptr dest, CPtr src, UInt size)
  196. {
  197. U32 *d=(U32*)dest,
  198. *s=(U32*)src ;
  199. REP(DivCeil4(size))*d++=*s++;
  200. }
  201. /******************************************************************************/
  202. // SHADER IMAGE
  203. /******************************************************************************/
  204. ThreadSafeMap<Str8, ShaderImage> ShaderImages(CompareCS);
  205. /******************************************************************************/
  206. #if DX9
  207. void ShaderImage::Sampler::set(Int index)
  208. {
  209. RestoreSamplerIndex[RestoreSamplers++]=index;
  210. D3D->SetSamplerState(index, D3DSAMP_MINFILTER, filter [0]);
  211. D3D->SetSamplerState(index, D3DSAMP_MAGFILTER, filter [1]);
  212. D3D->SetSamplerState(index, D3DSAMP_ADDRESSU , address[0]);
  213. D3D->SetSamplerState(index, D3DSAMP_ADDRESSV , address[1]);
  214. D3D->SetSamplerState(index, D3DSAMP_ADDRESSW , address[2]);
  215. }
  216. #elif DX11
  217. void ShaderImage::Sampler::del()
  218. {
  219. if(state)
  220. {
  221. //SyncLocker locker(D._lock); if(state) lock not needed for DX11 'Release'
  222. {if(D.created())state->Release(); state=null;} // clear while in lock
  223. }
  224. }
  225. Bool ShaderImage::Sampler::createTry(D3D11_SAMPLER_DESC &desc)
  226. {
  227. //SyncLocker locker(D._lock); lock not needed for DX11 'D3D'
  228. del();
  229. if(D3D)D3D->CreateSamplerState(&desc, &state);
  230. return state!=null;
  231. }
  232. void ShaderImage::Sampler::create(D3D11_SAMPLER_DESC &desc)
  233. {
  234. if(!createTry(desc))Exit(S+"Can't create Sampler State\n"
  235. "Filter: "+desc.Filter+"\n"
  236. "Address: "+desc.AddressU+','+desc.AddressV+','+desc.AddressW+"\n"
  237. "MipLODBias: "+desc.MipLODBias+"\n"
  238. "Anisotropy: "+desc.MaxAnisotropy+"\n"
  239. "ComparisonFunc: "+desc.ComparisonFunc+"\n"
  240. "MinMaxLOD: "+desc.MinLOD+','+desc.MaxLOD);
  241. }
  242. void ShaderImage::Sampler::setVS(Int index) {D3DC->VSSetSamplers(index, 1, &state);}
  243. void ShaderImage::Sampler::setHS(Int index) {D3DC->HSSetSamplers(index, 1, &state);}
  244. void ShaderImage::Sampler::setDS(Int index) {D3DC->DSSetSamplers(index, 1, &state);}
  245. void ShaderImage::Sampler::setPS(Int index) {D3DC->PSSetSamplers(index, 1, &state);}
  246. void ShaderImage::Sampler::set (Int index) {setVS(index); setHS(index); setDS(index); setPS(index);}
  247. #endif
  248. /******************************************************************************/
  249. // SHADER BUFFER
  250. /******************************************************************************/
  251. ThreadSafeMap<Str8, ShaderBuffer> ShaderBuffers(CompareCS);
  252. /******************************************************************************/
  253. void ShaderBuffer::Buffer::del()
  254. {
  255. if(buffer)
  256. {
  257. #if DX11
  258. //SyncLocker locker(D._lock); if(buffer) lock not needed for DX11 'Release'
  259. {if(D.created())buffer->Release(); buffer=null;} // clear while in lock
  260. #endif
  261. }
  262. size=0;
  263. }
  264. void ShaderBuffer::Buffer::create(Int size)
  265. {
  266. //if(T.size!=size) can't check for this, because buffers can be dynamically resized
  267. {
  268. del();
  269. T.size=size;
  270. #if DX11
  271. //SyncLocker lock(D._lock); lock not needed for DX11 'D3D'
  272. if(D3D)
  273. {
  274. D3D11_BUFFER_DESC desc;
  275. desc.ByteWidth =size;
  276. desc.Usage =(BUFFER_DYNAMIC ? D3D11_USAGE_DYNAMIC : D3D11_USAGE_DEFAULT);
  277. desc.CPUAccessFlags =(BUFFER_DYNAMIC ? D3D11_CPU_ACCESS_WRITE : 0);
  278. desc.BindFlags =D3D11_BIND_CONSTANT_BUFFER;
  279. desc.MiscFlags =0;
  280. desc.StructureByteStride=0;
  281. D3D->CreateBuffer(&desc, null, &buffer);
  282. }
  283. #endif
  284. }
  285. if(!buffer)Exit("Can't create Constant Buffer");
  286. }
  287. /******************************************************************************/
  288. // !! Warning: if we have any 'parts', then 'buffer' does not own the resources, but is just a raw copy !!
  289. /******************************************************************************/
  290. ShaderBuffer::~ShaderBuffer()
  291. {
  292. if(parts.elms())buffer.zero(); // if we have any 'parts', then 'buffer' does not own the resources, so just zero it, and they will be released in the 'parts' container
  293. Free(data);
  294. }
  295. ShaderBuffer::ShaderBuffer()
  296. {
  297. changed=false;
  298. data =null;
  299. }
  300. void ShaderBuffer::create(Int size) // no locks needed because this is called only in shader loading, and there 'ShaderBuffers.lock' is called
  301. {
  302. buffer.create(size);
  303. AllocZero(data, Ceil4(size+SIZEI(Vec4))); // add extra "Vec4 padd" at the end, because all 'ShaderParam.set' for performance reasons assume that there is at least SIZE(Vec4) size, use "+" instead of "Max" in case we have "Flt p[2]" and we call 'ShaderParam.set(Vec4)' for ShaderParam created from "p[1]" which would overwrite "p[1..4]", and do 'Ceil4' because 'COPY' is used which copies 'Ceil4'
  304. changed=true;
  305. }
  306. void ShaderBuffer::update()
  307. {
  308. #if DX11
  309. if(BUFFER_DYNAMIC)
  310. {
  311. D3D11_MAPPED_SUBRESOURCE map;
  312. if(OK(D3DC->Map(buffer.buffer, 0, D3D11_MAP_WRITE_DISCARD, 0, &map)))
  313. {
  314. COPY(map.pData, data, buffer.size);
  315. D3DC->Unmap(buffer.buffer, 0);
  316. }
  317. }else
  318. #if ALLOW_PARTIAL_BUFFERS // check for partial updates only if we may operate on partial buffers, because otherwise we always set entire buffers (which are smaller and separated into parts) and we can avoid the overhead of setting up 'D3D11_BOX'
  319. if(D3DC1) // use partial updates where available to reduce amount of memory
  320. {
  321. D3D11_BOX box;
  322. box.front=box.top=box.left=0;
  323. box.right=Ceil16(buffer.size); box.back=box.bottom=1; // must be 16-byte aligned or DX will fail
  324. D3DC1->UpdateSubresource1(buffer.buffer, 0, &box, data, 0, 0, D3D11_COPY_DISCARD);
  325. }else
  326. #endif
  327. D3DC ->UpdateSubresource (buffer.buffer, 0, null, data, 0, 0);
  328. #endif
  329. changed=false;
  330. }
  331. void ShaderBuffer::bind(Int index)
  332. {
  333. #if DX11
  334. BufVS(index, buffer.buffer);
  335. BufHS(index, buffer.buffer);
  336. BufDS(index, buffer.buffer);
  337. BufPS(index, buffer.buffer);
  338. #endif
  339. }
  340. void ShaderBuffer::bindCheck(Int index)
  341. {
  342. if(index>=0)
  343. {
  344. if(!InRange(index, MAX_SHADER_BUFFERS))Exit("Invalid ShaderBuffer bind index");
  345. #if DX11
  346. ID3D11Buffer *buf=vs_buf[index];
  347. #else
  348. Ptr buf=null;
  349. #endif
  350. if(buffer .buffer==buf)return;
  351. REPA(parts)if(parts[i].buffer==buf)return;
  352. Exit(S+"ShaderBuffer was expected to be bound at slot "+index);
  353. }
  354. }
  355. void ShaderBuffer::setPart(Int part)
  356. {
  357. buffer =parts[part]; // perform a raw copy
  358. changed=true;
  359. }
  360. void ShaderBuffer::createParts(C Int *elms, Int elms_num)
  361. {
  362. Int elm_size=buffer.size/elms[0];
  363. parts.setNum(elms_num); parts[0]=buffer; // store a raw copy of the buffer that was already created in the first slot, so we can keep it as backup and use later
  364. for(Int i=1; i<parts.elms(); i++)parts[i].create(elm_size*elms[i]);
  365. }
  366. /******************************************************************************/
  367. // SHADER PARAM
  368. /******************************************************************************/
  369. static Int Compare(C ShaderParam::Translation &a, C ShaderParam::Translation &b)
  370. {
  371. return Compare(a.cpu_offset, b.cpu_offset);
  372. }
  373. ThreadSafeMap<Str8, ShaderParam> ShaderParams(CompareCS);
  374. /******************************************************************************/
  375. ShaderParam::~ShaderParam()
  376. {
  377. if(_owns_data)
  378. {
  379. Free(_data );
  380. Free(_changed);
  381. }
  382. _data =null;
  383. _changed=null;
  384. _cpu_data_size=_gpu_data_size=_elements=_constant_count=0;
  385. }
  386. ShaderParam::ShaderParam()
  387. {
  388. _data =null;
  389. _changed=null;
  390. _cpu_data_size=_gpu_data_size=_elements=_constant_count=0;
  391. _owns_data=false;
  392. }
  393. /******************************************************************************/
  394. void ShaderParam::optimize()
  395. {
  396. _optimized_translation=_full_translation;
  397. _optimized_translation.sort(Compare);
  398. REPA(_optimized_translation)if(i)
  399. {
  400. Translation &prev=_optimized_translation[i-1],
  401. &next=_optimized_translation[i ];
  402. if(prev.cpu_offset+prev.elm_size==next.cpu_offset
  403. && prev.gpu_offset+prev.elm_size==next.gpu_offset)
  404. {
  405. prev.elm_size+=next.elm_size;
  406. _optimized_translation.remove(i, true);
  407. }
  408. }
  409. }
  410. void ShaderParam::initAsElement(ShaderParam &parent, Int index)
  411. {
  412. _owns_data =false;
  413. _cpu_data_size=parent._cpu_data_size/parent._elements; // set size of single element
  414. _data =parent._data;
  415. _changed =parent._changed;
  416. if( parent._full_translation.elms()%parent._elements)Exit("Shader Mod");
  417. Int elm_translations=parent._full_translation.elms()/parent._elements; // single element translations
  418. FREP(elm_translations)_full_translation.add(parent._full_translation[index*elm_translations+i]);
  419. Int offset=_full_translation[0].gpu_offset; _data+=offset; REPAO(_full_translation).gpu_offset-=offset; // apply offset
  420. offset=_full_translation[0].cpu_offset; REPAO(_full_translation).cpu_offset-=offset; // apply offset
  421. optimize();
  422. REPA(_optimized_translation)MAX(_gpu_data_size, _optimized_translation[i].gpu_offset+_optimized_translation[i].elm_size);
  423. }
  424. /******************************************************************************/
  425. void ShaderParam::set( Bool b ) {setChanged(); *(Flt *)_data=b;}
  426. void ShaderParam::set( Int i ) {setChanged(); *(Flt *)_data=i;}
  427. void ShaderParam::set( Flt f ) {setChanged(); *(Flt *)_data=f;}
  428. void ShaderParam::set( Dbl d ) {setChanged(); *(Flt *)_data=d;}
  429. void ShaderParam::set(C Vec2 &v ) {setChanged(); *(Vec2*)_data=v;}
  430. void ShaderParam::set(C VecD2 &v ) {setChanged(); *(Vec2*)_data=v;}
  431. void ShaderParam::set(C VecI2 &v ) {setChanged(); *(Vec2*)_data=v;}
  432. void ShaderParam::set(C Vec &v ) {setChanged(); *(Vec *)_data=v;}
  433. void ShaderParam::set(C VecD &v ) {setChanged(); *(Vec *)_data=v;}
  434. void ShaderParam::set(C VecI &v ) {setChanged(); *(Vec *)_data=v;}
  435. void ShaderParam::set(C Vec4 &v ) {setChanged(); *(Vec4*)_data=v;}
  436. void ShaderParam::set(C VecD4 &v ) {setChanged(); *(Vec4*)_data=v;}
  437. void ShaderParam::set(C VecI4 &v ) {setChanged(); *(Vec4*)_data=v;}
  438. void ShaderParam::set(C Rect &rect ) {setChanged(); *(Rect*)_data=rect;}
  439. void ShaderParam::set(C Color &color ) {setChanged(); (*(Vec4*)_data).set(color.r/255.0f, color.g/255.0f, color.b/255.0f, color.a/255.0f);}
  440. void ShaderParam::set(C Vec *v, Int elms)
  441. {
  442. setChanged();
  443. #if DX9 || DX11
  444. Vec4 *gpu=(Vec4*)_data;
  445. REP(Min(elms, (_gpu_data_size+SIZEU(Flt))/SIZEU(Vec4)))gpu[i].xyz=v[i]; // add SIZE(Flt) because '_gpu_data_size' may be SIZE(Vec) and div by SIZE(Vec4) would return 0 even though one Vec would fit (elements are aligned by 'Vec4' but we're writing only 'Vec')
  446. #elif GL
  447. COPY(_data, v, Min(_gpu_data_size, SIZEU(*v)*elms));
  448. #endif
  449. }
  450. void ShaderParam::set(C Vec4 *v, Int elms) {setChanged(); COPY(_data, v, Min(_gpu_data_size, SIZEU(*v)*elms));}
  451. void ShaderParam::set(C Matrix3 &matrix)
  452. {
  453. #if DX9 || DX11
  454. if(_gpu_data_size>=SIZE(Vec4)+SIZE(Vec4)+SIZE(Vec)) // do not test for 'SIZE(Matrix)' !! because '_gpu_data_size' may be SIZE(Matrix) minus last Flt, because it's not really used (this happens on DX10+)
  455. {
  456. setChanged();
  457. Vec4 *gpu=(Vec4*)_data;
  458. gpu[0].xyz.set(matrix.x.x, matrix.y.x, matrix.z.x); // SIZE(Vec4)
  459. gpu[1].xyz.set(matrix.x.y, matrix.y.y, matrix.z.y); // SIZE(Vec4)
  460. gpu[2].xyz.set(matrix.x.z, matrix.y.z, matrix.z.z); // SIZE(Vec )
  461. }
  462. #elif GL
  463. if(_gpu_data_size>=SIZE(matrix))
  464. {
  465. setChanged();
  466. Vec *gpu=(Vec*)_data;
  467. gpu[0].set(matrix.x.x, matrix.y.x, matrix.z.x);
  468. gpu[1].set(matrix.x.y, matrix.y.y, matrix.z.y);
  469. gpu[2].set(matrix.x.z, matrix.y.z, matrix.z.z);
  470. }
  471. #endif
  472. }
  473. void ShaderParam::set(C Matrix &matrix)
  474. {
  475. if(_gpu_data_size>=SIZE(matrix))
  476. {
  477. setChanged();
  478. Vec4 *gpu=(Vec4*)_data;
  479. gpu[0].set(matrix.x.x, matrix.y.x, matrix.z.x, matrix.pos.x);
  480. gpu[1].set(matrix.x.y, matrix.y.y, matrix.z.y, matrix.pos.y);
  481. gpu[2].set(matrix.x.z, matrix.y.z, matrix.z.z, matrix.pos.z);
  482. }
  483. }
  484. void ShaderParam::set(C MatrixM &matrix)
  485. {
  486. if(_gpu_data_size>=SIZE(Matrix)) // we're setting as 'Matrix' and not 'MatrixM'
  487. {
  488. setChanged();
  489. Vec4 *gpu=(Vec4*)_data;
  490. gpu[0].set(matrix.x.x, matrix.y.x, matrix.z.x, matrix.pos.x);
  491. gpu[1].set(matrix.x.y, matrix.y.y, matrix.z.y, matrix.pos.y);
  492. gpu[2].set(matrix.x.z, matrix.y.z, matrix.z.z, matrix.pos.z);
  493. }
  494. }
  495. void ShaderParam::set(C Matrix4 &matrix)
  496. {
  497. if(_gpu_data_size>=SIZE(matrix))
  498. {
  499. setChanged();
  500. Vec4 *gpu=(Vec4*)_data;
  501. gpu[0].set(matrix.x.x, matrix.y.x, matrix.z.x, matrix.pos.x);
  502. gpu[1].set(matrix.x.y, matrix.y.y, matrix.z.y, matrix.pos.y);
  503. gpu[2].set(matrix.x.z, matrix.y.z, matrix.z.z, matrix.pos.z);
  504. gpu[3].set(matrix.x.w, matrix.y.w, matrix.z.w, matrix.pos.w);
  505. }
  506. }
  507. void ShaderParam::set(C Matrix *matrix, Int elms)
  508. {
  509. setChanged();
  510. Vec4 *gpu=(Vec4*)_data;
  511. REP(Min(elms, UInt(_gpu_data_size)/SIZEU(*matrix)))
  512. {
  513. gpu[0].set(matrix->x.x, matrix->y.x, matrix->z.x, matrix->pos.x);
  514. gpu[1].set(matrix->x.y, matrix->y.y, matrix->z.y, matrix->pos.y);
  515. gpu[2].set(matrix->x.z, matrix->y.z, matrix->z.z, matrix->pos.z);
  516. gpu+=3;
  517. matrix++;
  518. }
  519. }
  520. void ShaderParam::set(CPtr data, Int size) // !! Warning: 'size' is ignored here for performance reasons !!
  521. {
  522. setChanged();
  523. REPA(_optimized_translation)
  524. {
  525. C ShaderParam::Translation &trans=_optimized_translation[i];
  526. COPY(T._data+trans.gpu_offset, (Byte*)data+trans.cpu_offset, trans.elm_size);
  527. }
  528. }
  529. void ShaderParam::set(C Vec &v, Int elm)
  530. {
  531. #if DX9 || DX11
  532. if(_gpu_data_size>=SIZE(Vec4)*elm+SIZE(Vec)) // elements are aligned by 'Vec4' but we're writing only 'Vec'
  533. {
  534. setChanged();
  535. Vec4 *gpu=(Vec4*)_data;
  536. gpu[elm].xyz=v;
  537. }
  538. #elif GL
  539. if(_gpu_data_size>=SIZE(v)*(elm+1))
  540. {
  541. setChanged();
  542. Vec *gpu=(Vec*)_data;
  543. gpu[elm]=v;
  544. }
  545. #endif
  546. }
  547. void ShaderParam::set(C Vec4 &v, Int elm)
  548. {
  549. if(_gpu_data_size>=SIZE(v)*(elm+1))
  550. {
  551. setChanged();
  552. Vec4 *gpu=(Vec4*)_data;
  553. gpu[elm]=v;
  554. }
  555. }
  556. void ShaderParam::set(C Matrix &matrix, Int elm)
  557. {
  558. if(_gpu_data_size>=SIZE(matrix)*(elm+1))
  559. {
  560. setChanged();
  561. Vec4 *gpu=(Vec4*)&(((GpuMatrix*)_data)[elm]);
  562. gpu[0].set(matrix.x.x, matrix.y.x, matrix.z.x, matrix.pos.x);
  563. gpu[1].set(matrix.x.y, matrix.y.y, matrix.z.y, matrix.pos.y);
  564. gpu[2].set(matrix.x.z, matrix.y.z, matrix.z.z, matrix.pos.z);
  565. }
  566. }
  567. void ShaderParam::fromMul(C Matrix &a, C Matrix &b)
  568. {
  569. if(_gpu_data_size>=SIZE(GpuMatrix))
  570. {
  571. setChanged();
  572. ((GpuMatrix*)_data)->fromMul(a, b);
  573. }
  574. }
  575. void ShaderParam::fromMul(C Matrix &a, C MatrixM &b)
  576. {
  577. if(_gpu_data_size>=SIZE(GpuMatrix))
  578. {
  579. setChanged();
  580. ((GpuMatrix*)_data)->fromMul(a, b);
  581. }
  582. }
  583. void ShaderParam::fromMul(C MatrixM &a, C MatrixM &b)
  584. {
  585. if(_gpu_data_size>=SIZE(GpuMatrix))
  586. {
  587. setChanged();
  588. ((GpuMatrix*)_data)->fromMul(a, b);
  589. }
  590. }
  591. void ShaderParam::fromMul(C Matrix &a, C Matrix &b, Int elm)
  592. {
  593. if(_gpu_data_size>=SIZE(GpuMatrix)*(elm+1))
  594. {
  595. setChanged();
  596. GpuMatrix *gpu=(GpuMatrix*)_data;
  597. gpu[elm].fromMul(a, b);
  598. }
  599. }
  600. void ShaderParam::fromMul(C Matrix &a, C MatrixM &b, Int elm)
  601. {
  602. if(_gpu_data_size>=SIZE(GpuMatrix)*(elm+1))
  603. {
  604. setChanged();
  605. GpuMatrix *gpu=(GpuMatrix*)_data;
  606. gpu[elm].fromMul(a, b);
  607. }
  608. }
  609. void ShaderParam::fromMul(C MatrixM &a, C MatrixM &b, Int elm)
  610. {
  611. if(_gpu_data_size>=SIZE(GpuMatrix)*(elm+1))
  612. {
  613. setChanged();
  614. GpuMatrix *gpu=(GpuMatrix*)_data;
  615. gpu[elm].fromMul(a, b);
  616. }
  617. }
  618. void ShaderParam::set(C GpuMatrix &matrix)
  619. {
  620. if(_gpu_data_size>=SIZE(matrix))
  621. {
  622. setChanged();
  623. GpuMatrix &gpu=*(GpuMatrix*)_data;
  624. gpu=matrix;
  625. }
  626. }
  627. void ShaderParam::set(C GpuMatrix &matrix, Int elm)
  628. {
  629. if(_gpu_data_size>=SIZE(matrix)*(elm+1))
  630. {
  631. setChanged();
  632. GpuMatrix *gpu=(GpuMatrix*)_data;
  633. gpu[elm]=matrix;
  634. }
  635. }
  636. void ShaderParam::set(C GpuMatrix *matrix, Int elms)
  637. {
  638. setChanged();
  639. COPY(_data, matrix, Min(_gpu_data_size, SIZEU(*matrix)*elms));
  640. }
  641. void ShaderParam::setConditional(C Flt &f)
  642. {
  643. U32 &dest =*(U32*)_data,
  644. &src =*(U32*)&f ;
  645. if( dest!=src){setChanged(); dest=src;}
  646. }
  647. void ShaderParam::setConditional(C Vec2 &v)
  648. {
  649. Vec2 &dest =*(Vec2*)_data;
  650. if( dest!=v){setChanged(); dest=v;}
  651. }
  652. void ShaderParam::setConditional(C Vec &v)
  653. {
  654. Vec &dest =*(Vec*)_data;
  655. if( dest!=v){setChanged(); dest=v;}
  656. }
  657. void ShaderParam::setConditional(C Vec4 &v)
  658. {
  659. Vec4 &dest =*(Vec4*)_data;
  660. if( dest!=v){setChanged(); dest=v;}
  661. }
  662. void ShaderParam::setConditional(C Rect &r)
  663. {
  664. Rect &dest =*(Rect*)_data;
  665. if( dest!=r){setChanged(); dest=r;}
  666. }
  667. void ShaderParam::setConditional(C Vec &v, Int elm)
  668. {
  669. #if DX9 || DX11
  670. if(_gpu_data_size>=SIZE(Vec4)*elm+SIZE(Vec)) // elements are aligned by 'Vec4' but we're writing only 'Vec'
  671. {
  672. Vec &dest=((Vec4*)_data)[elm].xyz;
  673. if( dest!=v){setChanged(); dest=v;}
  674. }
  675. #elif GL
  676. if(_gpu_data_size>=SIZE(v)*(elm+1))
  677. {
  678. Vec &dest=((Vec*)_data)[elm];
  679. if( dest!=v){setChanged(); dest=v;}
  680. }
  681. #endif
  682. }
  683. void ShaderParam::setSafe(C Vec4 &v) {setChanged(); COPY(_data, &v, Min(_gpu_data_size, SIZEU(v)));}
  684. /******************************************************************************/
  685. // SHADERS
  686. /******************************************************************************/
  687. #if WINDOWS_OLD
  688. ShaderVS9::~ShaderVS9() {if(vs){SyncLocker locker(D._lock); if(vs){if(D.created())vs->Release(); vs=null;}}} // clear while in lock
  689. ShaderPS9::~ShaderPS9() {if(ps){SyncLocker locker(D._lock); if(ps){if(D.created())ps->Release(); ps=null;}}} // clear while in lock
  690. #endif
  691. #if DX11
  692. // lock not needed for DX11 'Release'
  693. ShaderVS11::~ShaderVS11() {if(vs){/*SyncLocker locker(D._lock); if(vs)*/{if(D.created())vs->Release(); vs=null;}}} // clear while in lock
  694. ShaderHS11::~ShaderHS11() {if(hs){/*SyncLocker locker(D._lock); if(hs)*/{if(D.created())hs->Release(); hs=null;}}} // clear while in lock
  695. ShaderDS11::~ShaderDS11() {if(ds){/*SyncLocker locker(D._lock); if(ds)*/{if(D.created())ds->Release(); ds=null;}}} // clear while in lock
  696. ShaderPS11::~ShaderPS11() {if(ps){/*SyncLocker locker(D._lock); if(ps)*/{if(D.created())ps->Release(); ps=null;}}} // clear while in lock
  697. #endif
  698. #if GL_LOCK
  699. ShaderVSGL::~ShaderVSGL() {if(vs){SyncLocker locker(D._lock); if(D.created())glDeleteShader(vs); vs=0;}} // clear while in lock
  700. ShaderPSGL::~ShaderPSGL() {if(ps){SyncLocker locker(D._lock); if(D.created())glDeleteShader(ps); ps=0;}} // clear while in lock
  701. #elif GL
  702. ShaderVSGL::~ShaderVSGL() {if(vs){if(D.created())glDeleteShader(vs); vs=0;}} // clear while in lock
  703. ShaderPSGL::~ShaderPSGL() {if(ps){if(D.created())glDeleteShader(ps); ps=0;}} // clear while in lock
  704. #endif
  705. #if DX9
  706. IDirect3DVertexShader9* ShaderVS9::create() {if(!vs && data.elms()){SyncLocker locker(D._lock); if(!vs && data.elms() && D3D){D3D->CreateVertexShader((DWORD*)data.data(), &vs); clean();}} return vs;}
  707. IDirect3DPixelShader9 * ShaderPS9::create() {if(!ps && data.elms()){SyncLocker locker(D._lock); if(!ps && data.elms() && D3D){D3D->CreatePixelShader ((DWORD*)data.data(), &ps); clean();}} return ps;}
  708. #elif DX11
  709. // lock not needed for DX11 'D3D', however we need a lock because this may get called from multiple threads at the same time, but we can use another lock to allow processing during rendering (when D._lock is locked)
  710. static SyncLock ShaderLock; // use custom lock instead of 'D._lock' to allow shader creation while rendering
  711. ID3D11VertexShader* ShaderVS11::create() {if(!vs && data.elms()){SyncLocker locker(ShaderLock); if(!vs && data.elms() && D3D){D3D->CreateVertexShader(data.data(), data.elms(), null, &vs); clean();}} return vs;}
  712. ID3D11HullShader * ShaderHS11::create() {if(!hs && data.elms()){SyncLocker locker(ShaderLock); if(!hs && data.elms() && D3D){D3D->CreateHullShader (data.data(), data.elms(), null, &hs); clean();}} return hs;}
  713. ID3D11DomainShader* ShaderDS11::create() {if(!ds && data.elms()){SyncLocker locker(ShaderLock); if(!ds && data.elms() && D3D){D3D->CreateDomainShader(data.data(), data.elms(), null, &ds); clean();}} return ds;}
  714. ID3D11PixelShader * ShaderPS11::create() {if(!ps && data.elms()){SyncLocker locker(ShaderLock); if(!ps && data.elms() && D3D){D3D->CreatePixelShader (data.data(), data.elms(), null, &ps); clean();}} return ps;}
  715. #elif GL
  716. static void SetMaxMatrix(Str8 &code)
  717. {
  718. #if VARIABLE_MAX_MATRIX
  719. change 'Replace' to something else because it's slow
  720. if(D.meshBoneSplit())
  721. {
  722. code=Replace(code, "MAX_MATRIX 256" , "MAX_MATRIX 60" , true); // hand written GLSL
  723. code=Replace(code, "ViewMatrix[768]", "ViewMatrix[180]", true); // from CG, 256*3, 60*3
  724. code=Replace(code, "ObjVel[256]", "ObjVel[60]" , true); // from CG
  725. code=Replace(code, "FurVel[256]", "FurVel[60]" , true); // from CG
  726. }else
  727. {
  728. #if 0 // not needed because shaders by default have these values
  729. code=Replace(code, "MAX_MATRIX 60" , "MAX_MATRIX 256" , true);
  730. code=Replace(code, "ViewMatrix[180]", "ViewMatrix[768]", true); // 60*3, 256*3
  731. code=Replace(code, "ObjVel[60]" , "ObjVel[256]", true);
  732. code=Replace(code, "FurVel[60]" , "FurVel[256]", true);
  733. #endif
  734. }
  735. #endif
  736. }
  737. CChar8* GLSLVersion()
  738. {
  739. switch(D.shaderModel())
  740. {
  741. default : return ""; // avoid null in case some drivers will crash
  742. case SM_GL : return "#version 330\n"; // needed for Mac and Win when using GL3
  743. case SM_GL_ES_3: return "#version 300 es\n";
  744. }
  745. }
  746. static SyncLock ShaderLock; // use custom lock instead of 'D._lock' to allow shader creation while rendering
  747. UInt ShaderVSGL::create(Bool clean, Str *messages)
  748. {
  749. if(!vs && data.elms())
  750. {
  751. SyncLocker locker(GL_LOCK ? D._lock : ShaderLock);
  752. if(!vs && data.elms())
  753. {
  754. UInt vs=glCreateShader(GL_VERTEX_SHADER); if(!vs)Exit("Can't create GL_VERTEX_SHADER"); // create into temp var first and set to this only after fully initialized
  755. File src, temp; src.readMem(data.data(), data.elms()); Decompress(src, temp, true); temp.pos(0); // decompress shader
  756. Str8 code; temp.getStr(code); // read code
  757. SetMaxMatrix(code);
  758. #if GL_ES
  759. for(; CChar8 *gl=TextPos(code, "gl_ClipDistance"); ){Char8 *t=(Char8*)gl; t[0]=t[1]='/';} // VS plane clipping not available on GLES 2 and 3
  760. #endif
  761. CChar8 *srcs[]={GLSLVersion(), code}; // version must be first
  762. glShaderSource(vs, Elms(srcs), srcs, null); glCompileShader(vs); // compile
  763. int ok; glGetShaderiv(vs, GL_COMPILE_STATUS, &ok);
  764. if( ok)T.vs=vs;else // set to this only after all finished, so if another thread runs this method, it will detect 'vs' presence only after it was fully initialized
  765. {
  766. if(messages)
  767. {
  768. Char8 error[64*1024]; error[0]=0; glGetShaderInfoLog(vs, Elms(error), null, error);
  769. messages->line()+=(S+"Vertex Shader compilation failed:\n"+error).line()+"Vertex Shader code:\n";
  770. FREPA(srcs)*messages+=srcs[i];
  771. messages->line();
  772. }
  773. glDeleteShader(vs); //vs=0;
  774. }
  775. if(clean)T.clean();
  776. }
  777. }
  778. return vs;
  779. }
  780. UInt ShaderPSGL::create(Bool clean, Str *messages)
  781. {
  782. if(!ps && data.elms())
  783. {
  784. SyncLocker locker(GL_LOCK ? D._lock : ShaderLock);
  785. if(!ps && data.elms())
  786. {
  787. UInt ps=glCreateShader(GL_FRAGMENT_SHADER); if(!ps)Exit("Can't create GL_FRAGMENT_SHADER"); // create into temp var first and set to this only after fully initialized
  788. File src, temp; src.readMem(data.data(), data.elms()); Decompress(src, temp, true); temp.pos(0); // decompress shader
  789. Str8 code; temp.getStr(code); // read code
  790. SetMaxMatrix(code);
  791. #if GL_ES
  792. //for(Char8 *gl=(Char8*)code(); gl=(Char8*)TextPos(gl, "gl_FragDepth", true, true); )gl[0]=gl[1]='/'; // 'gl_FragDepth' is not supported in GL_ES 2
  793. if(!D._shader_tex_lod) // if shader Tex Lod is not supported then have to replace it with normal tex reads, do this by inserting define texture2DLodEXT->texture2D, however have to do this after all extensions
  794. {
  795. Char8 last='\n'; // allow inserting at the start
  796. FREPA(code)
  797. {
  798. if(last=='\n' && !Starts(code()+i, "#extension ", true)) // have to check for "#extension" and not "#", because "precision" can be used within # blocks
  799. {
  800. code.insert(i, "#define texture2DLodEXT(img, uv, i) texture2D(img, uv)\n");
  801. break;
  802. }
  803. last=code[i];
  804. }
  805. }
  806. #endif
  807. // if MRT is not supported then disable it in the shader codes, replace "\nRT.." instead of "RT=" because it can be also "RT.xyz=", check for new line because we also do "layout(location=1) out HP vec4 RT1;" and "#define RT1 gl_FragData[1]"
  808. if(D._max_rt<2)for(Char8 *gl=(Char8*)code(); gl=(Char8*)TextPos(gl, "\nRT1", true, true); )gl[1]=gl[2]='/'; // start replacing with index=1, to keep '\n' and change RT into //
  809. if(D._max_rt<3)for(Char8 *gl=(Char8*)code(); gl=(Char8*)TextPos(gl, "\nRT2", true, true); )gl[1]=gl[2]='/';
  810. if(D._max_rt<4)for(Char8 *gl=(Char8*)code(); gl=(Char8*)TextPos(gl, "\nRT3", true, true); )gl[1]=gl[2]='/';
  811. CChar8 *srcs[]={GLSLVersion(), code}; // version must be first
  812. glShaderSource(ps, Elms(srcs), srcs, null); glCompileShader(ps); // compile
  813. int ok; glGetShaderiv(ps, GL_COMPILE_STATUS, &ok);
  814. if( ok)T.ps=ps;else // set to this only after all finished, so if another thread runs this method, it will detect 'ps' presence only after it was fully initialized
  815. {
  816. if(messages)
  817. {
  818. Char8 error[64*1024]; error[0]=0; glGetShaderInfoLog(ps, Elms(error), null, error);
  819. messages->line()+=(S+"Pixel Shader compilation failed:\n"+error).line()+"Pixel Shader code:\n";
  820. FREPA(srcs)*messages+=srcs[i];
  821. messages->line();
  822. }
  823. glDeleteShader(ps); //ps=0;
  824. }
  825. if(clean)T.clean();
  826. }
  827. }
  828. return ps;
  829. }
  830. static Str ShaderSource(UInt shader)
  831. {
  832. Char8 source[64*1024]; if(shader)glGetShaderSource(shader, SIZE(source), null, source);else source[0]=0;
  833. return source;
  834. }
  835. Str ShaderVSGL::source()
  836. {
  837. return ShaderSource(vs);
  838. }
  839. Str ShaderPSGL::source()
  840. {
  841. return ShaderSource(ps);
  842. }
  843. #endif
  844. /******************************************************************************/
  845. // SHADER TECHNIQUE
  846. /******************************************************************************/
  847. #if WINDOWS_OLD
  848. Shader9::Shader9()
  849. {
  850. vs_index=
  851. ps_index=-1;
  852. vs=null;
  853. ps=null;
  854. }
  855. #endif
  856. #if WINDOWS
  857. Shader11::Shader11()
  858. {
  859. vs_index=
  860. hs_index=
  861. ds_index=
  862. ps_index=-1;
  863. vs=null;
  864. hs=null;
  865. ds=null;
  866. ps=null;
  867. }
  868. #endif
  869. /******************************************************************************/
  870. #if DX9
  871. // these members must have native alignment because we use them in atomic operations for set on multiple threads
  872. ALIGN_ASSERT(Shader9, vs);
  873. ALIGN_ASSERT(Shader9, ps);
  874. Bool Shader9::validate(ShaderFile &shader, Str *messages) // this function should be multi-threaded safe
  875. {
  876. if(!vs && InRange(vs_index, shader._vs))AtomicSet(vs, shader._vs[vs_index].create());
  877. if(!ps && InRange(ps_index, shader._ps))AtomicSet(ps, shader._ps[ps_index].create());
  878. return vs && ps;
  879. }
  880. #if CACHE_DX9_CONSTANTS
  881. static Byte VSConstantMem[MAX_DX9_SHADER_CONSTANT];
  882. static Byte PSConstantMem[MAX_DX9_SHADER_CONSTANT];
  883. static INLINE Bool SetConstantMem(Byte *mem, C Shader9::Constant &c)
  884. {
  885. Ptr dest=mem+c.start*SIZE(Vec4); Int size=*c.final_count*SIZE(Vec4);
  886. if(EqualMem(dest, c.data, size))return false;
  887. CopyFast(dest, c.data, size);return true ;
  888. }
  889. static INLINE void SetVSConstant(C Shader9::Constant &c) {if(SetConstantMem(VSConstantMem, c))D3D->SetVertexShaderConstantF(c.start, (Flt*)c.data, *c.final_count);}
  890. static INLINE void SetPSConstant(C Shader9::Constant &c) {if(SetConstantMem(PSConstantMem, c))D3D-> SetPixelShaderConstantF(c.start, (Flt*)c.data, *c.final_count);}
  891. #else
  892. static INLINE void SetVSConstant(C Shader9::Constant &c) {D3D->SetVertexShaderConstantF(c.start, (Flt*)c.data, *c.final_count);}
  893. static INLINE void SetPSConstant(C Shader9::Constant &c) {D3D-> SetPixelShaderConstantF(c.start, (Flt*)c.data, *c.final_count);}
  894. #endif
  895. void Shader9::commit()
  896. {
  897. REPA(vs_constants){Constant &c=vs_constants[i]; if(*c.changed)SetVSConstant(c);}
  898. REPA(ps_constants){Constant &c=ps_constants[i]; if(*c.changed)SetPSConstant(c);}
  899. // reset 'changed' after all commits, in case constants point to parts of shader params (in such case setting one part, and clearing changed, would prevent from setting other parts of the same shader param)
  900. REPA(vs_constants)(*vs_constants[i].changed)=false;
  901. REPA(ps_constants)(*ps_constants[i].changed)=false;
  902. }
  903. void Shader9::commitTex()
  904. {
  905. REPA(textures){C Texture &t=textures[i]; D.texPS(t.index, t.image->getBase()); if(t.image->_sampler)t.image->_sampler->set(t.index);}
  906. }
  907. void Shader9::start() // same as 'begin' but without committing constants and textures
  908. {
  909. ShaderCur=this;
  910. D3D->SetVertexShader(vs);
  911. D3D->SetPixelShader (ps);
  912. REPA(vs_constants)*vs_constants[i].changed=true; // mark all as changed to make sure next 'commit' will set them
  913. REPA(ps_constants)*ps_constants[i].changed=true; // mark all as changed to make sure next 'commit' will set them
  914. }
  915. void Shader9::begin()
  916. {
  917. ShaderCur=this;
  918. D3D->SetVertexShader(vs);
  919. D3D->SetPixelShader (ps);
  920. REPA(textures ){C Texture &t= textures[i]; D.texPS(t.index, t.image->getBase()); if(t.image->_sampler)t.image->_sampler->set(t.index);}
  921. REPA(vs_constants){ Constant &c=vs_constants[i]; SetVSConstant(c); *c.changed=false;}
  922. REPA(ps_constants){ Constant &c=ps_constants[i]; SetPSConstant(c); *c.changed=false;}
  923. }
  924. void ShaderEnd()
  925. {
  926. for(; RestoreSamplers; )
  927. {
  928. Byte index=RestoreSamplerIndex[--RestoreSamplers];
  929. D3D->SetSamplerState(index, D3DSAMP_MINFILTER, D._sampler_filter[0]);
  930. D3D->SetSamplerState(index, D3DSAMP_MAGFILTER, D._sampler_filter[1]);
  931. D3D->SetSamplerState(index, D3DSAMP_MIPFILTER, D._sampler_filter[2]);
  932. D3D->SetSamplerState(index, D3DSAMP_ADDRESSU , D._sampler_address );
  933. D3D->SetSamplerState(index, D3DSAMP_ADDRESSV , D._sampler_address );
  934. D3D->SetSamplerState(index, D3DSAMP_ADDRESSW , D._sampler_address );
  935. }
  936. }
  937. #elif DX11
  938. // these members must have native alignment because we use them in atomic operations for set on multiple threads
  939. ALIGN_ASSERT(Shader11, vs);
  940. ALIGN_ASSERT(Shader11, hs);
  941. ALIGN_ASSERT(Shader11, ds);
  942. ALIGN_ASSERT(Shader11, ps);
  943. Bool Shader11::validate(ShaderFile &shader, Str *messages) // this function should be multi-threaded safe
  944. {
  945. if(!vs && InRange(vs_index, shader._vs))AtomicSet(vs, shader._vs[vs_index].create());
  946. if(!hs && InRange(hs_index, shader._hs))AtomicSet(hs, shader._hs[hs_index].create());
  947. if(!ds && InRange(ds_index, shader._ds))AtomicSet(ds, shader._ds[ds_index].create());
  948. if(!ps && InRange(ps_index, shader._ps))AtomicSet(ps, shader._ps[ps_index].create());
  949. return vs && ps;
  950. }
  951. #if 0 // did not make any performance difference (set together with 'SetPrimitiveTopology' from "Vertex Index Buffer.cpp")
  952. static ID3D11VertexShader *VS; static INLINE void SetVS(ID3D11VertexShader *shader) {if(VS!=shader || Kb.shift())D3DC->VSSetShader(VS=shader, null, 0);}
  953. static ID3D11HullShader *HS; static INLINE void SetHS(ID3D11HullShader *shader) {if(HS!=shader || Kb.shift())D3DC->HSSetShader(HS=shader, null, 0);}
  954. static ID3D11DomainShader *DS; static INLINE void SetDS(ID3D11DomainShader *shader) {if(DS!=shader || Kb.shift())D3DC->DSSetShader(DS=shader, null, 0);}
  955. static ID3D11PixelShader *PS; static INLINE void SetPS(ID3D11PixelShader *shader) {if(PS!=shader || Kb.shift())D3DC->PSSetShader(PS=shader, null, 0);}
  956. static D3D11_PRIMITIVE_TOPOLOGY PT; INLINE void SetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY pt) {if(PT!=pt || Kb.shift())D3DC->IASetPrimitiveTopology(PT=pt);}
  957. #else
  958. static INLINE void SetVS(ID3D11VertexShader *shader) {D3DC->VSSetShader(shader, null, 0);}
  959. static INLINE void SetHS(ID3D11HullShader *shader) {D3DC->HSSetShader(shader, null, 0);}
  960. static INLINE void SetDS(ID3D11DomainShader *shader) {D3DC->DSSetShader(shader, null, 0);}
  961. static INLINE void SetPS(ID3D11PixelShader *shader) {D3DC->PSSetShader(shader, null, 0);}
  962. static INLINE void SetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY pt) {D3DC->IASetPrimitiveTopology(pt);}
  963. #endif
  964. void Shader11::commit()
  965. {
  966. REPA(buffers){ShaderBuffer &b=*buffers[i]; if(b.changed)b.update();}
  967. }
  968. void Shader11::commitTex()
  969. {
  970. if(hs)
  971. {
  972. REPA(hs_textures){C Texture &t=hs_textures[i]; D.texHS(t.index, t.image->getSRV());}
  973. REPA(ds_textures){C Texture &t=ds_textures[i]; D.texDS(t.index, t.image->getSRV());}
  974. }
  975. REPA(vs_textures){C Texture &t=vs_textures[i]; D.texVS(t.index, t.image->getSRV());}
  976. REPA(ps_textures){C Texture &t=ps_textures[i]; D.texPS(t.index, t.image->getSRV());}
  977. }
  978. void Shader11::start() // same as 'begin' but without committing buffers and textures
  979. {
  980. SetVS(vs);
  981. SetPS(ps);
  982. if(hs/* && D.tesselationAllow()*/) // currently disabled to avoid extra overhead as tesselation isn't generally used, TODO:
  983. {
  984. SetHS(hs);
  985. SetDS(ds);
  986. SetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_3_CONTROL_POINT_PATCHLIST);
  987. REPA(hs_buffers){C Buffer &b=hs_buffers[i]; BufHS(b.index, b.buffer->buffer.buffer);}
  988. REPA(ds_buffers){C Buffer &b=ds_buffers[i]; BufDS(b.index, b.buffer->buffer.buffer);}
  989. }else
  990. {
  991. SetHS(null);
  992. SetDS(null);
  993. SetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
  994. }
  995. REPA(vs_buffers){C Buffer &b=vs_buffers[i]; BufVS(b.index, b.buffer->buffer.buffer);}
  996. REPA(ps_buffers){C Buffer &b=ps_buffers[i]; BufPS(b.index, b.buffer->buffer.buffer);}
  997. }
  998. void Shader11::begin()
  999. {
  1000. SetVS(vs);
  1001. SetPS(ps);
  1002. if(hs/* && D.tesselationAllow()*/) // currently disabled to avoid extra overhead as tesselation isn't generally used, TODO:
  1003. {
  1004. SetHS(hs);
  1005. SetDS(ds);
  1006. SetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_3_CONTROL_POINT_PATCHLIST);
  1007. REPA(hs_textures){C Texture &t=hs_textures[i]; D.texHS(t.index, t.image->getSRV());}
  1008. REPA(ds_textures){C Texture &t=ds_textures[i]; D.texDS(t.index, t.image->getSRV());}
  1009. REPA(hs_buffers ){C Buffer &b=hs_buffers [i]; BufHS(b.index, b.buffer->buffer.buffer);}
  1010. REPA(ds_buffers ){C Buffer &b=ds_buffers [i]; BufDS(b.index, b.buffer->buffer.buffer);}
  1011. }else
  1012. {
  1013. SetHS(null);
  1014. SetDS(null);
  1015. SetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
  1016. }
  1017. REPA(vs_textures){C Texture &t=vs_textures[i]; D.texVS(t.index, t.image->getSRV());}
  1018. REPA(ps_textures){C Texture &t=ps_textures[i]; D.texPS(t.index, t.image->getSRV());}
  1019. REPA(vs_buffers ){C Buffer &b=vs_buffers [i]; BufVS(b.index, b.buffer->buffer.buffer);}
  1020. REPA(ps_buffers ){C Buffer &b=ps_buffers [i]; BufPS(b.index, b.buffer->buffer.buffer);}
  1021. REPA( buffers ){ShaderBuffer &b= *buffers [i]; if(b.changed)b.update();}
  1022. }
  1023. #elif GL
  1024. ShaderGL::ShaderGL()
  1025. {
  1026. vs=ps=prog=0;
  1027. vs_index=ps_index=-1;
  1028. }
  1029. ShaderGL::~ShaderGL()
  1030. {
  1031. if(prog)
  1032. {
  1033. SyncLocker locker(D._lock); if(D.created())glDeleteProgram(prog); prog=0; // clear while in lock
  1034. }
  1035. }
  1036. Str ShaderGL::source()
  1037. {
  1038. return S+"Vertex Shader:\n"+ShaderSource(vs)
  1039. +"\nPixel Shader:\n"+ShaderSource(ps);
  1040. }
  1041. UInt ShaderGL::compileEx(MemPtr<ShaderVSGL> vs_array, MemPtr<ShaderPSGL> ps_array, Bool clean, ShaderFile *shader, Str *messages) // this function doesn't need to be multi-threaded safe, it's called by 'validate' where it's already surrounded by a lock, and by 'compile' during shader pre-processing (where it's called for the same object only from the same thread), GL thread-safety should be handled outside of this function
  1042. {
  1043. // prepare shaders
  1044. if(messages)messages->clear();
  1045. if(!vs && InRange(vs_index, vs_array)){if(LogInit)LogN(S+"Compiling vertex shader in technique \""+name+"\" of shader \""+ShaderFiles.name(shader)+"\""); vs=vs_array[vs_index].create(clean, messages);} // no need for 'AtomicSet' because we don't need to be multi-thread safe here
  1046. if(!ps && InRange(ps_index, ps_array)){if(LogInit)LogN(S+ "Compiling pixel shader in technique \""+name+"\" of shader \""+ShaderFiles.name(shader)+"\""); ps=ps_array[ps_index].create(clean, messages);} // no need for 'AtomicSet' because we don't need to be multi-thread safe here
  1047. // prepare program
  1048. UInt prog=0; // have to operate on temp variable, so we can return it to 'validate' which still has to do some things before setting it into 'this'
  1049. if(vs && ps)
  1050. {
  1051. if(LogInit)Log(S+"Linking vertex+pixel shader in technique \""+name+"\" of shader \""+ShaderFiles.name(shader)+"\": ");
  1052. prog=glCreateProgram(); if(!prog)Exit("Can't create GL Shader Program");
  1053. FREP(16)
  1054. {
  1055. Char8 name[16], temp[256]; Set(name, "ATTR"); Append(name, TextInt(i, temp));
  1056. glBindAttribLocation(prog, VtxSemanticToIndex(i), name);
  1057. }
  1058. glAttachShader(prog, vs);
  1059. glAttachShader(prog, ps);
  1060. glLinkProgram (prog);
  1061. int ok; glGetProgramiv(prog, GL_LINK_STATUS, &ok);
  1062. if(!ok)
  1063. {
  1064. int max_length; glGetProgramiv(prog, GL_INFO_LOG_LENGTH, &max_length);
  1065. Mems<char> error; error.setNumZero(max_length+1); glGetProgramInfoLog(prog, max_length, null, error.data());
  1066. if(messages)messages->line()+=(S+"Error linking vertex+pixel shader in technique \""+name+"\" of shader \""+ShaderFiles.name(shader)+"\"\n"+error.data()).line()+source().line();
  1067. glDeleteProgram(prog); prog=0;
  1068. }
  1069. if(LogInit)LogN("Success");
  1070. }
  1071. return prog;
  1072. }
  1073. void ShaderGL::compile(MemPtr<ShaderVSGL> vs_array, MemPtr<ShaderPSGL> ps_array, Str *messages) // this function doesn't need to be multi-threaded safe, it's called only during shader pre-processing
  1074. {
  1075. #if GL_LOCK
  1076. SyncLocker locker(D._lock);
  1077. #endif
  1078. if(!prog)prog=compileEx(vs_array, ps_array, false, null, messages);
  1079. }
  1080. Bool ShaderGL::validate(ShaderFile &shader, Str *messages) // this function should be multi-threaded safe
  1081. {
  1082. if(prog || !D.canDraw())return true; // skip shader compilation if we don't need it (this is because compiling shaders on Linux with no GPU can exit the app with a message like "Xlib: extension "XFree86-VidModeExtension" missing on display ":99".")
  1083. SyncLocker locker(GL_LOCK ? D._lock : ShaderLock);
  1084. if(!prog)
  1085. if(UInt prog=compileEx(shader._vs, shader._ps, true, &shader, messages)) // create into temp var first and set to this only after fully initialized
  1086. {
  1087. MemtN<Texture , 256> textures;
  1088. MemtN<Constant, 256> constants;
  1089. Int params=0; glGetProgramiv(prog, GL_ACTIVE_UNIFORMS, &params);
  1090. FREP(params)
  1091. {
  1092. // GLSL name
  1093. Char8 glsl_name[1024]; glsl_name[0]=0;
  1094. Int size=0;
  1095. GLenum type;
  1096. glGetActiveUniform(prog, i, Elms(glsl_name), null, &size, &type, glsl_name);
  1097. Bool found=false;
  1098. if(type==GL_SAMPLER_2D || type==GL_SAMPLER_CUBE
  1099. #ifdef GL_SAMPLER_3D
  1100. || type==GL_SAMPLER_3D
  1101. #endif
  1102. #ifdef GL_SAMPLER_2D_SHADOW
  1103. || type==GL_SAMPLER_2D_SHADOW
  1104. #endif
  1105. #ifdef GL_SAMPLER_2D_SHADOW_EXT
  1106. || type==GL_SAMPLER_2D_SHADOW_EXT
  1107. #endif
  1108. )
  1109. {
  1110. Int tex_unit=textures.elms(); if(!InRange(tex_unit, Tex))Exit(S+"Texture index: "+tex_unit+", is too big");
  1111. Int location=glGetUniformLocation(prog, glsl_name); if(location<0)
  1112. {
  1113. #if WEB // this can happen on MS Edge for textures that aren't actually used
  1114. LogN
  1115. #else
  1116. Exit
  1117. #endif
  1118. (S+"Invalid Uniform Location ("+location+") of GLSL Parameter \""+glsl_name+"\"");
  1119. continue;
  1120. }
  1121. textures.New().set(tex_unit, *GetShaderImage(glsl_name));
  1122. glUseProgram(prog);
  1123. glUniform1i (location, tex_unit); // set 'location' sampler to use 'tex_unit' texture unit
  1124. found=true;
  1125. }else
  1126. {
  1127. REPA(glsl_params)
  1128. {
  1129. GLSLParam &gp=glsl_params[i];
  1130. ShaderParam &sp=*gp.param;
  1131. C Str8 &gp_name=ShaderParams.dataInMapToKey(sp);
  1132. if(Equal(gp_name , glsl_name, true)
  1133. || Equal(gp.glsl_name, glsl_name, true))
  1134. {
  1135. if(gp.gpu_offset+SIZE(Flt)>sp._gpu_data_size)Exit(S+"Shader \""+name+"\" refers to Shader Param \""+gp_name+"\" with invalid offset");
  1136. Int l=glGetUniformLocation(prog, glsl_name); if(l<0)Exit(S+"Invalid Uniform Location ("+l+") of GLSL Parameter \""+glsl_name+"\"");
  1137. Constant &c=constants.New();
  1138. c.set(l, size, sp._data+gp.gpu_offset, sp);
  1139. switch(type)
  1140. {
  1141. case GL_FLOAT : c.uniform=glUniform1fv; break;
  1142. case GL_FLOAT_VEC2: c.uniform=glUniform2fv; break;
  1143. case GL_FLOAT_VEC3: c.uniform=glUniform3fv; break;
  1144. case GL_FLOAT_VEC4: c.uniform=glUniform4fv; break;
  1145. default : Exit("Unrecognized Shader Parameter OpenGL Uniform Type"); break;
  1146. }
  1147. found=true; break;
  1148. }
  1149. }
  1150. }
  1151. if(!found)
  1152. {
  1153. // Some OpenGL drivers (ATI or Apple) aren't that good in optimizing shaders, so they can sometimes return variables
  1154. // which normally because of optimizations should be eliminated, in this case we'll just ignore them.
  1155. #if DEBUG && !GL_ES
  1156. Str s=S+"Unrecognized GLSL Parameter \""+glsl_name+"\"";
  1157. LogN(s); // Exit(s);
  1158. #endif
  1159. }
  1160. }
  1161. T. textures= textures;
  1162. T.constants=constants;
  1163. // GL constants should not be joined/merged, because as noted in the 'glUniform*' docs: "GL_INVALID_OPERATION is generated if count is greater than 1 and the indicated uniform variable is not an array variable"
  1164. // adjust final count after creating all constants (needed because constants are created dynamically inside, however 'final_count' may point to itself)
  1165. REPA(T.constants)
  1166. {
  1167. Constant &c=T.constants[i];
  1168. c.final_count=((c.sp==Sh.h_ViewMatrix
  1169. || c.sp==Sh.h_ObjVel
  1170. || c.sp==Sh.h_FurVel) ? &c.sp->_constant_count : &c.count); // if this constant is resizable, then point to the 'ShaderParam' count because we might resize it later, otherwise, use what was given, we can't check for 'fullConstantCount' here because it works only for Vec4's
  1171. }
  1172. // release no longer needed
  1173. glsl_params.del();
  1174. //glsl_images.del();
  1175. // !! at the end !!
  1176. T.prog=prog; // set to this only after all finished, so if another thread runs this method, it will detect 'prog' presence only after it was fully initialized
  1177. }
  1178. return prog!=0;
  1179. }
  1180. void ShaderGL::commit()
  1181. {
  1182. REPA(constants){Constant &c=constants[i]; if(*c.changed)c.uniform(c.index, *c.final_count, (Flt*)c.data);}
  1183. // reset changed after all commits, in case constants point to parts of shader params (in such case setting one part, and clearing changed, would prevent from setting other parts of the same shader param)
  1184. REPA(constants)(*constants[i].changed)=false;
  1185. }
  1186. void ShaderGL::commitTex()
  1187. {
  1188. REPA(textures){Texture &t=textures[i]; SetTexture(t.index, t.image->get(), t.image->_sampler);}
  1189. }
  1190. void ShaderGL::start() // same as 'begin' but without committing constants and textures
  1191. {
  1192. ShaderCur=this;
  1193. glUseProgram(prog);
  1194. REPA(constants)*constants[i].changed=true; // mark all as changed to make sure next 'commit' will set them
  1195. }
  1196. void ShaderGL::begin()
  1197. {
  1198. ShaderCur=this;
  1199. glUseProgram(prog);
  1200. REPA(textures ){Texture &t= textures[i]; SetTexture(t.index, t.image->get(), t.image->_sampler);}
  1201. REPA(constants){Constant &c=constants[i]; c.uniform(c.index, *c.final_count, (Flt*)c.data); *c.changed=false;}
  1202. }
  1203. #endif
  1204. /******************************************************************************/
  1205. // MANAGE
  1206. /******************************************************************************/
  1207. ShaderFile::ShaderFile()
  1208. {
  1209. // !! keep constructor here to properly initialize containers, because type sizes and constructors are hidden !!
  1210. }
  1211. void ShaderFile::del()
  1212. {
  1213. // !! keep this to properly delete '_shaders', because type sizes and constructors are hidden !!
  1214. _shaders.del(); // first delete this, then individual shaders
  1215. _vs .del();
  1216. _hs .del();
  1217. _ds .del();
  1218. _ps .del();
  1219. }
  1220. /******************************************************************************/
  1221. // GET / SET
  1222. /******************************************************************************/
  1223. Shader* ShaderFile::first()
  1224. {
  1225. if(_shaders.elms())
  1226. {
  1227. Shader &shader=_shaders.first(); if(shader.validate(T))return &shader;
  1228. }
  1229. return null;
  1230. }
  1231. Shader* ShaderFile::find(C Str8 &name, Str *messages)
  1232. {
  1233. if(name.is())for(Int l=0, r=_shaders.elms(); l<r; )
  1234. {
  1235. Int mid=UInt(l+r)/2,
  1236. compare=Compare(name, _shaders[mid].name, true);
  1237. if(!compare ){Shader &shader=_shaders[mid]; return shader.validate(T, messages) ? &shader : null;}
  1238. if( compare<0)r=mid;
  1239. else l=mid+1;
  1240. }
  1241. if(messages)*messages="Technique not found in shader.";
  1242. return null;
  1243. }
  1244. Shader* ShaderFile::find(C Str8 &name)
  1245. {
  1246. return find(name, null);
  1247. }
  1248. Shader* ShaderFile::get(C Str8 &name)
  1249. {
  1250. if(name.is())
  1251. {
  1252. Str messages;
  1253. if(Shader *shader=find(name, &messages))return shader;
  1254. Exit(S+"Error accessing Shader \""+name+"\" in ShaderFile \""+ShaderFiles.name(this)+"\"."+(messages.is() ? S+"\n"+messages : S));
  1255. }
  1256. return null;
  1257. }
  1258. /******************************************************************************/
  1259. // DRAW
  1260. /******************************************************************************/
  1261. void Shader::draw(C Image *image, C Rect *rect)
  1262. {
  1263. VI.image (image);
  1264. VI.shader (this );
  1265. VI.setType(VI_2D_TEX, VI_STRIP);
  1266. if(image)Sh.h_ColSize->set(Vec4(1.0f/image->hwSize(), image->hwSize()));
  1267. if(Vtx2DTex *v=(Vtx2DTex*)VI.addVtx(4))
  1268. {
  1269. if(!D._view_active.full || rect)
  1270. {
  1271. C RectI &viewport=D._view_active.recti; RectI recti;
  1272. if(!rect)
  1273. {
  1274. recti=viewport;
  1275. v[0].pos.set(-1, 1);
  1276. v[1].pos.set( 1, 1);
  1277. v[2].pos.set(-1, -1);
  1278. v[3].pos.set( 1, -1);
  1279. }else
  1280. {
  1281. recti=Renderer.screenToPixelI(*rect);
  1282. Bool flip_x=(recti.max.x<recti.min.x),
  1283. flip_y=(recti.max.y<recti.min.y);
  1284. if( flip_x)Swap(recti.min.x, recti.max.x);
  1285. if( flip_y)Swap(recti.min.y, recti.max.y);
  1286. if(!Cuts(recti, viewport)){VI.clear(); return;}
  1287. Flt xm=2.0f/viewport.w(),
  1288. ym=2.0f/viewport.h();
  1289. Rect frac((recti.min.x-viewport.min.x)*xm-1, (viewport.max.y-recti.max.y)*ym-1,
  1290. (recti.max.x-viewport.min.x)*xm-1, (viewport.max.y-recti.min.y)*ym-1);
  1291. if(flip_x)Swap(frac.min.x, frac.max.x);
  1292. if(flip_y)Swap(frac.min.y, frac.max.y);
  1293. v[0].pos.set(frac.min.x, frac.max.y);
  1294. v[1].pos.set(frac.max.x, frac.max.y);
  1295. v[2].pos.set(frac.min.x, frac.min.y);
  1296. v[3].pos.set(frac.max.x, frac.min.y);
  1297. }
  1298. Rect tex(Flt(recti.min.x)/Renderer.resW(), Flt(recti.min.y)/Renderer.resH(),
  1299. Flt(recti.max.x)/Renderer.resW(), Flt(recti.max.y)/Renderer.resH());
  1300. v[0].tex.set(tex.min.x, tex.min.y);
  1301. v[1].tex.set(tex.max.x, tex.min.y);
  1302. v[2].tex.set(tex.min.x, tex.max.y);
  1303. v[3].tex.set(tex.max.x, tex.max.y);
  1304. }else
  1305. {
  1306. v[0].pos.set(-1, 1);
  1307. v[1].pos.set( 1, 1);
  1308. v[2].pos.set(-1, -1);
  1309. v[3].pos.set( 1, -1);
  1310. v[0].tex.set(0, 0);
  1311. v[1].tex.set(1, 0);
  1312. v[2].tex.set(0, 1);
  1313. v[3].tex.set(1, 1);
  1314. }
  1315. #if GL
  1316. if(!D.mainFBO()) // in OpenGL when drawing to RenderTarget the 'dest.pos.y' must be flipped
  1317. {
  1318. CHS(v[0].pos.y);
  1319. CHS(v[1].pos.y);
  1320. CHS(v[2].pos.y);
  1321. CHS(v[3].pos.y);
  1322. }
  1323. #endif
  1324. }
  1325. VI.end();
  1326. }
  1327. void Shader::draw(C Image *image, C Rect *rect, C Rect &tex)
  1328. {
  1329. VI.image (image);
  1330. VI.shader (this );
  1331. VI.setType(VI_2D_TEX, VI_STRIP);
  1332. if(image)Sh.h_ColSize->set(Vec4(1.0f/image->hwSize(), image->hwSize()));
  1333. if(Vtx2DTex *v=(Vtx2DTex*)VI.addVtx(4))
  1334. {
  1335. if(!D._view_active.full || rect)
  1336. {
  1337. C RectI &viewport=D._view_active.recti; RectI recti;
  1338. if(!rect)
  1339. {
  1340. recti=viewport;
  1341. v[0].pos.set(-1, 1);
  1342. v[1].pos.set( 1, 1);
  1343. v[2].pos.set(-1, -1);
  1344. v[3].pos.set( 1, -1);
  1345. }else
  1346. {
  1347. recti=Renderer.screenToPixelI(*rect);
  1348. Bool flip_x=(recti.max.x<recti.min.x),
  1349. flip_y=(recti.max.y<recti.min.y);
  1350. if( flip_x)Swap(recti.min.x, recti.max.x);
  1351. if( flip_y)Swap(recti.min.y, recti.max.y);
  1352. if(!Cuts(recti, viewport)){VI.clear(); return;}
  1353. Flt xm=2.0f/viewport.w(),
  1354. ym=2.0f/viewport.h();
  1355. Rect frac((recti.min.x-viewport.min.x)*xm-1, (viewport.max.y-recti.max.y)*ym-1,
  1356. (recti.max.x-viewport.min.x)*xm-1, (viewport.max.y-recti.min.y)*ym-1);
  1357. if(flip_x)Swap(frac.min.x, frac.max.x);
  1358. if(flip_y)Swap(frac.min.y, frac.max.y);
  1359. v[0].pos.set(frac.min.x, frac.max.y);
  1360. v[1].pos.set(frac.max.x, frac.max.y);
  1361. v[2].pos.set(frac.min.x, frac.min.y);
  1362. v[3].pos.set(frac.max.x, frac.min.y);
  1363. }
  1364. }else
  1365. {
  1366. v[0].pos.set(-1, 1);
  1367. v[1].pos.set( 1, 1);
  1368. v[2].pos.set(-1, -1);
  1369. v[3].pos.set( 1, -1);
  1370. }
  1371. v[0].tex.set(tex.min.x, tex.min.y);
  1372. v[1].tex.set(tex.max.x, tex.min.y);
  1373. v[2].tex.set(tex.min.x, tex.max.y);
  1374. v[3].tex.set(tex.max.x, tex.max.y);
  1375. #if GL
  1376. if(!D.mainFBO()) // in OpenGL when drawing to RenderTarget the 'dest.pos.y' must be flipped
  1377. {
  1378. CHS(v[0].pos.y);
  1379. CHS(v[1].pos.y);
  1380. CHS(v[2].pos.y);
  1381. CHS(v[3].pos.y);
  1382. }
  1383. #endif
  1384. }
  1385. VI.end();
  1386. }
  1387. /******************************************************************************/
  1388. void DisplayState::clearShader()
  1389. {
  1390. // set ~0 for pointers because that's the most unlikely value that they would have
  1391. #if DX9
  1392. SetMem(Tex, ~0);
  1393. #if CACHE_DX9_CONSTANTS
  1394. Zero(VSConstantMem);
  1395. Zero(PSConstantMem);
  1396. #endif
  1397. #elif DX11
  1398. SetMem(VSTex, ~0);
  1399. SetMem(HSTex, ~0);
  1400. SetMem(DSTex, ~0);
  1401. SetMem(PSTex, ~0);
  1402. SetMem(vs_buf, ~0);
  1403. SetMem(hs_buf, ~0);
  1404. SetMem(ds_buf, ~0);
  1405. SetMem(ps_buf, ~0);
  1406. #elif GL
  1407. SetMem(Tex, ~0);
  1408. #endif
  1409. }
  1410. /******************************************************************************/
  1411. // FORWARD RENDERER SHADER TECHNIQUE
  1412. /******************************************************************************/
  1413. static Int Compare(C FRSTKey &a, C FRSTKey &b)
  1414. {
  1415. if(Int c=Compare(a.skin , b.skin ))return c;
  1416. if(Int c=Compare(a.materials , b.materials ))return c;
  1417. if(Int c=Compare(a.textures , b.textures ))return c;
  1418. if(Int c=Compare(a.bump_mode , b.bump_mode ))return c;
  1419. if(Int c=Compare(a.alpha_test, b.alpha_test))return c;
  1420. if(Int c=Compare(a.light_map , b.light_map ))return c;
  1421. if(Int c=Compare(a.detail , b.detail ))return c;
  1422. if(Int c=Compare(a.rflct , b.rflct ))return c;
  1423. if(Int c=Compare(a.color , b.color ))return c;
  1424. if(Int c=Compare(a.mtrl_blend, b.mtrl_blend))return c;
  1425. if(Int c=Compare(a.heightmap , b.heightmap ))return c;
  1426. if(Int c=Compare(a.fx , b.fx ))return c;
  1427. if(Int c=Compare(a.tess , b.tess ))return c;
  1428. return 0;
  1429. }
  1430. static Bool Create(FRST &frst, C FRSTKey &key, Ptr)
  1431. {
  1432. ShaderFile *shader_file=ShaderFiles("Forward");
  1433. if(key.bump_mode==SBUMP_ZERO)
  1434. {
  1435. Shader *shader=shader_file->get(TechNameForward(key.skin, key.materials, key.textures, key.bump_mode, key.alpha_test, key.light_map, key.detail, key.rflct, key.color, key.mtrl_blend, key.heightmap, key.fx, false, 0, 0, false, 0, false, 0, false, 0, false));
  1436. frst.all_passes=false;
  1437. frst.none=shader;
  1438. frst.dir =shader;
  1439. frst.pnt =shader;
  1440. frst.sqr =shader;
  1441. frst.cone=shader;
  1442. REPAO(frst. dir_shd)=shader;
  1443. frst. pnt_shd =shader;
  1444. frst. sqr_shd =shader;
  1445. frst.cone_shd =shader;
  1446. }else
  1447. {
  1448. frst.all_passes=true;
  1449. frst.none=shader_file->get(TechNameForward(key.skin, key.materials, key.textures, key.bump_mode, key.alpha_test, key.light_map, key.detail, key.rflct, key.color, key.mtrl_blend, key.heightmap, key.fx, false, false, 0, false, false, false, false, false, false, key.tess));
  1450. frst.dir =shader_file->get(TechNameForward(key.skin, key.materials, key.textures, key.bump_mode, key.alpha_test, key.light_map, key.detail, key.rflct, key.color, key.mtrl_blend, key.heightmap, key.fx, true , false, 0, false, false, false, false, false, false, key.tess));
  1451. frst.pnt =shader_file->get(TechNameForward(key.skin, key.materials, key.textures, key.bump_mode, key.alpha_test, key.light_map, key.detail, key.rflct, key.color, key.mtrl_blend, key.heightmap, key.fx, false, false, 0, true , false, false, false, false, false, key.tess));
  1452. frst.sqr =shader_file->get(TechNameForward(key.skin, key.materials, key.textures, key.bump_mode, key.alpha_test, key.light_map, key.detail, key.rflct, key.color, key.mtrl_blend, key.heightmap, key.fx, false, false, 0, false, false, true , false, false, false, key.tess));
  1453. frst.cone=shader_file->get(TechNameForward(key.skin, key.materials, key.textures, key.bump_mode, key.alpha_test, key.light_map, key.detail, key.rflct, key.color, key.mtrl_blend, key.heightmap, key.fx, false, false, 0, false, false, false, false, true , false, key.tess));
  1454. if(D.shadowSupported())
  1455. {
  1456. REPAO(frst. dir_shd)=shader_file->get(TechNameForward(key.skin, key.materials, key.textures, key.bump_mode, key.alpha_test, key.light_map, key.detail, key.rflct, key.color, key.mtrl_blend, key.heightmap, key.fx, true , true , Ceil2(i+1), false, false, false, false, false, false, key.tess));
  1457. frst. pnt_shd =shader_file->get(TechNameForward(key.skin, key.materials, key.textures, key.bump_mode, key.alpha_test, key.light_map, key.detail, key.rflct, key.color, key.mtrl_blend, key.heightmap, key.fx, false, false, 0 , true , true , false, false, false, false, key.tess));
  1458. frst. sqr_shd =shader_file->get(TechNameForward(key.skin, key.materials, key.textures, key.bump_mode, key.alpha_test, key.light_map, key.detail, key.rflct, key.color, key.mtrl_blend, key.heightmap, key.fx, false, false, 0 , false, false, true , true , false, false, key.tess));
  1459. frst.cone_shd =shader_file->get(TechNameForward(key.skin, key.materials, key.textures, key.bump_mode, key.alpha_test, key.light_map, key.detail, key.rflct, key.color, key.mtrl_blend, key.heightmap, key.fx, false, false, 0 , false, false, false, false, true , true , key.tess));
  1460. }else
  1461. {
  1462. REPAO(frst. dir_shd)=null;
  1463. frst. pnt_shd =null;
  1464. frst. sqr_shd =null;
  1465. frst.cone_shd =null;
  1466. }
  1467. }
  1468. return true;
  1469. }
  1470. ThreadSafeMap<FRSTKey, FRST> Frsts(Compare, Create);
  1471. /******************************************************************************/
  1472. // BLEND LIGHT SHADER TECHNIQUE
  1473. /******************************************************************************/
  1474. static Int Compare(C BLSTKey &a, C BLSTKey &b)
  1475. {
  1476. if(Int c=Compare(a.skin , b.skin ))return c;
  1477. if(Int c=Compare(a.color , b.color ))return c;
  1478. if(Int c=Compare(a.textures , b.textures ))return c;
  1479. if(Int c=Compare(a.bump_mode , b.bump_mode ))return c;
  1480. if(Int c=Compare(a.alpha_test, b.alpha_test))return c;
  1481. if(Int c=Compare(a.alpha , b.alpha ))return c;
  1482. if(Int c=Compare(a.light_map , b.light_map ))return c;
  1483. if(Int c=Compare(a.rflct , b.rflct ))return c;
  1484. if(Int c=Compare(a.fx , b.fx ))return c;
  1485. if(Int c=Compare(a.per_pixel , b.per_pixel ))return c;
  1486. return 0;
  1487. }
  1488. static Bool Create(BLST &blst, C BLSTKey &key, Ptr)
  1489. {
  1490. ShaderFile *shader=ShaderFiles("Blend Light");
  1491. blst.dir[0 ]=shader->get(TechNameBlendLight(key.skin, key.color, key.textures, key.bump_mode, key.alpha_test, key.alpha, key.light_map, key.rflct, key.fx, key.per_pixel, 0));
  1492. if(D.shadowSupported() && key.per_pixel)
  1493. {
  1494. REP(6)blst.dir[i+1]=shader->get(TechNameBlendLight(key.skin, key.color, key.textures, key.bump_mode, key.alpha_test, key.alpha, key.light_map, key.rflct, key.fx, key.per_pixel, i+1));
  1495. }else
  1496. {
  1497. REP(6)blst.dir[i+1]=blst.dir[0];
  1498. }
  1499. return true;
  1500. }
  1501. ThreadSafeMap<BLSTKey, BLST> Blsts(Compare, Create);
  1502. /******************************************************************************
  1503. can't be used because in RM_PREPARE we add models to the list and lights simultaneously
  1504. Shader* FRST::getShader()
  1505. {
  1506. return *(Shader**)(((Byte*)this)+Renderer._frst_light_offset);
  1507. }
  1508. /******************************************************************************/
  1509. Int Matrixes, FurVels;
  1510. #if DX11
  1511. static Int MatrixesPart, FurVelPart;
  1512. static Byte BoneNumToPart[256+1];
  1513. static ShaderBuffer *SBObjMatrix, *SBObjVel, *SBFurVel;
  1514. #endif
  1515. void SetMatrixCount(Int num)
  1516. {
  1517. if(Matrixes!=num)
  1518. {
  1519. Matrixes=num;
  1520. #if DX11
  1521. #if ALLOW_PARTIAL_BUFFERS
  1522. if(D3DC1)
  1523. {
  1524. SBObjMatrix->buffer.size=SIZE(GpuMatrix)*Matrixes;
  1525. SBObjVel ->buffer.size=SIZE(Vec4 )*Matrixes; // #VelAngVel
  1526. Int m16=Ceil16(Matrixes*3);
  1527. #if DEBUG
  1528. static Int old_vel_count; Int vel_count=Matrixes*1; if(MatrixesPart!=m16)old_vel_count=Ceil16(vel_count);else if(vel_count>old_vel_count)Exit("Need to test vel count separately"); // check if when not making a change below, we need more constants for vel buffer than what was set last time, currently keep *1 but replace with *2 when merging with angular velocities #VelAngVel
  1529. #endif
  1530. if(MatrixesPart!=m16)
  1531. {
  1532. MatrixesPart=m16;
  1533. // Warning: code below does not set the cached buffers as 'bind' does, as it's not needed, because those buffers have constant bind index
  1534. ASSERT(SBI_OBJ_VEL==SBI_OBJ_MATRIX+1); // can do this only if they're next to each other
  1535. UInt first[]={0, 0}, // must be provided or DX will fail
  1536. num[]={Ceil16(Matrixes*3), Ceil16(Matrixes*1)}; // #VelAngVel
  1537. ID3D11Buffer *buf[]={SBObjMatrix->buffer.buffer, SBObjVel->buffer.buffer};
  1538. D3DC1->VSSetConstantBuffers1(SBI_OBJ_MATRIX, 2, buf, first, num);
  1539. D3DC1->HSSetConstantBuffers1(SBI_OBJ_MATRIX, 2, buf, first, num);
  1540. D3DC1->DSSetConstantBuffers1(SBI_OBJ_MATRIX, 2, buf, first, num);
  1541. D3DC1->PSSetConstantBuffers1(SBI_OBJ_MATRIX, 2, buf, first, num);
  1542. }
  1543. }else
  1544. #endif
  1545. {
  1546. Int part=BoneNumToPart[num]; if(MatrixesPart!=part)
  1547. {
  1548. MatrixesPart=part;
  1549. SBObjMatrix->setPart(part);
  1550. SBObjVel ->setPart(part);
  1551. #if 0
  1552. SBObjMatrix->bind(SBI_OBJ_MATRIX);
  1553. SBObjVel ->bind(SBI_OBJ_VEL );
  1554. #else // bind 2 at the same time
  1555. // Warning: code below does not set the cached buffers as 'bind' does, as it's not needed, because those buffers have constant bind index
  1556. ASSERT(SBI_OBJ_VEL==SBI_OBJ_MATRIX+1); // can do this only if they're next to each other
  1557. ID3D11Buffer *buf[]={SBObjMatrix->buffer.buffer, SBObjVel->buffer.buffer};
  1558. D3DC1->VSSetConstantBuffers(SBI_OBJ_MATRIX, 2, buf);
  1559. D3DC1->HSSetConstantBuffers(SBI_OBJ_MATRIX, 2, buf);
  1560. D3DC1->DSSetConstantBuffers(SBI_OBJ_MATRIX, 2, buf);
  1561. D3DC1->PSSetConstantBuffers(SBI_OBJ_MATRIX, 2, buf);
  1562. #endif
  1563. }
  1564. }
  1565. #else
  1566. Sh.h_ViewMatrix->_constant_count=Min(Sh.h_ViewMatrix->fullConstantCount(), num*3); Sh.h_ViewMatrix->setChanged(); // unit of '_constant_count' is number of Vec4's (Matrix is 3*Vec4), 'setChanged' is needed in case we've committed only first few values and later we've used 'setConditional' which would not detect a change with the software buffer, then the next commit would not flush the changes
  1567. Sh.h_ObjVel ->_constant_count=Min(Sh.h_ObjVel ->fullConstantCount(), num ); Sh.h_ObjVel ->setChanged(); // unit of '_constant_count' is number of Vec4's (Vel is Vec4), 'setChanged' is needed in case we've committed only first few values and later we've used 'setConditional' which would not detect a change with the software buffer, then the next commit would not flush the changes
  1568. #endif
  1569. }
  1570. }
  1571. void SetFurVelCount(Int num) // !! unlike 'SetMatrixCount' this needs to be called before Shader start/begin, because it doesn't bind the new buffer !!
  1572. {
  1573. if(FurVels!=num)
  1574. {
  1575. FurVels=num;
  1576. #if DX11
  1577. Int part=BoneNumToPart[num]; if(FurVelPart!=part)SBFurVel->setPart(FurVelPart=part);
  1578. #else
  1579. Sh.h_FurVel->_constant_count=Min(Sh.h_FurVel->fullConstantCount(), num); Sh.h_FurVel->setChanged(); // unit of '_constant_count' is number of Vec4's (Vel is Vec4), 'setChanged' is needed in case we've committed only first few values and later we've used 'setConditional' which would not detect a change with the software buffer, then the next commit would not flush the changes
  1580. #endif
  1581. }
  1582. }
  1583. /******************************************************************************/
  1584. void InitMatrix()
  1585. {
  1586. ViewMatrix=Sh.h_ViewMatrix->asGpuMatrix();
  1587. const Int matrixes=D.maxShaderMatrixes();
  1588. // for GL 'ViewMatrix' and 'ObjVel' may be adjusted in "Bool ShaderFile::load(C Str &name)"
  1589. DYNAMIC_ASSERT(Sh.h_ViewMatrix->_cpu_data_size==SIZE(GpuMatrix)*matrixes, "Unexpected size of ViewMatrix");
  1590. DYNAMIC_ASSERT(Sh.h_ObjVel ->_cpu_data_size==SIZE(Vec )*matrixes, "Unexpected size of ObjVel"); // #VelAngVel
  1591. DYNAMIC_ASSERT(Sh.h_FurVel ->_cpu_data_size==SIZE(Vec )*matrixes, "Unexpected size of FurVel");
  1592. // !! if any other shader parameter can be resized, then we need to add it to "Bool ShaderGL::validate(ShaderFile &shader, Str *messages)" "c.final_count=((c.sp=="!!
  1593. #if DX11
  1594. SBObjMatrix=ShaderBuffers(Str8Temp("ObjMatrix")); DYNAMIC_ASSERT(SBObjMatrix->size()==SIZE(GpuMatrix)*matrixes, "Unexpected size of ObjMatrix");
  1595. SBObjVel =ShaderBuffers(Str8Temp("ObjVel" )); DYNAMIC_ASSERT(SBObjVel ->size()==SIZE(Vec4 )*matrixes, "Unexpected size of ObjVel" ); // #VelAngVel
  1596. SBFurVel =ShaderBuffers(Str8Temp("FurVel" )); DYNAMIC_ASSERT(SBFurVel ->size()==SIZE(Vec4 )*matrixes, "Unexpected size of FurVel" );
  1597. const Int parts[]={matrixes, 192, 160, 128, 96, 80, 64, 56, 48, 32, 16, 8, 1}; // start from the biggest, because 'ShaderBuffer.size' uses it as the total size
  1598. if(!ALLOW_PARTIAL_BUFFERS || !D3DC1) // have to create parts only if we won't use partial buffers
  1599. {
  1600. SBObjMatrix->createParts(parts, Elms(parts));
  1601. SBObjVel ->createParts(parts, Elms(parts));
  1602. }else
  1603. { // when we use ALLOW_PARTIAL_BUFFERS then for now we still have to create at least 1 part, because ShaderBuffer.size needs it to know the full size, which we dynamically resize (needed when loading other shaders and comparing that buffer total size matches)
  1604. SBObjMatrix->createParts(parts, 1);
  1605. SBObjVel ->createParts(parts, 1);
  1606. }
  1607. SBFurVel ->createParts(parts, Elms(parts));
  1608. Int end=Elms(BoneNumToPart); for(Int i=0; i<Elms(parts)-1; i++){Int start=parts[i+1]+1; SetMem(&BoneNumToPart[start], i, end-start); end=start;} REP(end)BoneNumToPart[i]=Elms(parts)-1;
  1609. #endif
  1610. }
  1611. /******************************************************************************/
  1612. }
  1613. /******************************************************************************/