gBufferConditionerHLSL.cpp 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431
  1. //-----------------------------------------------------------------------------
  2. // Copyright (c) 2012 GarageGames, LLC
  3. //
  4. // Permission is hereby granted, free of charge, to any person obtaining a copy
  5. // of this software and associated documentation files (the "Software"), to
  6. // deal in the Software without restriction, including without limitation the
  7. // rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
  8. // sell copies of the Software, and to permit persons to whom the Software is
  9. // furnished to do so, subject to the following conditions:
  10. //
  11. // The above copyright notice and this permission notice shall be included in
  12. // all copies or substantial portions of the Software.
  13. //
  14. // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15. // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16. // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  17. // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  18. // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  19. // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  20. // IN THE SOFTWARE.
  21. //-----------------------------------------------------------------------------
  22. #include "platform/platform.h"
  23. #include "lighting/advanced/hlsl/gBufferConditionerHLSL.h"
  24. #include "shaderGen/featureMgr.h"
  25. #include "gfx/gfxStringEnumTranslate.h"
  26. #include "materials/materialFeatureTypes.h"
  27. #include "materials/materialFeatureData.h"
  28. #include "shaderGen/hlsl/shaderFeatureHLSL.h"
  29. #include "gfx/gfxDevice.h"
  30. GBufferConditionerHLSL::GBufferConditionerHLSL( const GFXFormat bufferFormat, const NormalSpace nrmSpace ) :
  31. Parent( bufferFormat )
  32. {
  33. // Figure out how we should store the normal data. These are the defaults.
  34. mCanWriteNegativeValues = false;
  35. mNormalStorageType = CartesianXYZ;
  36. // Note: We clear to a depth 1 (the w component) so
  37. // that the unrendered parts of the scene end up
  38. // farthest to the camera.
  39. const NormalStorage &twoCmpNrmStorageType = ( nrmSpace == WorldSpace ? Spherical : LambertAzimuthal );
  40. switch(bufferFormat)
  41. {
  42. case GFXFormatR8G8B8A8:
  43. mNormalStorageType = twoCmpNrmStorageType;
  44. mBitsPerChannel = 8;
  45. break;
  46. case GFXFormatR16G16B16A16F:
  47. // Floating point buffers don't need to encode negative values
  48. mCanWriteNegativeValues = true;
  49. mNormalStorageType = twoCmpNrmStorageType;
  50. mBitsPerChannel = 16;
  51. break;
  52. // Store a 32bit depth with a sperical normal in the
  53. // integer 16 format. This gives us perfect depth
  54. // precision and high quality normals within a 64bit
  55. // buffer format.
  56. case GFXFormatR16G16B16A16:
  57. mNormalStorageType = twoCmpNrmStorageType;
  58. mBitsPerChannel = 16;
  59. break;
  60. case GFXFormatR32G32B32A32F:
  61. mCanWriteNegativeValues = true;
  62. mNormalStorageType = CartesianXYZ;
  63. mBitsPerChannel = 32;
  64. break;
  65. default:
  66. AssertFatal(false, "Unsupported G-Buffer format");
  67. }
  68. }
  69. GBufferConditionerHLSL::~GBufferConditionerHLSL()
  70. {
  71. }
  72. void GBufferConditionerHLSL::processVert( Vector<ShaderComponent*> &componentList,
  73. const MaterialFeatureData &fd )
  74. {
  75. // If we have a normal map then that feature will
  76. // take care of passing gbNormal to the pixel shader.
  77. if ( fd.features[MFT_NormalMap] )
  78. return;
  79. MultiLine *meta = new MultiLine;
  80. output = meta;
  81. // grab incoming vert normal
  82. Var *inNormal = (Var*) LangElement::find( "normal" );
  83. if (!inNormal)
  84. {
  85. inNormal = new Var("normal", "float3");
  86. meta->addStatement(new GenOp(" @ = float3( 0.0, 0.0, 1.0 );\r\n", new DecOp(inNormal)));
  87. Con::errorf("ShagerGen: Something went bad with ShaderGen. The normal should be already defined.");
  88. }
  89. AssertFatal( inNormal, "Something went bad with ShaderGen. The normal should be already defined." );
  90. // grab output for gbuffer normal
  91. ShaderConnector *connectComp = dynamic_cast<ShaderConnector *>( componentList[C_CONNECTOR] );
  92. Var *outNormal = connectComp->getElement( RT_TEXCOORD );
  93. outNormal->setName( "gbNormal" );
  94. outNormal->setStructName( "OUT" );
  95. outNormal->setType( "float3" );
  96. if( !fd.features[MFT_ParticleNormal] )
  97. {
  98. // Kick out the view-space normal
  99. // TODO: Total hack because Conditioner is directly derived
  100. // from ShaderFeature and not from ShaderFeatureHLSL.
  101. NamedFeatureHLSL dummy( String::EmptyString );
  102. dummy.setInstancingFormat( mInstancingFormat );
  103. Var *worldViewOnly = dummy.getWorldView( componentList, fd.features[MFT_UseInstancing], meta );
  104. meta->addStatement( new GenOp(" @ = mul(@, float4( normalize(@), 0.0 ) ).xyz;\r\n",
  105. outNormal, worldViewOnly, inNormal ) );
  106. }
  107. else
  108. {
  109. // Assume the particle normal generator has already put this in view space
  110. // and normalized it
  111. meta->addStatement( new GenOp( " @ = @;\r\n", outNormal, inNormal ) );
  112. }
  113. }
  114. void GBufferConditionerHLSL::processPix( Vector<ShaderComponent*> &componentList,
  115. const MaterialFeatureData &fd )
  116. {
  117. // sanity
  118. AssertFatal( fd.features[MFT_EyeSpaceDepthOut], "No depth-out feature enabled! Bad news!" );
  119. MultiLine *meta = new MultiLine;
  120. // grab connector normal
  121. ShaderConnector *connectComp = dynamic_cast<ShaderConnector *>( componentList[C_CONNECTOR] );
  122. Var *gbNormal = (Var*) LangElement::find( "gbNormal" );
  123. if( !gbNormal )
  124. {
  125. gbNormal = connectComp->getElement( RT_TEXCOORD );
  126. gbNormal->setName( "gbNormal" );
  127. gbNormal->setStructName( "IN" );
  128. gbNormal->setType( "float3" );
  129. gbNormal->mapsToSampler = false;
  130. gbNormal->uniform = false;
  131. }
  132. // find depth
  133. ShaderFeature *depthFeat = FEATUREMGR->getByType( MFT_EyeSpaceDepthOut );
  134. AssertFatal( depthFeat != NULL, "No eye space depth feature found!" );
  135. Var *depth = (Var*) LangElement::find(depthFeat->getOutputVarName());
  136. AssertFatal( depth, "Something went bad with ShaderGen. The depth should be already generated by the EyeSpaceDepthOut feature." );
  137. Var *unconditionedOut = new Var;
  138. unconditionedOut->setType("float4");
  139. unconditionedOut->setName("normal_depth");
  140. LangElement *outputDecl = new DecOp( unconditionedOut );
  141. // If we're doing prepass blending then we need
  142. // to steal away the alpha channel before the
  143. // conditioner stomps on it.
  144. Var *alphaVal = NULL;
  145. if ( fd.features[ MFT_IsTranslucentZWrite ] )
  146. {
  147. alphaVal = new Var( "outAlpha", "float" );
  148. meta->addStatement( new GenOp( " @ = OUT.col1.a; // MFT_IsTranslucentZWrite\r\n", new DecOp( alphaVal ) ) );
  149. }
  150. // If using interlaced normals, invert the normal
  151. if(fd.features[MFT_InterlacedPrePass])
  152. {
  153. // NOTE: Its safe to not call ShaderFeatureHLSL::addOutVpos() in the vertex
  154. // shader as for SM 3.0 nothing is needed there.
  155. Var *Vpos = ShaderFeatureHLSL::getInVpos( meta, componentList );
  156. Var *iGBNormal = new Var( "interlacedGBNormal", "float3" );
  157. meta->addStatement(new GenOp(" @ = (frac(@.y * 0.5) < 0.1 ? reflect(@, float3(0.0, -1.0, 0.0)) : @);\r\n", new DecOp(iGBNormal), Vpos, gbNormal, gbNormal));
  158. gbNormal = iGBNormal;
  159. }
  160. // NOTE: We renormalize the normal here as they
  161. // will not stay normalized during interpolation.
  162. meta->addStatement( new GenOp(" @ = @;", outputDecl, new GenOp( "float4(normalize(@), @)", gbNormal, depth ) ) );
  163. meta->addStatement( assignOutput( unconditionedOut ) );
  164. // If we have an alpha var then we're doing prepass lerp blending.
  165. if ( alphaVal )
  166. {
  167. Var *outColor = (Var*)LangElement::find( getOutputTargetVarName( DefaultTarget ) );
  168. meta->addStatement( new GenOp( " @.ba = float2( 0, @ ); // MFT_IsTranslucentZWrite\r\n", outColor, alphaVal ) );
  169. }
  170. output = meta;
  171. }
  172. ShaderFeature::Resources GBufferConditionerHLSL::getResources( const MaterialFeatureData &fd )
  173. {
  174. Resources res;
  175. // Passing from VS->PS:
  176. // - world space normal (gbNormal)
  177. res.numTexReg = 1;
  178. return res;
  179. }
  180. Var* GBufferConditionerHLSL::printMethodHeader( MethodType methodType, const String &methodName, Stream &stream, MultiLine *meta )
  181. {
  182. const bool isCondition = ( methodType == ConditionerFeature::ConditionMethod );
  183. Var *retVal = NULL;
  184. // The uncondition method inputs are changed
  185. if( isCondition )
  186. retVal = Parent::printMethodHeader( methodType, methodName, stream, meta );
  187. else
  188. {
  189. const bool isDirect3D11 = GFX->getAdapterType() == Direct3D11;
  190. Var *methodVar = new Var;
  191. methodVar->setName(methodName);
  192. methodVar->setType("inline float4");
  193. DecOp *methodDecl = new DecOp(methodVar);
  194. Var *prepassSampler = new Var;
  195. prepassSampler->setName("prepassSamplerVar");
  196. prepassSampler->setType("sampler2D");
  197. DecOp *prepassSamplerDecl = new DecOp(prepassSampler);
  198. Var *screenUV = new Var;
  199. screenUV->setName("screenUVVar");
  200. screenUV->setType("float2");
  201. DecOp *screenUVDecl = new DecOp(screenUV);
  202. Var *prepassTex = NULL;
  203. DecOp *prepassTexDecl = NULL;
  204. if (isDirect3D11)
  205. {
  206. prepassSampler->setType("SamplerState");
  207. prepassTex = new Var;
  208. prepassTex->setName("prepassTexVar");
  209. prepassTex->setType("Texture2D");
  210. prepassTex->texture = true;
  211. prepassTex->constNum = prepassSampler->constNum;
  212. prepassTexDecl = new DecOp(prepassTex);
  213. }
  214. Var *bufferSample = new Var;
  215. bufferSample->setName("bufferSample");
  216. bufferSample->setType("float4");
  217. DecOp *bufferSampleDecl = new DecOp(bufferSample);
  218. if (isDirect3D11)
  219. meta->addStatement(new GenOp("@(@, @, @)\r\n", methodDecl, prepassSamplerDecl, prepassTexDecl, screenUVDecl));
  220. else
  221. meta->addStatement( new GenOp( "@(@, @)\r\n", methodDecl, prepassSamplerDecl, screenUVDecl ) );
  222. meta->addStatement( new GenOp( "{\r\n" ) );
  223. meta->addStatement( new GenOp( " // Sampler g-buffer\r\n" ) );
  224. #ifdef TORQUE_OS_XENON
  225. meta->addStatement( new GenOp( " @;\r\n", bufferSampleDecl ) );
  226. meta->addStatement( new GenOp( " asm { tfetch2D @, @, @, MagFilter = point, MinFilter = point, MipFilter = point };\r\n", bufferSample, screenUV, prepassSampler ) );
  227. #else
  228. // The gbuffer has no mipmaps, so use tex2dlod when
  229. // possible so that the shader compiler can optimize.
  230. meta->addStatement( new GenOp( " #if TORQUE_SM >= 30\r\n" ) );
  231. if (isDirect3D11)
  232. meta->addStatement(new GenOp(" @ = @.SampleLevel(@, @,0);\r\n", bufferSampleDecl, prepassTex, prepassSampler, screenUV));
  233. else
  234. meta->addStatement(new GenOp(" @ = tex2Dlod(@, float4(@,0,0));\r\n", bufferSampleDecl, prepassSampler, screenUV));
  235. meta->addStatement(new GenOp(" #else\r\n"));
  236. meta->addStatement(new GenOp(" @ = tex2D(@, @);\r\n", bufferSampleDecl, prepassSampler, screenUV));
  237. meta->addStatement(new GenOp(" #endif\r\n\r\n"));
  238. #endif
  239. // We don't use this way of passing var's around, so this should cause a crash
  240. // if something uses this improperly
  241. retVal = bufferSample;
  242. }
  243. return retVal;
  244. }
  245. GenOp* GBufferConditionerHLSL::_posnegEncode( GenOp *val )
  246. {
  247. if(mNormalStorageType == LambertAzimuthal)
  248. return mCanWriteNegativeValues ? val : new GenOp(avar("(%f * (@ + %f))", 1.0f/(M_SQRT2_F * 2.0f), M_SQRT2_F), val);
  249. else
  250. return mCanWriteNegativeValues ? val : new GenOp("(0.5 * (@ + 1.0))", val);
  251. }
  252. GenOp* GBufferConditionerHLSL::_posnegDecode( GenOp *val )
  253. {
  254. if(mNormalStorageType == LambertAzimuthal)
  255. return mCanWriteNegativeValues ? val : new GenOp(avar("(@ * %f - %f)", M_SQRT2_F * 2.0f, M_SQRT2_F), val);
  256. else
  257. return mCanWriteNegativeValues ? val : new GenOp("(@ * 2.0 - 1.0)", val);
  258. }
  259. Var* GBufferConditionerHLSL::_conditionOutput( Var *unconditionedOutput, MultiLine *meta )
  260. {
  261. Var *retVar = new Var;
  262. retVar->setType("float4");
  263. retVar->setName("_gbConditionedOutput");
  264. LangElement *outputDecl = new DecOp( retVar );
  265. switch(mNormalStorageType)
  266. {
  267. case CartesianXYZ:
  268. meta->addStatement( new GenOp( " // g-buffer conditioner: float4(normal.xyz, depth)\r\n" ) );
  269. meta->addStatement( new GenOp( " @ = float4(@, @.a);\r\n", outputDecl,
  270. _posnegEncode(new GenOp("@.xyz", unconditionedOutput)), unconditionedOutput ) );
  271. break;
  272. case CartesianXY:
  273. meta->addStatement( new GenOp( " // g-buffer conditioner: float4(normal.xy, depth Hi + z-sign, depth Lo)\r\n" ) );
  274. meta->addStatement( new GenOp( " @ = float4(@, @.a);", outputDecl,
  275. _posnegEncode(new GenOp("float3(@.xy, sign(@.z))", unconditionedOutput, unconditionedOutput)), unconditionedOutput ) );
  276. break;
  277. case Spherical:
  278. meta->addStatement( new GenOp( " // g-buffer conditioner: float4(normal.theta, normal.phi, depth Hi, depth Lo)\r\n" ) );
  279. meta->addStatement( new GenOp( " @ = float4(@, 0.0, @.a);\r\n", outputDecl,
  280. _posnegEncode(new GenOp("float2(atan2(@.y, @.x) / 3.14159265358979323846f, @.z)", unconditionedOutput, unconditionedOutput, unconditionedOutput ) ),
  281. unconditionedOutput ) );
  282. // HACK: This fixes the noise present when using a floating point
  283. // gbuffer on Geforce cards and the "flat areas unlit" issues.
  284. //
  285. // We need work around atan2() above to fix this issue correctly
  286. // without the extra overhead of this test.
  287. //
  288. meta->addStatement( new GenOp( " if ( abs( dot( @.xyz, float3( 0.0, 0.0, 1.0 ) ) ) > 0.999f ) @ = float4( 0, 1 * sign( @.z ), 0, @.a );\r\n",
  289. unconditionedOutput, retVar, unconditionedOutput, unconditionedOutput ) );
  290. break;
  291. case LambertAzimuthal:
  292. //http://en.wikipedia.org/wiki/Lambert_azimuthal_equal-area_projection
  293. //
  294. // Note we're casting to half to use partial precision
  295. // sqrt which is much faster on older Geforces while
  296. // still being acceptable for normals.
  297. //
  298. meta->addStatement( new GenOp( " // g-buffer conditioner: float4(normal.X, normal.Y, depth Hi, depth Lo)\r\n" ) );
  299. meta->addStatement( new GenOp( " @ = float4(@, 0.0, @.a);\r\n", outputDecl,
  300. _posnegEncode(new GenOp("sqrt(half(2.0/(1.0 - @.y))) * half2(@.xz)", unconditionedOutput, unconditionedOutput)),
  301. unconditionedOutput ) );
  302. break;
  303. }
  304. // Encode depth into two channels
  305. if(mNormalStorageType != CartesianXYZ)
  306. {
  307. const U64 maxValPerChannel = (U64)1 << mBitsPerChannel;
  308. meta->addStatement( new GenOp( " \r\n // Encode depth into hi/lo\r\n" ) );
  309. meta->addStatement( new GenOp( avar( " float2 _tempDepth = frac(@.a * float2(1.0, %llu.0));\r\n", maxValPerChannel - 1 ),
  310. unconditionedOutput ) );
  311. meta->addStatement( new GenOp( avar( " @.zw = _tempDepth.xy - _tempDepth.yy * float2(1.0/%llu.0, 0.0);\r\n\r\n", maxValPerChannel - 1 ),
  312. retVar ) );
  313. }
  314. AssertFatal( retVar != NULL, avar( "Cannot condition output to buffer format: %s", GFXStringTextureFormat[getBufferFormat()] ) );
  315. return retVar;
  316. }
  317. Var* GBufferConditionerHLSL::_unconditionInput( Var *conditionedInput, MultiLine *meta )
  318. {
  319. Var *retVar = new Var;
  320. retVar->setType("float4");
  321. retVar->setName("_gbUnconditionedInput");
  322. LangElement *outputDecl = new DecOp( retVar );
  323. switch(mNormalStorageType)
  324. {
  325. case CartesianXYZ:
  326. meta->addStatement( new GenOp( " // g-buffer unconditioner: float4(normal.xyz, depth)\r\n" ) );
  327. meta->addStatement( new GenOp( " @ = float4(@, @.a);\r\n", outputDecl,
  328. _posnegDecode(new GenOp("@.xyz", conditionedInput)), conditionedInput ) );
  329. break;
  330. case CartesianXY:
  331. meta->addStatement( new GenOp( " // g-buffer unconditioner: float4(normal.xy, depth Hi + z-sign, depth Lo)\r\n" ) );
  332. meta->addStatement( new GenOp( " @ = float4(@, @.a);\r\n", outputDecl,
  333. _posnegDecode(new GenOp("@.xyz", conditionedInput)), conditionedInput ) );
  334. meta->addStatement( new GenOp( " @.z *= sqrt(1.0 - dot(@.xy, @.xy));\r\n", retVar, retVar, retVar ) );
  335. break;
  336. case Spherical:
  337. meta->addStatement( new GenOp( " // g-buffer unconditioner: float4(normal.theta, normal.phi, depth Hi, depth Lo)\r\n" ) );
  338. meta->addStatement( new GenOp( " float2 spGPUAngles = @;\r\n", _posnegDecode(new GenOp("@.xy", conditionedInput)) ) );
  339. meta->addStatement( new GenOp( " float2 sincosTheta;\r\n" ) );
  340. meta->addStatement( new GenOp( " sincos(spGPUAngles.x * 3.14159265358979323846f, sincosTheta.x, sincosTheta.y);\r\n" ) );
  341. meta->addStatement( new GenOp( " float2 sincosPhi = float2(sqrt(1.0 - spGPUAngles.y * spGPUAngles.y), spGPUAngles.y);\r\n" ) );
  342. meta->addStatement( new GenOp( " @ = float4(sincosTheta.y * sincosPhi.x, sincosTheta.x * sincosPhi.x, sincosPhi.y, @.a);\r\n", outputDecl, conditionedInput ) );
  343. break;
  344. case LambertAzimuthal:
  345. // Note we're casting to half to use partial precision
  346. // sqrt which is much faster on older Geforces while
  347. // still being acceptable for normals.
  348. //
  349. meta->addStatement( new GenOp( " // g-buffer unconditioner: float4(normal.X, normal.Y, depth Hi, depth Lo)\r\n" ) );
  350. meta->addStatement( new GenOp( " float2 _inpXY = @;\r\n", _posnegDecode(new GenOp("@.xy", conditionedInput)) ) );
  351. meta->addStatement( new GenOp( " float _xySQ = dot(_inpXY, _inpXY);\r\n" ) );
  352. meta->addStatement( new GenOp( " @ = float4( sqrt(half(1.0 - (_xySQ / 4.0))) * _inpXY, -1.0 + (_xySQ / 2.0), @.a).xzyw;\r\n", outputDecl, conditionedInput ) );
  353. break;
  354. }
  355. // Recover depth from encoding
  356. if(mNormalStorageType != CartesianXYZ)
  357. {
  358. const U64 maxValPerChannel = (U64)1 << mBitsPerChannel;
  359. meta->addStatement( new GenOp( " \r\n // Decode depth\r\n" ) );
  360. meta->addStatement( new GenOp( avar( " @.w = dot( @.zw, float2(1.0, 1.0/%llu.0));\r\n", maxValPerChannel - 1 ),
  361. retVar, conditionedInput ) );
  362. }
  363. AssertFatal( retVar != NULL, avar( "Cannot uncondition input from buffer format: %s", GFXStringTextureFormat[getBufferFormat()] ) );
  364. return retVar;
  365. }