PostFX_Bloom.fx 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564
  1. //////////////////////////////////////////////////////////////////////////////
  2. // ©2008 Electronic Arts Inc
  3. //
  4. // Image post processing effect performing a glow by selective color blurring
  5. //////////////////////////////////////////////////////////////////////////////
  6. #include "Common.fxh"
  7. #include "CommonPostFX.fxh"
  8. #include "Gamma.fxh"
  9. SAMPLER_2D_BEGIN(FrameBufferSampler,
  10. string SasBindAddress = "PostEffect.FrameBufferTexture";
  11. int WW3DDynamicSet = DS_CUSTOM_FIRST;
  12. )
  13. MinFilter = Linear;
  14. MagFilter = Linear;
  15. MipFilter = Point;
  16. AddressU = Clamp;
  17. AddressV = Clamp;
  18. SAMPLER_2D_END
  19. SAMPLER_2D_BEGIN(SourceTexture,
  20. string SasBindAddress = "PostEffect.Bloom.SourceTexture";
  21. int WW3DDynamicSet = DS_CUSTOM_FIRST;
  22. )
  23. MinFilter = Linear;
  24. MagFilter = Linear;
  25. MipFilter = Point;
  26. AddressU = Clamp;
  27. AddressV = Clamp;
  28. SAMPLER_2D_END
  29. float2 SourceTextureSize
  30. <
  31. string SasBindAddress = "PostEffect.Bloom.SourceTextureSize";
  32. > = float2(1024, 768);
  33. float2 TargetTextureSize
  34. <
  35. string SasBindAddress = "PostEffect.Bloom.TargetTextureSize";
  36. > = float2(1024, 768);
  37. float BloomIntensity
  38. <
  39. string SasBindAddress = "PostEffect.Bloom.Intensity";
  40. > = 0.5;
  41. float3 ExposureLevel
  42. <
  43. string SasBindAddress = "PostEffect.LookupTable.ExposureLevel";
  44. > = float3(1.0, 1.0, 1.0);
  45. // Note: The following variables aren't used in the shader, but declaring it "primes" the scope parameter used in the scrape script for it.
  46. // Since Scrape's ResolveConstant function does not know the type/dimensions of a parameter, they need to be declared somewhere before the script is loaded
  47. float2 TextureSizeHigh
  48. <
  49. string SasBindAddress = "PostEffect.Bloom.TextureSizeHigh";
  50. > = float2(0, 0);
  51. float2 TextureSizeMedium
  52. <
  53. string SasBindAddress = "PostEffect.Bloom.TextureSizeMedium";
  54. > = float2(0, 0);
  55. float2 TextureSizeLow
  56. <
  57. string SasBindAddress = "PostEffect.Bloom.TextureSizeLow";
  58. > = float2(0, 0);
  59. int SurfaceFormat
  60. <
  61. string SasBindAddress = "PostEffect.Bloom.SurfaceFormat";
  62. > = 1; // SURFACE_FORMAT_A8R8G8B8
  63. int TextureFormat
  64. <
  65. string SasBindAddress = "PostEffect.Bloom.TextureFormat";
  66. > = 1; // SURFACE_FORMAT_A8R8G8B8
  67. // ----------------------------------------------------------------------------
  68. struct VSOutput
  69. {
  70. float4 Position : POSITION;
  71. float2 TexCoord : TEXCOORD0;
  72. };
  73. // ----------------------------------------------------------------------------
  74. VSOutput DefaultVS(float2 Position : POSITION, float2 TexCoord : TEXCOORD0)
  75. {
  76. VSOutput Out;
  77. Out.Position = float4(Position, 0, 1);
  78. Out.TexCoord = TexCoord;
  79. return Out;
  80. }
  81. // ----------------------------------------------------------------------------
  82. float4 DefaultPS(float2 TexCoord : TEXCOORD0) : COLOR
  83. {
  84. float4 color = tex2D( SAMPLER(FrameBufferSampler), TexCoord);
  85. color.xyz = UncompressRenderTargetColor(color.xyz);
  86. return color;
  87. }
  88. // ----------------------------------------------------------------------------
  89. float4 DoBoxFilter(float2 texCoord, int halfKernelSize)
  90. {
  91. float4 color = 0;
  92. const int kernelSize = halfKernelSize * 2;
  93. for (int j = -(halfKernelSize - 1); j <= halfKernelSize - 1; j += 2)
  94. {
  95. for (int i = -(halfKernelSize - 1); i <= halfKernelSize - 1; i += 2)
  96. {
  97. color += tex2D(SAMPLER(SourceTexture), texCoord + float2(i, j) / SourceTextureSize);
  98. }
  99. }
  100. color /= pow(halfKernelSize, 2);
  101. return color;
  102. }
  103. // ----------------------------------------------------------------------------
  104. float ApplyBloomCurve(float intensity)
  105. {
  106. #define SQUARED_BLOOM_CURVE
  107. #if defined(SQUARED_BLOOM_CURVE)
  108. const float threshold = .11;
  109. // return intensity * intensity;
  110. if (intensity > threshold)
  111. return (intensity-threshold) * (intensity - threshold);
  112. else
  113. return 0;
  114. #else
  115. const float threshold = .66;
  116. const float steepness0 = 1.0 / 8.0;
  117. const float steepness1 = 2.75;
  118. if (intensity < threshold)
  119. return intensity * steepness0;
  120. else
  121. return intensity * steepness1 + threshold * (steepness0 - steepness1);
  122. #endif
  123. }
  124. // ----------------------------------------------------------------------------
  125. float4 Downsample4x4PS(VSOutput In) : COLOR
  126. {
  127. // Why does this say 4x4, when it only does 2x2?
  128. // Because it samples with bilinear filtering at the in-between points of 4 2x2 pixel areas
  129. float4 color = DoBoxFilter(In.TexCoord, 2);
  130. return color;
  131. }
  132. // ----------------------------------------------------------------------------
  133. float4 DownsampleInitialPS(VSOutput In, uniform int halfKernelSize) : COLOR
  134. {
  135. // We want to do downsampling like the 4x4 downsampler above,
  136. // just on the PC we have varying screen resolutions to deal with, so the kernel size adapts to this.
  137. float4 color = DoBoxFilter(In.TexCoord, halfKernelSize);
  138. color.xyz = UncompressRenderTargetColor(color.xyz);
  139. float intensity = dot(color.xyz, float3(0.3f, 0.3f, 0.3f));
  140. float bloomedIntensity = ApplyBloomCurve(intensity);
  141. color.xyz *= bloomedIntensity / (intensity + 0.0001); // Avoid division by 0, caused issue esp on Geforce7800 cards
  142. color.xyz *= ExposureLevel; // HDR exposure doesn't get applied until the end in the lookup table post effect,
  143. // but we want it to affect the bloom as well, because the bloom is basically
  144. // happening inside the "film camera" depending on the amount of light coming in.
  145. return color;
  146. }
  147. // ----------------------------------------------------------------------------
  148. float4 BlurGaussian11x11PS(VSOutput In, uniform float2 direction) : COLOR
  149. {
  150. #define OPTIMIZE_SAMPLES
  151. #if defined(OPTIMIZE_SAMPLES)
  152. // Wow, there are lots of magic numbers in the following lines. How do you compute those?
  153. // This is a good question.
  154. // The short answer is, that they are directly taken from the presentation "HDR The Bungie Way".
  155. // The are supposed to approximate an 11 tap Gaussian distribution
  156. // by just sampling 5 (bi-)linearly interpolated taps.
  157. // This works by sampling at varying in-between points between the pixels
  158. // so that eg at the 90% mark, the contribution will be 90% from one pixel and 10% from its neighbor.
  159. // The combined weight of this pixel pair is then scaled with the ratio that this pair
  160. // contributes to the whole distribution.
  161. In.TexCoord += (0.5 - direction) / SourceTextureSize;
  162. float4 color = 0;
  163. color += tex2D(SAMPLER(SourceTexture), In.TexCoord - (4.0 - 9.0 / (9.0 + 1.0) * 0.5) * direction / SourceTextureSize) * (1 + 9);
  164. color += tex2D(SAMPLER(SourceTexture), In.TexCoord - (2.0 - 84.0 / (84.0 + 36.0) * 0.5) * direction / SourceTextureSize) * (36 + 84);
  165. color += tex2D(SAMPLER(SourceTexture), In.TexCoord + 0 * direction / SourceTextureSize) * (126 + 126);
  166. color += tex2D(SAMPLER(SourceTexture), In.TexCoord + (2.0 - 84.0 / (84.0 + 36.0) * 0.5) * direction / SourceTextureSize) * (36 + 84);
  167. color += tex2D(SAMPLER(SourceTexture), In.TexCoord + (4.0 - 9.0 / (9.0 + 1.0) * 0.5) * direction / SourceTextureSize) * (1 + 9);
  168. return color / float(1 + 9 + 36 + 84 + 126) / 2.0;
  169. #else
  170. // The following is doing the original 11 tap Gaussian sampling.
  171. // Useful as a reference, if we need to derive other filter kernels in the future.
  172. In.TexCoord.y += 0.5 / SourceTextureSize.y;
  173. float4 color = 0;
  174. float samples[5] = { 210, 120, 45, 10, 1 };
  175. float weight = 252;
  176. color += tex2D(SAMPLER(SourceTexture), In.TexCoord + 0 * direction / SourceTextureSize) * 252;
  177. for (int i = 0; i < 5; i++)
  178. {
  179. color += tex2D(SAMPLER(SourceTexture), In.TexCoord + (i + 1) * direction / SourceTextureSize) * samples[i];
  180. color += tex2D(SAMPLER(SourceTexture), In.TexCoord - (i + 1) * direction / SourceTextureSize) * samples[i];
  181. weight += 2 * samples[i];
  182. }
  183. return color / weight;
  184. #endif
  185. }
  186. // ----------------------------------------------------------------------------
  187. float4 AccumulatePS(float2 TexCoord : TEXCOORD0) : COLOR
  188. {
  189. float4 color = tex2D(SAMPLER(FrameBufferSampler), TexCoord);
  190. float4 bloomColor = tex2D(SAMPLER(SourceTexture), TexCoord);
  191. color += bloomColor;
  192. return color;
  193. }
  194. // ----------------------------------------------------------------------------
  195. float4 AccumulatePS_M(float2 TexCoord : TEXCOORD0) : COLOR
  196. {
  197. float4 color = tex2D(SAMPLER(FrameBufferSampler), TexCoord);
  198. color.xyz = GammaToLinear(color.xyz);
  199. float4 bloomColor = tex2D(SAMPLER(SourceTexture), TexCoord);
  200. bloomColor.xyz = GammaToLinear(bloomColor.xyz);
  201. color += bloomColor;
  202. color.xyz = LinearToGamma(color.xyz);
  203. return color;
  204. }
  205. // ----------------------------------------------------------------------------
  206. float4 AccumulateFinalPS(float2 TexCoord : TEXCOORD0) : COLOR
  207. {
  208. float4 color = tex2D(SAMPLER(FrameBufferSampler), TexCoord);
  209. color.xyz = UncompressRenderTargetColor(color.xyz);
  210. float4 bloomColor = tex2D(SAMPLER(SourceTexture), TexCoord);
  211. color += bloomColor * BloomIntensity;
  212. return color;
  213. }
  214. // ----------------------------------------------------------------------------
  215. float4 AccumulateFinalPS_M(float2 TexCoord : TEXCOORD0) : COLOR
  216. {
  217. float4 color = tex2D(SAMPLER(FrameBufferSampler), TexCoord);
  218. color.xyz = UncompressRenderTargetColor(color.xyz);
  219. color.xyz = GammaToLinear(color.xyz);
  220. float4 bloomColor = tex2D(SAMPLER(SourceTexture), TexCoord);
  221. bloomColor.xyz = GammaToLinear(bloomColor.xyz);
  222. color += bloomColor * BloomIntensity;
  223. color.xyz = LinearToGamma(color.xyz);
  224. return color;
  225. }
  226. // ----------------------------------------------------------------------------
  227. DEFINE_ARRAY_MULTIPLIER(DownsampleInitialPS_Multiplier_HalfKernelSize = 1);
  228. #define DownsampleInitialPS_HalfKernelSize \
  229. compile PS_3_0 DownsampleInitialPS(1), \
  230. compile PS_3_0 DownsampleInitialPS(2), \
  231. compile PS_3_0 DownsampleInitialPS(3), \
  232. compile PS_3_0 DownsampleInitialPS(4), \
  233. compile PS_3_0 DownsampleInitialPS(5)
  234. DEFINE_ARRAY_MULTIPLIER(DownsampleInitialPS_Multiplier_Final = DownsampleInitialPS_Multiplier_HalfKernelSize * 5);
  235. #if SUPPORTS_SHADER_ARRAYS
  236. pixelshader DownsampleInitialPS_Array[DownsampleInitialPS_Multiplier_Final] = { DownsampleInitialPS_HalfKernelSize };
  237. #endif
  238. technique DownsampleInitial
  239. {
  240. pass p0
  241. {
  242. VertexShader = compile VS_3_0 DefaultVS();
  243. PixelShader = ARRAY_EXPRESSION_PS(DownsampleInitialPS_Array,
  244. // Following is the magic formula to determine how big the filter kernel needs to be based on the screen resolution of the user:
  245. // The idea is to not ignore any pixels when downsampling, as this leads to flickering of thin lines.
  246. // eg when going from 1024->256 you want 4x4 kernels, for 1920->256 8x8 is neccessary, max supported right now would be 2560->256 ie a 10x10 kernel
  247. // The formula computes the array index, which is 1 less than the halfKernelSize (since 0 does not make sense)
  248. clamp((int)(SourceTextureSize.x / TargetTextureSize.x / 2 - 0.5), 0, 4) * DownsampleInitialPS_Multiplier_HalfKernelSize,
  249. // Non-array alternative:
  250. // Xenon always has a fixed screen size, so the downsample ratio can be hardcoded here to be 4x4 (= half kernel size 2)
  251. compile PS_3_0 DownsampleInitialPS(2)
  252. );
  253. ZEnable = false;
  254. ZWriteEnable = false;
  255. CullMode = None;
  256. AlphaBlendEnable = false;
  257. AlphaTestEnable = false;
  258. }
  259. }
  260. // ----------------------------------------------------------------------------
  261. // PS2.0 fallback version (marked Low LOD, even though bloom is off by default on Low)
  262. // It does the same as the PS3.0 functions, but those are running out of pixel shader registers
  263. // when the kernel size reaches 10x10. So for PS2.0, we limit the kernel size to 8x8.
  264. // In the very unlikely case that someone runs this shader on a PS2.0-only card,
  265. // with a 2560x1600 monitor the bloom will be not as smooth as it ought to be...
  266. DEFINE_ARRAY_MULTIPLIER(DownsampleInitialPS_L_Multiplier_HalfKernelSize = 1);
  267. #define DownsampleInitialPS_L_HalfKernelSize \
  268. compile PS_2_0 DownsampleInitialPS(1), \
  269. compile PS_2_0 DownsampleInitialPS(2), \
  270. compile PS_2_0 DownsampleInitialPS(3), \
  271. compile PS_2_0 DownsampleInitialPS(4)
  272. DEFINE_ARRAY_MULTIPLIER(DownsampleInitialPS_L_Multiplier_Final = DownsampleInitialPS_L_Multiplier_HalfKernelSize * 4);
  273. #if SUPPORTS_SHADER_ARRAYS
  274. pixelshader DownsampleInitialPS_L_Array[DownsampleInitialPS_L_Multiplier_Final] = { DownsampleInitialPS_L_HalfKernelSize };
  275. #endif
  276. technique DownsampleInitial_L
  277. {
  278. pass p0
  279. {
  280. VertexShader = compile VS_2_0 DefaultVS();
  281. PixelShader = ARRAY_EXPRESSION_PS(DownsampleInitialPS_L_Array,
  282. // Following is the magic formula to determine how big the filter kernel needs to be based on the screen resolution of the user:
  283. // The idea is to not ignore any pixels when downsampling, as this leads to flickering of thin lines.
  284. // eg when going from 1024->256 you want 4x4 kernels, for 1920->256 8x8 is neccessary, max supported right now would be 2560->256 ie a 10x10 kernel
  285. // The formula computes the array index, which is 1 less than the halfKernelSize (since 0 does not make sense)
  286. clamp((int)(SourceTextureSize.x / TargetTextureSize.x / 2 - 0.5), 0, 3) * DownsampleInitialPS_L_Multiplier_HalfKernelSize,
  287. // Non-array alternative:
  288. // Xenon always has a fixed screen size, so the downsample ratio can be hardcoded here to be 4x4 (= half kernel size 2)
  289. compile PS_2_0 DownsampleInitialPS(2)
  290. );
  291. ZEnable = false;
  292. ZWriteEnable = false;
  293. CullMode = None;
  294. AlphaBlendEnable = false;
  295. AlphaTestEnable = false;
  296. }
  297. }
  298. // ----------------------------------------------------------------------------
  299. technique Downsample4x4
  300. {
  301. pass p0
  302. {
  303. VertexShader = compile VS_2_0 DefaultVS();
  304. PixelShader = compile PS_2_0 Downsample4x4PS();
  305. ZEnable = false;
  306. ZWriteEnable = false;
  307. CullMode = None;
  308. AlphaBlendEnable = false;
  309. AlphaTestEnable = false;
  310. }
  311. }
  312. // ----------------------------------------------------------------------------
  313. technique BlurGaussian11x11U
  314. {
  315. pass p0
  316. {
  317. VertexShader = compile VS_2_0 DefaultVS();
  318. PixelShader = compile PS_2_0 BlurGaussian11x11PS(float2(1, 0));
  319. ZEnable = false;
  320. ZWriteEnable = false;
  321. CullMode = None;
  322. AlphaBlendEnable = false;
  323. AlphaTestEnable = false;
  324. }
  325. }
  326. // ----------------------------------------------------------------------------
  327. technique BlurGaussian11x11V
  328. {
  329. pass p0
  330. {
  331. VertexShader = compile VS_2_0 DefaultVS();
  332. PixelShader = compile PS_2_0 BlurGaussian11x11PS(float2(0, 1));
  333. ZEnable = false;
  334. ZWriteEnable = false;
  335. CullMode = None;
  336. AlphaBlendEnable = false;
  337. AlphaTestEnable = false;
  338. }
  339. }
  340. // ----------------------------------------------------------------------------
  341. technique Accumulate
  342. {
  343. pass p0
  344. {
  345. VertexShader = compile VS_2_0 DefaultVS();
  346. PixelShader = compile PS_2_0 AccumulatePS();
  347. ZEnable = false;
  348. ZWriteEnable = false;
  349. CullMode = None;
  350. AlphaBlendEnable = false;
  351. AlphaTestEnable = false;
  352. }
  353. }
  354. // ----------------------------------------------------------------------------
  355. technique Accumulate_M
  356. {
  357. pass p0
  358. {
  359. VertexShader = compile VS_2_0 DefaultVS();
  360. PixelShader = compile PS_2_0 AccumulatePS_M();
  361. ZEnable = false;
  362. ZWriteEnable = false;
  363. CullMode = None;
  364. AlphaBlendEnable = false;
  365. AlphaTestEnable = false;
  366. }
  367. }
  368. // ----------------------------------------------------------------------------
  369. technique AccumulateFinal
  370. {
  371. pass p0
  372. {
  373. VertexShader = compile VS_2_0 DefaultVS();
  374. PixelShader = compile PS_2_0 AccumulateFinalPS();
  375. ZEnable = false;
  376. ZWriteEnable = false;
  377. CullMode = None;
  378. AlphaBlendEnable = false;
  379. AlphaTestEnable = false;
  380. }
  381. }
  382. // ----------------------------------------------------------------------------
  383. technique AccumulateFinal_M
  384. {
  385. pass p0
  386. {
  387. VertexShader = compile VS_2_0 DefaultVS();
  388. PixelShader = compile PS_2_0 AccumulateFinalPS_M();
  389. ZEnable = false;
  390. ZWriteEnable = false;
  391. CullMode = None;
  392. AlphaBlendEnable = false;
  393. AlphaTestEnable = false;
  394. }
  395. }
  396. technique Copy
  397. {
  398. pass p0
  399. {
  400. VertexShader = compile VS_2_0 DefaultVS();
  401. PixelShader = compile PS_2_0 DefaultPS();
  402. ZEnable = false;
  403. ZWriteEnable = false;
  404. CullMode = None;
  405. AlphaBlendEnable = false;
  406. AlphaTestEnable = false;
  407. }
  408. }
  409. // ----------------------------------------------------------------------------
  410. // Debug display
  411. SAMPLER_2D_BEGIN(DebugTextureSampler,
  412. string SasBindAddress = "PostEffect.FrameBufferTexture";
  413. int WW3DDynamicSet = DS_CUSTOM_FIRST;
  414. )
  415. MinFilter = Point;
  416. MagFilter = Point;
  417. MipFilter = Point;
  418. AddressU = Clamp;
  419. AddressV = Clamp;
  420. SAMPLER_2D_END
  421. // ----------------------------------------------------------------------------
  422. VSOutput DebugDisplayVS(float2 Position : POSITION, float2 TexCoord : TEXCOORD0)
  423. {
  424. VSOutput Out;
  425. Out.Position = float4(Position * float2(0.5, 1) + float2(-0.5, 0), 0, 1);
  426. Out.TexCoord = TexCoord * float2(0.5, 1);
  427. return Out;
  428. }
  429. // ----------------------------------------------------------------------------
  430. float4 DebugDisplayPS(float2 TexCoord : TEXCOORD0) : COLOR
  431. {
  432. float4 color = tex2D(SAMPLER(DebugTextureSampler), TexCoord);
  433. // Tip: To debug for NAN values, use the following:
  434. //color = abs(color) + 10;
  435. return color;
  436. }
  437. technique DebugDisplay
  438. {
  439. pass p0
  440. {
  441. VertexShader = compile VS_2_0 DebugDisplayVS();
  442. PixelShader = compile PS_2_0 DebugDisplayPS();
  443. ZEnable = false;
  444. ZWriteEnable = false;
  445. CullMode = None;
  446. AlphaBlendEnable = false;
  447. AlphaTestEnable = false;
  448. }
  449. }
  450. // ----------------------------------------------------------------------------
  451. // Debug pattern
  452. VSOutput DebugPatternVS(float2 Position : POSITION, float2 TexCoord : TEXCOORD0)
  453. {
  454. VSOutput Out;
  455. Out.Position = float4(Position * float2(1, 0.25) + float2(0, 0.75), 0, 1);
  456. Out.TexCoord = TexCoord * float2(1, 0.25);
  457. return Out;
  458. }
  459. // ----------------------------------------------------------------------------
  460. float4 DebugPatternPS(float2 TexCoord : TEXCOORD0) : COLOR
  461. {
  462. // Write out a color ramp from black to white
  463. return GammaToLinear(TexCoord.x).xxxx;
  464. // This formula generates a pattern where every n-th pixel is white or black.
  465. // Useful for testing filter kernels for doing the expected behavior.
  466. float4 color = frac((TexCoord.x * SourceTextureSize.x + 0.5) / 2) + frac((TexCoord.y * SourceTextureSize.y + 0.5) / 2) > .75;
  467. return color;
  468. }
  469. technique DebugPattern
  470. {
  471. pass p0
  472. {
  473. VertexShader = compile VS_2_0 DebugPatternVS();
  474. PixelShader = compile PS_2_0 DebugPatternPS();
  475. ZEnable = false;
  476. ZWriteEnable = false;
  477. CullMode = None;
  478. AlphaBlendEnable = false;
  479. AlphaTestEnable = false;
  480. }
  481. }