| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289 |
- //
- // Copyright (c) Microsoft. All rights reserved.
- // This code is licensed under the MIT License (MIT).
- // THIS CODE IS PROVIDED *AS IS* WITHOUT WARRANTY OF
- // ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING ANY
- // IMPLIED WARRANTIES OF FITNESS FOR A PARTICULAR
- // PURPOSE, MERCHANTABILITY, OR NON-INFRINGEMENT.
- //
- // Developed by Minigraph
- //
- // Author: James Stanard
- //
- // RGBM is a good way to pack HDR values into R8G8B8A8_UNORM
- uint PackRGBM( float3 rgb, float PeakValue = 16.0 )
- {
- rgb = saturate(rgb / PeakValue);
- float maxVal = max(max(1e-6, rgb.x), max(rgb.y, rgb.z));
- maxVal = ceil(maxVal * 255.0);
- float divisor = (255 * 255.0) / maxVal;
- #if _XBOX_ONE
- uint RGBM = (uint)maxVal;
- RGBM = __XB_PackF32ToU8(rgb.r * divisor + 0.5, 3, RGBM);
- RGBM = __XB_PackF32ToU8(rgb.g * divisor + 0.5, 2, RGBM);
- RGBM = __XB_PackF32ToU8(rgb.b * divisor + 0.5, 1, RGBM);
- return RGBM;
- #else
- uint M = (uint)maxVal;
- uint R = (uint)(rgb.r * divisor + 0.5);
- uint G = (uint)(rgb.g * divisor + 0.5);
- uint B = (uint)(rgb.b * divisor + 0.5);
- return R << 24 | G << 16 | B << 8 | M;
- #endif
- }
- float3 UnpackRGBM( uint p, float PeakValue = 16.0 )
- {
- #if _XBOX_ONE
- float R = __XB_UnpackByte3(p);
- float G = __XB_UnpackByte2(p);
- float B = __XB_UnpackByte1(p);
- float M = __XB_UnpackByte0(p);
- #else
- uint R = p >> 24;
- uint G = (p >> 16) & 0xFF;
- uint B = (p >> 8) & 0xFF;
- uint M = p & 0xFF;
- #endif
- return float3(R, G, B) * M * PeakValue / (255.0 * 255.0);
- }
- // RGBE packs 9 bits per color channel while encoding the multiplier as a perfect power of 2 (just the exponent)
- // What's nice about this is that it gives you a lot more range than RGBM. This isn't proven to be bitwise
- // compatible with DXGI_FORMAT_R9B9G9E5_SHAREDEXP, but if it's not, it could be made so.
- uint PackRGBE(float3 rgb)
- {
- float MaxChannel = max(rgb.r, max(rgb.g, rgb.b));
- // NextPow2 has to have the biggest exponent plus 1 (and nothing in the mantissa)
- float NextPow2 = asfloat((asuint(MaxChannel) + 0x800000) & 0x7F800000);
- // By adding NextPow2, all channels have the same exponent, shifting their mantissa bits
- // to the right to accomodate it. This also shifts in the implicit '1' bit of all channels.
- // The largest channel will always have the high bit set.
- rgb += NextPow2;
- #if _XBOX_ONE
- uint R = __XB_UBFE(9, 14, asuint(rgb.r));
- uint G = __XB_UBFE(9, 14, asuint(rgb.g));
- uint B = __XB_UBFE(9, 14, asuint(rgb.b));
- #else
- uint R = (asuint(rgb.r) << 9) >> 23;
- uint G = (asuint(rgb.g) << 9) >> 23;
- uint B = (asuint(rgb.b) << 9) >> 23;
- #endif
- uint E = f32tof16(NextPow2) << 17;
- return R | G << 9 | B << 18 | E;
- }
- float3 UnpackRGBE(uint p)
- {
- #if _XBOX_ONE
- float Pow2 = f16tof32(__XB_UBFE(5, 27, p) << 10);
- float R = asfloat(asuint(Pow2) | __XB_UBFE(9, 0, p) << 14);
- float G = asfloat(asuint(Pow2) | __XB_UBFE(9, 9, p) << 14);
- float B = asfloat(asuint(Pow2) | __XB_UBFE(9, 18, p) << 14);
- #else
- float Pow2 = f16tof32((p >> 27) << 10);
- float R = asfloat(asuint(Pow2) | (p << 14) & 0x7FC000);
- float G = asfloat(asuint(Pow2) | (p << 5) & 0x7FC000);
- float B = asfloat(asuint(Pow2) | (p >> 4) & 0x7FC000);
- #endif
- return float3(R, G, B) - Pow2;
- }
- // Like LogLuv "minus the log". The intention is to store Y in 16-bit float, and UV as 8-bit
- // unorm values. This is corrected from the widely publicized LogLuv encoding which used
- // the wrong color primaries for sRGB and Rec.709. Note the correct coefficients for computing
- // luminance ("Y") in the 2nd row of RGBtoXYZ.
- float3 EncodeYUV(float3 RGB)
- {
- /*
- // Start with the right RGBtoXYZ matrix for your color space (this one is sRGB D65)
- // http://www.brucelindbloom.com/index.html?Eqn_RGB_XYZ_Matrix.html
- static const float3x3 RGBtoXYZ =
- {
- 0.4124564, 0.3575761, 0.1804375,
- 0.2126729, 0.7151522, 0.0721750, <-- The color primaries determining luminance
- 0.0193339, 0.1191920, 0.9503041
- };
- // Compute u' and v'. These pack chrominance into two normalized channels.
- // u' = 4X / (X + 15Y + 3Z)
- // v' = 9Y / (X + 15Y + 3Z)
- // Expand visible spectrum from (0, 0.62) to (0, 1)
- // u" = u' / 0.62
- // v" = v' / 0.62
- // If we compute these two values...
- // X' = 4 / 9 * X
- // XYZ' = (X + 15 * Y + 3 * Z) * 0.62 / 9
- // ...we can derive our final Yu"v" from X', Y, and XYZ'
- // u" = X' / XYZ'
- // v" = Y / XYZ'
- // We can compute (X', Y, XYZ') by multiplying XYZ by this matrix
- static const float3x3 FixupMatrix =
- {
- 4.0 / 9.0, 0.0, 0.0,
- 0.0, 1.0, 0.0,
- 0.62 / 9.0, 15.0 * 0.62 / 9.0, 3.0 * 0.62 / 9.0
- };
-
- // But we should just concatenate the two matrices
- static const float3x3 EncodeMatrix = mul(FixupMatrix, RGBtoXYZ);
- */
- static const float3x3 EncodeMatrix =
- {
- 0.1833140, 0.1589227, 0.0801944,
- 0.2126729, 0.7151522, 0.0721750,
- 0.2521713, 0.7882566, 0.2834072
- };
- float3 Xp_Y_XYZp = mul(EncodeMatrix, RGB);
- float Y = Xp_Y_XYZp.y;
- float2 UV = saturate(Xp_Y_XYZp.xy / max(Xp_Y_XYZp.z, 1e-6));
- return float3(Y, UV);
- }
- float3 DecodeYUV(float3 YUV)
- {
- // Inverse of EncodeMatrix
- static const float3x3 DecodeMatrix =
- {
- 7.6649220, 0.9555189, -2.4122494,
- -2.2120164, 1.6682305, 0.2010778,
- -0.6677212, -5.4901517, 5.1156056
- };
- // Reverse of operations
- float Y = YUV.x;
- float XYZp = YUV.x / max(YUV.z, 1e-6);
- float Xp = YUV.y * XYZp;
- return mul(DecodeMatrix, float3(Xp, Y, XYZp));
- }
- // If you can't write Y and UV to separate buffers (R16_FLOAT, R8G8_UNORM), then
- // you can pack them into R32_UINT.
- uint PackYUV(float3 YUV)
- {
- uint Y = f32tof16(YUV.x);
- #if _XBOX_ONE
- uint p = __XB_PackF32ToU8(YUV.y * 255.0 + 0.5, 3, Y);
- return __XB_PackF32ToU8(YUV.z * 255.0 + 0.5, 2, p);
- #else
- uint U = (uint)(YUV.y * 255.0 + 0.5);
- uint V = (uint)(YUV.z * 255.0 + 0.5);
- return Y | U << 24 | V << 16;
- #endif
- }
- float3 UnpackYUV(uint YUV)
- {
- float Y = f16tof32(YUV);
- #if _XBOX_ONE
- float U = __XB_UnpackByte3(YUV) / 255.0;
- float V = __XB_UnpackByte2(YUV) / 255.0;
- #else
- float U = (YUV >> 24) / 255.0;
- float V = ((YUV >> 16) & 0xFF) / 255.0;
- #endif
- return float3(Y, U, V);
- }
- // To understand this, know that all math on YUV should really be done on
- // Y, Y*U, Y*V. You can add and blend all three of those values the same
- // as RGB, but for compact encoding, you only want to store Y, U, and V.
- float3 AddYUV( float3 YUV1, float3 YUV2 )
- {
- // Luminance is simply added; chrominance becomes a weighted average
- float Y = YUV1.x + YUV2.x;
- float2 UV = (YUV1.yz * YUV1.x + YUV2.yz * YUV2.x) / Y;
- return float3(Y, UV);
- }
- float3 LerpYUV( float3 YUV1, float3 YUV2, float t )
- {
- // To rescale a YUV value, you just have to rescale Y. Chroma remains the same.
- // After scaling luminance, you can add the two colors together. This version of
- // the math (as opposed to the possibly more readable code commented out below) is
- // more efficient. But it's interesting to note that if you kept values as Y, Y*U, Y*V,
- // you could simply add or lerp them.
- YUV1.x *= (1 - t);
- YUV2.x *= t;
- return AddYUV(YUV1, YUV2);
- //float Y = lerp(YUV1.x, YUV2.x, t);
- //float2 UV = lerp(YUV1.yz * YUV1.x, YUV2.yz * YUV2.x, t) / Y;
- //return float3(Y, UV);
- }
- // The standard 32-bit HDR color format. Each float has a 5-bit exponent and no sign bit.
- uint Pack_R11G11B10_FLOAT( float3 rgb )
- {
- uint r = (f32tof16(rgb.x) >> 4) & 0x000007FF;
- uint g = (f32tof16(rgb.y) << 7) & 0x003FF800;
- uint b = (f32tof16(rgb.z) << 17) & 0xFFC00000;
- return r | g | b;
- }
- float3 Unpack_R11G11B10_FLOAT( uint rgb )
- {
- float r = f16tof32((rgb << 4 ) & 0x7FF0);
- float g = f16tof32((rgb >> 7 ) & 0x7FF0);
- float b = f16tof32((rgb >> 17) & 0x7FE0);
- return float3(r, g, b);
- }
- // These next two encodings are great for LDR data. By knowing that our values are [0.0, 1.0]
- // (or [0.0, 2.0), incidentally), we can reduce how many bits we need in the exponent. We can
- // immediately eliminate all postive exponents. By giving more bits to the mantissa, we can
- // improve precision at the expense of range. The 8E3 format goes one bit further, quadrupling
- // mantissa precision but increasing smallest exponent from -14 to -6. The smallest value of 8E3
- // is 2^-14, while the smallest value of 7E4 is 2^-21. Both are smaller than the smallest 8-bit
- // sRGB value, which is close to 2^-12.
- // This is like R11G11B10_FLOAT except that it moves one bit from each exponent to each mantissa.
- uint Pack_R11G11B10_E4_FLOAT( float3 rgb )
- {
- // Clamp to [0.0, 2.0). The magic number is 1.FFFFF x 2^0. (We can't represent hex floats in HLSL.)
- // This trick works because clamping your exponent to 0 reduces the number of bits needed by 1.
- rgb = clamp( rgb, 0.0, asfloat(0x3FFFFFFF) );
- uint r = (f32tof16(rgb.r) >> 3 ) & 0x000007FF;
- uint g = (f32tof16(rgb.g) << 8 ) & 0x003FF800;
- uint b = (f32tof16(rgb.b) << 18) & 0xFFC00000;
- return r | g | b;
- }
- float3 Unpack_R11G11B10_E4_FLOAT( uint rgb )
- {
- float r = f16tof32((rgb << 3 ) & 0x3FF8);
- float g = f16tof32((rgb >> 8 ) & 0x3FF8);
- float b = f16tof32((rgb >> 18) & 0x3FF0);
- return float3(r, g, b);
- }
- // This is like R11G11B10_FLOAT except that it moves two bits from each exponent to each mantissa.
- uint Pack_R11G11B10_E3_FLOAT( float3 rgb )
- {
- // Clamp to [0.0, 2.0). Divide by 256 to bias the exponent by -8. This shifts it down to use one
- // fewer bit while still taking advantage of the denormalization hardware. In half precision,
- // the exponent of 0 is 0xF. Dividing by 256 makes the max exponent 0x7--one fewer bit.
- rgb = clamp( rgb, 0.0, asfloat(0x3FFFFFFF) ) / 256.0;
- uint r = (f32tof16(rgb.r) >> 2 ) & 0x000007FF;
- uint g = (f32tof16(rgb.g) << 9 ) & 0x003FF800;
- uint b = (f32tof16(rgb.b) << 19) & 0xFFC00000;
- return r | g | b;
- }
- float3 Unpack_R11G11B10_E3_FLOAT( uint rgb )
- {
- float r = f16tof32((rgb << 2 ) & 0x1FFC);
- float g = f16tof32((rgb >> 9 ) & 0x1FFC);
- float b = f16tof32((rgb >> 19) & 0x1FF8);
- return float3(r, g, b) * 256.0;
- }
|