lightAPI.cpp 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307
  1. #include "lightAPI.h"
  2. #include "../../../DFPSR/base/simd3D.h"
  3. #include "../../../DFPSR/base/threading.h" // TODO: Make an official "dangerous" API for multi-threading
  4. namespace dsr {
  5. // Precondition: The packed color must be in the standard RGBA order, meaning no native packing
  6. inline F32x4x3 unpackRgb_U32x4_to_F32x4x3(const U32x4& color) {
  7. return F32x4x3(floatFromU32(getRed(color)), floatFromU32(getGreen(color)), floatFromU32(getBlue(color)));
  8. }
  9. static inline void setLight(SafePointer<uint8_t> lightPixel, U8x16 newlight) {
  10. newlight.writeAligned(lightPixel, "setLight: writing light");
  11. }
  12. static inline void addLight(SafePointer<uint8_t> lightPixel, U8x16 addedlight) {
  13. U8x16 oldLight = U8x16::readAligned(lightPixel, "addLight: reading light");
  14. U8x16 newlight = saturatedAddition(oldLight, addedlight);
  15. newlight.writeAligned(lightPixel, "addLight: writing light");
  16. }
  17. template <bool ADD_LIGHT>
  18. void directedLight(const FMatrix3x3& normalToWorldSpace, OrderedImageRgbaU8& lightBuffer, const OrderedImageRgbaU8& normalBuffer, const FVector3D& lightDirection, float lightIntensity, const ColorRgbI32& lightColor) {
  19. // Normals in range 0..255 - 128 have lengths of 127 and 128, so if we double the reverse light direction we'll end up near 0..255 again for colors
  20. F32x4x3 reverseLightDirection = F32x4x3(-normalize(normalToWorldSpace.transformTransposed(lightDirection)) * lightIntensity * 2.0f);
  21. IRect rectangleBound = image_getBound(lightBuffer);
  22. float colorR = std::max(0.0f, (float)lightColor.red / 255.0f);
  23. float colorG = std::max(0.0f, (float)lightColor.green / 255.0f);
  24. float colorB = std::max(0.0f, (float)lightColor.blue / 255.0f);
  25. threadedSplit(rectangleBound, [
  26. lightBuffer, normalBuffer, reverseLightDirection, colorR, colorG, colorB](const IRect& bound) mutable {
  27. SafePointer<uint8_t> lightRow = image_getSafePointer_channels(lightBuffer, bound.top());
  28. SafePointer<uint32_t> normalRow = image_getSafePointer(normalBuffer, bound.top());
  29. int lightStride = image_getStride(lightBuffer);
  30. int normalStride = image_getStride(normalBuffer);
  31. for (int y = bound.top(); y < bound.bottom(); y++) {
  32. SafePointer<uint8_t> lightPixel = lightRow;
  33. SafePointer<uint32_t> normalPixel = normalRow;
  34. for (int x4 = bound.left(); x4 < bound.right(); x4+=4) {
  35. // Read surface normals
  36. U32x4 normalColor = U32x4::readAligned(normalPixel, "directedLight: reading normal");
  37. F32x4x3 negativeSurfaceNormal = unpackRgb_U32x4_to_F32x4x3(normalColor) - 128.0f;
  38. // Calculate light intensity
  39. // Normalization and negation is already pre-multiplied into reverseLightDirection
  40. F32x4 intensity = dotProduct(negativeSurfaceNormal, reverseLightDirection).clampLower(0.0f);
  41. F32x4 red = intensity * colorR;
  42. F32x4 green = intensity * colorG;
  43. F32x4 blue = intensity * colorB;
  44. red = red.clampUpper(255.1f);
  45. green = green.clampUpper(255.1f);
  46. blue = blue.clampUpper(255.1f);
  47. U8x16 light = reinterpret_U8FromU32(packBytes(truncateToU32(red), truncateToU32(green), truncateToU32(blue)));
  48. if (ADD_LIGHT) {
  49. addLight(lightPixel, light);
  50. } else {
  51. setLight(lightPixel, light);
  52. }
  53. lightPixel += 16;
  54. normalPixel += 4;
  55. }
  56. lightRow.increaseBytes(lightStride);
  57. normalRow.increaseBytes(normalStride);
  58. }
  59. });
  60. }
  61. void setDirectedLight(const OrthoView& camera, OrderedImageRgbaU8& lightBuffer, const OrderedImageRgbaU8& normalBuffer, const FVector3D& lightDirection, float lightIntensity, const ColorRgbI32& lightColor) {
  62. directedLight<false>(camera.normalToWorldSpace, lightBuffer, normalBuffer, lightDirection, lightIntensity, ColorRgbI32(255, 255, 255));
  63. }
  64. void addDirectedLight(const OrthoView& camera, OrderedImageRgbaU8& lightBuffer, const OrderedImageRgbaU8& normalBuffer, const FVector3D& lightDirection, float lightIntensity, const ColorRgbI32& lightColor) {
  65. directedLight<true>(camera.normalToWorldSpace, lightBuffer, normalBuffer, lightDirection, lightIntensity, ColorRgbI32(255, 255, 255));
  66. }
  67. static IRect calculateBound(const OrthoView& camera, const IVector2D& worldCenter, OrderedImageRgbaU8& lightBuffer, const FVector3D& lightSpacePosition, float lightRadius, int alignmentPixels) {
  68. // Get the light's 2D position in pixels
  69. FVector3D rotatedPosition = camera.lightSpaceToScreenDepth.transform(lightSpacePosition);
  70. IVector2D pixelCenter = IVector2D(rotatedPosition.x, rotatedPosition.y) + worldCenter;
  71. // Use the light-space X axis to convert the sphere's radius into pixels
  72. int pixelRadius = lightRadius * camera.lightSpaceToScreenDepth.xAxis.x;
  73. // Check if the location can be seen
  74. IRect imageBound = image_getBound(lightBuffer);
  75. if (pixelCenter.x < -pixelRadius
  76. || pixelCenter.x > imageBound.right() + pixelRadius
  77. || pixelCenter.y < -pixelRadius
  78. || pixelCenter.y > imageBound.bottom() + pixelRadius) {
  79. // The light source cannot be seen at all
  80. return IRect();
  81. }
  82. // Calculate the bound
  83. IRect result = IRect::cut(imageBound, IRect(pixelCenter.x - pixelRadius, pixelCenter.y - pixelRadius, pixelRadius * 2.0f, pixelRadius * 2.0f));
  84. // Round out to multiples of SIMD vectors
  85. if (result.hasArea() && alignmentPixels > 1) {
  86. int left = roundDown(result.left(), alignmentPixels);
  87. int right = roundUp(result.right(), alignmentPixels);
  88. result = IRect(left, result.top(), right - left, result.height());
  89. }
  90. return result;
  91. }
  92. // Returns:
  93. // 0.0 for blocked
  94. // 1.0 for passing
  95. // Values between 0.0 and 1.0 for fuzzy thresholding
  96. // Precondition: pixelData Does not contain any padding by using widths in multiples of 4 pixels
  97. static float getShadowTransparency(SafePointer<float> pixelData, int32_t width, float halfWidth, const FVector3D& lightOffset) {
  98. // Get lengths
  99. float absX = lightOffset.x; if (absX < 0.0f) { absX = -absX; }
  100. float absY = lightOffset.y; if (absY < 0.0f) { absY = -absY; }
  101. float absZ = lightOffset.z; if (absZ < 0.0f) { absZ = -absZ; }
  102. // Compare dimensions
  103. bool xIsLongest = absX > absY && absX > absZ;
  104. bool yIsLongerThanZ = absY > absZ;
  105. // Transform
  106. float depth = xIsLongest ? lightOffset.x : (yIsLongerThanZ ? lightOffset.y : lightOffset.z);
  107. float slopeUp = (yIsLongerThanZ && !xIsLongest) ? lightOffset.z : lightOffset.y;
  108. float slopeSide = xIsLongest ? -lightOffset.z : (yIsLongerThanZ ? -lightOffset.x : lightOffset.x);
  109. int32_t viewOffset = width * (xIsLongest ? 0 : (yIsLongerThanZ ? 2 : 4));
  110. bool negativeSide = depth < 0.0f;
  111. if (negativeSide) { depth = -depth; }
  112. if (negativeSide) { slopeSide = -slopeSide; }
  113. if (negativeSide) { viewOffset = viewOffset + width; }
  114. // Project and round to pixels
  115. float reciDepth = 1.0f / depth;
  116. float scale = halfWidth * reciDepth;
  117. int32_t sampleX = (int)(halfWidth + (slopeSide * scale));
  118. int32_t sampleY = (int)(halfWidth - (slopeUp * scale));
  119. // Clamp to local view coordinates
  120. int32_t maxPixel = width - 1;
  121. if (sampleX < 0) { sampleX = 0; }
  122. if (sampleX > maxPixel) { sampleX = maxPixel; }
  123. if (sampleY < 0) { sampleY = 0; }
  124. if (sampleY > maxPixel) { sampleY = maxPixel; }
  125. // Read the depth pixel
  126. float shadowReciDepth = pixelData[((sampleY + viewOffset) * width) + sampleX];
  127. // Apply biased thresholding
  128. return reciDepth * 1.02f > shadowReciDepth ? 1.0f : 0.0f;
  129. }
  130. static inline F32x4 getShadowTransparency(SafePointer<float> pixelData, int32_t width, float halfWidth, const F32x4x3& lightOffset) {
  131. FVector4D offsetX = lightOffset.v1.get();
  132. FVector4D offsetY = lightOffset.v2.get();
  133. FVector4D offsetZ = lightOffset.v3.get();
  134. return F32x4(
  135. getShadowTransparency(pixelData, width, halfWidth, FVector3D(offsetX.x, offsetY.x, offsetZ.x)),
  136. getShadowTransparency(pixelData, width, halfWidth, FVector3D(offsetX.y, offsetY.y, offsetZ.y)),
  137. getShadowTransparency(pixelData, width, halfWidth, FVector3D(offsetX.z, offsetY.z, offsetZ.z)),
  138. getShadowTransparency(pixelData, width, halfWidth, FVector3D(offsetX.w, offsetY.w, offsetZ.w))
  139. );
  140. }
  141. template <bool SHADOW_CASTING>
  142. static void addPointLightSuper(const OrthoView& camera, const IVector2D& worldCenter, OrderedImageRgbaU8& lightBuffer, const OrderedImageRgbaU8& normalBuffer, const AlignedImageF32& heightBuffer, const FVector3D& lightPosition, float lightRadius, float lightIntensity, const ColorRgbI32& lightColor, const AlignedImageF32& shadowCubeMap) {
  143. // Rotate the light position from relative space to light space
  144. // Normal-space defines the rotation for light-space
  145. FVector3D lightSpaceSourcePosition = camera.normalToWorldSpace.transformTransposed(lightPosition);
  146. // Align the rectangle with 8 pixels, because that's the widest read to align in the 16-bit height buffer
  147. IRect rectangleBound = calculateBound(camera, worldCenter, lightBuffer, lightSpaceSourcePosition, lightRadius, 4);
  148. if (rectangleBound.hasArea()) {
  149. // Uniform values
  150. // How much closer to your face in light-space does the pixel go per depth unit
  151. F32x4x3 inYourFaceAxis = F32x4x3(camera.screenDepthToLightSpace.zAxis);
  152. // Light color
  153. float colorR = std::max(0.0f, (float)lightColor.red * lightIntensity);
  154. float colorG = std::max(0.0f, (float)lightColor.green * lightIntensity);
  155. float colorB = std::max(0.0f, (float)lightColor.blue * lightIntensity);
  156. float reciprocalRadius = 1.0f / lightRadius;
  157. threadedSplit(rectangleBound, [
  158. lightBuffer, normalBuffer, heightBuffer, camera, worldCenter, inYourFaceAxis, lightSpaceSourcePosition,
  159. reciprocalRadius, colorR, colorG, colorB, shadowCubeMap](const IRect& bound) mutable {
  160. // Initiate the local light-space sweep along base height
  161. // The local light space is rotated like normal-space but has the origin at the light source
  162. FVector3D lightBaseRow = camera.screenDepthToLightSpace.transform(FVector3D(0.5f - (float)worldCenter.x + bound.left(), 0.5f - (float)worldCenter.y + bound.top(), 0.0f)) - lightSpaceSourcePosition;
  163. FVector3D dx = camera.screenDepthToLightSpace.xAxis;
  164. FVector3D dy = camera.screenDepthToLightSpace.yAxis;
  165. // Pack the offset for each of the 4 first pixels into a transposing constructor
  166. F32x4x3 lightBaseRowX4 = F32x4x3(lightBaseRow, lightBaseRow + dx, lightBaseRow + dx * 2.0f, lightBaseRow + dx * 3.0f);
  167. // Derivatives for moving four pixels to the right in parallel
  168. // (n+0, y0), (n+1, y0), (n+2, y0), (n+3, y0) -> (n+4, y0), (n+5, y0), (n+6, y0), (n+7, y0)
  169. F32x4x3 dx4 = F32x4x3(dx * 4.0f);
  170. // Derivatives for moving one pixel down in parallel
  171. // (x0, n+0), (x1, n+0), (x2, n+0), (x3, n+0)
  172. // -> (x0, n+1), (x1, n+1), (x2, n+1), (x3, n+1)
  173. F32x4x3 dy1 = F32x4x3(dy);
  174. // Get strides
  175. int lightStride = image_getStride(lightBuffer);
  176. int normalStride = image_getStride(normalBuffer);
  177. int heightStride = image_getStride(heightBuffer);
  178. // Get pointers
  179. SafePointer<uint8_t> lightRow = image_getSafePointer_channels(lightBuffer, bound.top()) + bound.left() * 4;
  180. SafePointer<uint32_t> normalRow = image_getSafePointer(normalBuffer, bound.top()) + bound.left();
  181. SafePointer<float> heightRow = image_getSafePointer(heightBuffer, bound.top()) + bound.left();
  182. // Get cube map for casting shadows
  183. int32_t shadowCubeWidth;
  184. SafePointer<float> shadowCubeData;
  185. float shadowCubeCenter;
  186. if (SHADOW_CASTING) {
  187. shadowCubeWidth = image_getWidth(shadowCubeMap); assert(shadowCubeWidth % 4 == 0);
  188. shadowCubeData = image_getSafePointer(shadowCubeMap);
  189. shadowCubeCenter = (float)shadowCubeWidth * 0.5f;
  190. }
  191. // Loop over the pixels to add light
  192. for (int y = bound.top(); y < bound.bottom(); y++) {
  193. // Initiate the leftmost pixels before iterating to the right
  194. F32x4x3 lightBasePixelx4 = lightBaseRowX4;
  195. SafePointer<uint8_t> lightPixel = lightRow;
  196. SafePointer<uint32_t> normalPixel = normalRow;
  197. SafePointer<float> heightPixel = heightRow;
  198. // Iterate over 16-bit pixels 8 at a time
  199. for (int x4 = bound.left(); x4 < bound.right(); x4+=4) {
  200. // Read pixel height
  201. F32x4 depthOffset = F32x4::readAligned(heightPixel, "addPointLight: reading height");
  202. // Extrude the pixel using positive values towards the camera to represent another height
  203. // This will solve X and Z positions based on the height Y
  204. F32x4x3 lightOffset = lightBasePixelx4 + (inYourFaceAxis * depthOffset);
  205. // Get the linear distance, divide by sphere radius and limit to length 1 at intensity 0
  206. F32x4 lightRatio = min(F32x4(1.0f), length(lightOffset) * reciprocalRadius);
  207. // Read surface normal
  208. U32x4 normalColor = U32x4::readAligned(normalPixel, "addPointLight: reading normal");
  209. // normalScale is used to negate the normals in advance so that opposing directions get positive values
  210. F32x4x3 negativeSurfaceNormal = (unpackRgb_U32x4_to_F32x4x3(normalColor) - 128.0f) * (-1.0f / 128.0f);
  211. // Fade from 0 to 1 using 1 - 2x + x²
  212. F32x4 distanceIntensity = 1.0f - 2.0f * lightRatio + lightRatio * lightRatio;
  213. F32x4 angleIntensity = max(F32x4(0.0f), dotProduct(normalize(lightOffset), negativeSurfaceNormal));
  214. F32x4 intensity = angleIntensity * distanceIntensity;
  215. if (SHADOW_CASTING) {
  216. intensity = intensity * getShadowTransparency(shadowCubeData, shadowCubeWidth, shadowCubeCenter, lightOffset);
  217. }
  218. // TODO: Make an optimized version for white light replacing red, green and blue with a single LUMA
  219. F32x4 red = intensity * colorR;
  220. F32x4 green = intensity * colorG;
  221. F32x4 blue = intensity * colorB;
  222. red = red.clampUpper(255.1f);
  223. green = green.clampUpper(255.1f);
  224. blue = blue.clampUpper(255.1f);
  225. // Add light to the image
  226. U8x16 morelight = reinterpret_U8FromU32(packBytes(truncateToU32(red), truncateToU32(green), truncateToU32(blue)));
  227. addLight(lightPixel, morelight);
  228. // Go to the next four pixels in light-space
  229. lightBasePixelx4 += dx4;
  230. // Go to the next 4 pixels of image data
  231. lightPixel += 16;
  232. normalPixel += 4;
  233. heightPixel += 4;
  234. }
  235. // Go to the next row in light-space
  236. lightBaseRowX4 += dy1;
  237. // Go to the next row of image data
  238. lightRow.increaseBytes(lightStride);
  239. normalRow.increaseBytes(normalStride);
  240. heightRow.increaseBytes(heightStride);
  241. }
  242. });
  243. }
  244. }
  245. void addPointLight(const OrthoView& camera, const IVector2D& worldCenter, OrderedImageRgbaU8& lightBuffer, const OrderedImageRgbaU8& normalBuffer, const AlignedImageF32& heightBuffer, const FVector3D& lightPosition, float lightRadius, float lightIntensity, const ColorRgbI32& lightColor, const AlignedImageF32& shadowCubeMap) {
  246. if (image_exists(shadowCubeMap)) {
  247. addPointLightSuper<true>(camera, worldCenter, lightBuffer, normalBuffer, heightBuffer, lightPosition, lightRadius, lightIntensity, lightColor, shadowCubeMap);
  248. } else {
  249. addPointLightSuper<false>(camera, worldCenter, lightBuffer, normalBuffer, heightBuffer, lightPosition, lightRadius, lightIntensity, lightColor, AlignedImageF32());
  250. }
  251. }
  252. void addPointLight(const OrthoView& camera, const IVector2D& worldCenter, OrderedImageRgbaU8& lightBuffer, const OrderedImageRgbaU8& normalBuffer, const AlignedImageF32& heightBuffer, const FVector3D& lightPosition, float lightRadius, float lightIntensity, const ColorRgbI32& lightColor) {
  253. addPointLightSuper<false>(camera, worldCenter, lightBuffer, normalBuffer, heightBuffer, lightPosition, lightRadius, lightIntensity, lightColor, AlignedImageF32());
  254. }
  255. void blendLight(AlignedImageRgbaU8& colorBuffer, const OrderedImageRgbaU8& diffuseBuffer, const OrderedImageRgbaU8& lightBuffer) {
  256. PackOrder targetOrder = PackOrder::getPackOrder(image_getPackOrderIndex(colorBuffer));
  257. int width = image_getWidth(colorBuffer);
  258. int height = image_getHeight(colorBuffer);
  259. threadedSplit(0, height, [colorBuffer, diffuseBuffer, lightBuffer, targetOrder, width](int startIndex, int stopIndex) mutable {
  260. SafePointer<uint32_t> targetRow = image_getSafePointer(colorBuffer, startIndex);
  261. SafePointer<uint32_t> diffuseRow = image_getSafePointer(diffuseBuffer, startIndex);
  262. SafePointer<uint32_t> lightRow = image_getSafePointer(lightBuffer, startIndex);
  263. int targetStride = image_getStride(colorBuffer);
  264. int diffuseStride = image_getStride(diffuseBuffer);
  265. int lightStride = image_getStride(lightBuffer);
  266. F32x4 scale = F32x4(1.0 / 128.0f);
  267. for (int y = startIndex; y < stopIndex; y++) {
  268. SafePointer<uint32_t> targetPixel = targetRow;
  269. SafePointer<uint32_t> diffusePixel = diffuseRow;
  270. SafePointer<uint32_t> lightPixel = lightRow;
  271. for (int x4 = 0; x4 < width; x4 += 4) {
  272. U32x4 diffuse = U32x4::readAligned(diffusePixel, "blendLight: reading diffuse");
  273. U32x4 light = U32x4::readAligned(lightPixel, "blendLight: reading light");
  274. F32x4 red = (floatFromU32(getRed(diffuse)) * floatFromU32(getRed(light))) * scale;
  275. F32x4 green = (floatFromU32(getGreen(diffuse)) * floatFromU32(getGreen(light))) * scale;
  276. F32x4 blue = (floatFromU32(getBlue(diffuse)) * floatFromU32(getBlue(light))) * scale;
  277. red = red.clampUpper(255.1f);
  278. green = green.clampUpper(255.1f);
  279. blue = blue.clampUpper(255.1f);
  280. U32x4 color = packBytes(truncateToU32(red), truncateToU32(green), truncateToU32(blue), targetOrder);
  281. color.writeAligned(targetPixel, "blendLight: writing color");
  282. targetPixel += 4;
  283. diffusePixel += 4;
  284. lightPixel += 4;
  285. }
  286. targetRow.increaseBytes(targetStride);
  287. diffuseRow.increaseBytes(diffuseStride);
  288. lightRow.increaseBytes(lightStride);
  289. }
  290. });
  291. }
  292. }