lightAPI.cpp 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307
  1. 
  2. #include "lightAPI.h"
  3. #include "../../DFPSR/base/simd3D.h"
  4. #include "../../DFPSR/base/threading.h" // TODO: Make an official "dangerous" API for multi-threading
  5. namespace dsr {
  6. // Precondition: The packed color must be in the standard RGBA order, meaning no native packing
  7. inline F32x4x3 unpackRgb_U32x4_to_F32x4x3(const U32x4& color) {
  8. return F32x4x3(floatFromU32(getRed(color)), floatFromU32(getGreen(color)), floatFromU32(getBlue(color)));
  9. }
  10. static inline void setLight(SafePointer<uint8_t> lightPixel, U8x16 newlight) {
  11. newlight.writeAligned(lightPixel, "setLight: writing light");
  12. }
  13. static inline void addLight(SafePointer<uint8_t> lightPixel, U8x16 addedlight) {
  14. U8x16 oldLight = U8x16::readAligned(lightPixel, "addLight: reading light");
  15. U8x16 newlight = saturatedAddition(oldLight, addedlight);
  16. newlight.writeAligned(lightPixel, "addLight: writing light");
  17. }
  18. template <bool ADD_LIGHT>
  19. void directedLight(const FMatrix3x3& normalToWorldSpace, OrderedImageRgbaU8& lightBuffer, const OrderedImageRgbaU8& normalBuffer, const FVector3D& lightDirection, float lightIntensity, const ColorRgbI32& lightColor) {
  20. // Normals in range 0..255 - 128 have lengths of 127 and 128, so if we double the reverse light direction we'll end up near 0..255 again for colors
  21. F32x4x3 reverseLightDirection = F32x4x3(-normalize(normalToWorldSpace.transformTransposed(lightDirection)) * lightIntensity * 2.0f);
  22. IRect rectangleBound = image_getBound(lightBuffer);
  23. float colorR = std::max(0.0f, (float)lightColor.red / 255.0f);
  24. float colorG = std::max(0.0f, (float)lightColor.green / 255.0f);
  25. float colorB = std::max(0.0f, (float)lightColor.blue / 255.0f);
  26. threadedSplit(rectangleBound, [
  27. lightBuffer, normalBuffer, reverseLightDirection, colorR, colorG, colorB](const IRect& bound) mutable {
  28. SafePointer<uint8_t> lightRow = image_getSafePointer_channels(lightBuffer, bound.top());
  29. SafePointer<uint32_t> normalRow = image_getSafePointer(normalBuffer, bound.top());
  30. int lightStride = image_getStride(lightBuffer);
  31. int normalStride = image_getStride(normalBuffer);
  32. for (int y = bound.top(); y < bound.bottom(); y++) {
  33. SafePointer<uint8_t> lightPixel = lightRow;
  34. SafePointer<uint32_t> normalPixel = normalRow;
  35. for (int x4 = bound.left(); x4 < bound.right(); x4+=4) {
  36. // Read surface normals
  37. U32x4 normalColor = U32x4::readAligned(normalPixel, "directedLight: reading normal");
  38. F32x4x3 negativeSurfaceNormal = unpackRgb_U32x4_to_F32x4x3(normalColor) - 128.0f;
  39. // Calculate light intensity
  40. // Normalization and negation is already pre-multiplied into reverseLightDirection
  41. F32x4 intensity = dotProduct(negativeSurfaceNormal, reverseLightDirection).clampLower(0.0f);
  42. F32x4 red = intensity * colorR;
  43. F32x4 green = intensity * colorG;
  44. F32x4 blue = intensity * colorB;
  45. red = red.clampUpper(255.1f);
  46. green = green.clampUpper(255.1f);
  47. blue = blue.clampUpper(255.1f);
  48. U8x16 light = reinterpret_U8FromU32(packBytes(truncateToU32(red), truncateToU32(green), truncateToU32(blue)));
  49. if (ADD_LIGHT) {
  50. addLight(lightPixel, light);
  51. } else {
  52. setLight(lightPixel, light);
  53. }
  54. lightPixel += 16;
  55. normalPixel += 4;
  56. }
  57. lightRow.increaseBytes(lightStride);
  58. normalRow.increaseBytes(normalStride);
  59. }
  60. });
  61. }
  62. void setDirectedLight(const OrthoView& camera, OrderedImageRgbaU8& lightBuffer, const OrderedImageRgbaU8& normalBuffer, const FVector3D& lightDirection, float lightIntensity, const ColorRgbI32& lightColor) {
  63. directedLight<false>(camera.normalToWorldSpace, lightBuffer, normalBuffer, lightDirection, lightIntensity, lightColor);
  64. }
  65. void addDirectedLight(const OrthoView& camera, OrderedImageRgbaU8& lightBuffer, const OrderedImageRgbaU8& normalBuffer, const FVector3D& lightDirection, float lightIntensity, const ColorRgbI32& lightColor) {
  66. directedLight<true>(camera.normalToWorldSpace, lightBuffer, normalBuffer, lightDirection, lightIntensity, lightColor);
  67. }
  68. static IRect calculateBound(const OrthoView& camera, const IVector2D& worldCenter, OrderedImageRgbaU8& lightBuffer, const FVector3D& lightSpacePosition, float lightRadius, int alignmentPixels) {
  69. // Get the light's 2D position in pixels
  70. FVector3D rotatedPosition = camera.lightSpaceToScreenDepth.transform(lightSpacePosition);
  71. IVector2D pixelCenter = IVector2D(rotatedPosition.x, rotatedPosition.y) + worldCenter;
  72. // Use the light-space X axis to convert the sphere's radius into pixels
  73. int pixelRadius = lightRadius * camera.lightSpaceToScreenDepth.xAxis.x;
  74. // Check if the location can be seen
  75. IRect imageBound = image_getBound(lightBuffer);
  76. if (pixelCenter.x < -pixelRadius
  77. || pixelCenter.x > imageBound.right() + pixelRadius
  78. || pixelCenter.y < -pixelRadius
  79. || pixelCenter.y > imageBound.bottom() + pixelRadius) {
  80. // The light source cannot be seen at all
  81. return IRect();
  82. }
  83. // Calculate the bound
  84. IRect result = IRect::cut(imageBound, IRect(pixelCenter.x - pixelRadius, pixelCenter.y - pixelRadius, pixelRadius * 2.0f, pixelRadius * 2.0f));
  85. // Round out to multiples of SIMD vectors
  86. if (result.hasArea() && alignmentPixels > 1) {
  87. int left = roundDown(result.left(), alignmentPixels);
  88. int right = roundUp(result.right(), alignmentPixels);
  89. result = IRect(left, result.top(), right - left, result.height());
  90. }
  91. return result;
  92. }
  93. // Returns:
  94. // 0.0 for blocked
  95. // 1.0 for passing
  96. // Values between 0.0 and 1.0 for fuzzy thresholding
  97. // Precondition: pixelData Does not contain any padding by using widths in multiples of 4 pixels
  98. static float getShadowTransparency(SafePointer<float> pixelData, int32_t width, float halfWidth, const FVector3D& lightOffset) {
  99. // Get lengths
  100. float absX = lightOffset.x; if (absX < 0.0f) { absX = -absX; }
  101. float absY = lightOffset.y; if (absY < 0.0f) { absY = -absY; }
  102. float absZ = lightOffset.z; if (absZ < 0.0f) { absZ = -absZ; }
  103. // Compare dimensions
  104. bool xIsLongest = absX > absY && absX > absZ;
  105. bool yIsLongerThanZ = absY > absZ;
  106. // Transform
  107. float depth = xIsLongest ? lightOffset.x : (yIsLongerThanZ ? lightOffset.y : lightOffset.z);
  108. float slopeUp = (yIsLongerThanZ && !xIsLongest) ? lightOffset.z : lightOffset.y;
  109. float slopeSide = xIsLongest ? -lightOffset.z : (yIsLongerThanZ ? -lightOffset.x : lightOffset.x);
  110. int32_t viewOffset = width * (xIsLongest ? 0 : (yIsLongerThanZ ? 2 : 4));
  111. bool negativeSide = depth < 0.0f;
  112. if (negativeSide) { depth = -depth; }
  113. if (negativeSide) { slopeSide = -slopeSide; }
  114. if (negativeSide) { viewOffset = viewOffset + width; }
  115. // Project and round to pixels
  116. float reciDepth = 1.0f / depth;
  117. float scale = halfWidth * reciDepth;
  118. int32_t sampleX = (int)(halfWidth + (slopeSide * scale));
  119. int32_t sampleY = (int)(halfWidth - (slopeUp * scale));
  120. // Clamp to local view coordinates
  121. int32_t maxPixel = width - 1;
  122. if (sampleX < 0) { sampleX = 0; }
  123. if (sampleX > maxPixel) { sampleX = maxPixel; }
  124. if (sampleY < 0) { sampleY = 0; }
  125. if (sampleY > maxPixel) { sampleY = maxPixel; }
  126. // Read the depth pixel
  127. float shadowReciDepth = pixelData[((sampleY + viewOffset) * width) + sampleX];
  128. // Apply biased thresholding
  129. return reciDepth * 1.02f > shadowReciDepth ? 1.0f : 0.0f;
  130. }
  131. static inline F32x4 getShadowTransparency(SafePointer<float> pixelData, int32_t width, float halfWidth, const F32x4x3& lightOffset) {
  132. FVector4D offsetX = lightOffset.v1.get();
  133. FVector4D offsetY = lightOffset.v2.get();
  134. FVector4D offsetZ = lightOffset.v3.get();
  135. return F32x4(
  136. getShadowTransparency(pixelData, width, halfWidth, FVector3D(offsetX.x, offsetY.x, offsetZ.x)),
  137. getShadowTransparency(pixelData, width, halfWidth, FVector3D(offsetX.y, offsetY.y, offsetZ.y)),
  138. getShadowTransparency(pixelData, width, halfWidth, FVector3D(offsetX.z, offsetY.z, offsetZ.z)),
  139. getShadowTransparency(pixelData, width, halfWidth, FVector3D(offsetX.w, offsetY.w, offsetZ.w))
  140. );
  141. }
  142. template <bool SHADOW_CASTING>
  143. static void addPointLightSuper(const OrthoView& camera, const IVector2D& worldCenter, OrderedImageRgbaU8& lightBuffer, const OrderedImageRgbaU8& normalBuffer, const AlignedImageF32& heightBuffer, const FVector3D& lightPosition, float lightRadius, float lightIntensity, const ColorRgbI32& lightColor, const AlignedImageF32& shadowCubeMap) {
  144. // Rotate the light position from relative space to light space
  145. // Normal-space defines the rotation for light-space
  146. FVector3D lightSpaceSourcePosition = camera.normalToWorldSpace.transformTransposed(lightPosition);
  147. // Align the rectangle with 8 pixels, because that's the widest read to align in the 16-bit height buffer
  148. IRect rectangleBound = calculateBound(camera, worldCenter, lightBuffer, lightSpaceSourcePosition, lightRadius, 4);
  149. if (rectangleBound.hasArea()) {
  150. // Uniform values
  151. // How much closer to your face in light-space does the pixel go per depth unit
  152. F32x4x3 inYourFaceAxis = F32x4x3(camera.screenDepthToLightSpace.zAxis);
  153. // Light color
  154. float colorR = std::max(0.0f, (float)lightColor.red * lightIntensity);
  155. float colorG = std::max(0.0f, (float)lightColor.green * lightIntensity);
  156. float colorB = std::max(0.0f, (float)lightColor.blue * lightIntensity);
  157. float reciprocalRadius = 1.0f / lightRadius;
  158. threadedSplit(rectangleBound, [
  159. lightBuffer, normalBuffer, heightBuffer, camera, worldCenter, inYourFaceAxis, lightSpaceSourcePosition,
  160. reciprocalRadius, colorR, colorG, colorB, shadowCubeMap](const IRect& bound) mutable {
  161. // Initiate the local light-space sweep along base height
  162. // The local light space is rotated like normal-space but has the origin at the light source
  163. FVector3D lightBaseRow = camera.screenDepthToLightSpace.transform(FVector3D(0.5f - (float)worldCenter.x + bound.left(), 0.5f - (float)worldCenter.y + bound.top(), 0.0f)) - lightSpaceSourcePosition;
  164. FVector3D dx = camera.screenDepthToLightSpace.xAxis;
  165. FVector3D dy = camera.screenDepthToLightSpace.yAxis;
  166. // Pack the offset for each of the 4 first pixels into a transposing constructor
  167. F32x4x3 lightBaseRowX4 = F32x4x3(lightBaseRow, lightBaseRow + dx, lightBaseRow + dx * 2.0f, lightBaseRow + dx * 3.0f);
  168. // Derivatives for moving four pixels to the right in parallel
  169. // (n+0, y0), (n+1, y0), (n+2, y0), (n+3, y0) -> (n+4, y0), (n+5, y0), (n+6, y0), (n+7, y0)
  170. F32x4x3 dx4 = F32x4x3(dx * 4.0f);
  171. // Derivatives for moving one pixel down in parallel
  172. // (x0, n+0), (x1, n+0), (x2, n+0), (x3, n+0)
  173. // -> (x0, n+1), (x1, n+1), (x2, n+1), (x3, n+1)
  174. F32x4x3 dy1 = F32x4x3(dy);
  175. // Get strides
  176. int lightStride = image_getStride(lightBuffer);
  177. int normalStride = image_getStride(normalBuffer);
  178. int heightStride = image_getStride(heightBuffer);
  179. // Get pointers
  180. SafePointer<uint8_t> lightRow = image_getSafePointer_channels(lightBuffer, bound.top()) + bound.left() * 4;
  181. SafePointer<uint32_t> normalRow = image_getSafePointer(normalBuffer, bound.top()) + bound.left();
  182. SafePointer<float> heightRow = image_getSafePointer(heightBuffer, bound.top()) + bound.left();
  183. // Get cube map for casting shadows
  184. int32_t shadowCubeWidth;
  185. SafePointer<float> shadowCubeData;
  186. float shadowCubeCenter;
  187. if (SHADOW_CASTING) {
  188. shadowCubeWidth = image_getWidth(shadowCubeMap); assert(shadowCubeWidth % 4 == 0);
  189. shadowCubeData = image_getSafePointer(shadowCubeMap);
  190. shadowCubeCenter = (float)shadowCubeWidth * 0.5f;
  191. }
  192. // Loop over the pixels to add light
  193. for (int y = bound.top(); y < bound.bottom(); y++) {
  194. // Initiate the leftmost pixels before iterating to the right
  195. F32x4x3 lightBasePixelx4 = lightBaseRowX4;
  196. SafePointer<uint8_t> lightPixel = lightRow;
  197. SafePointer<uint32_t> normalPixel = normalRow;
  198. SafePointer<float> heightPixel = heightRow;
  199. // Iterate over 16-bit pixels 8 at a time
  200. for (int x4 = bound.left(); x4 < bound.right(); x4+=4) {
  201. // Read pixel height
  202. F32x4 depthOffset = F32x4::readAligned(heightPixel, "addPointLight: reading height");
  203. // Extrude the pixel using positive values towards the camera to represent another height
  204. // This will solve X and Z positions based on the height Y
  205. F32x4x3 lightOffset = lightBasePixelx4 + (inYourFaceAxis * depthOffset);
  206. // Get the linear distance, divide by sphere radius and limit to length 1 at intensity 0
  207. F32x4 lightRatio = min(F32x4(1.0f), length(lightOffset) * reciprocalRadius);
  208. // Read surface normal
  209. U32x4 normalColor = U32x4::readAligned(normalPixel, "addPointLight: reading normal");
  210. // normalScale is used to negate the normals in advance so that opposing directions get positive values
  211. F32x4x3 negativeSurfaceNormal = (unpackRgb_U32x4_to_F32x4x3(normalColor) - 128.0f) * (-1.0f / 128.0f);
  212. // Fade from 0 to 1 using 1 - 2x + x²
  213. F32x4 distanceIntensity = 1.0f - 2.0f * lightRatio + lightRatio * lightRatio;
  214. F32x4 angleIntensity = max(F32x4(0.0f), dotProduct(normalize(lightOffset), negativeSurfaceNormal));
  215. F32x4 intensity = angleIntensity * distanceIntensity;
  216. if (SHADOW_CASTING) {
  217. intensity = intensity * getShadowTransparency(shadowCubeData, shadowCubeWidth, shadowCubeCenter, lightOffset);
  218. }
  219. // TODO: Make an optimized version for white light replacing red, green and blue with a single LUMA
  220. F32x4 red = intensity * colorR;
  221. F32x4 green = intensity * colorG;
  222. F32x4 blue = intensity * colorB;
  223. red = red.clampUpper(255.1f);
  224. green = green.clampUpper(255.1f);
  225. blue = blue.clampUpper(255.1f);
  226. // Add light to the image
  227. U8x16 morelight = reinterpret_U8FromU32(packBytes(truncateToU32(red), truncateToU32(green), truncateToU32(blue)));
  228. addLight(lightPixel, morelight);
  229. // Go to the next four pixels in light-space
  230. lightBasePixelx4 += dx4;
  231. // Go to the next 4 pixels of image data
  232. lightPixel += 16;
  233. normalPixel += 4;
  234. heightPixel += 4;
  235. }
  236. // Go to the next row in light-space
  237. lightBaseRowX4 += dy1;
  238. // Go to the next row of image data
  239. lightRow.increaseBytes(lightStride);
  240. normalRow.increaseBytes(normalStride);
  241. heightRow.increaseBytes(heightStride);
  242. }
  243. });
  244. }
  245. }
  246. void addPointLight(const OrthoView& camera, const IVector2D& worldCenter, OrderedImageRgbaU8& lightBuffer, const OrderedImageRgbaU8& normalBuffer, const AlignedImageF32& heightBuffer, const FVector3D& lightPosition, float lightRadius, float lightIntensity, const ColorRgbI32& lightColor, const AlignedImageF32& shadowCubeMap) {
  247. if (image_exists(shadowCubeMap)) {
  248. addPointLightSuper<true>(camera, worldCenter, lightBuffer, normalBuffer, heightBuffer, lightPosition, lightRadius, lightIntensity, lightColor, shadowCubeMap);
  249. } else {
  250. addPointLightSuper<false>(camera, worldCenter, lightBuffer, normalBuffer, heightBuffer, lightPosition, lightRadius, lightIntensity, lightColor, AlignedImageF32());
  251. }
  252. }
  253. void addPointLight(const OrthoView& camera, const IVector2D& worldCenter, OrderedImageRgbaU8& lightBuffer, const OrderedImageRgbaU8& normalBuffer, const AlignedImageF32& heightBuffer, const FVector3D& lightPosition, float lightRadius, float lightIntensity, const ColorRgbI32& lightColor) {
  254. addPointLightSuper<false>(camera, worldCenter, lightBuffer, normalBuffer, heightBuffer, lightPosition, lightRadius, lightIntensity, lightColor, AlignedImageF32());
  255. }
  256. void blendLight(AlignedImageRgbaU8& colorBuffer, const OrderedImageRgbaU8& diffuseBuffer, const OrderedImageRgbaU8& lightBuffer) {
  257. PackOrder targetOrder = PackOrder::getPackOrder(image_getPackOrderIndex(colorBuffer));
  258. int width = image_getWidth(colorBuffer);
  259. int height = image_getHeight(colorBuffer);
  260. threadedSplit(0, height, [colorBuffer, diffuseBuffer, lightBuffer, targetOrder, width](int startIndex, int stopIndex) mutable {
  261. SafePointer<uint32_t> targetRow = image_getSafePointer(colorBuffer, startIndex);
  262. SafePointer<uint32_t> diffuseRow = image_getSafePointer(diffuseBuffer, startIndex);
  263. SafePointer<uint32_t> lightRow = image_getSafePointer(lightBuffer, startIndex);
  264. int targetStride = image_getStride(colorBuffer);
  265. int diffuseStride = image_getStride(diffuseBuffer);
  266. int lightStride = image_getStride(lightBuffer);
  267. F32x4 scale = F32x4(1.0 / 128.0f);
  268. for (int y = startIndex; y < stopIndex; y++) {
  269. SafePointer<uint32_t> targetPixel = targetRow;
  270. SafePointer<uint32_t> diffusePixel = diffuseRow;
  271. SafePointer<uint32_t> lightPixel = lightRow;
  272. for (int x4 = 0; x4 < width; x4 += 4) {
  273. U32x4 diffuse = U32x4::readAligned(diffusePixel, "blendLight: reading diffuse");
  274. U32x4 light = U32x4::readAligned(lightPixel, "blendLight: reading light");
  275. F32x4 red = (floatFromU32(getRed(diffuse)) * floatFromU32(getRed(light))) * scale;
  276. F32x4 green = (floatFromU32(getGreen(diffuse)) * floatFromU32(getGreen(light))) * scale;
  277. F32x4 blue = (floatFromU32(getBlue(diffuse)) * floatFromU32(getBlue(light))) * scale;
  278. red = red.clampUpper(255.1f);
  279. green = green.clampUpper(255.1f);
  280. blue = blue.clampUpper(255.1f);
  281. U32x4 color = packBytes(truncateToU32(red), truncateToU32(green), truncateToU32(blue), targetOrder);
  282. color.writeAligned(targetPixel, "blendLight: writing color");
  283. targetPixel += 4;
  284. diffusePixel += 4;
  285. lightPixel += 4;
  286. }
  287. targetRow.increaseBytes(targetStride);
  288. diffuseRow.increaseBytes(diffuseStride);
  289. lightRow.increaseBytes(lightStride);
  290. }
  291. });
  292. }
  293. }