TriangleCodecIndexed8BitPackSOA4Flags.h 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456
  1. // Jolt Physics Library (https://github.com/jrouwe/JoltPhysics)
  2. // SPDX-FileCopyrightText: 2021 Jorrit Rouwe
  3. // SPDX-License-Identifier: MIT
  4. #pragma once
  5. #include <Jolt/Geometry/RayTriangle.h>
  6. JPH_NAMESPACE_BEGIN
  7. /// Store vertices in 64 bits and indices in 8 bits + 8 bit of flags per triangle like this:
  8. ///
  9. /// TriangleBlockHeader,
  10. /// TriangleBlock (4 triangles and their flags in 16 bytes),
  11. /// TriangleBlock...
  12. ///
  13. /// Vertices are stored:
  14. ///
  15. /// VertexData (1 vertex in 64 bits),
  16. /// VertexData...
  17. ///
  18. /// They're compressed relative to the bounding box as provided by the node codec.
  19. class TriangleCodecIndexed8BitPackSOA4Flags
  20. {
  21. public:
  22. class TriangleHeader
  23. {
  24. public:
  25. Float3 mOffset; ///< Offset of all vertices
  26. Float3 mScale; ///< Scale of all vertices, vertex_position = mOffset + mScale * compressed_vertex_position
  27. };
  28. /// Size of the header (an empty struct is always > 0 bytes so this needs a separate variable)
  29. static constexpr int TriangleHeaderSize = sizeof(TriangleHeader);
  30. /// If this codec could return a different offset than the current buffer size when calling Pack()
  31. static constexpr bool ChangesOffsetOnPack = false;
  32. /// Amount of bits per component
  33. enum EComponentData : uint32
  34. {
  35. COMPONENT_BITS = 21,
  36. COMPONENT_MASK = (1 << COMPONENT_BITS) - 1,
  37. };
  38. /// Packed X and Y coordinate
  39. enum EVertexXY : uint32
  40. {
  41. COMPONENT_X = 0,
  42. COMPONENT_Y1 = COMPONENT_BITS,
  43. COMPONENT_Y1_BITS = 32 - COMPONENT_BITS,
  44. };
  45. /// Packed Z and Y coordinate
  46. enum EVertexZY : uint32
  47. {
  48. COMPONENT_Z = 0,
  49. COMPONENT_Y2 = COMPONENT_BITS,
  50. COMPONENT_Y2_BITS = 31 - COMPONENT_BITS,
  51. };
  52. /// A single packed vertex
  53. struct VertexData
  54. {
  55. uint32 mVertexXY;
  56. uint32 mVertexZY;
  57. };
  58. static_assert(sizeof(VertexData) == 8, "Compiler added padding");
  59. /// A block of 4 triangles
  60. struct TriangleBlock
  61. {
  62. uint8 mIndices[3][4]; ///< 8 bit indices to triangle vertices for 4 triangles in the form mIndices[vertex][triangle] where vertex in [0, 2] and triangle in [0, 3]
  63. uint8 mFlags[4]; ///< Triangle flags (could contain material and active edges)
  64. };
  65. static_assert(sizeof(TriangleBlock) == 16, "Compiler added padding");
  66. /// A triangle header, will be followed by one or more TriangleBlocks
  67. struct TriangleBlockHeader
  68. {
  69. const VertexData * GetVertexData() const { return reinterpret_cast<const VertexData *>(reinterpret_cast<const uint8 *>(this) + mOffsetToVertices); }
  70. const TriangleBlock * GetTriangleBlock() const { return reinterpret_cast<const TriangleBlock *>(reinterpret_cast<const uint8 *>(this) + sizeof(TriangleBlockHeader)); }
  71. uint32 mOffsetToVertices; ///< Offset from current block to start of vertices in bytes
  72. };
  73. static_assert(sizeof(TriangleBlockHeader) == 4, "Compiler added padding");
  74. /// This class is used to validate that the triangle data will not be degenerate after compression
  75. class ValidationContext
  76. {
  77. public:
  78. /// Constructor
  79. ValidationContext(const IndexedTriangleList &inTriangles, const VertexList &inVertices) :
  80. mVertices(inVertices)
  81. {
  82. // Only used the referenced triangles, just like EncodingContext::Finalize does
  83. for (const IndexedTriangle &i : inTriangles)
  84. for (uint32 idx : i.mIdx)
  85. mBounds.Encapsulate(Vec3(inVertices[idx]));
  86. }
  87. /// Test if a triangle will be degenerate after quantization
  88. bool IsDegenerate(const IndexedTriangle &inTriangle) const
  89. {
  90. // Quantize the triangle in the same way as EncodingContext::Finalize does
  91. UVec4 quantized_vertex[3];
  92. Vec3 compress_scale = Vec3::sReplicate(COMPONENT_MASK) / Vec3::sMax(mBounds.GetSize(), Vec3::sReplicate(1.0e-20f));
  93. for (int i = 0; i < 3; ++i)
  94. quantized_vertex[i] = ((Vec3(mVertices[inTriangle.mIdx[i]]) - mBounds.mMin) * compress_scale + Vec3::sReplicate(0.5f)).ToInt();
  95. return quantized_vertex[0] == quantized_vertex[1] || quantized_vertex[1] == quantized_vertex[2] || quantized_vertex[0] == quantized_vertex[2];
  96. }
  97. private:
  98. const VertexList & mVertices;
  99. AABox mBounds;
  100. };
  101. /// This class is used to encode and compress triangle data into a byte buffer
  102. class EncodingContext
  103. {
  104. public:
  105. /// Construct the encoding context
  106. explicit EncodingContext(const VertexList &inVertices) :
  107. mVertexMap(inVertices.size(), 0xffffffff) // Fill vertex map with 'not found'
  108. {
  109. // Reserve for worst case to avoid allocating in the inner loop
  110. mVertices.reserve(inVertices.size());
  111. }
  112. /// Get an upper bound on the amount of bytes needed to store inTriangleCount triangles
  113. uint GetPessimisticMemoryEstimate(uint inTriangleCount) const
  114. {
  115. // Worst case each triangle is alone in a block, none of the vertices are shared and we need to add 3 bytes to align the vertices
  116. return inTriangleCount * (sizeof(TriangleBlockHeader) + sizeof(TriangleBlock) + 3 * sizeof(VertexData)) + 3;
  117. }
  118. /// Pack the triangles in inContainer to ioBuffer. This stores the mMaterialIndex of a triangle in the 8 bit flags.
  119. /// Returns uint(-1) on error.
  120. uint Pack(const IndexedTriangleList &inTriangles, ByteBuffer &ioBuffer, const char *&outError)
  121. {
  122. // Determine position of triangles start
  123. uint offset = (uint)ioBuffer.size();
  124. // Update stats
  125. uint tri_count = (uint)inTriangles.size();
  126. mNumTriangles += tri_count;
  127. // Allocate triangle block header
  128. TriangleBlockHeader *header = ioBuffer.Allocate<TriangleBlockHeader>();
  129. // Compute first vertex that this batch will use (ensuring there's enough room if none of the vertices are shared)
  130. uint start_vertex = Clamp((int)mVertices.size() - 256 + (int)tri_count * 3, 0, (int)mVertices.size());
  131. // Store the start vertex offset, this will later be patched to give the delta offset relative to the triangle block
  132. mOffsetsToPatch.push_back(uint((uint8 *)&header->mOffsetToVertices - &ioBuffer[0]));
  133. header->mOffsetToVertices = start_vertex * sizeof(VertexData);
  134. // Pack vertices
  135. uint padded_triangle_count = AlignUp(tri_count, 4);
  136. for (uint t = 0; t < padded_triangle_count; t += 4)
  137. {
  138. TriangleBlock *block = ioBuffer.Allocate<TriangleBlock>();
  139. for (uint vertex_nr = 0; vertex_nr < 3; ++vertex_nr)
  140. for (uint block_tri_idx = 0; block_tri_idx < 4; ++block_tri_idx)
  141. {
  142. // Fetch vertex index. Create degenerate triangles for padding triangles.
  143. bool triangle_available = t + block_tri_idx < tri_count;
  144. uint32 src_vertex_index = triangle_available? inTriangles[t + block_tri_idx].mIdx[vertex_nr] : inTriangles[tri_count - 1].mIdx[0];
  145. // Check if we've seen this vertex before and if it is in the range that we can encode
  146. uint32 &vertex_index = mVertexMap[src_vertex_index];
  147. if (vertex_index == 0xffffffff || vertex_index < start_vertex)
  148. {
  149. // Add vertex
  150. vertex_index = (uint32)mVertices.size();
  151. mVertices.push_back(src_vertex_index);
  152. }
  153. // Store vertex index
  154. uint32 vertex_offset = vertex_index - start_vertex;
  155. if (vertex_offset > 0xff)
  156. {
  157. outError = "TriangleCodecIndexed8BitPackSOA4Flags: Offset doesn't fit in 8 bit";
  158. return uint(-1);
  159. }
  160. block->mIndices[vertex_nr][block_tri_idx] = (uint8)vertex_offset;
  161. // Store flags
  162. uint32 flags = triangle_available? inTriangles[t + block_tri_idx].mMaterialIndex : 0;
  163. if (flags > 0xff)
  164. {
  165. outError = "TriangleCodecIndexed8BitPackSOA4Flags: Material index doesn't fit in 8 bit";
  166. return uint(-1);
  167. }
  168. block->mFlags[block_tri_idx] = (uint8)flags;
  169. }
  170. }
  171. return offset;
  172. }
  173. /// After all triangles have been packed, this finalizes the header and triangle buffer
  174. void Finalize(const VertexList &inVertices, TriangleHeader *ioHeader, ByteBuffer &ioBuffer) const
  175. {
  176. // Check if anything to do
  177. if (mVertices.empty())
  178. return;
  179. // Align buffer to 4 bytes
  180. uint vertices_idx = (uint)ioBuffer.Align(4);
  181. // Patch the offsets
  182. for (uint o : mOffsetsToPatch)
  183. *ioBuffer.Get<uint32>(o) += vertices_idx - o;
  184. // Calculate bounding box
  185. AABox bounds;
  186. for (uint32 v : mVertices)
  187. bounds.Encapsulate(Vec3(inVertices[v]));
  188. // Compress vertices
  189. VertexData *vertices = ioBuffer.Allocate<VertexData>(mVertices.size());
  190. Vec3 compress_scale = Vec3::sReplicate(COMPONENT_MASK) / Vec3::sMax(bounds.GetSize(), Vec3::sReplicate(1.0e-20f));
  191. for (uint32 v : mVertices)
  192. {
  193. UVec4 c = ((Vec3(inVertices[v]) - bounds.mMin) * compress_scale + Vec3::sReplicate(0.5f)).ToInt();
  194. JPH_ASSERT(c.GetX() <= COMPONENT_MASK);
  195. JPH_ASSERT(c.GetY() <= COMPONENT_MASK);
  196. JPH_ASSERT(c.GetZ() <= COMPONENT_MASK);
  197. vertices->mVertexXY = c.GetX() + (c.GetY() << COMPONENT_Y1);
  198. vertices->mVertexZY = c.GetZ() + ((c.GetY() >> COMPONENT_Y1_BITS) << COMPONENT_Y2);
  199. ++vertices;
  200. }
  201. // Store decompression information
  202. bounds.mMin.StoreFloat3(&ioHeader->mOffset);
  203. (bounds.GetSize() / Vec3::sReplicate(COMPONENT_MASK)).StoreFloat3(&ioHeader->mScale);
  204. }
  205. private:
  206. using VertexMap = Array<uint32>;
  207. uint mNumTriangles = 0;
  208. Array<uint32> mVertices; ///< Output vertices as an index into the original vertex list (inVertices), sorted according to occurrence
  209. VertexMap mVertexMap; ///< Maps from the original mesh vertex index (inVertices) to the index in our output vertices (mVertices)
  210. Array<uint> mOffsetsToPatch; ///< Offsets to the vertex buffer that need to be patched in once all nodes have been packed
  211. };
  212. /// This class is used to decode and decompress triangle data packed by the EncodingContext
  213. class DecodingContext
  214. {
  215. private:
  216. /// Private helper functions to unpack the 1 vertex of 4 triangles (outX contains the x coordinate of triangle 0 .. 3 etc.)
  217. JPH_INLINE void Unpack(const VertexData *inVertices, UVec4Arg inIndex, Vec4 &outX, Vec4 &outY, Vec4 &outZ) const
  218. {
  219. // Get compressed data
  220. UVec4 c1 = UVec4::sGatherInt4<8>(&inVertices->mVertexXY, inIndex);
  221. UVec4 c2 = UVec4::sGatherInt4<8>(&inVertices->mVertexZY, inIndex);
  222. // Unpack the x y and z component
  223. UVec4 xc = UVec4::sAnd(c1, UVec4::sReplicate(COMPONENT_MASK));
  224. UVec4 yc = UVec4::sOr(c1.LogicalShiftRight<COMPONENT_Y1>(), c2.LogicalShiftRight<COMPONENT_Y2>().LogicalShiftLeft<COMPONENT_Y1_BITS>());
  225. UVec4 zc = UVec4::sAnd(c2, UVec4::sReplicate(COMPONENT_MASK));
  226. // Convert to float
  227. outX = Vec4::sFusedMultiplyAdd(xc.ToFloat(), mScaleX, mOffsetX);
  228. outY = Vec4::sFusedMultiplyAdd(yc.ToFloat(), mScaleY, mOffsetY);
  229. outZ = Vec4::sFusedMultiplyAdd(zc.ToFloat(), mScaleZ, mOffsetZ);
  230. }
  231. public:
  232. JPH_INLINE explicit DecodingContext(const TriangleHeader *inHeader) :
  233. mOffsetX(Vec4::sReplicate(inHeader->mOffset.x)),
  234. mOffsetY(Vec4::sReplicate(inHeader->mOffset.y)),
  235. mOffsetZ(Vec4::sReplicate(inHeader->mOffset.z)),
  236. mScaleX(Vec4::sReplicate(inHeader->mScale.x)),
  237. mScaleY(Vec4::sReplicate(inHeader->mScale.y)),
  238. mScaleZ(Vec4::sReplicate(inHeader->mScale.z))
  239. {
  240. }
  241. /// Unpacks triangles in the format t1v1,t1v2,t1v3, t2v1,t2v2,t2v3, ...
  242. JPH_INLINE void Unpack(const void *inTriangleStart, uint32 inNumTriangles, Vec3 *outTriangles) const
  243. {
  244. JPH_ASSERT(inNumTriangles > 0);
  245. const TriangleBlockHeader *header = reinterpret_cast<const TriangleBlockHeader *>(inTriangleStart);
  246. const VertexData *vertices = header->GetVertexData();
  247. const TriangleBlock *t = header->GetTriangleBlock();
  248. const TriangleBlock *end = t + ((inNumTriangles + 3) >> 2);
  249. int triangles_left = inNumTriangles;
  250. do
  251. {
  252. // Get the indices for the three vertices (reads 4 bytes extra, but these are the flags so that's ok)
  253. UVec4 indices = UVec4::sLoadInt4(reinterpret_cast<const uint32 *>(&t->mIndices[0]));
  254. UVec4 iv1 = indices.Expand4Byte0();
  255. UVec4 iv2 = indices.Expand4Byte4();
  256. UVec4 iv3 = indices.Expand4Byte8();
  257. // Decompress the triangle data
  258. Vec4 v1x, v1y, v1z, v2x, v2y, v2z, v3x, v3y, v3z;
  259. Unpack(vertices, iv1, v1x, v1y, v1z);
  260. Unpack(vertices, iv2, v2x, v2y, v2z);
  261. Unpack(vertices, iv3, v3x, v3y, v3z);
  262. // Transpose it so we get normal vectors
  263. Mat44 v1 = Mat44(v1x, v1y, v1z, Vec4::sZero()).Transposed();
  264. Mat44 v2 = Mat44(v2x, v2y, v2z, Vec4::sZero()).Transposed();
  265. Mat44 v3 = Mat44(v3x, v3y, v3z, Vec4::sZero()).Transposed();
  266. // Store triangle data
  267. for (int i = 0; i < 4 && triangles_left > 0; ++i, --triangles_left)
  268. {
  269. *outTriangles++ = v1.GetColumn3(i);
  270. *outTriangles++ = v2.GetColumn3(i);
  271. *outTriangles++ = v3.GetColumn3(i);
  272. }
  273. ++t;
  274. }
  275. while (t < end);
  276. }
  277. /// Tests a ray against the packed triangles
  278. JPH_INLINE float TestRay(Vec3Arg inRayOrigin, Vec3Arg inRayDirection, const void *inTriangleStart, uint32 inNumTriangles, float inClosest, uint32 &outClosestTriangleIndex) const
  279. {
  280. JPH_ASSERT(inNumTriangles > 0);
  281. const TriangleBlockHeader *header = reinterpret_cast<const TriangleBlockHeader *>(inTriangleStart);
  282. const VertexData *vertices = header->GetVertexData();
  283. const TriangleBlock *t = header->GetTriangleBlock();
  284. const TriangleBlock *end = t + ((inNumTriangles + 3) >> 2);
  285. Vec4 closest = Vec4::sReplicate(inClosest);
  286. UVec4 closest_triangle_idx = UVec4::sZero();
  287. UVec4 start_triangle_idx = UVec4::sZero();
  288. do
  289. {
  290. // Get the indices for the three vertices (reads 4 bytes extra, but these are the flags so that's ok)
  291. UVec4 indices = UVec4::sLoadInt4(reinterpret_cast<const uint32 *>(&t->mIndices[0]));
  292. UVec4 iv1 = indices.Expand4Byte0();
  293. UVec4 iv2 = indices.Expand4Byte4();
  294. UVec4 iv3 = indices.Expand4Byte8();
  295. // Decompress the triangle data
  296. Vec4 v1x, v1y, v1z, v2x, v2y, v2z, v3x, v3y, v3z;
  297. Unpack(vertices, iv1, v1x, v1y, v1z);
  298. Unpack(vertices, iv2, v2x, v2y, v2z);
  299. Unpack(vertices, iv3, v3x, v3y, v3z);
  300. // Perform ray vs triangle test
  301. Vec4 distance = RayTriangle4(inRayOrigin, inRayDirection, v1x, v1y, v1z, v2x, v2y, v2z, v3x, v3y, v3z);
  302. // Update closest with the smaller values
  303. UVec4 smaller = Vec4::sLess(distance, closest);
  304. closest = Vec4::sSelect(closest, distance, smaller);
  305. // Update triangle index with the smallest values
  306. UVec4 triangle_idx = start_triangle_idx + UVec4(0, 1, 2, 3);
  307. closest_triangle_idx = UVec4::sSelect(closest_triangle_idx, triangle_idx, smaller);
  308. // Next block
  309. ++t;
  310. start_triangle_idx += UVec4::sReplicate(4);
  311. }
  312. while (t < end);
  313. // Get the smallest component
  314. Vec4::sSort4(closest, closest_triangle_idx);
  315. outClosestTriangleIndex = closest_triangle_idx.GetX();
  316. return closest.GetX();
  317. }
  318. /// Decode a single triangle
  319. inline void GetTriangle(const void *inTriangleStart, uint32 inTriangleIdx, Vec3 &outV1, Vec3 &outV2, Vec3 &outV3) const
  320. {
  321. const TriangleBlockHeader *header = reinterpret_cast<const TriangleBlockHeader *>(inTriangleStart);
  322. const VertexData *vertices = header->GetVertexData();
  323. const TriangleBlock *block = header->GetTriangleBlock() + (inTriangleIdx >> 2);
  324. uint32 block_triangle_idx = inTriangleIdx & 0b11;
  325. // Get the 3 vertices
  326. const VertexData &v1 = vertices[block->mIndices[0][block_triangle_idx]];
  327. const VertexData &v2 = vertices[block->mIndices[1][block_triangle_idx]];
  328. const VertexData &v3 = vertices[block->mIndices[2][block_triangle_idx]];
  329. // Pack the vertices
  330. UVec4 c1(v1.mVertexXY, v2.mVertexXY, v3.mVertexXY, 0);
  331. UVec4 c2(v1.mVertexZY, v2.mVertexZY, v3.mVertexZY, 0);
  332. // Unpack the x y and z component
  333. UVec4 xc = UVec4::sAnd(c1, UVec4::sReplicate(COMPONENT_MASK));
  334. UVec4 yc = UVec4::sOr(c1.LogicalShiftRight<COMPONENT_Y1>(), c2.LogicalShiftRight<COMPONENT_Y2>().LogicalShiftLeft<COMPONENT_Y1_BITS>());
  335. UVec4 zc = UVec4::sAnd(c2, UVec4::sReplicate(COMPONENT_MASK));
  336. // Convert to float
  337. Vec4 vx = Vec4::sFusedMultiplyAdd(xc.ToFloat(), mScaleX, mOffsetX);
  338. Vec4 vy = Vec4::sFusedMultiplyAdd(yc.ToFloat(), mScaleY, mOffsetY);
  339. Vec4 vz = Vec4::sFusedMultiplyAdd(zc.ToFloat(), mScaleZ, mOffsetZ);
  340. // Transpose it so we get normal vectors
  341. Mat44 trans = Mat44(vx, vy, vz, Vec4::sZero()).Transposed();
  342. outV1 = trans.GetAxisX();
  343. outV2 = trans.GetAxisY();
  344. outV3 = trans.GetAxisZ();
  345. }
  346. /// Get flags for entire triangle block
  347. JPH_INLINE static void sGetFlags(const void *inTriangleStart, uint32 inNumTriangles, uint8 *outTriangleFlags)
  348. {
  349. JPH_ASSERT(inNumTriangles > 0);
  350. const TriangleBlockHeader *header = reinterpret_cast<const TriangleBlockHeader *>(inTriangleStart);
  351. const TriangleBlock *t = header->GetTriangleBlock();
  352. const TriangleBlock *end = t + ((inNumTriangles + 3) >> 2);
  353. int triangles_left = inNumTriangles;
  354. do
  355. {
  356. for (int i = 0; i < 4 && triangles_left > 0; ++i, --triangles_left)
  357. *outTriangleFlags++ = t->mFlags[i];
  358. ++t;
  359. }
  360. while (t < end);
  361. }
  362. /// Get flags for a particular triangle
  363. JPH_INLINE static uint8 sGetFlags(const void *inTriangleStart, int inTriangleIndex)
  364. {
  365. const TriangleBlockHeader *header = reinterpret_cast<const TriangleBlockHeader *>(inTriangleStart);
  366. const TriangleBlock *first_block = header->GetTriangleBlock();
  367. return first_block[inTriangleIndex >> 2].mFlags[inTriangleIndex & 0b11];
  368. }
  369. /// Unpacks triangles and flags, convenience function
  370. JPH_INLINE void Unpack(const void *inTriangleStart, uint32 inNumTriangles, Vec3 *outTriangles, uint8 *outTriangleFlags) const
  371. {
  372. Unpack(inTriangleStart, inNumTriangles, outTriangles);
  373. sGetFlags(inTriangleStart, inNumTriangles, outTriangleFlags);
  374. }
  375. private:
  376. Vec4 mOffsetX;
  377. Vec4 mOffsetY;
  378. Vec4 mOffsetZ;
  379. Vec4 mScaleX;
  380. Vec4 mScaleY;
  381. Vec4 mScaleZ;
  382. };
  383. };
  384. JPH_NAMESPACE_END