TriangleCodecIndexed8BitPackSOA4Flags.h 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555
  1. // Jolt Physics Library (https://github.com/jrouwe/JoltPhysics)
  2. // SPDX-FileCopyrightText: 2021 Jorrit Rouwe
  3. // SPDX-License-Identifier: MIT
  4. #pragma once
  5. #include <Jolt/Geometry/RayTriangle.h>
  6. JPH_NAMESPACE_BEGIN
  7. /// Store vertices in 64 bits and indices in 8 bits + 8 bit of flags per triangle like this:
  8. ///
  9. /// TriangleBlockHeader,
  10. /// TriangleBlock (4 triangles and their flags in 16 bytes),
  11. /// TriangleBlock...
  12. /// [Optional] UserData (4 bytes per triangle)
  13. ///
  14. /// Vertices are stored:
  15. ///
  16. /// VertexData (1 vertex in 64 bits),
  17. /// VertexData...
  18. ///
  19. /// They're compressed relative to the bounding box as provided by the node codec.
  20. class TriangleCodecIndexed8BitPackSOA4Flags
  21. {
  22. public:
  23. class TriangleHeader
  24. {
  25. public:
  26. Float3 mOffset; ///< Offset of all vertices
  27. Float3 mScale; ///< Scale of all vertices, vertex_position = mOffset + mScale * compressed_vertex_position
  28. };
  29. /// Size of the header (an empty struct is always > 0 bytes so this needs a separate variable)
  30. static constexpr int TriangleHeaderSize = sizeof(TriangleHeader);
  31. /// If this codec could return a different offset than the current buffer size when calling Pack()
  32. static constexpr bool ChangesOffsetOnPack = false;
  33. /// Amount of bits per component
  34. enum EComponentData : uint32
  35. {
  36. COMPONENT_BITS = 21,
  37. COMPONENT_MASK = (1 << COMPONENT_BITS) - 1,
  38. };
  39. /// Packed X and Y coordinate
  40. enum EVertexXY : uint32
  41. {
  42. COMPONENT_X = 0,
  43. COMPONENT_Y1 = COMPONENT_BITS,
  44. COMPONENT_Y1_BITS = 32 - COMPONENT_BITS,
  45. };
  46. /// Packed Z and Y coordinate
  47. enum EVertexZY : uint32
  48. {
  49. COMPONENT_Z = 0,
  50. COMPONENT_Y2 = COMPONENT_BITS,
  51. COMPONENT_Y2_BITS = 31 - COMPONENT_BITS,
  52. };
  53. /// A single packed vertex
  54. struct VertexData
  55. {
  56. uint32 mVertexXY;
  57. uint32 mVertexZY;
  58. };
  59. static_assert(sizeof(VertexData) == 8, "Compiler added padding");
  60. /// A block of 4 triangles
  61. struct TriangleBlock
  62. {
  63. uint8 mIndices[3][4]; ///< 8 bit indices to triangle vertices for 4 triangles in the form mIndices[vertex][triangle] where vertex in [0, 2] and triangle in [0, 3]
  64. uint8 mFlags[4]; ///< Triangle flags (could contain material and active edges)
  65. };
  66. static_assert(sizeof(TriangleBlock) == 16, "Compiler added padding");
  67. enum ETriangleBlockHeaderFlags : uint32
  68. {
  69. OFFSET_TO_VERTICES_BITS = 29, ///< Offset from current block to start of vertices in bytes
  70. OFFSET_TO_VERTICES_MASK = (1 << OFFSET_TO_VERTICES_BITS) - 1,
  71. OFFSET_NON_SIGNIFICANT_BITS = 2, ///< The offset from the current block to the start of the vertices must be a multiple of 4 bytes
  72. OFFSET_NON_SIGNIFICANT_MASK = (1 << OFFSET_NON_SIGNIFICANT_BITS) - 1,
  73. OFFSET_TO_USERDATA_BITS = 3, ///< When user data is stored, this is the number of blocks to skip to get to the user data (0 = no user data)
  74. OFFSET_TO_USERDATA_MASK = (1 << OFFSET_TO_USERDATA_BITS) - 1,
  75. };
  76. /// A triangle header, will be followed by one or more TriangleBlocks
  77. struct TriangleBlockHeader
  78. {
  79. const VertexData * GetVertexData() const { return reinterpret_cast<const VertexData *>(reinterpret_cast<const uint8 *>(this) + ((mFlags & OFFSET_TO_VERTICES_MASK) << OFFSET_NON_SIGNIFICANT_BITS)); }
  80. const TriangleBlock * GetTriangleBlock() const { return reinterpret_cast<const TriangleBlock *>(reinterpret_cast<const uint8 *>(this) + sizeof(TriangleBlockHeader)); }
  81. const uint32 * GetUserData() const { uint32 offset = mFlags >> OFFSET_TO_VERTICES_BITS; return offset == 0? nullptr : reinterpret_cast<const uint32 *>(GetTriangleBlock() + offset); }
  82. uint32 mFlags;
  83. };
  84. static_assert(sizeof(TriangleBlockHeader) == 4, "Compiler added padding");
  85. /// This class is used to validate that the triangle data will not be degenerate after compression
  86. class ValidationContext
  87. {
  88. public:
  89. /// Constructor
  90. ValidationContext(const IndexedTriangleList &inTriangles, const VertexList &inVertices) :
  91. mVertices(inVertices)
  92. {
  93. // Only used the referenced triangles, just like EncodingContext::Finalize does
  94. for (const IndexedTriangle &i : inTriangles)
  95. for (uint32 idx : i.mIdx)
  96. mBounds.Encapsulate(Vec3(inVertices[idx]));
  97. }
  98. /// Test if a triangle will be degenerate after quantization
  99. bool IsDegenerate(const IndexedTriangle &inTriangle) const
  100. {
  101. // Quantize the triangle in the same way as EncodingContext::Finalize does
  102. UVec4 quantized_vertex[3];
  103. Vec3 compress_scale = Vec3::sReplicate(COMPONENT_MASK) / Vec3::sMax(mBounds.GetSize(), Vec3::sReplicate(1.0e-20f));
  104. for (int i = 0; i < 3; ++i)
  105. quantized_vertex[i] = ((Vec3(mVertices[inTriangle.mIdx[i]]) - mBounds.mMin) * compress_scale + Vec3::sReplicate(0.5f)).ToInt();
  106. return quantized_vertex[0] == quantized_vertex[1] || quantized_vertex[1] == quantized_vertex[2] || quantized_vertex[0] == quantized_vertex[2];
  107. }
  108. private:
  109. const VertexList & mVertices;
  110. AABox mBounds;
  111. };
  112. /// This class is used to encode and compress triangle data into a byte buffer
  113. class EncodingContext
  114. {
  115. public:
  116. /// Indicates a vertex hasn't been seen yet in the triangle list
  117. static constexpr uint32 cNotFound = 0xffffffff;
  118. /// Construct the encoding context
  119. explicit EncodingContext(const VertexList &inVertices) :
  120. mVertexMap(inVertices.size(), cNotFound)
  121. {
  122. }
  123. /// Mimics the size a call to Pack() would add to the buffer
  124. void PreparePack(const IndexedTriangle *inTriangles, uint inNumTriangles, bool inStoreUserData, uint64 &ioBufferSize)
  125. {
  126. // Add triangle block header
  127. ioBufferSize += sizeof(TriangleBlockHeader);
  128. // Compute first vertex that this batch will use (ensuring there's enough room if none of the vertices are shared)
  129. uint start_vertex = Clamp((int)mVertexCount - 256 + (int)inNumTriangles * 3, 0, (int)mVertexCount);
  130. // Pack vertices
  131. uint padded_triangle_count = AlignUp(inNumTriangles, 4);
  132. for (uint t = 0; t < padded_triangle_count; t += 4)
  133. {
  134. // Add triangle block header
  135. ioBufferSize += sizeof(TriangleBlock);
  136. for (uint vertex_nr = 0; vertex_nr < 3; ++vertex_nr)
  137. for (uint block_tri_idx = 0; block_tri_idx < 4; ++block_tri_idx)
  138. {
  139. // Fetch vertex index. Create degenerate triangles for padding triangles.
  140. bool triangle_available = t + block_tri_idx < inNumTriangles;
  141. uint32 src_vertex_index = triangle_available? inTriangles[t + block_tri_idx].mIdx[vertex_nr] : inTriangles[inNumTriangles - 1].mIdx[0];
  142. // Check if we've seen this vertex before and if it is in the range that we can encode
  143. uint32 &vertex_index = mVertexMap[src_vertex_index];
  144. if (vertex_index == cNotFound || vertex_index < start_vertex)
  145. {
  146. // Add vertex
  147. vertex_index = mVertexCount;
  148. mVertexCount++;
  149. }
  150. }
  151. }
  152. // Add user data
  153. if (inStoreUserData)
  154. ioBufferSize += inNumTriangles * sizeof(uint32);
  155. }
  156. /// Mimics the size the Finalize() call would add to ioBufferSize
  157. void FinalizePreparePack(uint64 &ioBufferSize)
  158. {
  159. // Remember where the vertices are going to start in the output buffer
  160. JPH_ASSERT(IsAligned(ioBufferSize, 4));
  161. mVerticesStartIdx = size_t(ioBufferSize);
  162. // Add vertices to buffer
  163. ioBufferSize += uint64(mVertexCount) * sizeof(VertexData);
  164. // Reserve the amount of memory we need for the vertices
  165. mVertices.reserve(mVertexCount);
  166. // Set vertex map back to 'not found'
  167. for (uint32 &v : mVertexMap)
  168. v = cNotFound;
  169. }
  170. /// Pack the triangles in inContainer to ioBuffer. This stores the mMaterialIndex of a triangle in the 8 bit flags.
  171. /// Returns size_t(-1) on error.
  172. size_t Pack(const IndexedTriangle *inTriangles, uint inNumTriangles, bool inStoreUserData, ByteBuffer &ioBuffer, const char *&outError)
  173. {
  174. JPH_ASSERT(inNumTriangles > 0);
  175. // Determine position of triangles start
  176. size_t triangle_block_start = ioBuffer.size();
  177. // Allocate triangle block header
  178. TriangleBlockHeader *header = ioBuffer.Allocate<TriangleBlockHeader>();
  179. // Compute first vertex that this batch will use (ensuring there's enough room if none of the vertices are shared)
  180. uint start_vertex = Clamp((int)mVertices.size() - 256 + (int)inNumTriangles * 3, 0, (int)mVertices.size());
  181. // Store the start vertex offset relative to TriangleBlockHeader
  182. size_t offset_to_vertices = mVerticesStartIdx - triangle_block_start + size_t(start_vertex) * sizeof(VertexData);
  183. if (offset_to_vertices & OFFSET_NON_SIGNIFICANT_MASK)
  184. {
  185. outError = "TriangleCodecIndexed8BitPackSOA4Flags: Internal Error: Offset has non-significant bits set";
  186. return size_t(-1);
  187. }
  188. offset_to_vertices >>= OFFSET_NON_SIGNIFICANT_BITS;
  189. if (offset_to_vertices > OFFSET_TO_VERTICES_MASK)
  190. {
  191. outError = "TriangleCodecIndexed8BitPackSOA4Flags: Offset to vertices doesn't fit. Too much data.";
  192. return size_t(-1);
  193. }
  194. header->mFlags = uint32(offset_to_vertices);
  195. // When we store user data we need to store the offset to the user data in TriangleBlocks
  196. uint padded_triangle_count = AlignUp(inNumTriangles, 4);
  197. if (inStoreUserData)
  198. {
  199. uint32 num_blocks = padded_triangle_count >> 2;
  200. JPH_ASSERT(num_blocks <= OFFSET_TO_USERDATA_MASK);
  201. header->mFlags |= num_blocks << OFFSET_TO_VERTICES_BITS;
  202. }
  203. // Pack vertices
  204. for (uint t = 0; t < padded_triangle_count; t += 4)
  205. {
  206. TriangleBlock *block = ioBuffer.Allocate<TriangleBlock>();
  207. for (uint vertex_nr = 0; vertex_nr < 3; ++vertex_nr)
  208. for (uint block_tri_idx = 0; block_tri_idx < 4; ++block_tri_idx)
  209. {
  210. // Fetch vertex index. Create degenerate triangles for padding triangles.
  211. bool triangle_available = t + block_tri_idx < inNumTriangles;
  212. uint32 src_vertex_index = triangle_available? inTriangles[t + block_tri_idx].mIdx[vertex_nr] : inTriangles[inNumTriangles - 1].mIdx[0];
  213. // Check if we've seen this vertex before and if it is in the range that we can encode
  214. uint32 &vertex_index = mVertexMap[src_vertex_index];
  215. if (vertex_index == cNotFound || vertex_index < start_vertex)
  216. {
  217. // Add vertex
  218. vertex_index = (uint32)mVertices.size();
  219. mVertices.push_back(src_vertex_index);
  220. }
  221. // Store vertex index
  222. uint32 vertex_offset = vertex_index - start_vertex;
  223. if (vertex_offset > 0xff)
  224. {
  225. outError = "TriangleCodecIndexed8BitPackSOA4Flags: Offset doesn't fit in 8 bit";
  226. return size_t(-1);
  227. }
  228. block->mIndices[vertex_nr][block_tri_idx] = (uint8)vertex_offset;
  229. // Store flags
  230. uint32 flags = triangle_available? inTriangles[t + block_tri_idx].mMaterialIndex : 0;
  231. if (flags > 0xff)
  232. {
  233. outError = "TriangleCodecIndexed8BitPackSOA4Flags: Material index doesn't fit in 8 bit";
  234. return size_t(-1);
  235. }
  236. block->mFlags[block_tri_idx] = (uint8)flags;
  237. }
  238. }
  239. // Store user data
  240. if (inStoreUserData)
  241. {
  242. uint32 *user_data = ioBuffer.Allocate<uint32>(inNumTriangles);
  243. for (uint t = 0; t < inNumTriangles; ++t)
  244. user_data[t] = inTriangles[t].mUserData;
  245. }
  246. return triangle_block_start;
  247. }
  248. /// After all triangles have been packed, this finalizes the header and triangle buffer
  249. void Finalize(const VertexList &inVertices, TriangleHeader *ioHeader, ByteBuffer &ioBuffer) const
  250. {
  251. // Assert that our reservations were correct
  252. JPH_ASSERT(mVertices.size() == mVertexCount);
  253. JPH_ASSERT(ioBuffer.size() == mVerticesStartIdx);
  254. // Check if anything to do
  255. if (mVertices.empty())
  256. return;
  257. // Calculate bounding box
  258. AABox bounds;
  259. for (uint32 v : mVertices)
  260. bounds.Encapsulate(Vec3(inVertices[v]));
  261. // Compress vertices
  262. VertexData *vertices = ioBuffer.Allocate<VertexData>(mVertices.size());
  263. Vec3 compress_scale = Vec3::sReplicate(COMPONENT_MASK) / Vec3::sMax(bounds.GetSize(), Vec3::sReplicate(1.0e-20f));
  264. for (uint32 v : mVertices)
  265. {
  266. UVec4 c = ((Vec3(inVertices[v]) - bounds.mMin) * compress_scale + Vec3::sReplicate(0.5f)).ToInt();
  267. JPH_ASSERT(c.GetX() <= COMPONENT_MASK);
  268. JPH_ASSERT(c.GetY() <= COMPONENT_MASK);
  269. JPH_ASSERT(c.GetZ() <= COMPONENT_MASK);
  270. vertices->mVertexXY = c.GetX() + (c.GetY() << COMPONENT_Y1);
  271. vertices->mVertexZY = c.GetZ() + ((c.GetY() >> COMPONENT_Y1_BITS) << COMPONENT_Y2);
  272. ++vertices;
  273. }
  274. // Store decompression information
  275. bounds.mMin.StoreFloat3(&ioHeader->mOffset);
  276. (bounds.GetSize() / Vec3::sReplicate(COMPONENT_MASK)).StoreFloat3(&ioHeader->mScale);
  277. }
  278. private:
  279. using VertexMap = Array<uint32>;
  280. uint32 mVertexCount = 0; ///< Number of vertices calculated during PreparePack
  281. size_t mVerticesStartIdx = 0; ///< Start of the vertices in the output buffer, calculated during PreparePack
  282. Array<uint32> mVertices; ///< Output vertices as an index into the original vertex list (inVertices), sorted according to occurrence
  283. VertexMap mVertexMap; ///< Maps from the original mesh vertex index (inVertices) to the index in our output vertices (mVertices)
  284. };
  285. /// This class is used to decode and decompress triangle data packed by the EncodingContext
  286. class DecodingContext
  287. {
  288. private:
  289. /// Private helper function to unpack the 1 vertex of 4 triangles (outX contains the x coordinate of triangle 0 .. 3 etc.)
  290. JPH_INLINE void Unpack(const VertexData *inVertices, UVec4Arg inIndex, Vec4 &outX, Vec4 &outY, Vec4 &outZ) const
  291. {
  292. // Get compressed data
  293. UVec4 c1 = UVec4::sGatherInt4<8>(&inVertices->mVertexXY, inIndex);
  294. UVec4 c2 = UVec4::sGatherInt4<8>(&inVertices->mVertexZY, inIndex);
  295. // Unpack the x y and z component
  296. UVec4 xc = UVec4::sAnd(c1, UVec4::sReplicate(COMPONENT_MASK));
  297. UVec4 yc = UVec4::sOr(c1.LogicalShiftRight<COMPONENT_Y1>(), c2.LogicalShiftRight<COMPONENT_Y2>().LogicalShiftLeft<COMPONENT_Y1_BITS>());
  298. UVec4 zc = UVec4::sAnd(c2, UVec4::sReplicate(COMPONENT_MASK));
  299. // Convert to float
  300. outX = Vec4::sFusedMultiplyAdd(xc.ToFloat(), mScaleX, mOffsetX);
  301. outY = Vec4::sFusedMultiplyAdd(yc.ToFloat(), mScaleY, mOffsetY);
  302. outZ = Vec4::sFusedMultiplyAdd(zc.ToFloat(), mScaleZ, mOffsetZ);
  303. }
  304. /// Private helper function to unpack 4 triangles from a triangle block
  305. JPH_INLINE void Unpack(const TriangleBlock *inBlock, const VertexData *inVertices, Vec4 &outX1, Vec4 &outY1, Vec4 &outZ1, Vec4 &outX2, Vec4 &outY2, Vec4 &outZ2, Vec4 &outX3, Vec4 &outY3, Vec4 &outZ3) const
  306. {
  307. // Get the indices for the three vertices (reads 4 bytes extra, but these are the flags so that's ok)
  308. UVec4 indices = UVec4::sLoadInt4(reinterpret_cast<const uint32 *>(&inBlock->mIndices[0]));
  309. UVec4 iv1 = indices.Expand4Byte0();
  310. UVec4 iv2 = indices.Expand4Byte4();
  311. UVec4 iv3 = indices.Expand4Byte8();
  312. #ifdef JPH_CPU_BIG_ENDIAN
  313. // On big endian systems we need to reverse the bytes
  314. iv1 = iv1.Swizzle<SWIZZLE_W, SWIZZLE_Z, SWIZZLE_Y, SWIZZLE_X>();
  315. iv2 = iv2.Swizzle<SWIZZLE_W, SWIZZLE_Z, SWIZZLE_Y, SWIZZLE_X>();
  316. iv3 = iv3.Swizzle<SWIZZLE_W, SWIZZLE_Z, SWIZZLE_Y, SWIZZLE_X>();
  317. #endif
  318. // Decompress the triangle data
  319. Unpack(inVertices, iv1, outX1, outY1, outZ1);
  320. Unpack(inVertices, iv2, outX2, outY2, outZ2);
  321. Unpack(inVertices, iv3, outX3, outY3, outZ3);
  322. }
  323. public:
  324. JPH_INLINE explicit DecodingContext(const TriangleHeader *inHeader) :
  325. mOffsetX(Vec4::sReplicate(inHeader->mOffset.x)),
  326. mOffsetY(Vec4::sReplicate(inHeader->mOffset.y)),
  327. mOffsetZ(Vec4::sReplicate(inHeader->mOffset.z)),
  328. mScaleX(Vec4::sReplicate(inHeader->mScale.x)),
  329. mScaleY(Vec4::sReplicate(inHeader->mScale.y)),
  330. mScaleZ(Vec4::sReplicate(inHeader->mScale.z))
  331. {
  332. }
  333. /// Unpacks triangles in the format t1v1,t1v2,t1v3, t2v1,t2v2,t2v3, ...
  334. JPH_INLINE void Unpack(const void *inTriangleStart, uint32 inNumTriangles, Vec3 *outTriangles) const
  335. {
  336. JPH_ASSERT(inNumTriangles > 0);
  337. const TriangleBlockHeader *header = reinterpret_cast<const TriangleBlockHeader *>(inTriangleStart);
  338. const VertexData *vertices = header->GetVertexData();
  339. const TriangleBlock *t = header->GetTriangleBlock();
  340. const TriangleBlock *end = t + ((inNumTriangles + 3) >> 2);
  341. int triangles_left = inNumTriangles;
  342. do
  343. {
  344. // Unpack the vertices for 4 triangles
  345. Vec4 v1x, v1y, v1z, v2x, v2y, v2z, v3x, v3y, v3z;
  346. Unpack(t, vertices, v1x, v1y, v1z, v2x, v2y, v2z, v3x, v3y, v3z);
  347. // Transpose it so we get normal vectors
  348. Mat44 v1 = Mat44(v1x, v1y, v1z, Vec4::sZero()).Transposed();
  349. Mat44 v2 = Mat44(v2x, v2y, v2z, Vec4::sZero()).Transposed();
  350. Mat44 v3 = Mat44(v3x, v3y, v3z, Vec4::sZero()).Transposed();
  351. // Store triangle data
  352. for (int i = 0; i < 4 && triangles_left > 0; ++i, --triangles_left)
  353. {
  354. *outTriangles++ = v1.GetColumn3(i);
  355. *outTriangles++ = v2.GetColumn3(i);
  356. *outTriangles++ = v3.GetColumn3(i);
  357. }
  358. ++t;
  359. }
  360. while (t < end);
  361. }
  362. /// Tests a ray against the packed triangles
  363. JPH_INLINE float TestRay(Vec3Arg inRayOrigin, Vec3Arg inRayDirection, const void *inTriangleStart, uint32 inNumTriangles, float inClosest, uint32 &outClosestTriangleIndex) const
  364. {
  365. JPH_ASSERT(inNumTriangles > 0);
  366. const TriangleBlockHeader *header = reinterpret_cast<const TriangleBlockHeader *>(inTriangleStart);
  367. const VertexData *vertices = header->GetVertexData();
  368. const TriangleBlock *t = header->GetTriangleBlock();
  369. const TriangleBlock *end = t + ((inNumTriangles + 3) >> 2);
  370. Vec4 closest = Vec4::sReplicate(inClosest);
  371. UVec4 closest_triangle_idx = UVec4::sZero();
  372. UVec4 start_triangle_idx = UVec4::sZero();
  373. do
  374. {
  375. // Unpack the vertices for 4 triangles
  376. Vec4 v1x, v1y, v1z, v2x, v2y, v2z, v3x, v3y, v3z;
  377. Unpack(t, vertices, v1x, v1y, v1z, v2x, v2y, v2z, v3x, v3y, v3z);
  378. // Perform ray vs triangle test
  379. Vec4 distance = RayTriangle4(inRayOrigin, inRayDirection, v1x, v1y, v1z, v2x, v2y, v2z, v3x, v3y, v3z);
  380. // Update closest with the smaller values
  381. UVec4 smaller = Vec4::sLess(distance, closest);
  382. closest = Vec4::sSelect(closest, distance, smaller);
  383. // Update triangle index with the smallest values
  384. UVec4 triangle_idx = start_triangle_idx + UVec4(0, 1, 2, 3);
  385. closest_triangle_idx = UVec4::sSelect(closest_triangle_idx, triangle_idx, smaller);
  386. // Next block
  387. ++t;
  388. start_triangle_idx += UVec4::sReplicate(4);
  389. }
  390. while (t < end);
  391. // Get the smallest component
  392. Vec4::sSort4(closest, closest_triangle_idx);
  393. outClosestTriangleIndex = closest_triangle_idx.GetX();
  394. return closest.GetX();
  395. }
  396. /// Decode a single triangle
  397. inline void GetTriangle(const void *inTriangleStart, uint32 inTriangleIdx, Vec3 &outV1, Vec3 &outV2, Vec3 &outV3) const
  398. {
  399. const TriangleBlockHeader *header = reinterpret_cast<const TriangleBlockHeader *>(inTriangleStart);
  400. const VertexData *vertices = header->GetVertexData();
  401. const TriangleBlock *block = header->GetTriangleBlock() + (inTriangleIdx >> 2);
  402. uint32 block_triangle_idx = inTriangleIdx & 0b11;
  403. // Get the 3 vertices
  404. const VertexData &v1 = vertices[block->mIndices[0][block_triangle_idx]];
  405. const VertexData &v2 = vertices[block->mIndices[1][block_triangle_idx]];
  406. const VertexData &v3 = vertices[block->mIndices[2][block_triangle_idx]];
  407. // Pack the vertices
  408. UVec4 c1(v1.mVertexXY, v2.mVertexXY, v3.mVertexXY, 0);
  409. UVec4 c2(v1.mVertexZY, v2.mVertexZY, v3.mVertexZY, 0);
  410. // Unpack the x y and z component
  411. UVec4 xc = UVec4::sAnd(c1, UVec4::sReplicate(COMPONENT_MASK));
  412. UVec4 yc = UVec4::sOr(c1.LogicalShiftRight<COMPONENT_Y1>(), c2.LogicalShiftRight<COMPONENT_Y2>().LogicalShiftLeft<COMPONENT_Y1_BITS>());
  413. UVec4 zc = UVec4::sAnd(c2, UVec4::sReplicate(COMPONENT_MASK));
  414. // Convert to float
  415. Vec4 vx = Vec4::sFusedMultiplyAdd(xc.ToFloat(), mScaleX, mOffsetX);
  416. Vec4 vy = Vec4::sFusedMultiplyAdd(yc.ToFloat(), mScaleY, mOffsetY);
  417. Vec4 vz = Vec4::sFusedMultiplyAdd(zc.ToFloat(), mScaleZ, mOffsetZ);
  418. // Transpose it so we get normal vectors
  419. Mat44 trans = Mat44(vx, vy, vz, Vec4::sZero()).Transposed();
  420. outV1 = trans.GetAxisX();
  421. outV2 = trans.GetAxisY();
  422. outV3 = trans.GetAxisZ();
  423. }
  424. /// Get user data for a triangle
  425. JPH_INLINE uint32 GetUserData(const void *inTriangleStart, uint32 inTriangleIdx) const
  426. {
  427. const TriangleBlockHeader *header = reinterpret_cast<const TriangleBlockHeader *>(inTriangleStart);
  428. const uint32 *user_data = header->GetUserData();
  429. return user_data != nullptr? user_data[inTriangleIdx] : 0;
  430. }
  431. /// Get flags for entire triangle block
  432. JPH_INLINE static void sGetFlags(const void *inTriangleStart, uint32 inNumTriangles, uint8 *outTriangleFlags)
  433. {
  434. JPH_ASSERT(inNumTriangles > 0);
  435. const TriangleBlockHeader *header = reinterpret_cast<const TriangleBlockHeader *>(inTriangleStart);
  436. const TriangleBlock *t = header->GetTriangleBlock();
  437. const TriangleBlock *end = t + ((inNumTriangles + 3) >> 2);
  438. int triangles_left = inNumTriangles;
  439. do
  440. {
  441. for (int i = 0; i < 4 && triangles_left > 0; ++i, --triangles_left)
  442. *outTriangleFlags++ = t->mFlags[i];
  443. ++t;
  444. }
  445. while (t < end);
  446. }
  447. /// Get flags for a particular triangle
  448. JPH_INLINE static uint8 sGetFlags(const void *inTriangleStart, int inTriangleIndex)
  449. {
  450. const TriangleBlockHeader *header = reinterpret_cast<const TriangleBlockHeader *>(inTriangleStart);
  451. const TriangleBlock *first_block = header->GetTriangleBlock();
  452. return first_block[inTriangleIndex >> 2].mFlags[inTriangleIndex & 0b11];
  453. }
  454. /// Unpacks triangles and flags, convenience function
  455. JPH_INLINE void Unpack(const void *inTriangleStart, uint32 inNumTriangles, Vec3 *outTriangles, uint8 *outTriangleFlags) const
  456. {
  457. Unpack(inTriangleStart, inNumTriangles, outTriangles);
  458. sGetFlags(inTriangleStart, inNumTriangles, outTriangleFlags);
  459. }
  460. private:
  461. Vec4 mOffsetX;
  462. Vec4 mOffsetY;
  463. Vec4 mOffsetZ;
  464. Vec4 mScaleX;
  465. Vec4 mScaleY;
  466. Vec4 mScaleZ;
  467. };
  468. };
  469. JPH_NAMESPACE_END