grid_soa.h 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275
  1. // Copyright 2009-2021 Intel Corporation
  2. // SPDX-License-Identifier: Apache-2.0
  3. #pragma once
  4. #include "../common/ray.h"
  5. #include "../common/scene_subdiv_mesh.h"
  6. #include "../bvh/bvh.h"
  7. #include "../subdiv/tessellation.h"
  8. #include "../subdiv/tessellation_cache.h"
  9. #include "subdivpatch1.h"
  10. namespace embree
  11. {
  12. namespace isa
  13. {
  14. class GridSOA
  15. {
  16. public:
  17. /*! GridSOA constructor */
  18. GridSOA(const SubdivPatch1Base* patches, const unsigned time_steps,
  19. const unsigned x0, const unsigned x1, const unsigned y0, const unsigned y1, const unsigned swidth, const unsigned sheight,
  20. const SubdivMesh* const geom, const size_t totalBvhBytes, const size_t gridBytes, BBox3fa* bounds_o = nullptr);
  21. /*! Subgrid creation */
  22. template<typename Allocator>
  23. static GridSOA* create(const SubdivPatch1Base* patches, const unsigned time_steps,
  24. unsigned x0, unsigned x1, unsigned y0, unsigned y1,
  25. const Scene* scene, Allocator& alloc, BBox3fa* bounds_o = nullptr)
  26. {
  27. const unsigned width = x1-x0+1;
  28. const unsigned height = y1-y0+1;
  29. const GridRange range(0,width-1,0,height-1);
  30. size_t bvhBytes = 0;
  31. if (time_steps == 1)
  32. bvhBytes = getBVHBytes(range,sizeof(BVH4::AABBNode),0);
  33. else {
  34. bvhBytes = (time_steps-1)*getBVHBytes(range,sizeof(BVH4::AABBNodeMB),0);
  35. bvhBytes += getTemporalBVHBytes(make_range(0,int(time_steps-1)),sizeof(BVH4::AABBNodeMB4D));
  36. }
  37. const size_t gridBytes = 4*size_t(width)*size_t(height)*sizeof(float);
  38. size_t rootBytes = time_steps*sizeof(BVH4::NodeRef);
  39. #if !defined(__64BIT__)
  40. rootBytes += 4; // We read 2 elements behind the grid. As we store at least 8 root bytes after the grid we are fine in 64 bit mode. But in 32 bit mode we have to do additional padding.
  41. #endif
  42. void* data = alloc(offsetof(GridSOA,data)+bvhBytes+time_steps*gridBytes+rootBytes);
  43. assert(data);
  44. return new (data) GridSOA(patches,time_steps,x0,x1,y0,y1,patches->grid_u_res,patches->grid_v_res,scene->get<SubdivMesh>(patches->geomID()),bvhBytes,gridBytes,bounds_o);
  45. }
  46. /*! Grid creation */
  47. template<typename Allocator>
  48. static GridSOA* create(const SubdivPatch1Base* const patches, const unsigned time_steps,
  49. const Scene* scene, const Allocator& alloc, BBox3fa* bounds_o = nullptr)
  50. {
  51. return create(patches,time_steps,0,patches->grid_u_res-1,0,patches->grid_v_res-1,scene,alloc,bounds_o);
  52. }
  53. /*! returns reference to root */
  54. __forceinline BVH4::NodeRef& root(size_t t = 0) { return (BVH4::NodeRef&)data[rootOffset + t*sizeof(BVH4::NodeRef)]; }
  55. __forceinline const BVH4::NodeRef& root(size_t t = 0) const { return (BVH4::NodeRef&)data[rootOffset + t*sizeof(BVH4::NodeRef)]; }
  56. /*! returns pointer to BVH array */
  57. __forceinline char* bvhData() { return &data[0]; }
  58. __forceinline const char* bvhData() const { return &data[0]; }
  59. /*! returns pointer to Grid array */
  60. __forceinline float* gridData(size_t t = 0) { return (float*) &data[gridOffset + t*gridBytes]; }
  61. __forceinline const float* gridData(size_t t = 0) const { return (float*) &data[gridOffset + t*gridBytes]; }
  62. __forceinline void* encodeLeaf(size_t u, size_t v) {
  63. return (void*) (16*(v * width + u + 1)); // +1 to not create empty leaf
  64. }
  65. __forceinline float* decodeLeaf(size_t t, const void* ptr) {
  66. return gridData(t) + (((size_t) (ptr) >> 4) - 1);
  67. }
  68. /*! returns the size of the BVH over the grid in bytes */
  69. static size_t getBVHBytes(const GridRange& range, const size_t nodeBytes, const size_t leafBytes);
  70. /*! returns the size of the temporal BVH over the time range BVHs */
  71. static size_t getTemporalBVHBytes(const range<int> time_range, const size_t nodeBytes);
  72. /*! calculates bounding box of grid range */
  73. __forceinline BBox3fa calculateBounds(size_t time, const GridRange& range) const
  74. {
  75. const float* const grid_array = gridData(time);
  76. const float* const grid_x_array = grid_array + 0 * dim_offset;
  77. const float* const grid_y_array = grid_array + 1 * dim_offset;
  78. const float* const grid_z_array = grid_array + 2 * dim_offset;
  79. /* compute the bounds just for the range! */
  80. BBox3fa bounds( empty );
  81. for (unsigned v = range.v_start; v<=range.v_end; v++)
  82. {
  83. for (unsigned u = range.u_start; u<=range.u_end; u++)
  84. {
  85. const float x = grid_x_array[ v * width + u];
  86. const float y = grid_y_array[ v * width + u];
  87. const float z = grid_z_array[ v * width + u];
  88. bounds.extend( Vec3fa(x,y,z) );
  89. }
  90. }
  91. assert(is_finite(bounds));
  92. return bounds;
  93. }
  94. /*! Evaluates grid over patch and builds BVH4 tree over the grid. */
  95. std::pair<BVH4::NodeRef,BBox3fa> buildBVH(BBox3fa* bounds_o);
  96. /*! Create BVH4 tree over grid. */
  97. std::pair<BVH4::NodeRef,BBox3fa> buildBVH(const GridRange& range, size_t& allocator);
  98. /*! Evaluates grid over patch and builds MSMBlur BVH4 tree over the grid. */
  99. std::pair<BVH4::NodeRef,LBBox3fa> buildMSMBlurBVH(const range<int> time_range, BBox3fa* bounds_o);
  100. /*! Create MBlur BVH4 tree over grid. */
  101. std::pair<BVH4::NodeRef,LBBox3fa> buildMBlurBVH(size_t time, const GridRange& range, size_t& allocator);
  102. /*! Create MSMBlur BVH4 tree over grid. */
  103. std::pair<BVH4::NodeRef,LBBox3fa> buildMSMBlurBVH(const range<int> time_range, size_t& allocator, BBox3fa* bounds_o);
  104. template<typename Loader>
  105. struct MapUV
  106. {
  107. typedef typename Loader::vfloat vfloat;
  108. const float* const grid_uv;
  109. size_t line_offset;
  110. size_t lines;
  111. __forceinline MapUV(const float* const grid_uv, size_t line_offset, const size_t lines)
  112. : grid_uv(grid_uv), line_offset(line_offset), lines(lines) {}
  113. __forceinline void operator() (vfloat& u, vfloat& v, Vec3<vfloat>& Ng) const {
  114. const Vec3<vfloat> tri_v012_uv = Loader::gather(grid_uv,line_offset,lines);
  115. const Vec2<vfloat> uv0 = GridSOA::decodeUV(tri_v012_uv[0]);
  116. const Vec2<vfloat> uv1 = GridSOA::decodeUV(tri_v012_uv[1]);
  117. const Vec2<vfloat> uv2 = GridSOA::decodeUV(tri_v012_uv[2]);
  118. const Vec2<vfloat> uv = u * uv1 + v * uv2 + (1.0f-u-v) * uv0;
  119. u = uv[0];v = uv[1];
  120. }
  121. };
  122. struct Gather2x3
  123. {
  124. enum { M = 4 };
  125. typedef vbool4 vbool;
  126. typedef vint4 vint;
  127. typedef vfloat4 vfloat;
  128. static __forceinline const Vec3vf4 gather(const float* const grid, const size_t line_offset, const size_t lines)
  129. {
  130. vfloat4 r0 = vfloat4::loadu(grid + 0*line_offset);
  131. vfloat4 r1 = vfloat4::loadu(grid + 1*line_offset); // this accesses 2 elements too much in case of 2x2 grid, but this is ok as we ensure enough padding after the grid
  132. if (unlikely(line_offset == 2))
  133. {
  134. r0 = shuffle<0,1,1,1>(r0);
  135. r1 = shuffle<0,1,1,1>(r1);
  136. }
  137. return Vec3vf4(unpacklo(r0,r1), // r00, r10, r01, r11
  138. shuffle<1,1,2,2>(r0), // r01, r01, r02, r02
  139. shuffle<0,1,1,2>(r1)); // r10, r11, r11, r12
  140. }
  141. static __forceinline void gather(const float* const grid_x,
  142. const float* const grid_y,
  143. const float* const grid_z,
  144. const size_t line_offset,
  145. const size_t lines,
  146. Vec3vf4& v0_o,
  147. Vec3vf4& v1_o,
  148. Vec3vf4& v2_o)
  149. {
  150. const Vec3vf4 tri_v012_x = gather(grid_x,line_offset,lines);
  151. const Vec3vf4 tri_v012_y = gather(grid_y,line_offset,lines);
  152. const Vec3vf4 tri_v012_z = gather(grid_z,line_offset,lines);
  153. v0_o = Vec3vf4(tri_v012_x[0],tri_v012_y[0],tri_v012_z[0]);
  154. v1_o = Vec3vf4(tri_v012_x[1],tri_v012_y[1],tri_v012_z[1]);
  155. v2_o = Vec3vf4(tri_v012_x[2],tri_v012_y[2],tri_v012_z[2]);
  156. }
  157. };
  158. #if defined (__AVX__)
  159. struct Gather3x3
  160. {
  161. enum { M = 8 };
  162. typedef vbool8 vbool;
  163. typedef vint8 vint;
  164. typedef vfloat8 vfloat;
  165. static __forceinline const Vec3vf8 gather(const float* const grid, const size_t line_offset, const size_t lines)
  166. {
  167. vfloat4 ra = vfloat4::loadu(grid + 0*line_offset);
  168. vfloat4 rb = vfloat4::loadu(grid + 1*line_offset); // this accesses 2 elements too much in case of 2x2 grid, but this is ok as we ensure enough padding after the grid
  169. vfloat4 rc;
  170. if (likely(lines > 2))
  171. rc = vfloat4::loadu(grid + 2*line_offset);
  172. else
  173. rc = rb;
  174. if (unlikely(line_offset == 2))
  175. {
  176. ra = shuffle<0,1,1,1>(ra);
  177. rb = shuffle<0,1,1,1>(rb);
  178. rc = shuffle<0,1,1,1>(rc);
  179. }
  180. const vfloat8 r0 = vfloat8(ra,rb);
  181. const vfloat8 r1 = vfloat8(rb,rc);
  182. return Vec3vf8(unpacklo(r0,r1), // r00, r10, r01, r11, r10, r20, r11, r21
  183. shuffle<1,1,2,2>(r0), // r01, r01, r02, r02, r11, r11, r12, r12
  184. shuffle<0,1,1,2>(r1)); // r10, r11, r11, r12, r20, r21, r21, r22
  185. }
  186. static __forceinline void gather(const float* const grid_x,
  187. const float* const grid_y,
  188. const float* const grid_z,
  189. const size_t line_offset,
  190. const size_t lines,
  191. Vec3vf8& v0_o,
  192. Vec3vf8& v1_o,
  193. Vec3vf8& v2_o)
  194. {
  195. const Vec3vf8 tri_v012_x = gather(grid_x,line_offset,lines);
  196. const Vec3vf8 tri_v012_y = gather(grid_y,line_offset,lines);
  197. const Vec3vf8 tri_v012_z = gather(grid_z,line_offset,lines);
  198. v0_o = Vec3vf8(tri_v012_x[0],tri_v012_y[0],tri_v012_z[0]);
  199. v1_o = Vec3vf8(tri_v012_x[1],tri_v012_y[1],tri_v012_z[1]);
  200. v2_o = Vec3vf8(tri_v012_x[2],tri_v012_y[2],tri_v012_z[2]);
  201. }
  202. };
  203. #endif
  204. template<typename vfloat>
  205. static __forceinline Vec2<vfloat> decodeUV(const vfloat& uv)
  206. {
  207. typedef typename vfloat::Int vint;
  208. const vint iu = asInt(uv) & 0xffff;
  209. const vint iv = srl(asInt(uv),16);
  210. const vfloat u = (vfloat)iu * vfloat(8.0f/0x10000);
  211. const vfloat v = (vfloat)iv * vfloat(8.0f/0x10000);
  212. return Vec2<vfloat>(u,v);
  213. }
  214. __forceinline unsigned int geomID() const {
  215. return _geomID;
  216. }
  217. __forceinline unsigned int primID() const {
  218. return _primID;
  219. }
  220. public:
  221. BVH4::NodeRef troot;
  222. #if !defined(__64BIT__)
  223. unsigned align1;
  224. #endif
  225. unsigned time_steps;
  226. unsigned width;
  227. unsigned height;
  228. unsigned dim_offset;
  229. unsigned _geomID;
  230. unsigned _primID;
  231. unsigned align2;
  232. unsigned gridOffset;
  233. unsigned gridBytes;
  234. unsigned rootOffset;
  235. char data[1]; //!< after the struct we first store the BVH, then the grid, and finally the roots
  236. };
  237. }
  238. }