grid_soa.h 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268
  1. // ======================================================================== //
  2. // Copyright 2009-2017 Intel Corporation //
  3. // //
  4. // Licensed under the Apache License, Version 2.0 (the "License"); //
  5. // you may not use this file except in compliance with the License. //
  6. // You may obtain a copy of the License at //
  7. // //
  8. // http://www.apache.org/licenses/LICENSE-2.0 //
  9. // //
  10. // Unless required by applicable law or agreed to in writing, software //
  11. // distributed under the License is distributed on an "AS IS" BASIS, //
  12. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. //
  13. // See the License for the specific language governing permissions and //
  14. // limitations under the License. //
  15. // ======================================================================== //
  16. #pragma once
  17. #include "../common/ray.h"
  18. #include "../common/scene_subdiv_mesh.h"
  19. #include "filter.h"
  20. #include "../bvh/bvh.h"
  21. #include "../subdiv/tessellation.h"
  22. #include "../subdiv/tessellation_cache.h"
  23. #include "subdivpatch1cached.h"
  24. namespace embree
  25. {
  26. namespace isa
  27. {
  28. class GridSOA
  29. {
  30. public:
  31. /*! GridSOA constructor */
  32. GridSOA(const SubdivPatch1Base* patches, const unsigned time_steps, const unsigned time_steps_global,
  33. const unsigned x0, const unsigned x1, const unsigned y0, const unsigned y1, const unsigned swidth, const unsigned sheight,
  34. const SubdivMesh* const geom, const size_t bvhBytes, const size_t gridBytes, BBox3fa* bounds_o = nullptr);
  35. /*! Subgrid creation */
  36. template<typename Allocator>
  37. static GridSOA* create(const SubdivPatch1Base* patches, const unsigned time_steps, const unsigned time_steps_global,
  38. unsigned x0, unsigned x1, unsigned y0, unsigned y1,
  39. const Scene* scene, Allocator& alloc, BBox3fa* bounds_o = nullptr)
  40. {
  41. const unsigned width = x1-x0+1;
  42. const unsigned height = y1-y0+1;
  43. const GridRange range(0,width-1,0,height-1);
  44. const size_t nodeBytes = time_steps_global == 1 ? sizeof(BVH4::AlignedNode) : sizeof(BVH4::AlignedNodeMB);
  45. const size_t bvhBytes = getBVHBytes(range,nodeBytes,0);
  46. const size_t gridBytes = 4*size_t(width)*size_t(height)*sizeof(float);
  47. size_t rootBytes = time_steps_global*sizeof(BVH4::NodeRef);
  48. #if !defined(__X86_64__)
  49. rootBytes += 4; // We read 2 elements behind the grid. As we store at least 8 root bytes after the grid we are fine in 64 bit mode. But in 32 bit mode we have to do additional padding.
  50. #endif
  51. void* data = alloc(offsetof(GridSOA,data)+max(1u,time_steps_global-1)*bvhBytes+time_steps*gridBytes+rootBytes);
  52. assert(data);
  53. return new (data) GridSOA(patches,time_steps,time_steps_global,x0,x1,y0,y1,patches->grid_u_res,patches->grid_v_res,scene->get<SubdivMesh>(patches->geom),bvhBytes,gridBytes,bounds_o);
  54. }
  55. /*! Grid creation */
  56. template<typename Allocator>
  57. static GridSOA* create(const SubdivPatch1Base* const patches, const unsigned time_steps, const unsigned time_steps_global,
  58. const Scene* scene, const Allocator& alloc, BBox3fa* bounds_o = nullptr)
  59. {
  60. return create(patches,time_steps,time_steps_global,0,patches->grid_u_res-1,0,patches->grid_v_res-1,scene,alloc,bounds_o);
  61. }
  62. /*! returns reference to root */
  63. __forceinline BVH4::NodeRef& root(size_t t = 0) { return (BVH4::NodeRef&)data[rootOffset + t*sizeof(BVH4::NodeRef)]; }
  64. __forceinline const BVH4::NodeRef& root(size_t t = 0) const { return (BVH4::NodeRef&)data[rootOffset + t*sizeof(BVH4::NodeRef)]; }
  65. /*! returns pointer to BVH array */
  66. __forceinline char* bvhData(size_t t = 0) { return &data[t*bvhBytes]; }
  67. __forceinline const char* bvhData(size_t t = 0) const { return &data[t*bvhBytes]; }
  68. /*! returns pointer to Grid array */
  69. __forceinline float* gridData(size_t t = 0) { return (float*) &data[gridOffset + t*gridBytes]; }
  70. __forceinline const float* gridData(size_t t = 0) const { return (float*) &data[gridOffset + t*gridBytes]; }
  71. __forceinline void* encodeLeaf(size_t u, size_t v) {
  72. return (void*) (16*(v * width + u + 1)); // +1 to not create empty leaf
  73. }
  74. __forceinline float* decodeLeaf(size_t t, const void* ptr) {
  75. return gridData(t) + (((size_t) (ptr) >> 4) - 1);
  76. }
  77. /*! returns the size of the BVH over the grid in bytes */
  78. static size_t getBVHBytes(const GridRange& range, const size_t nodeBytes, const size_t leafBytes);
  79. /*! calculates bounding box of grid range */
  80. __forceinline BBox3fa calculateBounds(size_t time, const GridRange& range) const
  81. {
  82. const float* const grid_array = gridData(time);
  83. const float* const grid_x_array = grid_array + 0 * dim_offset;
  84. const float* const grid_y_array = grid_array + 1 * dim_offset;
  85. const float* const grid_z_array = grid_array + 2 * dim_offset;
  86. /* compute the bounds just for the range! */
  87. BBox3fa bounds( empty );
  88. for (unsigned v = range.v_start; v<=range.v_end; v++)
  89. {
  90. for (unsigned u = range.u_start; u<=range.u_end; u++)
  91. {
  92. const float x = grid_x_array[ v * width + u];
  93. const float y = grid_y_array[ v * width + u];
  94. const float z = grid_z_array[ v * width + u];
  95. bounds.extend( Vec3fa(x,y,z) );
  96. }
  97. }
  98. assert(is_finite(bounds));
  99. return bounds;
  100. }
  101. /*! Evaluates grid over patch and builds BVH4 tree over the grid. */
  102. BVH4::NodeRef buildBVH(size_t time, BBox3fa* bounds_o);
  103. /*! Create BVH4 tree over grid. */
  104. BBox3fa buildBVH(BVH4::NodeRef& curNode, size_t time, const GridRange& range, size_t& allocator);
  105. /*! Evaluates grid over patch and builds MBlur BVH4 tree over the grid. */
  106. BVH4::NodeRef buildMBlurBVH(size_t time, LBBox3fa* bounds_o);
  107. /*! Create MBlur BVH4 tree over grid. */
  108. LBBox3fa buildMBlurBVH(BVH4::NodeRef& curNode, size_t time, const GridRange& range, size_t& allocator);
  109. template<typename Loader>
  110. struct MapUV
  111. {
  112. typedef typename Loader::vfloat vfloat;
  113. const float* const grid_uv;
  114. size_t line_offset;
  115. size_t lines;
  116. __forceinline MapUV(const float* const grid_uv, size_t line_offset, const size_t lines)
  117. : grid_uv(grid_uv), line_offset(line_offset), lines(lines) {}
  118. __forceinline void operator() (vfloat& u, vfloat& v) const {
  119. const Vec3<vfloat> tri_v012_uv = Loader::gather(grid_uv,line_offset,lines);
  120. const Vec2<vfloat> uv0 = GridSOA::decodeUV(tri_v012_uv[0]);
  121. const Vec2<vfloat> uv1 = GridSOA::decodeUV(tri_v012_uv[1]);
  122. const Vec2<vfloat> uv2 = GridSOA::decodeUV(tri_v012_uv[2]);
  123. const Vec2<vfloat> uv = u * uv1 + v * uv2 + (1.0f-u-v) * uv0;
  124. u = uv[0];v = uv[1];
  125. }
  126. };
  127. struct Gather2x3
  128. {
  129. enum { M = 4 };
  130. typedef vbool4 vbool;
  131. typedef vint4 vint;
  132. typedef vfloat4 vfloat;
  133. static __forceinline const Vec3<vfloat4> gather(const float* const grid, const size_t line_offset, const size_t lines)
  134. {
  135. vfloat4 r0 = vfloat4::loadu(grid + 0*line_offset);
  136. vfloat4 r1 = vfloat4::loadu(grid + 1*line_offset); // this accesses 2 elements too much in case of 2x2 grid, but this is ok as we ensure enough padding after the grid
  137. if (unlikely(line_offset == 2))
  138. {
  139. r0 = shuffle<0,1,1,1>(r0);
  140. r1 = shuffle<0,1,1,1>(r1);
  141. }
  142. return Vec3<vfloat4>(unpacklo(r0,r1), // r00, r10, r01, r11
  143. shuffle<1,1,2,2>(r0), // r01, r01, r02, r02
  144. shuffle<0,1,1,2>(r1)); // r10, r11, r11, r12
  145. }
  146. static __forceinline void gather(const float* const grid_x,
  147. const float* const grid_y,
  148. const float* const grid_z,
  149. const size_t line_offset,
  150. const size_t lines,
  151. Vec3<vfloat4>& v0_o,
  152. Vec3<vfloat4>& v1_o,
  153. Vec3<vfloat4>& v2_o)
  154. {
  155. const Vec3<vfloat4> tri_v012_x = gather(grid_x,line_offset,lines);
  156. const Vec3<vfloat4> tri_v012_y = gather(grid_y,line_offset,lines);
  157. const Vec3<vfloat4> tri_v012_z = gather(grid_z,line_offset,lines);
  158. v0_o = Vec3<vfloat4>(tri_v012_x[0],tri_v012_y[0],tri_v012_z[0]);
  159. v1_o = Vec3<vfloat4>(tri_v012_x[1],tri_v012_y[1],tri_v012_z[1]);
  160. v2_o = Vec3<vfloat4>(tri_v012_x[2],tri_v012_y[2],tri_v012_z[2]);
  161. }
  162. };
  163. #if defined (__AVX__)
  164. struct Gather3x3
  165. {
  166. enum { M = 8 };
  167. typedef vbool8 vbool;
  168. typedef vint8 vint;
  169. typedef vfloat8 vfloat;
  170. static __forceinline const Vec3<vfloat8> gather(const float* const grid, const size_t line_offset, const size_t lines)
  171. {
  172. vfloat4 ra = vfloat4::loadu(grid + 0*line_offset);
  173. vfloat4 rb = vfloat4::loadu(grid + 1*line_offset); // this accesses 2 elements too much in case of 2x2 grid, but this is ok as we ensure enough padding after the grid
  174. vfloat4 rc;
  175. if (likely(lines > 2))
  176. rc = vfloat4::loadu(grid + 2*line_offset);
  177. else
  178. rc = rb;
  179. if (unlikely(line_offset == 2))
  180. {
  181. ra = shuffle<0,1,1,1>(ra);
  182. rb = shuffle<0,1,1,1>(rb);
  183. rc = shuffle<0,1,1,1>(rc);
  184. }
  185. const vfloat8 r0 = vfloat8(ra,rb);
  186. const vfloat8 r1 = vfloat8(rb,rc);
  187. return Vec3<vfloat8>(unpacklo(r0,r1), // r00, r10, r01, r11, r10, r20, r11, r21
  188. shuffle<1,1,2,2>(r0), // r01, r01, r02, r02, r11, r11, r12, r12
  189. shuffle<0,1,1,2>(r1)); // r10, r11, r11, r12, r20, r21, r21, r22
  190. }
  191. static __forceinline void gather(const float* const grid_x,
  192. const float* const grid_y,
  193. const float* const grid_z,
  194. const size_t line_offset,
  195. const size_t lines,
  196. Vec3<vfloat8>& v0_o,
  197. Vec3<vfloat8>& v1_o,
  198. Vec3<vfloat8>& v2_o)
  199. {
  200. const Vec3<vfloat8> tri_v012_x = gather(grid_x,line_offset,lines);
  201. const Vec3<vfloat8> tri_v012_y = gather(grid_y,line_offset,lines);
  202. const Vec3<vfloat8> tri_v012_z = gather(grid_z,line_offset,lines);
  203. v0_o = Vec3<vfloat8>(tri_v012_x[0],tri_v012_y[0],tri_v012_z[0]);
  204. v1_o = Vec3<vfloat8>(tri_v012_x[1],tri_v012_y[1],tri_v012_z[1]);
  205. v2_o = Vec3<vfloat8>(tri_v012_x[2],tri_v012_y[2],tri_v012_z[2]);
  206. }
  207. };
  208. #endif
  209. template<typename vfloat>
  210. static __forceinline Vec2<vfloat> decodeUV(const vfloat& uv)
  211. {
  212. typedef typename vfloat::Int vint;
  213. const vint iu = asInt(uv) & 0xffff;
  214. const vint iv = srl(asInt(uv),16);
  215. const vfloat u = (vfloat)iu * vfloat(1.0f/0xFFFF);
  216. const vfloat v = (vfloat)iv * vfloat(1.0f/0xFFFF);
  217. return Vec2<vfloat>(u,v);
  218. }
  219. public:
  220. unsigned align0;
  221. unsigned time_steps_global;
  222. unsigned time_steps;
  223. unsigned width;
  224. unsigned height;
  225. unsigned dim_offset;
  226. unsigned geomID;
  227. unsigned primID;
  228. unsigned bvhBytes;
  229. unsigned gridOffset;
  230. unsigned gridBytes;
  231. unsigned rootOffset;
  232. char data[1]; //!< after the struct we first store the BVH, then the grid, and finally the roots
  233. };
  234. }
  235. }