grid_soa.cpp 8.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206
  1. // ======================================================================== //
  2. // Copyright 2009-2017 Intel Corporation //
  3. // //
  4. // Licensed under the Apache License, Version 2.0 (the "License"); //
  5. // you may not use this file except in compliance with the License. //
  6. // You may obtain a copy of the License at //
  7. // //
  8. // http://www.apache.org/licenses/LICENSE-2.0 //
  9. // //
  10. // Unless required by applicable law or agreed to in writing, software //
  11. // distributed under the License is distributed on an "AS IS" BASIS, //
  12. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. //
  13. // See the License for the specific language governing permissions and //
  14. // limitations under the License. //
  15. // ======================================================================== //
  16. #include "grid_soa.h"
  17. namespace embree
  18. {
  19. namespace isa
  20. {
  21. GridSOA::GridSOA(const SubdivPatch1Base* patches, unsigned time_steps, unsigned time_steps_global,
  22. const unsigned x0, const unsigned x1, const unsigned y0, const unsigned y1, const unsigned swidth, const unsigned sheight,
  23. const SubdivMesh* const geom, const size_t bvhBytes, const size_t gridBytes, BBox3fa* bounds_o)
  24. : align0(0),
  25. time_steps_global(time_steps_global),time_steps(time_steps), width(x1-x0+1), height(y1-y0+1), dim_offset(width*height),
  26. geomID(patches->geom), primID(patches->prim),
  27. bvhBytes(unsigned(bvhBytes)), gridOffset(max(1u,time_steps_global-1)*unsigned(bvhBytes)), gridBytes(unsigned(gridBytes)), rootOffset(unsigned(gridOffset+time_steps*gridBytes))
  28. {
  29. /* the generate loops need padded arrays, thus first store into these temporary arrays */
  30. unsigned temp_size = width*height+VSIZEX;
  31. dynamic_large_stack_array(float,local_grid_u,temp_size,64*64*sizeof(float));
  32. dynamic_large_stack_array(float,local_grid_v,temp_size,64*64*sizeof(float));
  33. dynamic_large_stack_array(float,local_grid_x,temp_size,64*64*sizeof(float));
  34. dynamic_large_stack_array(float,local_grid_y,temp_size,64*64*sizeof(float));
  35. dynamic_large_stack_array(float,local_grid_z,temp_size,64*64*sizeof(float));
  36. dynamic_large_stack_array(float,local_grid_uv,temp_size,64*64*sizeof(float));
  37. /* first create the grids for each time step */
  38. for (size_t t=0; t<time_steps; t++)
  39. {
  40. /* compute vertex grid (+displacement) */
  41. evalGrid(patches[t],x0,x1,y0,y1,swidth,sheight,
  42. local_grid_x,local_grid_y,local_grid_z,local_grid_u,local_grid_v,geom);
  43. /* encode UVs */
  44. for (unsigned i=0; i<dim_offset; i+=VSIZEX) {
  45. const vintx iu = (vintx) clamp(vfloatx::load(&local_grid_u[i])*0xFFFF, vfloatx(0.0f), vfloatx(0xFFFF));
  46. const vintx iv = (vintx) clamp(vfloatx::load(&local_grid_v[i])*0xFFFF, vfloatx(0.0f), vfloatx(0xFFFF));
  47. vintx::storeu(&local_grid_uv[i], (iv << 16) | iu);
  48. }
  49. /* copy temporary data to compact grid */
  50. float* const grid_x = (float*)(gridData(t) + 0*dim_offset);
  51. float* const grid_y = (float*)(gridData(t) + 1*dim_offset);
  52. float* const grid_z = (float*)(gridData(t) + 2*dim_offset);
  53. int * const grid_uv = (int* )(gridData(t) + 3*dim_offset);
  54. memcpy(grid_x, local_grid_x, dim_offset*sizeof(float));
  55. memcpy(grid_y, local_grid_y, dim_offset*sizeof(float));
  56. memcpy(grid_z, local_grid_z, dim_offset*sizeof(float));
  57. memcpy(grid_uv,local_grid_uv,dim_offset*sizeof(int));
  58. }
  59. /* create normal BVH when no motion blur is active */
  60. if (time_steps == 1) {
  61. root(0) = buildBVH(0,bounds_o);
  62. }
  63. /* otherwise build MBlur BVH */
  64. else
  65. {
  66. for (size_t t=0; t<time_steps_global-1; t++)
  67. {
  68. LBBox3fa bounds;
  69. root(t) = buildMBlurBVH(t,&bounds);
  70. if (bounds_o && time_steps == time_steps_global) {
  71. bounds_o[t+0] = bounds.bounds0;
  72. bounds_o[t+1] = bounds.bounds1;
  73. }
  74. }
  75. if (bounds_o && time_steps != time_steps_global)
  76. {
  77. GridRange range(0,width-1,0,height-1);
  78. for (size_t t=0; t<time_steps; t++)
  79. bounds_o[t] = calculateBounds(t,range);
  80. }
  81. }
  82. }
  83. size_t GridSOA::getBVHBytes(const GridRange& range, const size_t nodeBytes, const size_t leafBytes)
  84. {
  85. if (range.hasLeafSize())
  86. return leafBytes;
  87. __aligned(64) GridRange r[4];
  88. const size_t children = range.splitIntoSubRanges(r);
  89. size_t bytes = nodeBytes;
  90. for (size_t i=0; i<children; i++)
  91. bytes += getBVHBytes(r[i],nodeBytes,leafBytes);
  92. return bytes;
  93. }
  94. BVH4::NodeRef GridSOA::buildBVH(size_t time, BBox3fa* bounds_o)
  95. {
  96. BVH4::NodeRef root = 0; size_t allocator = 0;
  97. GridRange range(0,width-1,0,height-1);
  98. BBox3fa bounds = buildBVH(root,time,range,allocator);
  99. if (bounds_o) *bounds_o = bounds;
  100. assert(allocator == bvhBytes);
  101. return root;
  102. }
  103. BBox3fa GridSOA::buildBVH(BVH4::NodeRef& curNode, size_t time, const GridRange& range, size_t& allocator)
  104. {
  105. /*! create leaf node */
  106. if (unlikely(range.hasLeafSize()))
  107. {
  108. /* we store index of first subgrid vertex as leaf node */
  109. curNode = BVH4::encodeTypedLeaf(encodeLeaf(range.u_start,range.v_start),0);
  110. /* return bounding box */
  111. return calculateBounds(time,range);
  112. }
  113. /* create internal node */
  114. else
  115. {
  116. /* allocate new bvh4 node */
  117. BVH4::AlignedNode* node = (BVH4::AlignedNode *)&bvhData(time)[allocator];
  118. allocator += sizeof(BVH4::AlignedNode);
  119. node->clear();
  120. /* split range */
  121. GridRange r[4];
  122. const unsigned children = range.splitIntoSubRanges(r);
  123. /* recurse into subtrees */
  124. BBox3fa bounds( empty );
  125. for (unsigned i=0; i<children; i++)
  126. {
  127. BBox3fa box = buildBVH( node->child(i), time, r[i], allocator);
  128. node->set(i,box);
  129. bounds.extend(box);
  130. }
  131. curNode = BVH4::encodeNode(node);
  132. assert(is_finite(bounds));
  133. return bounds;
  134. }
  135. }
  136. BVH4::NodeRef GridSOA::buildMBlurBVH(size_t time, LBBox3fa* bounds_o)
  137. {
  138. BVH4::NodeRef root = 0; size_t allocator = 0;
  139. GridRange range(0,width-1,0,height-1);
  140. LBBox3fa bounds = buildMBlurBVH(root,time,range,allocator);
  141. if (bounds_o) *bounds_o = bounds;
  142. assert(allocator == bvhBytes);
  143. return root;
  144. }
  145. LBBox3fa GridSOA::buildMBlurBVH(BVH4::NodeRef& curNode, size_t time, const GridRange& range, size_t& allocator)
  146. {
  147. /*! create leaf node */
  148. if (unlikely(range.hasLeafSize()))
  149. {
  150. /* we store index of first subgrid vertex as leaf node */
  151. curNode = BVH4::encodeTypedLeaf(encodeLeaf(range.u_start,range.v_start),0);
  152. /* return bounding box */
  153. return Geometry::linearBounds([&] (size_t itime) { return calculateBounds(itime,range); },
  154. time, time_steps_global, time_steps);
  155. }
  156. /* create internal node */
  157. else
  158. {
  159. /* allocate new bvh4 node */
  160. BVH4::AlignedNodeMB* node = (BVH4::AlignedNodeMB *)&bvhData(time)[allocator];
  161. allocator += sizeof(BVH4::AlignedNodeMB);
  162. node->clear();
  163. /* split range */
  164. GridRange r[4];
  165. const unsigned children = range.splitIntoSubRanges(r);
  166. /* recurse into subtrees */
  167. LBBox3fa bounds(empty);
  168. for (unsigned i=0; i<children; i++)
  169. {
  170. LBBox3fa box = buildMBlurBVH(node->child(i), time, r[i], allocator);
  171. node->set(i, box);
  172. bounds.extend(box);
  173. }
  174. curNode = BVH4::encodeNode(node);
  175. assert(is_finite(bounds.bounds0));
  176. assert(is_finite(bounds.bounds1));
  177. return bounds;
  178. }
  179. }
  180. }
  181. }