bvh_rotate.cpp 6.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140
  1. // ======================================================================== //
  2. // Copyright 2009-2017 Intel Corporation //
  3. // //
  4. // Licensed under the Apache License, Version 2.0 (the "License"); //
  5. // you may not use this file except in compliance with the License. //
  6. // You may obtain a copy of the License at //
  7. // //
  8. // http://www.apache.org/licenses/LICENSE-2.0 //
  9. // //
  10. // Unless required by applicable law or agreed to in writing, software //
  11. // distributed under the License is distributed on an "AS IS" BASIS, //
  12. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. //
  13. // See the License for the specific language governing permissions and //
  14. // limitations under the License. //
  15. // ======================================================================== //
  16. #include "bvh_rotate.h"
  17. namespace embree
  18. {
  19. namespace isa
  20. {
  21. /*! Computes half surface area of box. */
  22. __forceinline float halfArea3f(const BBox<vfloat4>& box) {
  23. const vfloat4 d = box.size();
  24. const vfloat4 a = d*shuffle<1,2,0,3>(d);
  25. return a[0]+a[1]+a[2];
  26. }
  27. size_t BVHNRotate<4>::rotate(NodeRef parentRef, size_t depth)
  28. {
  29. /*! nothing to rotate if we reached a leaf node. */
  30. if (parentRef.isBarrier()) return 0;
  31. if (parentRef.isLeaf()) return 0;
  32. AlignedNode* parent = parentRef.alignedNode();
  33. /*! rotate all children first */
  34. vint4 cdepth;
  35. for (size_t c=0; c<4; c++)
  36. cdepth[c] = (int)rotate(parent->child(c),depth+1);
  37. /* compute current areas of all children */
  38. vfloat4 sizeX = parent->upper_x-parent->lower_x;
  39. vfloat4 sizeY = parent->upper_y-parent->lower_y;
  40. vfloat4 sizeZ = parent->upper_z-parent->lower_z;
  41. vfloat4 childArea = madd(sizeX,(sizeY + sizeZ),sizeY*sizeZ);
  42. /*! get node bounds */
  43. BBox<vfloat4> child1_0,child1_1,child1_2,child1_3;
  44. parent->bounds(child1_0,child1_1,child1_2,child1_3);
  45. /*! Find best rotation. We pick a first child (child1) and a sub-child
  46. (child2child) of a different second child (child2), and swap child1
  47. and child2child. We perform the best such swap. */
  48. float bestArea = 0;
  49. size_t bestChild1 = -1, bestChild2 = -1, bestChild2Child = -1;
  50. for (size_t c2=0; c2<4; c2++)
  51. {
  52. /*! ignore leaf nodes as we cannot descent into them */
  53. if (parent->child(c2).isBarrier()) continue;
  54. if (parent->child(c2).isLeaf()) continue;
  55. AlignedNode* child2 = parent->child(c2).alignedNode();
  56. /*! transpose child bounds */
  57. BBox<vfloat4> child2c0,child2c1,child2c2,child2c3;
  58. child2->bounds(child2c0,child2c1,child2c2,child2c3);
  59. /*! put child1_0 at each child2 position */
  60. float cost00 = halfArea3f(merge(child1_0,child2c1,child2c2,child2c3));
  61. float cost01 = halfArea3f(merge(child2c0,child1_0,child2c2,child2c3));
  62. float cost02 = halfArea3f(merge(child2c0,child2c1,child1_0,child2c3));
  63. float cost03 = halfArea3f(merge(child2c0,child2c1,child2c2,child1_0));
  64. vfloat4 cost0 = vfloat4(cost00,cost01,cost02,cost03);
  65. vfloat4 min0 = vreduce_min(cost0);
  66. int pos0 = (int)__bsf(movemask(min0 == cost0));
  67. /*! put child1_1 at each child2 position */
  68. float cost10 = halfArea3f(merge(child1_1,child2c1,child2c2,child2c3));
  69. float cost11 = halfArea3f(merge(child2c0,child1_1,child2c2,child2c3));
  70. float cost12 = halfArea3f(merge(child2c0,child2c1,child1_1,child2c3));
  71. float cost13 = halfArea3f(merge(child2c0,child2c1,child2c2,child1_1));
  72. vfloat4 cost1 = vfloat4(cost10,cost11,cost12,cost13);
  73. vfloat4 min1 = vreduce_min(cost1);
  74. int pos1 = (int)__bsf(movemask(min1 == cost1));
  75. /*! put child1_2 at each child2 position */
  76. float cost20 = halfArea3f(merge(child1_2,child2c1,child2c2,child2c3));
  77. float cost21 = halfArea3f(merge(child2c0,child1_2,child2c2,child2c3));
  78. float cost22 = halfArea3f(merge(child2c0,child2c1,child1_2,child2c3));
  79. float cost23 = halfArea3f(merge(child2c0,child2c1,child2c2,child1_2));
  80. vfloat4 cost2 = vfloat4(cost20,cost21,cost22,cost23);
  81. vfloat4 min2 = vreduce_min(cost2);
  82. int pos2 = (int)__bsf(movemask(min2 == cost2));
  83. /*! put child1_3 at each child2 position */
  84. float cost30 = halfArea3f(merge(child1_3,child2c1,child2c2,child2c3));
  85. float cost31 = halfArea3f(merge(child2c0,child1_3,child2c2,child2c3));
  86. float cost32 = halfArea3f(merge(child2c0,child2c1,child1_3,child2c3));
  87. float cost33 = halfArea3f(merge(child2c0,child2c1,child2c2,child1_3));
  88. vfloat4 cost3 = vfloat4(cost30,cost31,cost32,cost33);
  89. vfloat4 min3 = vreduce_min(cost3);
  90. int pos3 = (int)__bsf(movemask(min3 == cost3));
  91. /*! find best other child */
  92. vfloat4 area0123 = vfloat4(extract<0>(min0),extract<0>(min1),extract<0>(min2),extract<0>(min3)) - vfloat4(childArea[c2]);
  93. int pos[4] = { pos0,pos1,pos2,pos3 };
  94. const size_t mbd = BVH4::maxBuildDepth;
  95. vbool4 valid = vint4(int(depth+1))+cdepth <= vint4(mbd); // only select swaps that fulfill depth constraints
  96. valid &= vint4(c2) != vint4(step);
  97. if (none(valid)) continue;
  98. size_t c1 = select_min(valid,area0123);
  99. float area = area0123[c1];
  100. if (c1 == c2) continue; // can happen if bounds are NANs
  101. /*! accept a swap when it reduces cost and is not swapping a node with itself */
  102. if (area < bestArea) {
  103. bestArea = area;
  104. bestChild1 = c1;
  105. bestChild2 = c2;
  106. bestChild2Child = pos[c1];
  107. }
  108. }
  109. /*! if we did not find a swap that improves the SAH then do nothing */
  110. if (bestChild1 == size_t(-1)) return 1+reduce_max(cdepth);
  111. /*! perform the best found tree rotation */
  112. AlignedNode* child2 = parent->child(bestChild2).alignedNode();
  113. BVH4::swap(parent,bestChild1,child2,bestChild2Child);
  114. parent->set(bestChild2,child2->bounds());
  115. BVH4::compact(parent);
  116. BVH4::compact(child2);
  117. /*! This returned depth is conservative as the child that was
  118. * pulled up in the tree could have been on the critical path. */
  119. cdepth[bestChild1]++; // bestChild1 was pushed down one level
  120. return 1+reduce_max(cdepth);
  121. }
  122. }
  123. }