quadi.h 9.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269
  1. // ======================================================================== //
  2. // Copyright 2009-2017 Intel Corporation //
  3. // //
  4. // Licensed under the Apache License, Version 2.0 (the "License"); //
  5. // you may not use this file except in compliance with the License. //
  6. // You may obtain a copy of the License at //
  7. // //
  8. // http://www.apache.org/licenses/LICENSE-2.0 //
  9. // //
  10. // Unless required by applicable law or agreed to in writing, software //
  11. // distributed under the License is distributed on an "AS IS" BASIS, //
  12. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. //
  13. // See the License for the specific language governing permissions and //
  14. // limitations under the License. //
  15. // ======================================================================== //
  16. #pragma once
  17. #include "primitive.h"
  18. #include "../common/scene.h"
  19. namespace embree
  20. {
  21. /* Stores M quads from an indexed face set */
  22. template <int M>
  23. struct QuadMi
  24. {
  25. typedef Vec3<vfloat<M>> Vec3vfM;
  26. /* Virtual interface to query information about the quad type */
  27. struct Type : public PrimitiveType
  28. {
  29. Type();
  30. size_t size(const char* This) const;
  31. };
  32. static Type type;
  33. public:
  34. /* Returns maximal number of stored quads */
  35. static __forceinline size_t max_size() { return M; }
  36. /* Returns required number of primitive blocks for N primitives */
  37. static __forceinline size_t blocks(size_t N) { return (N+max_size()-1)/max_size(); }
  38. public:
  39. /* Default constructor */
  40. __forceinline QuadMi() { }
  41. /* Construction from vertices and IDs */
  42. __forceinline QuadMi(const vint<M>& v0, const vint<M>& v1, const vint<M>& v2, const vint<M>& v3, const vint<M>& geomIDs, const vint<M>& primIDs)
  43. : v0(v0),v1(v1), v2(v2), v3(v3), geomIDs(geomIDs), primIDs(primIDs)
  44. {
  45. }
  46. /* Returns a mask that tells which quads are valid */
  47. __forceinline vbool<M> valid() const { return primIDs != vint<M>(-1); }
  48. /* Returns if the specified quad is valid */
  49. __forceinline bool valid(const size_t i) const { assert(i<M); return primIDs[i] != -1; }
  50. /* Returns the number of stored quads */
  51. __forceinline size_t size() const { return __bsf(~movemask(valid())); }
  52. /* Returns the geometry IDs */
  53. __forceinline vint<M> geomID() const { return geomIDs; }
  54. __forceinline int geomID(const size_t i) const { assert(i<M); assert(geomIDs[i] != -1); return geomIDs[i]; }
  55. /* Returns the primitive IDs */
  56. __forceinline vint<M> primID() const { return primIDs; }
  57. __forceinline int primID(const size_t i) const { assert(i<M); return primIDs[i]; }
  58. __forceinline Vec3fa& getVertex(const vint<M> &v, const size_t index, const Scene *const scene) const
  59. {
  60. const QuadMesh* mesh = scene->get<QuadMesh>(geomID(index));
  61. return *(Vec3fa*)mesh->vertexPtr(v[index]); // FIMXE: should use loadu
  62. }
  63. /* gather the quads */
  64. __forceinline void gather(Vec3<vfloat<M>>& p0,
  65. Vec3<vfloat<M>>& p1,
  66. Vec3<vfloat<M>>& p2,
  67. Vec3<vfloat<M>>& p3,
  68. const Scene *const scene) const;
  69. #if defined(__AVX512F__)
  70. __forceinline void gather(Vec3vf16& p0,
  71. Vec3vf16& p1,
  72. Vec3vf16& p2,
  73. Vec3vf16& p3,
  74. const Scene *const scene) const;
  75. #endif
  76. /* Fill quad from quad list */
  77. __forceinline void fill(const PrimRef* prims, size_t& begin, size_t end, Scene* scene)
  78. {
  79. vint<M> geomID = -1, primID = -1;
  80. vint<M> v0 = zero, v1 = zero, v2 = zero, v3 = zero;
  81. const PrimRef* prim = &prims[begin];
  82. for (size_t i=0; i<M; i++)
  83. {
  84. const QuadMesh* mesh = scene->get<QuadMesh>(prim->geomID());
  85. const QuadMesh::Quad& q = mesh->quad(prim->primID());
  86. if (begin<end) {
  87. geomID[i] = prim->geomID();
  88. primID[i] = prim->primID();
  89. v0[i] = q.v[0];
  90. v1[i] = q.v[1];
  91. v2[i] = q.v[2];
  92. v3[i] = q.v[3];
  93. begin++;
  94. } else {
  95. assert(i);
  96. geomID[i] = geomID[0]; // always valid geomIDs
  97. primID[i] = -1; // indicates invalid data
  98. v0[i] = 0;
  99. v1[i] = 0;
  100. v2[i] = 0;
  101. v3[i] = 0;
  102. }
  103. if (begin<end) prim = &prims[begin];
  104. }
  105. new (this) QuadMi(v0,v1,v2,v3,geomID,primID); // FIXME: use non temporal store
  106. }
  107. /* Updates the primitive */
  108. __forceinline BBox3fa update(QuadMesh* mesh)
  109. {
  110. BBox3fa bounds = empty;
  111. for (size_t i=0; i<M; i++)
  112. {
  113. if (!valid(i)) break;
  114. const unsigned primId = primID(i);
  115. const QuadMesh::Quad& q = mesh->quad(primId);
  116. const Vec3fa p0 = mesh->vertex(q.v[0]);
  117. const Vec3fa p1 = mesh->vertex(q.v[1]);
  118. const Vec3fa p2 = mesh->vertex(q.v[2]);
  119. const Vec3fa p3 = mesh->vertex(q.v[3]);
  120. bounds.extend(merge(BBox3fa(p0),BBox3fa(p1),BBox3fa(p2),BBox3fa(p3)));
  121. }
  122. return bounds;
  123. }
  124. public:
  125. vint<M> v0; // index of 1st vertex
  126. vint<M> v1; // index of 2nd vertex
  127. vint<M> v2; // index of 3rd vertex
  128. vint<M> v3; // index of 4rd vertex
  129. vint<M> geomIDs; // geometry ID of mesh
  130. vint<M> primIDs; // primitive ID of primitive inside mesh
  131. };
  132. template<>
  133. __forceinline void QuadMi<4>::gather(Vec3vf4& p0,
  134. Vec3vf4& p1,
  135. Vec3vf4& p2,
  136. Vec3vf4& p3,
  137. const Scene *const scene) const
  138. {
  139. prefetchL1(((char*)this)+0*64);
  140. prefetchL1(((char*)this)+1*64);
  141. const QuadMesh* mesh0 = scene->get<QuadMesh>(geomIDs[0]);
  142. const QuadMesh* mesh1 = scene->get<QuadMesh>(geomIDs[1]);
  143. const QuadMesh* mesh2 = scene->get<QuadMesh>(geomIDs[2]);
  144. const QuadMesh* mesh3 = scene->get<QuadMesh>(geomIDs[3]);
  145. const vfloat4 a0 = vfloat4::loadu(mesh0->vertexPtr(v0[0]));
  146. const vfloat4 a1 = vfloat4::loadu(mesh1->vertexPtr(v0[1]));
  147. const vfloat4 a2 = vfloat4::loadu(mesh2->vertexPtr(v0[2]));
  148. const vfloat4 a3 = vfloat4::loadu(mesh3->vertexPtr(v0[3]));
  149. transpose(a0,a1,a2,a3,p0.x,p0.y,p0.z);
  150. const vfloat4 b0 = vfloat4::loadu(mesh0->vertexPtr(v1[0]));
  151. const vfloat4 b1 = vfloat4::loadu(mesh1->vertexPtr(v1[1]));
  152. const vfloat4 b2 = vfloat4::loadu(mesh2->vertexPtr(v1[2]));
  153. const vfloat4 b3 = vfloat4::loadu(mesh3->vertexPtr(v1[3]));
  154. transpose(b0,b1,b2,b3,p1.x,p1.y,p1.z);
  155. const vfloat4 c0 = vfloat4::loadu(mesh0->vertexPtr(v2[0]));
  156. const vfloat4 c1 = vfloat4::loadu(mesh1->vertexPtr(v2[1]));
  157. const vfloat4 c2 = vfloat4::loadu(mesh2->vertexPtr(v2[2]));
  158. const vfloat4 c3 = vfloat4::loadu(mesh3->vertexPtr(v2[3]));
  159. transpose(c0,c1,c2,c3,p2.x,p2.y,p2.z);
  160. const vfloat4 d0 = vfloat4::loadu(mesh0->vertexPtr(v3[0]));
  161. const vfloat4 d1 = vfloat4::loadu(mesh1->vertexPtr(v3[1]));
  162. const vfloat4 d2 = vfloat4::loadu(mesh2->vertexPtr(v3[2]));
  163. const vfloat4 d3 = vfloat4::loadu(mesh3->vertexPtr(v3[3]));
  164. transpose(d0,d1,d2,d3,p3.x,p3.y,p3.z);
  165. }
  166. #if defined(__AVX512F__)
  167. template<>
  168. __forceinline void QuadMi<4>::gather(Vec3vf16& p0,
  169. Vec3vf16& p1,
  170. Vec3vf16& p2,
  171. Vec3vf16& p3,
  172. const Scene *const scene) const
  173. {
  174. const QuadMesh* mesh0 = scene->get<QuadMesh>(geomIDs[0]);
  175. const QuadMesh* mesh1 = scene->get<QuadMesh>(geomIDs[1]);
  176. const QuadMesh* mesh2 = scene->get<QuadMesh>(geomIDs[2]);
  177. const QuadMesh* mesh3 = scene->get<QuadMesh>(geomIDs[3]);
  178. const vint16 perm(0,4,8,12,1,5,9,13,2,6,10,14,3,7,11,15);
  179. const vfloat4 &a0 = *(vfloat4*)(mesh0->vertexPtr(v0[0]));
  180. const vfloat4 &a1 = *(vfloat4*)(mesh1->vertexPtr(v0[1]));
  181. const vfloat4 &a2 = *(vfloat4*)(mesh2->vertexPtr(v0[2]));
  182. const vfloat4 &a3 = *(vfloat4*)(mesh3->vertexPtr(v0[3]));
  183. const vfloat16 _p0(permute(vfloat16(a0,a1,a2,a3),perm));
  184. const vfloat4 &b0 = *(vfloat4*)(mesh0->vertexPtr(v1[0]));
  185. const vfloat4 &b1 = *(vfloat4*)(mesh1->vertexPtr(v1[1]));
  186. const vfloat4 &b2 = *(vfloat4*)(mesh2->vertexPtr(v1[2]));
  187. const vfloat4 &b3 = *(vfloat4*)(mesh3->vertexPtr(v1[3]));
  188. const vfloat16 _p1(permute(vfloat16(b0,b1,b2,b3),perm));
  189. const vfloat4 &c0 = *(vfloat4*)(mesh0->vertexPtr(v2[0]));
  190. const vfloat4 &c1 = *(vfloat4*)(mesh1->vertexPtr(v2[1]));
  191. const vfloat4 &c2 = *(vfloat4*)(mesh2->vertexPtr(v2[2]));
  192. const vfloat4 &c3 = *(vfloat4*)(mesh3->vertexPtr(v2[3]));
  193. const vfloat16 _p2(permute(vfloat16(c0,c1,c2,c3),perm));
  194. const vfloat4 &d0 = *(vfloat4*)(mesh0->vertexPtr(v3[0]));
  195. const vfloat4 &d1 = *(vfloat4*)(mesh1->vertexPtr(v3[1]));
  196. const vfloat4 &d2 = *(vfloat4*)(mesh2->vertexPtr(v3[2]));
  197. const vfloat4 &d3 = *(vfloat4*)(mesh3->vertexPtr(v3[3]));
  198. const vfloat16 _p3(permute(vfloat16(d0,d1,d2,d3),perm));
  199. p0.x = shuffle4<0>(_p0);
  200. p0.y = shuffle4<1>(_p0);
  201. p0.z = shuffle4<2>(_p0);
  202. p1.x = shuffle4<0>(_p1);
  203. p1.y = shuffle4<1>(_p1);
  204. p1.z = shuffle4<2>(_p1);
  205. p2.x = shuffle4<0>(_p2);
  206. p2.y = shuffle4<1>(_p2);
  207. p2.z = shuffle4<2>(_p2);
  208. p3.x = shuffle4<0>(_p3);
  209. p3.y = shuffle4<1>(_p3);
  210. p3.z = shuffle4<2>(_p3);
  211. }
  212. #endif
  213. template<int M>
  214. typename QuadMi<M>::Type QuadMi<M>::type;
  215. typedef QuadMi<4> Quad4i;
  216. }