scene_subdiv_mesh_avx.cpp 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242
  1. // ======================================================================== //
  2. // Copyright 2009-2017 Intel Corporation //
  3. // //
  4. // Licensed under the Apache License, Version 2.0 (the "License"); //
  5. // you may not use this file except in compliance with the License. //
  6. // You may obtain a copy of the License at //
  7. // //
  8. // http://www.apache.org/licenses/LICENSE-2.0 //
  9. // //
  10. // Unless required by applicable law or agreed to in writing, software //
  11. // distributed under the License is distributed on an "AS IS" BASIS, //
  12. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. //
  13. // See the License for the specific language governing permissions and //
  14. // limitations under the License. //
  15. // ======================================================================== //
  16. #include "scene_subdiv_mesh.h"
  17. #include "scene.h"
  18. #include "../subdiv/patch_eval.h"
  19. #include "../subdiv/patch_eval_simd.h"
  20. namespace embree
  21. {
  22. SubdivMeshAVX::SubdivMeshAVX(Scene* parent, RTCGeometryFlags flags, size_t numFaces, size_t numEdges, size_t numVertices,
  23. size_t numCreases, size_t numCorners, size_t numHoles, size_t numTimeSteps)
  24. : SubdivMesh(parent,flags,numFaces,numEdges,numVertices,numCreases,numCorners,numHoles,numTimeSteps) {}
  25. void SubdivMeshAVX::interpolate(unsigned primID, float u, float v, RTCBufferType buffer, float* P, float* dPdu, float* dPdv, float* ddPdudu, float* ddPdvdv, float* ddPdudv, size_t numFloats)
  26. {
  27. #if defined(DEBUG)
  28. if ((parent->aflags & RTC_INTERPOLATE) == 0)
  29. throw_RTCError(RTC_INVALID_OPERATION,"rtcInterpolate can only get called when RTC_INTERPOLATE is enabled for the scene");
  30. #endif
  31. /* calculate base pointer and stride */
  32. assert((buffer >= RTC_VERTEX_BUFFER0 && buffer < RTCBufferType(RTC_VERTEX_BUFFER0 + RTC_MAX_TIME_STEPS)) ||
  33. (buffer >= RTC_USER_VERTEX_BUFFER0 && RTCBufferType(RTC_USER_VERTEX_BUFFER0 + RTC_MAX_USER_VERTEX_BUFFERS)));
  34. const char* src = nullptr;
  35. size_t stride = 0;
  36. size_t bufID = buffer&0xFFFF;
  37. std::vector<SharedLazyTessellationCache::CacheEntry>* baseEntry = nullptr;
  38. Topology* topo = nullptr;
  39. if (buffer >= RTC_USER_VERTEX_BUFFER0) {
  40. assert(bufID < userbuffers.size());
  41. src = userbuffers[bufID].getPtr();
  42. stride = userbuffers[bufID].getStride();
  43. baseEntry = &user_buffer_tags[bufID];
  44. int topologyID = userbuffers[bufID].userdata;
  45. topo = &topology[topologyID];
  46. } else {
  47. assert(bufID < numTimeSteps);
  48. src = vertices[bufID].getPtr();
  49. stride = vertices[bufID].getStride();
  50. baseEntry = &vertex_buffer_tags[bufID];
  51. topo = &topology[0];
  52. }
  53. bool has_P = P;
  54. bool has_dP = dPdu; assert(!has_dP || dPdv);
  55. bool has_ddP = ddPdudu; assert(!has_ddP || (ddPdvdv && ddPdudu));
  56. for (size_t i=0,slot=0; i<numFloats; slot++)
  57. {
  58. if (i+4 >= numFloats)
  59. {
  60. vfloat4 Pt, dPdut, dPdvt, ddPdudut, ddPdvdvt, ddPdudvt;;
  61. isa::PatchEval<vfloat4>(baseEntry->at(interpolationSlot(primID,slot,stride)),parent->commitCounterSubdiv,
  62. topo->getHalfEdge(primID),src+i*sizeof(float),stride,u,v,
  63. has_P ? &Pt : nullptr,
  64. has_dP ? &dPdut : nullptr,
  65. has_dP ? &dPdvt : nullptr,
  66. has_ddP ? &ddPdudut : nullptr,
  67. has_ddP ? &ddPdvdvt : nullptr,
  68. has_ddP ? &ddPdudvt : nullptr);
  69. if (has_P) {
  70. for (size_t j=i; j<min(i+4,numFloats); j++)
  71. P[j] = Pt[j-i];
  72. }
  73. if (has_dP)
  74. {
  75. for (size_t j=i; j<min(i+4,numFloats); j++) {
  76. dPdu[j] = dPdut[j-i];
  77. dPdv[j] = dPdvt[j-i];
  78. }
  79. }
  80. if (has_ddP)
  81. {
  82. for (size_t j=i; j<min(i+4,numFloats); j++) {
  83. ddPdudu[j] = ddPdudut[j-i];
  84. ddPdvdv[j] = ddPdvdvt[j-i];
  85. ddPdudv[j] = ddPdudvt[j-i];
  86. }
  87. }
  88. i+=4;
  89. }
  90. else
  91. {
  92. vfloat8 Pt, dPdut, dPdvt, ddPdudut, ddPdvdvt, ddPdudvt;
  93. isa::PatchEval<vfloat8>(baseEntry->at(interpolationSlot(primID,slot,stride)),parent->commitCounterSubdiv,
  94. topo->getHalfEdge(primID),src+i*sizeof(float),stride,u,v,
  95. has_P ? &Pt : nullptr,
  96. has_dP ? &dPdut : nullptr,
  97. has_dP ? &dPdvt : nullptr,
  98. has_ddP ? &ddPdudut : nullptr,
  99. has_ddP ? &ddPdvdvt : nullptr,
  100. has_ddP ? &ddPdudvt : nullptr);
  101. if (has_P) {
  102. for (size_t j=i; j<i+8; j++)
  103. P[j] = Pt[j-i];
  104. }
  105. if (has_dP)
  106. {
  107. for (size_t j=i; j<i+8; j++) {
  108. dPdu[j] = dPdut[j-i];
  109. dPdv[j] = dPdvt[j-i];
  110. }
  111. }
  112. if (has_ddP)
  113. {
  114. for (size_t j=i; j<i+8; j++) {
  115. ddPdudu[j] = ddPdudut[j-i];
  116. ddPdvdv[j] = ddPdvdvt[j-i];
  117. ddPdudv[j] = ddPdudvt[j-i];
  118. }
  119. }
  120. i+=8;
  121. }
  122. }
  123. AVX_ZERO_UPPER();
  124. }
  125. template<typename vbool, typename vint, typename vfloat>
  126. void SubdivMeshAVX::interpolateHelper(const vbool& valid1, const vint& primID, const vfloat& uu, const vfloat& vv, size_t numUVs,
  127. RTCBufferType buffer, float* P, float* dPdu, float* dPdv, float* ddPdudu, float* ddPdvdv, float* ddPdudv, size_t numFloats)
  128. {
  129. /* calculate base pointer and stride */
  130. assert((buffer >= RTC_VERTEX_BUFFER0 && buffer < RTCBufferType(RTC_VERTEX_BUFFER0 + RTC_MAX_TIME_STEPS)) ||
  131. (buffer >= RTC_USER_VERTEX_BUFFER0 && RTCBufferType(RTC_USER_VERTEX_BUFFER0 + RTC_MAX_USER_VERTEX_BUFFERS)));
  132. const char* src = nullptr;
  133. size_t stride = 0;
  134. size_t bufID = buffer&0xFFFF;
  135. std::vector<SharedLazyTessellationCache::CacheEntry>* baseEntry = nullptr;
  136. Topology* topo = nullptr;
  137. if (buffer >= RTC_USER_VERTEX_BUFFER0) {
  138. assert(bufID < userbuffers.size());
  139. src = userbuffers[bufID].getPtr();
  140. stride = userbuffers[bufID].getStride();
  141. baseEntry = &user_buffer_tags[bufID];
  142. int topologyID = userbuffers[bufID].userdata;
  143. topo = &topology[topologyID];
  144. } else {
  145. assert(bufID < numTimeSteps);
  146. src = vertices[bufID].getPtr();
  147. stride = vertices[bufID].getStride();
  148. baseEntry = &vertex_buffer_tags[bufID];
  149. topo = &topology[0];
  150. }
  151. foreach_unique(valid1,primID,[&](const vbool& valid1, const int primID)
  152. {
  153. for (size_t j=0,slot=0; j<numFloats; slot++)
  154. {
  155. if (j+4 >= numFloats)
  156. {
  157. const size_t M = min(size_t(4),numFloats-j);
  158. isa::PatchEvalSimd<vbool,vint,vfloat,vfloat4>(baseEntry->at(interpolationSlot(primID,slot,stride)),parent->commitCounterSubdiv,
  159. topo->getHalfEdge(primID),src+j*sizeof(float),stride,valid1,uu,vv,
  160. P ? P+j*numUVs : nullptr,
  161. dPdu ? dPdu+j*numUVs : nullptr,
  162. dPdv ? dPdv+j*numUVs : nullptr,
  163. ddPdudu ? ddPdudu+j*numUVs : nullptr,
  164. ddPdvdv ? ddPdvdv+j*numUVs : nullptr,
  165. ddPdudv ? ddPdudv+j*numUVs : nullptr,
  166. numUVs,M);
  167. j+=4;
  168. }
  169. else
  170. {
  171. const size_t M = min(size_t(8),numFloats-j);
  172. isa::PatchEvalSimd<vbool,vint,vfloat,vfloat8>(baseEntry->at(interpolationSlot(primID,slot,stride)),parent->commitCounterSubdiv,
  173. topo->getHalfEdge(primID),src+j*sizeof(float),stride,valid1,uu,vv,
  174. P ? P+j*numUVs : nullptr,
  175. dPdu ? dPdu+j*numUVs : nullptr,
  176. dPdv ? dPdv+j*numUVs : nullptr,
  177. ddPdudu ? ddPdudu+j*numUVs : nullptr,
  178. ddPdvdv ? ddPdvdv+j*numUVs : nullptr,
  179. ddPdudv ? ddPdudv+j*numUVs : nullptr,
  180. numUVs,M);
  181. j+=8;
  182. }
  183. }
  184. });
  185. }
  186. void SubdivMeshAVX::interpolateN(const void* valid_i, const unsigned* primIDs, const float* u, const float* v, size_t numUVs,
  187. RTCBufferType buffer, float* P, float* dPdu, float* dPdv, float* ddPdudu, float* ddPdvdv, float* ddPdudv, size_t numFloats)
  188. {
  189. #if defined(DEBUG)
  190. if ((parent->aflags & RTC_INTERPOLATE) == 0)
  191. throw_RTCError(RTC_INVALID_OPERATION,"rtcInterpolate can only get called when RTC_INTERPOLATE is enabled for the scene");
  192. #endif
  193. const int* valid = (const int*) valid_i;
  194. for (size_t i=0; i<numUVs;)
  195. {
  196. if (i+4 >= numUVs)
  197. {
  198. vbool4 valid1 = vint4(int(i))+vint4(step) < vint4(numUVs);
  199. if (valid) valid1 &= vint4::loadu(&valid[i]) == vint4(-1);
  200. if (none(valid1)) { i+=4; continue; }
  201. interpolateHelper(valid1,vint4::loadu(&primIDs[i]),vfloat4::loadu(&u[i]),vfloat4::loadu(&v[i]),numUVs,buffer,
  202. P ? P+i : nullptr,
  203. dPdu ? dPdu+i : nullptr,
  204. dPdv ? dPdv+i : nullptr,
  205. ddPdudu ? ddPdudu+i : nullptr,
  206. ddPdvdv ? ddPdvdv+i : nullptr,
  207. ddPdudv ? ddPdudv+i : nullptr,
  208. numFloats);
  209. i+=4;
  210. }
  211. else
  212. {
  213. vbool8 valid1 = vint8(int(i))+vint8(step) < vint8(int(numUVs));
  214. if (valid) valid1 &= vint8::loadu(&valid[i]) == vint8(-1);
  215. if (none(valid1)) { i+=8; continue; }
  216. interpolateHelper(valid1,vint8::loadu(&primIDs[i]),vfloat8::loadu(&u[i]),vfloat8::loadu(&v[i]),numUVs,buffer,
  217. P ? P+i : nullptr,
  218. dPdu ? dPdu+i : nullptr,
  219. dPdv ? dPdv+i : nullptr,
  220. ddPdudu ? ddPdudu+i : nullptr,
  221. ddPdvdv ? ddPdvdv+i : nullptr,
  222. ddPdudv ? ddPdudv+i : nullptr,
  223. numFloats);
  224. i+=8;
  225. }
  226. }
  227. AVX_ZERO_UPPER();
  228. }
  229. }