gregory_patch.h 43 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906
  1. // ======================================================================== //
  2. // Copyright 2009-2017 Intel Corporation //
  3. // //
  4. // Licensed under the Apache License, Version 2.0 (the "License"); //
  5. // you may not use this file except in compliance with the License. //
  6. // You may obtain a copy of the License at //
  7. // //
  8. // http://www.apache.org/licenses/LICENSE-2.0 //
  9. // //
  10. // Unless required by applicable law or agreed to in writing, software //
  11. // distributed under the License is distributed on an "AS IS" BASIS, //
  12. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. //
  13. // See the License for the specific language governing permissions and //
  14. // limitations under the License. //
  15. // ======================================================================== //
  16. #pragma once
  17. #include "catmullclark_patch.h"
  18. #include "bezier_patch.h"
  19. #include "bezier_curve.h"
  20. #include "catmullclark_coefficients.h"
  21. namespace embree
  22. {
  23. template<typename Vertex, typename Vertex_t = Vertex>
  24. class __aligned(64) GregoryPatchT
  25. {
  26. typedef CatmullClarkPatchT<Vertex,Vertex_t> CatmullClarkPatch;
  27. typedef GeneralCatmullClarkPatchT<Vertex,Vertex_t> GeneralCatmullClarkPatch;
  28. typedef CatmullClark1RingT<Vertex,Vertex_t> CatmullClark1Ring;
  29. typedef BezierCurveT<Vertex> BezierCurve;
  30. public:
  31. Vertex v[4][4];
  32. Vertex f[2][2];
  33. __forceinline GregoryPatchT() {}
  34. __forceinline GregoryPatchT(const CatmullClarkPatch& patch) {
  35. init(patch);
  36. }
  37. __forceinline GregoryPatchT(const CatmullClarkPatch& patch,
  38. const BezierCurve* border0, const BezierCurve* border1, const BezierCurve* border2, const BezierCurve* border3)
  39. {
  40. init_crackfix(patch,border0,border1,border2,border3);
  41. }
  42. __forceinline GregoryPatchT (const HalfEdge* edge, const char* vertices, size_t stride) {
  43. init(CatmullClarkPatch(edge,vertices,stride));
  44. }
  45. __forceinline Vertex& p0() { return v[0][0]; }
  46. __forceinline Vertex& p1() { return v[0][3]; }
  47. __forceinline Vertex& p2() { return v[3][3]; }
  48. __forceinline Vertex& p3() { return v[3][0]; }
  49. __forceinline Vertex& e0_p() { return v[0][1]; }
  50. __forceinline Vertex& e0_m() { return v[1][0]; }
  51. __forceinline Vertex& e1_p() { return v[1][3]; }
  52. __forceinline Vertex& e1_m() { return v[0][2]; }
  53. __forceinline Vertex& e2_p() { return v[3][2]; }
  54. __forceinline Vertex& e2_m() { return v[2][3]; }
  55. __forceinline Vertex& e3_p() { return v[2][0]; }
  56. __forceinline Vertex& e3_m() { return v[3][1]; }
  57. __forceinline Vertex& f0_p() { return v[1][1]; }
  58. __forceinline Vertex& f1_p() { return v[1][2]; }
  59. __forceinline Vertex& f2_p() { return v[2][2]; }
  60. __forceinline Vertex& f3_p() { return v[2][1]; }
  61. __forceinline Vertex& f0_m() { return f[0][0]; }
  62. __forceinline Vertex& f1_m() { return f[0][1]; }
  63. __forceinline Vertex& f2_m() { return f[1][1]; }
  64. __forceinline Vertex& f3_m() { return f[1][0]; }
  65. __forceinline const Vertex& p0() const { return v[0][0]; }
  66. __forceinline const Vertex& p1() const { return v[0][3]; }
  67. __forceinline const Vertex& p2() const { return v[3][3]; }
  68. __forceinline const Vertex& p3() const { return v[3][0]; }
  69. __forceinline const Vertex& e0_p() const { return v[0][1]; }
  70. __forceinline const Vertex& e0_m() const { return v[1][0]; }
  71. __forceinline const Vertex& e1_p() const { return v[1][3]; }
  72. __forceinline const Vertex& e1_m() const { return v[0][2]; }
  73. __forceinline const Vertex& e2_p() const { return v[3][2]; }
  74. __forceinline const Vertex& e2_m() const { return v[2][3]; }
  75. __forceinline const Vertex& e3_p() const { return v[2][0]; }
  76. __forceinline const Vertex& e3_m() const { return v[3][1]; }
  77. __forceinline const Vertex& f0_p() const { return v[1][1]; }
  78. __forceinline const Vertex& f1_p() const { return v[1][2]; }
  79. __forceinline const Vertex& f2_p() const { return v[2][2]; }
  80. __forceinline const Vertex& f3_p() const { return v[2][1]; }
  81. __forceinline const Vertex& f0_m() const { return f[0][0]; }
  82. __forceinline const Vertex& f1_m() const { return f[0][1]; }
  83. __forceinline const Vertex& f2_m() const { return f[1][1]; }
  84. __forceinline const Vertex& f3_m() const { return f[1][0]; }
  85. __forceinline Vertex initCornerVertex(const CatmullClarkPatch& irreg_patch, const size_t index) {
  86. return irreg_patch.ring[index].getLimitVertex();
  87. }
  88. __forceinline Vertex initPositiveEdgeVertex(const CatmullClarkPatch& irreg_patch, const size_t index, const Vertex& p_vtx) {
  89. return madd(1.0f/3.0f,irreg_patch.ring[index].getLimitTangent(),p_vtx);
  90. }
  91. __forceinline Vertex initNegativeEdgeVertex(const CatmullClarkPatch& irreg_patch, const size_t index, const Vertex& p_vtx) {
  92. return madd(1.0f/3.0f,irreg_patch.ring[index].getSecondLimitTangent(),p_vtx);
  93. }
  94. __forceinline Vertex initPositiveEdgeVertex2(const CatmullClarkPatch& irreg_patch, const size_t index, const Vertex& p_vtx)
  95. {
  96. CatmullClark1Ring3fa r0,r1,r2;
  97. irreg_patch.ring[index].subdivide(r0);
  98. r0.subdivide(r1);
  99. r1.subdivide(r2);
  100. return madd(8.0f/3.0f,r2.getLimitTangent(),p_vtx);
  101. }
  102. __forceinline Vertex initNegativeEdgeVertex2(const CatmullClarkPatch& irreg_patch, const size_t index, const Vertex& p_vtx)
  103. {
  104. CatmullClark1Ring3fa r0,r1,r2;
  105. irreg_patch.ring[index].subdivide(r0);
  106. r0.subdivide(r1);
  107. r1.subdivide(r2);
  108. return madd(8.0f/3.0f,r2.getSecondLimitTangent(),p_vtx);
  109. }
  110. void initFaceVertex(const CatmullClarkPatch& irreg_patch,
  111. const size_t index,
  112. const Vertex& p_vtx,
  113. const Vertex& e0_p_vtx,
  114. const Vertex& e1_m_vtx,
  115. const unsigned int face_valence_p1,
  116. const Vertex& e0_m_vtx,
  117. const Vertex& e3_p_vtx,
  118. const unsigned int face_valence_p3,
  119. Vertex& f_p_vtx,
  120. Vertex& f_m_vtx)
  121. {
  122. const unsigned int face_valence = irreg_patch.ring[index].face_valence;
  123. const unsigned int edge_valence = irreg_patch.ring[index].edge_valence;
  124. const unsigned int border_index = irreg_patch.ring[index].border_index;
  125. const Vertex& vtx = irreg_patch.ring[index].vtx;
  126. const Vertex e_i = irreg_patch.ring[index].getEdgeCenter(0);
  127. const Vertex c_i_m_1 = irreg_patch.ring[index].getQuadCenter(0);
  128. const Vertex e_i_m_1 = irreg_patch.ring[index].getEdgeCenter(1);
  129. Vertex c_i, e_i_p_1;
  130. const bool hasHardEdge0 =
  131. std::isinf(irreg_patch.ring[index].vertex_crease_weight) &&
  132. std::isinf(irreg_patch.ring[index].crease_weight[0]);
  133. if (unlikely((border_index == edge_valence-2) || hasHardEdge0))
  134. {
  135. /* mirror quad center and edge mid-point */
  136. c_i = madd(2.0f, e_i - c_i_m_1, c_i_m_1);
  137. e_i_p_1 = madd(2.0f, vtx - e_i_m_1, e_i_m_1);
  138. }
  139. else
  140. {
  141. c_i = irreg_patch.ring[index].getQuadCenter( face_valence-1 );
  142. e_i_p_1 = irreg_patch.ring[index].getEdgeCenter( face_valence-1 );
  143. }
  144. Vertex c_i_m_2, e_i_m_2;
  145. const bool hasHardEdge1 =
  146. std::isinf(irreg_patch.ring[index].vertex_crease_weight) &&
  147. std::isinf(irreg_patch.ring[index].crease_weight[1]);
  148. if (unlikely(border_index == 2 || hasHardEdge1))
  149. {
  150. /* mirror quad center and edge mid-point */
  151. c_i_m_2 = madd(2.0f, e_i_m_1 - c_i_m_1, c_i_m_1);
  152. e_i_m_2 = madd(2.0f, vtx - e_i, + e_i);
  153. }
  154. else
  155. {
  156. c_i_m_2 = irreg_patch.ring[index].getQuadCenter( 1 );
  157. e_i_m_2 = irreg_patch.ring[index].getEdgeCenter( 2 );
  158. }
  159. const float d = 3.0f;
  160. //const float c = cosf(2.0f*M_PI/(float)face_valence);
  161. //const float c_e_p = cosf(2.0f*M_PI/(float)face_valence_p1);
  162. //const float c_e_m = cosf(2.0f*M_PI/(float)face_valence_p3);
  163. const float c = CatmullClarkPrecomputedCoefficients::table.cos_2PI_div_n(face_valence);
  164. const float c_e_p = CatmullClarkPrecomputedCoefficients::table.cos_2PI_div_n(face_valence_p1);
  165. const float c_e_m = CatmullClarkPrecomputedCoefficients::table.cos_2PI_div_n(face_valence_p3);
  166. const Vertex r_e_p = 1.0f/3.0f * (e_i_m_1 - e_i_p_1) + 2.0f/3.0f * (c_i_m_1 - c_i);
  167. const Vertex r_e_m = 1.0f/3.0f * (e_i - e_i_m_2) + 2.0f/3.0f * (c_i_m_1 - c_i_m_2);
  168. f_p_vtx = 1.0f / d * (c_e_p * p_vtx + (d - 2.0f*c - c_e_p) * e0_p_vtx + 2.0f*c* e1_m_vtx + r_e_p);
  169. f_m_vtx = 1.0f / d * (c_e_m * p_vtx + (d - 2.0f*c - c_e_m) * e0_m_vtx + 2.0f*c* e3_p_vtx + r_e_m);
  170. }
  171. __noinline void init(const CatmullClarkPatch& patch)
  172. {
  173. assert( patch.ring[0].hasValidPositions() );
  174. assert( patch.ring[1].hasValidPositions() );
  175. assert( patch.ring[2].hasValidPositions() );
  176. assert( patch.ring[3].hasValidPositions() );
  177. p0() = initCornerVertex(patch,0);
  178. p1() = initCornerVertex(patch,1);
  179. p2() = initCornerVertex(patch,2);
  180. p3() = initCornerVertex(patch,3);
  181. e0_p() = initPositiveEdgeVertex(patch,0, p0());
  182. e1_p() = initPositiveEdgeVertex(patch,1, p1());
  183. e2_p() = initPositiveEdgeVertex(patch,2, p2());
  184. e3_p() = initPositiveEdgeVertex(patch,3, p3());
  185. e0_m() = initNegativeEdgeVertex(patch,0, p0());
  186. e1_m() = initNegativeEdgeVertex(patch,1, p1());
  187. e2_m() = initNegativeEdgeVertex(patch,2, p2());
  188. e3_m() = initNegativeEdgeVertex(patch,3, p3());
  189. const unsigned int face_valence_p0 = patch.ring[0].face_valence;
  190. const unsigned int face_valence_p1 = patch.ring[1].face_valence;
  191. const unsigned int face_valence_p2 = patch.ring[2].face_valence;
  192. const unsigned int face_valence_p3 = patch.ring[3].face_valence;
  193. initFaceVertex(patch,0,p0(),e0_p(),e1_m(),face_valence_p1,e0_m(),e3_p(),face_valence_p3,f0_p(),f0_m() );
  194. initFaceVertex(patch,1,p1(),e1_p(),e2_m(),face_valence_p2,e1_m(),e0_p(),face_valence_p0,f1_p(),f1_m() );
  195. initFaceVertex(patch,2,p2(),e2_p(),e3_m(),face_valence_p3,e2_m(),e1_p(),face_valence_p1,f2_p(),f2_m() );
  196. initFaceVertex(patch,3,p3(),e3_p(),e0_m(),face_valence_p0,e3_m(),e2_p(),face_valence_p3,f3_p(),f3_m() );
  197. }
  198. __noinline void init_crackfix(const CatmullClarkPatch& patch,
  199. const BezierCurve* border0,
  200. const BezierCurve* border1,
  201. const BezierCurve* border2,
  202. const BezierCurve* border3)
  203. {
  204. assert( patch.ring[0].hasValidPositions() );
  205. assert( patch.ring[1].hasValidPositions() );
  206. assert( patch.ring[2].hasValidPositions() );
  207. assert( patch.ring[3].hasValidPositions() );
  208. p0() = initCornerVertex(patch,0);
  209. p1() = initCornerVertex(patch,1);
  210. p2() = initCornerVertex(patch,2);
  211. p3() = initCornerVertex(patch,3);
  212. e0_p() = initPositiveEdgeVertex(patch,0, p0());
  213. e1_p() = initPositiveEdgeVertex(patch,1, p1());
  214. e2_p() = initPositiveEdgeVertex(patch,2, p2());
  215. e3_p() = initPositiveEdgeVertex(patch,3, p3());
  216. e0_m() = initNegativeEdgeVertex(patch,0, p0());
  217. e1_m() = initNegativeEdgeVertex(patch,1, p1());
  218. e2_m() = initNegativeEdgeVertex(patch,2, p2());
  219. e3_m() = initNegativeEdgeVertex(patch,3, p3());
  220. if (unlikely(border0 != nullptr))
  221. {
  222. p0() = border0->v0;
  223. e0_p() = border0->v1;
  224. e1_m() = border0->v2;
  225. p1() = border0->v3;
  226. }
  227. if (unlikely(border1 != nullptr))
  228. {
  229. p1() = border1->v0;
  230. e1_p() = border1->v1;
  231. e2_m() = border1->v2;
  232. p2() = border1->v3;
  233. }
  234. if (unlikely(border2 != nullptr))
  235. {
  236. p2() = border2->v0;
  237. e2_p() = border2->v1;
  238. e3_m() = border2->v2;
  239. p3() = border2->v3;
  240. }
  241. if (unlikely(border3 != nullptr))
  242. {
  243. p3() = border3->v0;
  244. e3_p() = border3->v1;
  245. e0_m() = border3->v2;
  246. p0() = border3->v3;
  247. }
  248. const unsigned int face_valence_p0 = patch.ring[0].face_valence;
  249. const unsigned int face_valence_p1 = patch.ring[1].face_valence;
  250. const unsigned int face_valence_p2 = patch.ring[2].face_valence;
  251. const unsigned int face_valence_p3 = patch.ring[3].face_valence;
  252. initFaceVertex(patch,0,p0(),e0_p(),e1_m(),face_valence_p1,e0_m(),e3_p(),face_valence_p3,f0_p(),f0_m() );
  253. initFaceVertex(patch,1,p1(),e1_p(),e2_m(),face_valence_p2,e1_m(),e0_p(),face_valence_p0,f1_p(),f1_m() );
  254. initFaceVertex(patch,2,p2(),e2_p(),e3_m(),face_valence_p3,e2_m(),e1_p(),face_valence_p1,f2_p(),f2_m() );
  255. initFaceVertex(patch,3,p3(),e3_p(),e0_m(),face_valence_p0,e3_m(),e2_p(),face_valence_p3,f3_p(),f3_m() );
  256. }
  257. void computeGregoryPatchFacePoints(const unsigned int face_valence,
  258. const Vertex& r_e_p,
  259. const Vertex& r_e_m,
  260. const Vertex& p_vtx,
  261. const Vertex& e0_p_vtx,
  262. const Vertex& e1_m_vtx,
  263. const unsigned int face_valence_p1,
  264. const Vertex& e0_m_vtx,
  265. const Vertex& e3_p_vtx,
  266. const unsigned int face_valence_p3,
  267. Vertex& f_p_vtx,
  268. Vertex& f_m_vtx,
  269. const float d = 3.0f)
  270. {
  271. //const float c = cosf(2.0*M_PI/(float)face_valence);
  272. //const float c_e_p = cosf(2.0*M_PI/(float)face_valence_p1);
  273. //const float c_e_m = cosf(2.0*M_PI/(float)face_valence_p3);
  274. const float c = CatmullClarkPrecomputedCoefficients::table.cos_2PI_div_n(face_valence);
  275. const float c_e_p = CatmullClarkPrecomputedCoefficients::table.cos_2PI_div_n(face_valence_p1);
  276. const float c_e_m = CatmullClarkPrecomputedCoefficients::table.cos_2PI_div_n(face_valence_p3);
  277. f_p_vtx = 1.0f / d * (c_e_p * p_vtx + (d - 2.0f*c - c_e_p) * e0_p_vtx + 2.0f*c* e1_m_vtx + r_e_p);
  278. f_m_vtx = 1.0f / d * (c_e_m * p_vtx + (d - 2.0f*c - c_e_m) * e0_m_vtx + 2.0f*c* e3_p_vtx + r_e_m);
  279. f_p_vtx = 1.0f / d * (c_e_p * p_vtx + (d - 2.0f*c - c_e_p) * e0_p_vtx + 2.0f*c* e1_m_vtx + r_e_p);
  280. f_m_vtx = 1.0f / d * (c_e_m * p_vtx + (d - 2.0f*c - c_e_m) * e0_m_vtx + 2.0f*c* e3_p_vtx + r_e_m);
  281. }
  282. __noinline void init(const GeneralCatmullClarkPatch& patch)
  283. {
  284. assert(patch.size() == 4);
  285. #if 0
  286. CatmullClarkPatch qpatch; patch.init(qpatch);
  287. init(qpatch);
  288. #else
  289. const float face_valence_p0 = patch.ring[0].face_valence;
  290. const float face_valence_p1 = patch.ring[1].face_valence;
  291. const float face_valence_p2 = patch.ring[2].face_valence;
  292. const float face_valence_p3 = patch.ring[3].face_valence;
  293. Vertex p0_r_p, p0_r_m;
  294. patch.ring[0].computeGregoryPatchEdgePoints( p0(), e0_p(), e0_m(), p0_r_p, p0_r_m );
  295. Vertex p1_r_p, p1_r_m;
  296. patch.ring[1].computeGregoryPatchEdgePoints( p1(), e1_p(), e1_m(), p1_r_p, p1_r_m );
  297. Vertex p2_r_p, p2_r_m;
  298. patch.ring[2].computeGregoryPatchEdgePoints( p2(), e2_p(), e2_m(), p2_r_p, p2_r_m );
  299. Vertex p3_r_p, p3_r_m;
  300. patch.ring[3].computeGregoryPatchEdgePoints( p3(), e3_p(), e3_m(), p3_r_p, p3_r_m );
  301. computeGregoryPatchFacePoints(face_valence_p0, p0_r_p, p0_r_m, p0(), e0_p(), e1_m(), face_valence_p1, e0_m(), e3_p(), face_valence_p3, f0_p(), f0_m() );
  302. computeGregoryPatchFacePoints(face_valence_p1, p1_r_p, p1_r_m, p1(), e1_p(), e2_m(), face_valence_p2, e1_m(), e0_p(), face_valence_p0, f1_p(), f1_m() );
  303. computeGregoryPatchFacePoints(face_valence_p2, p2_r_p, p2_r_m, p2(), e2_p(), e3_m(), face_valence_p3, e2_m(), e1_p(), face_valence_p1, f2_p(), f2_m() );
  304. computeGregoryPatchFacePoints(face_valence_p3, p3_r_p, p3_r_m, p3(), e3_p(), e0_m(), face_valence_p0, e3_m(), e2_p(), face_valence_p3, f3_p(), f3_m() );
  305. #endif
  306. }
  307. __forceinline void convert_to_bezier()
  308. {
  309. f0_p() = (f0_p() + f0_m()) * 0.5f;
  310. f1_p() = (f1_p() + f1_m()) * 0.5f;
  311. f2_p() = (f2_p() + f2_m()) * 0.5f;
  312. f3_p() = (f3_p() + f3_m()) * 0.5f;
  313. f0_m() = Vertex( zero );
  314. f1_m() = Vertex( zero );
  315. f2_m() = Vertex( zero );
  316. f3_m() = Vertex( zero );
  317. }
  318. static __forceinline void computeInnerVertices(const Vertex matrix[4][4], const Vertex f_m[2][2], const float uu, const float vv,
  319. Vertex_t& matrix_11, Vertex_t& matrix_12, Vertex_t& matrix_22, Vertex_t& matrix_21)
  320. {
  321. if (unlikely(uu == 0.0f || uu == 1.0f || vv == 0.0f || vv == 1.0f))
  322. {
  323. matrix_11 = matrix[1][1];
  324. matrix_12 = matrix[1][2];
  325. matrix_22 = matrix[2][2];
  326. matrix_21 = matrix[2][1];
  327. }
  328. else
  329. {
  330. const Vertex_t f0_p = matrix[1][1];
  331. const Vertex_t f1_p = matrix[1][2];
  332. const Vertex_t f2_p = matrix[2][2];
  333. const Vertex_t f3_p = matrix[2][1];
  334. const Vertex_t f0_m = f_m[0][0];
  335. const Vertex_t f1_m = f_m[0][1];
  336. const Vertex_t f2_m = f_m[1][1];
  337. const Vertex_t f3_m = f_m[1][0];
  338. matrix_11 = ( uu * f0_p + vv * f0_m)*rcp(uu+vv);
  339. matrix_12 = ((1.0f-uu) * f1_m + vv * f1_p)*rcp(1.0f-uu+vv);
  340. matrix_22 = ((1.0f-uu) * f2_p + (1.0f-vv) * f2_m)*rcp(2.0f-uu-vv);
  341. matrix_21 = ( uu * f3_m + (1.0f-vv) * f3_p)*rcp(1.0f+uu-vv);
  342. }
  343. }
  344. template<typename vfloat>
  345. static __forceinline void computeInnerVertices(const Vertex v[4][4], const Vertex f[2][2],
  346. size_t i, const vfloat& uu, const vfloat& vv, vfloat& matrix_11, vfloat& matrix_12, vfloat& matrix_22, vfloat& matrix_21)
  347. {
  348. const auto m_border = (uu == 0.0f) | (uu == 1.0f) | (vv == 0.0f) | (vv == 1.0f);
  349. const vfloat f0_p = v[1][1][i];
  350. const vfloat f1_p = v[1][2][i];
  351. const vfloat f2_p = v[2][2][i];
  352. const vfloat f3_p = v[2][1][i];
  353. const vfloat f0_m = f[0][0][i];
  354. const vfloat f1_m = f[0][1][i];
  355. const vfloat f2_m = f[1][1][i];
  356. const vfloat f3_m = f[1][0][i];
  357. const vfloat one_minus_uu = vfloat(1.0f) - uu;
  358. const vfloat one_minus_vv = vfloat(1.0f) - vv;
  359. const vfloat f0_i = ( uu * f0_p + vv * f0_m) * rcp(uu+vv);
  360. const vfloat f1_i = (one_minus_uu * f1_m + vv * f1_p) * rcp(one_minus_uu+vv);
  361. const vfloat f2_i = (one_minus_uu * f2_p + one_minus_vv * f2_m) * rcp(one_minus_uu+one_minus_vv);
  362. const vfloat f3_i = ( uu * f3_m + one_minus_vv * f3_p) * rcp(uu+one_minus_vv);
  363. matrix_11 = select(m_border,f0_p,f0_i);
  364. matrix_12 = select(m_border,f1_p,f1_i);
  365. matrix_22 = select(m_border,f2_p,f2_i);
  366. matrix_21 = select(m_border,f3_p,f3_i);
  367. }
  368. static __forceinline Vertex eval(const Vertex matrix[4][4], const Vertex f[2][2], const float& uu, const float& vv)
  369. {
  370. Vertex_t v_11, v_12, v_22, v_21;
  371. computeInnerVertices(matrix,f,uu,vv,v_11, v_12, v_22, v_21);
  372. const Vec4<float> Bu = BezierBasis::eval(uu);
  373. const Vec4<float> Bv = BezierBasis::eval(vv);
  374. return madd(Bv.x,madd(Bu.x,matrix[0][0],madd(Bu.y,matrix[0][1],madd(Bu.z,matrix[0][2],Bu.w * matrix[0][3]))),
  375. madd(Bv.y,madd(Bu.x,matrix[1][0],madd(Bu.y,v_11 ,madd(Bu.z,v_12 ,Bu.w * matrix[1][3]))),
  376. madd(Bv.z,madd(Bu.x,matrix[2][0],madd(Bu.y,v_21 ,madd(Bu.z,v_22 ,Bu.w * matrix[2][3]))),
  377. Bv.w*madd(Bu.x,matrix[3][0],madd(Bu.y,matrix[3][1],madd(Bu.z,matrix[3][2],Bu.w * matrix[3][3]))))));
  378. }
  379. static __forceinline Vertex eval_du(const Vertex matrix[4][4], const Vertex f[2][2], const float uu, const float vv) // approximative derivative
  380. {
  381. Vertex_t v_11, v_12, v_22, v_21;
  382. computeInnerVertices(matrix,f,uu,vv,v_11, v_12, v_22, v_21);
  383. const Vec4<float> Bu = BezierBasis::derivative(uu);
  384. const Vec4<float> Bv = BezierBasis::eval(vv);
  385. return madd(Bv.x,madd(Bu.x,matrix[0][0],madd(Bu.y,matrix[0][1],madd(Bu.z,matrix[0][2],Bu.w * matrix[0][3]))),
  386. madd(Bv.y,madd(Bu.x,matrix[1][0],madd(Bu.y,v_11 ,madd(Bu.z,v_12 ,Bu.w * matrix[1][3]))),
  387. madd(Bv.z,madd(Bu.x,matrix[2][0],madd(Bu.y,v_21 ,madd(Bu.z,v_22 ,Bu.w * matrix[2][3]))),
  388. Bv.w*madd(Bu.x,matrix[3][0],madd(Bu.y,matrix[3][1],madd(Bu.z,matrix[3][2],Bu.w * matrix[3][3]))))));
  389. }
  390. static __forceinline Vertex eval_dv(const Vertex matrix[4][4], const Vertex f[2][2], const float uu, const float vv) // approximative derivative
  391. {
  392. Vertex_t v_11, v_12, v_22, v_21;
  393. computeInnerVertices(matrix,f,uu,vv,v_11, v_12, v_22, v_21);
  394. const Vec4<float> Bu = BezierBasis::eval(uu);
  395. const Vec4<float> Bv = BezierBasis::derivative(vv);
  396. return madd(Bv.x,madd(Bu.x,matrix[0][0],madd(Bu.y,matrix[0][1],madd(Bu.z,matrix[0][2],Bu.w * matrix[0][3]))),
  397. madd(Bv.y,madd(Bu.x,matrix[1][0],madd(Bu.y,v_11 ,madd(Bu.z,v_12 ,Bu.w * matrix[1][3]))),
  398. madd(Bv.z,madd(Bu.x,matrix[2][0],madd(Bu.y,v_21 ,madd(Bu.z,v_22 ,Bu.w * matrix[2][3]))),
  399. Bv.w*madd(Bu.x,matrix[3][0],madd(Bu.y,matrix[3][1],madd(Bu.z,matrix[3][2],Bu.w * matrix[3][3]))))));
  400. }
  401. static __forceinline Vertex eval_dudu(const Vertex matrix[4][4], const Vertex f[2][2], const float uu, const float vv) // approximative derivative
  402. {
  403. Vertex_t v_11, v_12, v_22, v_21;
  404. computeInnerVertices(matrix,f,uu,vv,v_11, v_12, v_22, v_21);
  405. const Vec4<float> Bu = BezierBasis::derivative2(uu);
  406. const Vec4<float> Bv = BezierBasis::eval(vv);
  407. return madd(Bv.x,madd(Bu.x,matrix[0][0],madd(Bu.y,matrix[0][1],madd(Bu.z,matrix[0][2],Bu.w * matrix[0][3]))),
  408. madd(Bv.y,madd(Bu.x,matrix[1][0],madd(Bu.y,v_11 ,madd(Bu.z,v_12 ,Bu.w * matrix[1][3]))),
  409. madd(Bv.z,madd(Bu.x,matrix[2][0],madd(Bu.y,v_21 ,madd(Bu.z,v_22 ,Bu.w * matrix[2][3]))),
  410. Bv.w*madd(Bu.x,matrix[3][0],madd(Bu.y,matrix[3][1],madd(Bu.z,matrix[3][2],Bu.w * matrix[3][3]))))));
  411. }
  412. static __forceinline Vertex eval_dvdv(const Vertex matrix[4][4], const Vertex f[2][2], const float uu, const float vv) // approximative derivative
  413. {
  414. Vertex_t v_11, v_12, v_22, v_21;
  415. computeInnerVertices(matrix,f,uu,vv,v_11, v_12, v_22, v_21);
  416. const Vec4<float> Bu = BezierBasis::eval(uu);
  417. const Vec4<float> Bv = BezierBasis::derivative2(vv);
  418. return madd(Bv.x,madd(Bu.x,matrix[0][0],madd(Bu.y,matrix[0][1],madd(Bu.z,matrix[0][2],Bu.w * matrix[0][3]))),
  419. madd(Bv.y,madd(Bu.x,matrix[1][0],madd(Bu.y,v_11 ,madd(Bu.z,v_12 ,Bu.w * matrix[1][3]))),
  420. madd(Bv.z,madd(Bu.x,matrix[2][0],madd(Bu.y,v_21 ,madd(Bu.z,v_22 ,Bu.w * matrix[2][3]))),
  421. Bv.w*madd(Bu.x,matrix[3][0],madd(Bu.y,matrix[3][1],madd(Bu.z,matrix[3][2],Bu.w * matrix[3][3]))))));
  422. }
  423. static __forceinline Vertex eval_dudv(const Vertex matrix[4][4], const Vertex f[2][2], const float uu, const float vv) // approximative derivative
  424. {
  425. Vertex_t v_11, v_12, v_22, v_21;
  426. computeInnerVertices(matrix,f,uu,vv,v_11, v_12, v_22, v_21);
  427. const Vec4<float> Bu = BezierBasis::derivative(uu);
  428. const Vec4<float> Bv = BezierBasis::derivative(vv);
  429. return madd(Bv.x,madd(Bu.x,matrix[0][0],madd(Bu.y,matrix[0][1],madd(Bu.z,matrix[0][2],Bu.w * matrix[0][3]))),
  430. madd(Bv.y,madd(Bu.x,matrix[1][0],madd(Bu.y,v_11 ,madd(Bu.z,v_12 ,Bu.w * matrix[1][3]))),
  431. madd(Bv.z,madd(Bu.x,matrix[2][0],madd(Bu.y,v_21 ,madd(Bu.z,v_22 ,Bu.w * matrix[2][3]))),
  432. Bv.w*madd(Bu.x,matrix[3][0],madd(Bu.y,matrix[3][1],madd(Bu.z,matrix[3][2],Bu.w * matrix[3][3]))))));
  433. }
  434. __forceinline Vertex eval(const float uu, const float vv) const {
  435. return eval(v,f,uu,vv);
  436. }
  437. __forceinline Vertex eval_du( const float uu, const float vv) const {
  438. return eval_du(v,f,uu,vv);
  439. }
  440. __forceinline Vertex eval_dv( const float uu, const float vv) const {
  441. return eval_dv(v,f,uu,vv);
  442. }
  443. __forceinline Vertex eval_dudu( const float uu, const float vv) const {
  444. return eval_dudu(v,f,uu,vv);
  445. }
  446. __forceinline Vertex eval_dvdv( const float uu, const float vv) const {
  447. return eval_dvdv(v,f,uu,vv);
  448. }
  449. __forceinline Vertex eval_dudv( const float uu, const float vv) const {
  450. return eval_dudv(v,f,uu,vv);
  451. }
  452. static __forceinline Vertex normal(const Vertex matrix[4][4], const Vertex f_m[2][2], const float uu, const float vv) // FIXME: why not using basis functions
  453. {
  454. /* interpolate inner vertices */
  455. Vertex_t matrix_11, matrix_12, matrix_22, matrix_21;
  456. computeInnerVertices(matrix,f_m,uu,vv,matrix_11, matrix_12, matrix_22, matrix_21);
  457. /* tangentU */
  458. const Vertex_t col0 = deCasteljau(vv, (Vertex_t)matrix[0][0], (Vertex_t)matrix[1][0], (Vertex_t)matrix[2][0], (Vertex_t)matrix[3][0]);
  459. const Vertex_t col1 = deCasteljau(vv, (Vertex_t)matrix[0][1], (Vertex_t)matrix_11 , (Vertex_t)matrix_21 , (Vertex_t)matrix[3][1]);
  460. const Vertex_t col2 = deCasteljau(vv, (Vertex_t)matrix[0][2], (Vertex_t)matrix_12 , (Vertex_t)matrix_22 , (Vertex_t)matrix[3][2]);
  461. const Vertex_t col3 = deCasteljau(vv, (Vertex_t)matrix[0][3], (Vertex_t)matrix[1][3], (Vertex_t)matrix[2][3], (Vertex_t)matrix[3][3]);
  462. const Vertex_t tangentU = deCasteljau_tangent(uu, col0, col1, col2, col3);
  463. /* tangentV */
  464. const Vertex_t row0 = deCasteljau(uu, (Vertex_t)matrix[0][0], (Vertex_t)matrix[0][1], (Vertex_t)matrix[0][2], (Vertex_t)matrix[0][3]);
  465. const Vertex_t row1 = deCasteljau(uu, (Vertex_t)matrix[1][0], (Vertex_t)matrix_11 , (Vertex_t)matrix_12 , (Vertex_t)matrix[1][3]);
  466. const Vertex_t row2 = deCasteljau(uu, (Vertex_t)matrix[2][0], (Vertex_t)matrix_21 , (Vertex_t)matrix_22 , (Vertex_t)matrix[2][3]);
  467. const Vertex_t row3 = deCasteljau(uu, (Vertex_t)matrix[3][0], (Vertex_t)matrix[3][1], (Vertex_t)matrix[3][2], (Vertex_t)matrix[3][3]);
  468. const Vertex_t tangentV = deCasteljau_tangent(vv, row0, row1, row2, row3);
  469. /* normal = tangentU x tangentV */
  470. const Vertex_t n = cross(tangentV,tangentU);
  471. return n;
  472. }
  473. __forceinline Vertex normal( const float uu, const float vv) const {
  474. return normal(v,f,uu,vv);
  475. }
  476. __forceinline void eval(const float u, const float v,
  477. Vertex* P, Vertex* dPdu, Vertex* dPdv,
  478. Vertex* ddPdudu, Vertex* ddPdvdv, Vertex* ddPdudv,
  479. const float dscale = 1.0f) const
  480. {
  481. if (P) {
  482. *P = eval(u,v);
  483. }
  484. if (dPdu) {
  485. assert(dPdu); *dPdu = eval_du(u,v)*dscale;
  486. assert(dPdv); *dPdv = eval_dv(u,v)*dscale;
  487. }
  488. if (ddPdudu) {
  489. assert(ddPdudu); *ddPdudu = eval_dudu(u,v)*sqr(dscale);
  490. assert(ddPdvdv); *ddPdvdv = eval_dvdv(u,v)*sqr(dscale);
  491. assert(ddPdudv); *ddPdudv = eval_dudv(u,v)*sqr(dscale);
  492. }
  493. }
  494. template<class vfloat>
  495. static __forceinline vfloat eval(const Vertex v[4][4], const Vertex f[2][2],
  496. const size_t i, const vfloat& uu, const vfloat& vv, const Vec4<vfloat>& u_n, const Vec4<vfloat>& v_n,
  497. vfloat& matrix_11, vfloat& matrix_12, vfloat& matrix_22, vfloat& matrix_21)
  498. {
  499. const vfloat curve0_x = madd(v_n[0],vfloat(v[0][0][i]),madd(v_n[1],vfloat(v[1][0][i]),madd(v_n[2],vfloat(v[2][0][i]),v_n[3] * vfloat(v[3][0][i]))));
  500. const vfloat curve1_x = madd(v_n[0],vfloat(v[0][1][i]),madd(v_n[1],vfloat(matrix_11 ),madd(v_n[2],vfloat(matrix_21 ),v_n[3] * vfloat(v[3][1][i]))));
  501. const vfloat curve2_x = madd(v_n[0],vfloat(v[0][2][i]),madd(v_n[1],vfloat(matrix_12 ),madd(v_n[2],vfloat(matrix_22 ),v_n[3] * vfloat(v[3][2][i]))));
  502. const vfloat curve3_x = madd(v_n[0],vfloat(v[0][3][i]),madd(v_n[1],vfloat(v[1][3][i]),madd(v_n[2],vfloat(v[2][3][i]),v_n[3] * vfloat(v[3][3][i]))));
  503. return madd(u_n[0],curve0_x,madd(u_n[1],curve1_x,madd(u_n[2],curve2_x,u_n[3] * curve3_x)));
  504. }
  505. template<typename vbool, typename vfloat>
  506. static __forceinline void eval(const Vertex v[4][4], const Vertex f[2][2],
  507. const vbool& valid, const vfloat& uu, const vfloat& vv,
  508. float* P, float* dPdu, float* dPdv, float* ddPdudu, float* ddPdvdv, float* ddPdudv,
  509. const float dscale, const size_t dstride, const size_t N)
  510. {
  511. if (P) {
  512. const Vec4<vfloat> u_n = BezierBasis::eval(uu);
  513. const Vec4<vfloat> v_n = BezierBasis::eval(vv);
  514. for (size_t i=0; i<N; i++) {
  515. vfloat matrix_11, matrix_12, matrix_22, matrix_21;
  516. computeInnerVertices(v,f,i,uu,vv,matrix_11,matrix_12,matrix_22,matrix_21); // FIXME: calculated multiple times
  517. vfloat::store(valid,P+i*dstride,eval(v,f,i,uu,vv,u_n,v_n,matrix_11,matrix_12,matrix_22,matrix_21));
  518. }
  519. }
  520. if (dPdu)
  521. {
  522. {
  523. assert(dPdu);
  524. const Vec4<vfloat> u_n = BezierBasis::derivative(uu);
  525. const Vec4<vfloat> v_n = BezierBasis::eval(vv);
  526. for (size_t i=0; i<N; i++) {
  527. vfloat matrix_11, matrix_12, matrix_22, matrix_21;
  528. computeInnerVertices(v,f,i,uu,vv,matrix_11,matrix_12,matrix_22,matrix_21); // FIXME: calculated multiple times
  529. vfloat::store(valid,dPdu+i*dstride,eval(v,f,i,uu,vv,u_n,v_n,matrix_11,matrix_12,matrix_22,matrix_21)*dscale);
  530. }
  531. }
  532. {
  533. assert(dPdv);
  534. const Vec4<vfloat> u_n = BezierBasis::eval(uu);
  535. const Vec4<vfloat> v_n = BezierBasis::derivative(vv);
  536. for (size_t i=0; i<N; i++) {
  537. vfloat matrix_11, matrix_12, matrix_22, matrix_21;
  538. computeInnerVertices(v,f,i,uu,vv,matrix_11,matrix_12,matrix_22,matrix_21); // FIXME: calculated multiple times
  539. vfloat::store(valid,dPdv+i*dstride,eval(v,f,i,uu,vv,u_n,v_n,matrix_11,matrix_12,matrix_22,matrix_21)*dscale);
  540. }
  541. }
  542. }
  543. if (ddPdudu)
  544. {
  545. {
  546. assert(ddPdudu);
  547. const Vec4<vfloat> u_n = BezierBasis::derivative2(uu);
  548. const Vec4<vfloat> v_n = BezierBasis::eval(vv);
  549. for (size_t i=0; i<N; i++) {
  550. vfloat matrix_11, matrix_12, matrix_22, matrix_21;
  551. computeInnerVertices(v,f,i,uu,vv,matrix_11,matrix_12,matrix_22,matrix_21); // FIXME: calculated multiple times
  552. vfloat::store(valid,ddPdudu+i*dstride,eval(v,f,i,uu,vv,u_n,v_n,matrix_11,matrix_12,matrix_22,matrix_21)*sqr(dscale));
  553. }
  554. }
  555. {
  556. assert(ddPdvdv);
  557. const Vec4<vfloat> u_n = BezierBasis::eval(uu);
  558. const Vec4<vfloat> v_n = BezierBasis::derivative2(vv);
  559. for (size_t i=0; i<N; i++) {
  560. vfloat matrix_11, matrix_12, matrix_22, matrix_21;
  561. computeInnerVertices(v,f,i,uu,vv,matrix_11,matrix_12,matrix_22,matrix_21); // FIXME: calculated multiple times
  562. vfloat::store(valid,ddPdvdv+i*dstride,eval(v,f,i,uu,vv,u_n,v_n,matrix_11,matrix_12,matrix_22,matrix_21)*sqr(dscale));
  563. }
  564. }
  565. {
  566. assert(ddPdudv);
  567. const Vec4<vfloat> u_n = BezierBasis::derivative(uu);
  568. const Vec4<vfloat> v_n = BezierBasis::derivative(vv);
  569. for (size_t i=0; i<N; i++) {
  570. vfloat matrix_11, matrix_12, matrix_22, matrix_21;
  571. computeInnerVertices(v,f,i,uu,vv,matrix_11,matrix_12,matrix_22,matrix_21); // FIXME: calculated multiple times
  572. vfloat::store(valid,ddPdudv+i*dstride,eval(v,f,i,uu,vv,u_n,v_n,matrix_11,matrix_12,matrix_22,matrix_21)*sqr(dscale));
  573. }
  574. }
  575. }
  576. }
  577. template<typename vbool, typename vfloat>
  578. __forceinline void eval(const vbool& valid, const vfloat& uu, const vfloat& vv,
  579. float* P, float* dPdu, float* dPdv, float* ddPdudu, float* ddPdvdv, float* ddPdudv,
  580. const float dscale, const size_t dstride, const size_t N) const {
  581. eval(v,f,valid,uu,vv,P,dPdu,dPdv,ddPdudu,ddPdvdv,ddPdudv,dscale,dstride,N);
  582. }
  583. template<class T>
  584. static __forceinline Vec3<T> eval_t(const Vertex matrix[4][4], const Vec3<T> f[2][2], const T& uu, const T& vv)
  585. {
  586. typedef typename T::Bool M;
  587. const M m_border = (uu == 0.0f) | (uu == 1.0f) | (vv == 0.0f) | (vv == 1.0f);
  588. const Vec3<T> f0_p = Vec3<T>(matrix[1][1].x,matrix[1][1].y,matrix[1][1].z);
  589. const Vec3<T> f1_p = Vec3<T>(matrix[1][2].x,matrix[1][2].y,matrix[1][2].z);
  590. const Vec3<T> f2_p = Vec3<T>(matrix[2][2].x,matrix[2][2].y,matrix[2][2].z);
  591. const Vec3<T> f3_p = Vec3<T>(matrix[2][1].x,matrix[2][1].y,matrix[2][1].z);
  592. const Vec3<T> f0_m = f[0][0];
  593. const Vec3<T> f1_m = f[0][1];
  594. const Vec3<T> f2_m = f[1][1];
  595. const Vec3<T> f3_m = f[1][0];
  596. const T one_minus_uu = T(1.0f) - uu;
  597. const T one_minus_vv = T(1.0f) - vv;
  598. const Vec3<T> f0_i = ( uu * f0_p + vv * f0_m) * rcp(uu+vv);
  599. const Vec3<T> f1_i = (one_minus_uu * f1_m + vv * f1_p) * rcp(one_minus_uu+vv);
  600. const Vec3<T> f2_i = (one_minus_uu * f2_p + one_minus_vv * f2_m) * rcp(one_minus_uu+one_minus_vv);
  601. const Vec3<T> f3_i = ( uu * f3_m + one_minus_vv * f3_p) * rcp(uu+one_minus_vv);
  602. const Vec3<T> F0( select(m_border,f0_p.x,f0_i.x), select(m_border,f0_p.y,f0_i.y), select(m_border,f0_p.z,f0_i.z) );
  603. const Vec3<T> F1( select(m_border,f1_p.x,f1_i.x), select(m_border,f1_p.y,f1_i.y), select(m_border,f1_p.z,f1_i.z) );
  604. const Vec3<T> F2( select(m_border,f2_p.x,f2_i.x), select(m_border,f2_p.y,f2_i.y), select(m_border,f2_p.z,f2_i.z) );
  605. const Vec3<T> F3( select(m_border,f3_p.x,f3_i.x), select(m_border,f3_p.y,f3_i.y), select(m_border,f3_p.z,f3_i.z) );
  606. const T B0_u = one_minus_uu * one_minus_uu * one_minus_uu;
  607. const T B0_v = one_minus_vv * one_minus_vv * one_minus_vv;
  608. const T B1_u = 3.0f * (one_minus_uu * uu * one_minus_uu);
  609. const T B1_v = 3.0f * (one_minus_vv * vv * one_minus_vv);
  610. const T B2_u = 3.0f * (uu * one_minus_uu * uu);
  611. const T B2_v = 3.0f * (vv * one_minus_vv * vv);
  612. const T B3_u = uu * uu * uu;
  613. const T B3_v = vv * vv * vv;
  614. const T x = madd(B0_v,madd(B0_u,matrix[0][0].x,madd(B1_u,matrix[0][1].x,madd(B2_u,matrix[0][2].x,B3_u * matrix[0][3].x))),
  615. madd(B1_v,madd(B0_u,matrix[1][0].x,madd(B1_u,F0.x ,madd(B2_u,F1.x ,B3_u * matrix[1][3].x))),
  616. madd(B2_v,madd(B0_u,matrix[2][0].x,madd(B1_u,F3.x ,madd(B2_u,F2.x ,B3_u * matrix[2][3].x))),
  617. B3_v*madd(B0_u,matrix[3][0].x,madd(B1_u,matrix[3][1].x,madd(B2_u,matrix[3][2].x,B3_u * matrix[3][3].x))))));
  618. const T y = madd(B0_v,madd(B0_u,matrix[0][0].y,madd(B1_u,matrix[0][1].y,madd(B2_u,matrix[0][2].y,B3_u * matrix[0][3].y))),
  619. madd(B1_v,madd(B0_u,matrix[1][0].y,madd(B1_u,F0.y ,madd(B2_u,F1.y ,B3_u * matrix[1][3].y))),
  620. madd(B2_v,madd(B0_u,matrix[2][0].y,madd(B1_u,F3.y ,madd(B2_u,F2.y ,B3_u * matrix[2][3].y))),
  621. B3_v*madd(B0_u,matrix[3][0].y,madd(B1_u,matrix[3][1].y,madd(B2_u,matrix[3][2].y,B3_u * matrix[3][3].y))))));
  622. const T z = madd(B0_v,madd(B0_u,matrix[0][0].z,madd(B1_u,matrix[0][1].z,madd(B2_u,matrix[0][2].z,B3_u * matrix[0][3].z))),
  623. madd(B1_v,madd(B0_u,matrix[1][0].z,madd(B1_u,F0.z ,madd(B2_u,F1.z ,B3_u * matrix[1][3].z))),
  624. madd(B2_v,madd(B0_u,matrix[2][0].z,madd(B1_u,F3.z ,madd(B2_u,F2.z ,B3_u * matrix[2][3].z))),
  625. B3_v*madd(B0_u,matrix[3][0].z,madd(B1_u,matrix[3][1].z,madd(B2_u,matrix[3][2].z,B3_u * matrix[3][3].z))))));
  626. return Vec3<T>(x,y,z);
  627. }
  628. template<class T>
  629. __forceinline Vec3<T> eval(const T& uu, const T& vv) const
  630. {
  631. Vec3<T> ff[2][2];
  632. ff[0][0] = Vec3<T>(f[0][0]);
  633. ff[0][1] = Vec3<T>(f[0][1]);
  634. ff[1][1] = Vec3<T>(f[1][1]);
  635. ff[1][0] = Vec3<T>(f[1][0]);
  636. return eval_t(v,ff,uu,vv);
  637. }
  638. template<class T>
  639. static __forceinline Vec3<T> normal_t(const Vertex matrix[4][4], const Vec3<T> f[2][2], const T& uu, const T& vv)
  640. {
  641. typedef typename T::Bool M;
  642. const Vec3<T> f0_p = Vec3<T>(matrix[1][1].x,matrix[1][1].y,matrix[1][1].z);
  643. const Vec3<T> f1_p = Vec3<T>(matrix[1][2].x,matrix[1][2].y,matrix[1][2].z);
  644. const Vec3<T> f2_p = Vec3<T>(matrix[2][2].x,matrix[2][2].y,matrix[2][2].z);
  645. const Vec3<T> f3_p = Vec3<T>(matrix[2][1].x,matrix[2][1].y,matrix[2][1].z);
  646. const Vec3<T> f0_m = f[0][0];
  647. const Vec3<T> f1_m = f[0][1];
  648. const Vec3<T> f2_m = f[1][1];
  649. const Vec3<T> f3_m = f[1][0];
  650. const T one_minus_uu = T(1.0f) - uu;
  651. const T one_minus_vv = T(1.0f) - vv;
  652. const Vec3<T> f0_i = ( uu * f0_p + vv * f0_m) * rcp(uu+vv);
  653. const Vec3<T> f1_i = (one_minus_uu * f1_m + vv * f1_p) * rcp(one_minus_uu+vv);
  654. const Vec3<T> f2_i = (one_minus_uu * f2_p + one_minus_vv * f2_m) * rcp(one_minus_uu+one_minus_vv);
  655. const Vec3<T> f3_i = ( uu * f3_m + one_minus_vv * f3_p) * rcp(uu+one_minus_vv);
  656. #if 1
  657. const M m_corner0 = (uu == 0.0f) & (vv == 0.0f);
  658. const M m_corner1 = (uu == 1.0f) & (vv == 0.0f);
  659. const M m_corner2 = (uu == 1.0f) & (vv == 1.0f);
  660. const M m_corner3 = (uu == 0.0f) & (vv == 1.0f);
  661. const Vec3<T> matrix_11( select(m_corner0,f0_p.x,f0_i.x), select(m_corner0,f0_p.y,f0_i.y), select(m_corner0,f0_p.z,f0_i.z) );
  662. const Vec3<T> matrix_12( select(m_corner1,f1_p.x,f1_i.x), select(m_corner1,f1_p.y,f1_i.y), select(m_corner1,f1_p.z,f1_i.z) );
  663. const Vec3<T> matrix_22( select(m_corner2,f2_p.x,f2_i.x), select(m_corner2,f2_p.y,f2_i.y), select(m_corner2,f2_p.z,f2_i.z) );
  664. const Vec3<T> matrix_21( select(m_corner3,f3_p.x,f3_i.x), select(m_corner3,f3_p.y,f3_i.y), select(m_corner3,f3_p.z,f3_i.z) );
  665. #else
  666. const M m_border = (uu == 0.0f) | (uu == 1.0f) | (vv == 0.0f) | (vv == 1.0f);
  667. const Vec3<T> matrix_11( select(m_border,f0_p.x,f0_i.x), select(m_border,f0_p.y,f0_i.y), select(m_border,f0_p.z,f0_i.z) );
  668. const Vec3<T> matrix_12( select(m_border,f1_p.x,f1_i.x), select(m_border,f1_p.y,f1_i.y), select(m_border,f1_p.z,f1_i.z) );
  669. const Vec3<T> matrix_22( select(m_border,f2_p.x,f2_i.x), select(m_border,f2_p.y,f2_i.y), select(m_border,f2_p.z,f2_i.z) );
  670. const Vec3<T> matrix_21( select(m_border,f3_p.x,f3_i.x), select(m_border,f3_p.y,f3_i.y), select(m_border,f3_p.z,f3_i.z) );
  671. #endif
  672. const Vec3<T> matrix_00 = Vec3<T>(matrix[0][0].x,matrix[0][0].y,matrix[0][0].z);
  673. const Vec3<T> matrix_10 = Vec3<T>(matrix[1][0].x,matrix[1][0].y,matrix[1][0].z);
  674. const Vec3<T> matrix_20 = Vec3<T>(matrix[2][0].x,matrix[2][0].y,matrix[2][0].z);
  675. const Vec3<T> matrix_30 = Vec3<T>(matrix[3][0].x,matrix[3][0].y,matrix[3][0].z);
  676. const Vec3<T> matrix_01 = Vec3<T>(matrix[0][1].x,matrix[0][1].y,matrix[0][1].z);
  677. const Vec3<T> matrix_02 = Vec3<T>(matrix[0][2].x,matrix[0][2].y,matrix[0][2].z);
  678. const Vec3<T> matrix_03 = Vec3<T>(matrix[0][3].x,matrix[0][3].y,matrix[0][3].z);
  679. const Vec3<T> matrix_31 = Vec3<T>(matrix[3][1].x,matrix[3][1].y,matrix[3][1].z);
  680. const Vec3<T> matrix_32 = Vec3<T>(matrix[3][2].x,matrix[3][2].y,matrix[3][2].z);
  681. const Vec3<T> matrix_33 = Vec3<T>(matrix[3][3].x,matrix[3][3].y,matrix[3][3].z);
  682. const Vec3<T> matrix_13 = Vec3<T>(matrix[1][3].x,matrix[1][3].y,matrix[1][3].z);
  683. const Vec3<T> matrix_23 = Vec3<T>(matrix[2][3].x,matrix[2][3].y,matrix[2][3].z);
  684. /* tangentU */
  685. const Vec3<T> col0 = deCasteljau(vv, matrix_00, matrix_10, matrix_20, matrix_30);
  686. const Vec3<T> col1 = deCasteljau(vv, matrix_01, matrix_11, matrix_21, matrix_31);
  687. const Vec3<T> col2 = deCasteljau(vv, matrix_02, matrix_12, matrix_22, matrix_32);
  688. const Vec3<T> col3 = deCasteljau(vv, matrix_03, matrix_13, matrix_23, matrix_33);
  689. const Vec3<T> tangentU = deCasteljau_tangent(uu, col0, col1, col2, col3);
  690. /* tangentV */
  691. const Vec3<T> row0 = deCasteljau(uu, matrix_00, matrix_01, matrix_02, matrix_03);
  692. const Vec3<T> row1 = deCasteljau(uu, matrix_10, matrix_11, matrix_12, matrix_13);
  693. const Vec3<T> row2 = deCasteljau(uu, matrix_20, matrix_21, matrix_22, matrix_23);
  694. const Vec3<T> row3 = deCasteljau(uu, matrix_30, matrix_31, matrix_32, matrix_33);
  695. const Vec3<T> tangentV = deCasteljau_tangent(vv, row0, row1, row2, row3);
  696. /* normal = tangentU x tangentV */
  697. const Vec3<T> n = cross(tangentV,tangentU);
  698. return n;
  699. }
  700. template<class T>
  701. __forceinline Vec3<T> normal(const T& uu, const T& vv) const
  702. {
  703. Vec3<T> ff[2][2];
  704. ff[0][0] = Vec3<T>(f[0][0]);
  705. ff[0][1] = Vec3<T>(f[0][1]);
  706. ff[1][1] = Vec3<T>(f[1][1]);
  707. ff[1][0] = Vec3<T>(f[1][0]);
  708. return normal_t(v,ff,uu,vv);
  709. }
  710. __forceinline BBox<Vertex> bounds() const
  711. {
  712. const Vertex *const cv = &v[0][0];
  713. BBox<Vertex> bounds (cv[0]);
  714. for (size_t i=1; i<16; i++)
  715. bounds.extend( cv[i] );
  716. bounds.extend(f[0][0]);
  717. bounds.extend(f[1][0]);
  718. bounds.extend(f[1][1]);
  719. bounds.extend(f[1][1]);
  720. return bounds;
  721. }
  722. friend std::ostream& operator<<(std::ostream& o, const GregoryPatchT& p)
  723. {
  724. for (size_t y=0; y<4; y++)
  725. for (size_t x=0; x<4; x++)
  726. o << "v[" << y << "][" << x << "] " << p.v[y][x] << std::endl;
  727. for (size_t y=0; y<2; y++)
  728. for (size_t x=0; x<2; x++)
  729. o << "f[" << y << "][" << x << "] " << p.f[y][x] << std::endl;
  730. return o;
  731. }
  732. };
  733. typedef GregoryPatchT<Vec3fa,Vec3fa_t> GregoryPatch3fa;
  734. template<typename Vertex, typename Vertex_t>
  735. __forceinline BezierPatchT<Vertex,Vertex_t>::BezierPatchT (const HalfEdge* edge, const char* vertices, size_t stride)
  736. {
  737. CatmullClarkPatchT<Vertex,Vertex_t> patch(edge,vertices,stride);
  738. GregoryPatchT<Vertex,Vertex_t> gpatch(patch);
  739. gpatch.convert_to_bezier();
  740. for (size_t y=0; y<4; y++)
  741. for (size_t x=0; x<4; x++)
  742. matrix[y][x] = (Vertex_t)gpatch.v[y][x];
  743. }
  744. template<typename Vertex, typename Vertex_t>
  745. __forceinline BezierPatchT<Vertex,Vertex_t>::BezierPatchT(const CatmullClarkPatchT<Vertex,Vertex_t>& patch)
  746. {
  747. GregoryPatchT<Vertex,Vertex_t> gpatch(patch);
  748. gpatch.convert_to_bezier();
  749. for (size_t y=0; y<4; y++)
  750. for (size_t x=0; x<4; x++)
  751. matrix[y][x] = (Vertex_t)gpatch.v[y][x];
  752. }
  753. template<typename Vertex, typename Vertex_t>
  754. __forceinline BezierPatchT<Vertex,Vertex_t>::BezierPatchT(const CatmullClarkPatchT<Vertex,Vertex_t>& patch,
  755. const BezierCurveT<Vertex>* border0,
  756. const BezierCurveT<Vertex>* border1,
  757. const BezierCurveT<Vertex>* border2,
  758. const BezierCurveT<Vertex>* border3)
  759. {
  760. GregoryPatchT<Vertex,Vertex_t> gpatch(patch,border0,border1,border2,border3);
  761. gpatch.convert_to_bezier();
  762. for (size_t y=0; y<4; y++)
  763. for (size_t x=0; x<4; x++)
  764. matrix[y][x] = (Vertex_t)gpatch.v[y][x];
  765. }
  766. }