DirectXMathMisc.inl 75 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515
  1. //-------------------------------------------------------------------------------------
  2. // DirectXMathMisc.inl -- SIMD C++ Math library
  3. //
  4. // THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF
  5. // ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO
  6. // THE IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A
  7. // PARTICULAR PURPOSE.
  8. //
  9. // Copyright (c) Microsoft Corporation. All rights reserved.
  10. //
  11. // http://go.microsoft.com/fwlink/?LinkID=615560
  12. //-------------------------------------------------------------------------------------
  13. #pragma once
  14. /****************************************************************************
  15. *
  16. * Quaternion
  17. *
  18. ****************************************************************************/
  19. //------------------------------------------------------------------------------
  20. // Comparison operations
  21. //------------------------------------------------------------------------------
  22. //------------------------------------------------------------------------------
  23. inline bool XM_CALLCONV XMQuaternionEqual
  24. (
  25. FXMVECTOR Q1,
  26. FXMVECTOR Q2
  27. )
  28. {
  29. return XMVector4Equal(Q1, Q2);
  30. }
  31. //------------------------------------------------------------------------------
  32. inline bool XM_CALLCONV XMQuaternionNotEqual
  33. (
  34. FXMVECTOR Q1,
  35. FXMVECTOR Q2
  36. )
  37. {
  38. return XMVector4NotEqual(Q1, Q2);
  39. }
  40. //------------------------------------------------------------------------------
  41. inline bool XM_CALLCONV XMQuaternionIsNaN
  42. (
  43. FXMVECTOR Q
  44. )
  45. {
  46. return XMVector4IsNaN(Q);
  47. }
  48. //------------------------------------------------------------------------------
  49. inline bool XM_CALLCONV XMQuaternionIsInfinite
  50. (
  51. FXMVECTOR Q
  52. )
  53. {
  54. return XMVector4IsInfinite(Q);
  55. }
  56. //------------------------------------------------------------------------------
  57. inline bool XM_CALLCONV XMQuaternionIsIdentity
  58. (
  59. FXMVECTOR Q
  60. )
  61. {
  62. return XMVector4Equal(Q, g_XMIdentityR3.v);
  63. }
  64. //------------------------------------------------------------------------------
  65. // Computation operations
  66. //------------------------------------------------------------------------------
  67. //------------------------------------------------------------------------------
  68. inline XMVECTOR XM_CALLCONV XMQuaternionDot
  69. (
  70. FXMVECTOR Q1,
  71. FXMVECTOR Q2
  72. )
  73. {
  74. return XMVector4Dot(Q1, Q2);
  75. }
  76. //------------------------------------------------------------------------------
  77. inline XMVECTOR XM_CALLCONV XMQuaternionMultiply
  78. (
  79. FXMVECTOR Q1,
  80. FXMVECTOR Q2
  81. )
  82. {
  83. // Returns the product Q2*Q1 (which is the concatenation of a rotation Q1 followed by the rotation Q2)
  84. // [ (Q2.w * Q1.x) + (Q2.x * Q1.w) + (Q2.y * Q1.z) - (Q2.z * Q1.y),
  85. // (Q2.w * Q1.y) - (Q2.x * Q1.z) + (Q2.y * Q1.w) + (Q2.z * Q1.x),
  86. // (Q2.w * Q1.z) + (Q2.x * Q1.y) - (Q2.y * Q1.x) + (Q2.z * Q1.w),
  87. // (Q2.w * Q1.w) - (Q2.x * Q1.x) - (Q2.y * Q1.y) - (Q2.z * Q1.z) ]
  88. #if defined(_XM_NO_INTRINSICS_)
  89. XMVECTORF32 Result = { { {
  90. (Q2.vector4_f32[3] * Q1.vector4_f32[0]) + (Q2.vector4_f32[0] * Q1.vector4_f32[3]) + (Q2.vector4_f32[1] * Q1.vector4_f32[2]) - (Q2.vector4_f32[2] * Q1.vector4_f32[1]),
  91. (Q2.vector4_f32[3] * Q1.vector4_f32[1]) - (Q2.vector4_f32[0] * Q1.vector4_f32[2]) + (Q2.vector4_f32[1] * Q1.vector4_f32[3]) + (Q2.vector4_f32[2] * Q1.vector4_f32[0]),
  92. (Q2.vector4_f32[3] * Q1.vector4_f32[2]) + (Q2.vector4_f32[0] * Q1.vector4_f32[1]) - (Q2.vector4_f32[1] * Q1.vector4_f32[0]) + (Q2.vector4_f32[2] * Q1.vector4_f32[3]),
  93. (Q2.vector4_f32[3] * Q1.vector4_f32[3]) - (Q2.vector4_f32[0] * Q1.vector4_f32[0]) - (Q2.vector4_f32[1] * Q1.vector4_f32[1]) - (Q2.vector4_f32[2] * Q1.vector4_f32[2])
  94. } } };
  95. return Result.v;
  96. #elif defined(_XM_ARM_NEON_INTRINSICS_)
  97. static const XMVECTORF32 ControlWZYX = { { { 1.0f, -1.0f, 1.0f, -1.0f } } };
  98. static const XMVECTORF32 ControlZWXY = { { { 1.0f, 1.0f, -1.0f, -1.0f } } };
  99. static const XMVECTORF32 ControlYXWZ = { { { -1.0f, 1.0f, 1.0f, -1.0f } } };
  100. float32x2_t Q2L = vget_low_f32(Q2);
  101. float32x2_t Q2H = vget_high_f32(Q2);
  102. float32x4_t Q2X = vdupq_lane_f32( Q2L, 0 );
  103. float32x4_t Q2Y = vdupq_lane_f32( Q2L, 1 );
  104. float32x4_t Q2Z = vdupq_lane_f32( Q2H, 0 );
  105. XMVECTOR vResult = vmulq_lane_f32(Q1, Q2H, 1);
  106. // Mul by Q1WZYX
  107. float32x4_t vTemp = vrev64q_f32(Q1);
  108. vTemp = vcombine_f32( vget_high_f32(vTemp), vget_low_f32(vTemp) );
  109. Q2X = vmulq_f32(Q2X,vTemp);
  110. vResult = vmlaq_f32( vResult, Q2X, ControlWZYX );
  111. // Mul by Q1ZWXY
  112. vTemp = vrev64q_u32(vTemp);
  113. Q2Y = vmulq_f32(Q2Y,vTemp);
  114. vResult = vmlaq_f32(vResult, Q2Y, ControlZWXY);
  115. // Mul by Q1YXWZ
  116. vTemp = vrev64q_u32(vTemp);
  117. vTemp = vcombine_f32(vget_high_f32(vTemp), vget_low_f32(vTemp));
  118. Q2Z = vmulq_f32(Q2Z,vTemp);
  119. vResult = vmlaq_f32(vResult, Q2Z, ControlYXWZ);
  120. return vResult;
  121. #elif defined(_XM_SSE_INTRINSICS_)
  122. static const XMVECTORF32 ControlWZYX = { { { 1.0f, -1.0f, 1.0f, -1.0f } } };
  123. static const XMVECTORF32 ControlZWXY = { { { 1.0f, 1.0f, -1.0f, -1.0f } } };
  124. static const XMVECTORF32 ControlYXWZ = { { { -1.0f, 1.0f, 1.0f, -1.0f } } };
  125. // Copy to SSE registers and use as few as possible for x86
  126. XMVECTOR Q2X = Q2;
  127. XMVECTOR Q2Y = Q2;
  128. XMVECTOR Q2Z = Q2;
  129. XMVECTOR vResult = Q2;
  130. // Splat with one instruction
  131. vResult = XM_PERMUTE_PS(vResult,_MM_SHUFFLE(3,3,3,3));
  132. Q2X = XM_PERMUTE_PS(Q2X,_MM_SHUFFLE(0,0,0,0));
  133. Q2Y = XM_PERMUTE_PS(Q2Y,_MM_SHUFFLE(1,1,1,1));
  134. Q2Z = XM_PERMUTE_PS(Q2Z,_MM_SHUFFLE(2,2,2,2));
  135. // Retire Q1 and perform Q1*Q2W
  136. vResult = _mm_mul_ps(vResult,Q1);
  137. XMVECTOR Q1Shuffle = Q1;
  138. // Shuffle the copies of Q1
  139. Q1Shuffle = XM_PERMUTE_PS(Q1Shuffle,_MM_SHUFFLE(0,1,2,3));
  140. // Mul by Q1WZYX
  141. Q2X = _mm_mul_ps(Q2X,Q1Shuffle);
  142. Q1Shuffle = XM_PERMUTE_PS(Q1Shuffle,_MM_SHUFFLE(2,3,0,1));
  143. // Flip the signs on y and z
  144. Q2X = _mm_mul_ps(Q2X,ControlWZYX);
  145. // Mul by Q1ZWXY
  146. Q2Y = _mm_mul_ps(Q2Y,Q1Shuffle);
  147. Q1Shuffle = XM_PERMUTE_PS(Q1Shuffle,_MM_SHUFFLE(0,1,2,3));
  148. // Flip the signs on z and w
  149. Q2Y = _mm_mul_ps(Q2Y,ControlZWXY);
  150. // Mul by Q1YXWZ
  151. Q2Z = _mm_mul_ps(Q2Z,Q1Shuffle);
  152. vResult = _mm_add_ps(vResult,Q2X);
  153. // Flip the signs on x and w
  154. Q2Z = _mm_mul_ps(Q2Z,ControlYXWZ);
  155. Q2Y = _mm_add_ps(Q2Y,Q2Z);
  156. vResult = _mm_add_ps(vResult,Q2Y);
  157. return vResult;
  158. #endif
  159. }
  160. //------------------------------------------------------------------------------
  161. inline XMVECTOR XM_CALLCONV XMQuaternionLengthSq
  162. (
  163. FXMVECTOR Q
  164. )
  165. {
  166. return XMVector4LengthSq(Q);
  167. }
  168. //------------------------------------------------------------------------------
  169. inline XMVECTOR XM_CALLCONV XMQuaternionReciprocalLength
  170. (
  171. FXMVECTOR Q
  172. )
  173. {
  174. return XMVector4ReciprocalLength(Q);
  175. }
  176. //------------------------------------------------------------------------------
  177. inline XMVECTOR XM_CALLCONV XMQuaternionLength
  178. (
  179. FXMVECTOR Q
  180. )
  181. {
  182. return XMVector4Length(Q);
  183. }
  184. //------------------------------------------------------------------------------
  185. inline XMVECTOR XM_CALLCONV XMQuaternionNormalizeEst
  186. (
  187. FXMVECTOR Q
  188. )
  189. {
  190. return XMVector4NormalizeEst(Q);
  191. }
  192. //------------------------------------------------------------------------------
  193. inline XMVECTOR XM_CALLCONV XMQuaternionNormalize
  194. (
  195. FXMVECTOR Q
  196. )
  197. {
  198. return XMVector4Normalize(Q);
  199. }
  200. //------------------------------------------------------------------------------
  201. inline XMVECTOR XM_CALLCONV XMQuaternionConjugate
  202. (
  203. FXMVECTOR Q
  204. )
  205. {
  206. #if defined(_XM_NO_INTRINSICS_)
  207. XMVECTORF32 Result = { { {
  208. -Q.vector4_f32[0],
  209. -Q.vector4_f32[1],
  210. -Q.vector4_f32[2],
  211. Q.vector4_f32[3]
  212. } } };
  213. return Result.v;
  214. #elif defined(_XM_ARM_NEON_INTRINSICS_)
  215. static const XMVECTORF32 NegativeOne3 = { { { -1.0f, -1.0f, -1.0f, 1.0f } } };
  216. return vmulq_f32(Q, NegativeOne3.v );
  217. #elif defined(_XM_SSE_INTRINSICS_)
  218. static const XMVECTORF32 NegativeOne3 = { { { -1.0f, -1.0f, -1.0f, 1.0f } } };
  219. return _mm_mul_ps(Q,NegativeOne3);
  220. #endif
  221. }
  222. //------------------------------------------------------------------------------
  223. inline XMVECTOR XM_CALLCONV XMQuaternionInverse
  224. (
  225. FXMVECTOR Q
  226. )
  227. {
  228. const XMVECTOR Zero = XMVectorZero();
  229. XMVECTOR L = XMVector4LengthSq(Q);
  230. XMVECTOR Conjugate = XMQuaternionConjugate(Q);
  231. XMVECTOR Control = XMVectorLessOrEqual(L, g_XMEpsilon.v);
  232. XMVECTOR Result = XMVectorDivide(Conjugate, L);
  233. Result = XMVectorSelect(Result, Zero, Control);
  234. return Result;
  235. }
  236. //------------------------------------------------------------------------------
  237. inline XMVECTOR XM_CALLCONV XMQuaternionLn
  238. (
  239. FXMVECTOR Q
  240. )
  241. {
  242. static const XMVECTORF32 OneMinusEpsilon = { { { 1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f } } };
  243. XMVECTOR QW = XMVectorSplatW(Q);
  244. XMVECTOR Q0 = XMVectorSelect(g_XMSelect1110.v, Q, g_XMSelect1110.v);
  245. XMVECTOR ControlW = XMVectorInBounds(QW, OneMinusEpsilon.v);
  246. XMVECTOR Theta = XMVectorACos(QW);
  247. XMVECTOR SinTheta = XMVectorSin(Theta);
  248. XMVECTOR S = XMVectorDivide(Theta,SinTheta);
  249. XMVECTOR Result = XMVectorMultiply(Q0, S);
  250. Result = XMVectorSelect(Q0, Result, ControlW);
  251. return Result;
  252. }
  253. //------------------------------------------------------------------------------
  254. inline XMVECTOR XM_CALLCONV XMQuaternionExp
  255. (
  256. FXMVECTOR Q
  257. )
  258. {
  259. XMVECTOR Theta = XMVector3Length(Q);
  260. XMVECTOR SinTheta, CosTheta;
  261. XMVectorSinCos(&SinTheta, &CosTheta, Theta);
  262. XMVECTOR S = XMVectorDivide(SinTheta, Theta);
  263. XMVECTOR Result = XMVectorMultiply(Q, S);
  264. const XMVECTOR Zero = XMVectorZero();
  265. XMVECTOR Control = XMVectorNearEqual(Theta, Zero, g_XMEpsilon.v);
  266. Result = XMVectorSelect(Result, Q, Control);
  267. Result = XMVectorSelect(CosTheta, Result, g_XMSelect1110.v);
  268. return Result;
  269. }
  270. //------------------------------------------------------------------------------
  271. inline XMVECTOR XM_CALLCONV XMQuaternionSlerp
  272. (
  273. FXMVECTOR Q0,
  274. FXMVECTOR Q1,
  275. float t
  276. )
  277. {
  278. XMVECTOR T = XMVectorReplicate(t);
  279. return XMQuaternionSlerpV(Q0, Q1, T);
  280. }
  281. //------------------------------------------------------------------------------
  282. inline XMVECTOR XM_CALLCONV XMQuaternionSlerpV
  283. (
  284. FXMVECTOR Q0,
  285. FXMVECTOR Q1,
  286. FXMVECTOR T
  287. )
  288. {
  289. assert((XMVectorGetY(T) == XMVectorGetX(T)) && (XMVectorGetZ(T) == XMVectorGetX(T)) && (XMVectorGetW(T) == XMVectorGetX(T)));
  290. // Result = Q0 * sin((1.0 - t) * Omega) / sin(Omega) + Q1 * sin(t * Omega) / sin(Omega)
  291. #if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
  292. const XMVECTORF32 OneMinusEpsilon = { { { 1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f } } };
  293. XMVECTOR CosOmega = XMQuaternionDot(Q0, Q1);
  294. const XMVECTOR Zero = XMVectorZero();
  295. XMVECTOR Control = XMVectorLess(CosOmega, Zero);
  296. XMVECTOR Sign = XMVectorSelect(g_XMOne.v, g_XMNegativeOne.v, Control);
  297. CosOmega = XMVectorMultiply(CosOmega, Sign);
  298. Control = XMVectorLess(CosOmega, OneMinusEpsilon);
  299. XMVECTOR SinOmega = XMVectorNegativeMultiplySubtract(CosOmega, CosOmega, g_XMOne.v);
  300. SinOmega = XMVectorSqrt(SinOmega);
  301. XMVECTOR Omega = XMVectorATan2(SinOmega, CosOmega);
  302. XMVECTOR SignMask = XMVectorSplatSignMask();
  303. XMVECTOR V01 = XMVectorShiftLeft(T, Zero, 2);
  304. SignMask = XMVectorShiftLeft(SignMask, Zero, 3);
  305. V01 = XMVectorXorInt(V01, SignMask);
  306. V01 = XMVectorAdd(g_XMIdentityR0.v, V01);
  307. XMVECTOR InvSinOmega = XMVectorReciprocal(SinOmega);
  308. XMVECTOR S0 = XMVectorMultiply(V01, Omega);
  309. S0 = XMVectorSin(S0);
  310. S0 = XMVectorMultiply(S0, InvSinOmega);
  311. S0 = XMVectorSelect(V01, S0, Control);
  312. XMVECTOR S1 = XMVectorSplatY(S0);
  313. S0 = XMVectorSplatX(S0);
  314. S1 = XMVectorMultiply(S1, Sign);
  315. XMVECTOR Result = XMVectorMultiply(Q0, S0);
  316. Result = XMVectorMultiplyAdd(Q1, S1, Result);
  317. return Result;
  318. #elif defined(_XM_SSE_INTRINSICS_)
  319. static const XMVECTORF32 OneMinusEpsilon = { { { 1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f } } };
  320. static const XMVECTORU32 SignMask2 = { { { 0x80000000, 0x00000000, 0x00000000, 0x00000000 } } };
  321. XMVECTOR CosOmega = XMQuaternionDot(Q0, Q1);
  322. const XMVECTOR Zero = XMVectorZero();
  323. XMVECTOR Control = XMVectorLess(CosOmega, Zero);
  324. XMVECTOR Sign = XMVectorSelect(g_XMOne, g_XMNegativeOne, Control);
  325. CosOmega = _mm_mul_ps(CosOmega, Sign);
  326. Control = XMVectorLess(CosOmega, OneMinusEpsilon);
  327. XMVECTOR SinOmega = _mm_mul_ps(CosOmega,CosOmega);
  328. SinOmega = _mm_sub_ps(g_XMOne,SinOmega);
  329. SinOmega = _mm_sqrt_ps(SinOmega);
  330. XMVECTOR Omega = XMVectorATan2(SinOmega, CosOmega);
  331. XMVECTOR V01 = XM_PERMUTE_PS(T,_MM_SHUFFLE(2,3,0,1));
  332. V01 = _mm_and_ps(V01,g_XMMaskXY);
  333. V01 = _mm_xor_ps(V01,SignMask2);
  334. V01 = _mm_add_ps(g_XMIdentityR0, V01);
  335. XMVECTOR S0 = _mm_mul_ps(V01, Omega);
  336. S0 = XMVectorSin(S0);
  337. S0 = _mm_div_ps(S0, SinOmega);
  338. S0 = XMVectorSelect(V01, S0, Control);
  339. XMVECTOR S1 = XMVectorSplatY(S0);
  340. S0 = XMVectorSplatX(S0);
  341. S1 = _mm_mul_ps(S1, Sign);
  342. XMVECTOR Result = _mm_mul_ps(Q0, S0);
  343. S1 = _mm_mul_ps(S1, Q1);
  344. Result = _mm_add_ps(Result,S1);
  345. return Result;
  346. #endif
  347. }
  348. //------------------------------------------------------------------------------
  349. inline XMVECTOR XM_CALLCONV XMQuaternionSquad
  350. (
  351. FXMVECTOR Q0,
  352. FXMVECTOR Q1,
  353. FXMVECTOR Q2,
  354. GXMVECTOR Q3,
  355. float t
  356. )
  357. {
  358. XMVECTOR T = XMVectorReplicate(t);
  359. return XMQuaternionSquadV(Q0, Q1, Q2, Q3, T);
  360. }
  361. //------------------------------------------------------------------------------
  362. inline XMVECTOR XM_CALLCONV XMQuaternionSquadV
  363. (
  364. FXMVECTOR Q0,
  365. FXMVECTOR Q1,
  366. FXMVECTOR Q2,
  367. GXMVECTOR Q3,
  368. HXMVECTOR T
  369. )
  370. {
  371. assert( (XMVectorGetY(T) == XMVectorGetX(T)) && (XMVectorGetZ(T) == XMVectorGetX(T)) && (XMVectorGetW(T) == XMVectorGetX(T)) );
  372. XMVECTOR TP = T;
  373. const XMVECTOR Two = XMVectorSplatConstant(2, 0);
  374. XMVECTOR Q03 = XMQuaternionSlerpV(Q0, Q3, T);
  375. XMVECTOR Q12 = XMQuaternionSlerpV(Q1, Q2, T);
  376. TP = XMVectorNegativeMultiplySubtract(TP, TP, TP);
  377. TP = XMVectorMultiply(TP, Two);
  378. XMVECTOR Result = XMQuaternionSlerpV(Q03, Q12, TP);
  379. return Result;
  380. }
  381. //------------------------------------------------------------------------------
  382. _Use_decl_annotations_
  383. inline void XM_CALLCONV XMQuaternionSquadSetup
  384. (
  385. XMVECTOR* pA,
  386. XMVECTOR* pB,
  387. XMVECTOR* pC,
  388. FXMVECTOR Q0,
  389. FXMVECTOR Q1,
  390. FXMVECTOR Q2,
  391. GXMVECTOR Q3
  392. )
  393. {
  394. assert(pA);
  395. assert(pB);
  396. assert(pC);
  397. XMVECTOR LS12 = XMQuaternionLengthSq(XMVectorAdd(Q1, Q2));
  398. XMVECTOR LD12 = XMQuaternionLengthSq(XMVectorSubtract(Q1, Q2));
  399. XMVECTOR SQ2 = XMVectorNegate(Q2);
  400. XMVECTOR Control1 = XMVectorLess(LS12, LD12);
  401. SQ2 = XMVectorSelect(Q2, SQ2, Control1);
  402. XMVECTOR LS01 = XMQuaternionLengthSq(XMVectorAdd(Q0, Q1));
  403. XMVECTOR LD01 = XMQuaternionLengthSq(XMVectorSubtract(Q0, Q1));
  404. XMVECTOR SQ0 = XMVectorNegate(Q0);
  405. XMVECTOR LS23 = XMQuaternionLengthSq(XMVectorAdd(SQ2, Q3));
  406. XMVECTOR LD23 = XMQuaternionLengthSq(XMVectorSubtract(SQ2, Q3));
  407. XMVECTOR SQ3 = XMVectorNegate(Q3);
  408. XMVECTOR Control0 = XMVectorLess(LS01, LD01);
  409. XMVECTOR Control2 = XMVectorLess(LS23, LD23);
  410. SQ0 = XMVectorSelect(Q0, SQ0, Control0);
  411. SQ3 = XMVectorSelect(Q3, SQ3, Control2);
  412. XMVECTOR InvQ1 = XMQuaternionInverse(Q1);
  413. XMVECTOR InvQ2 = XMQuaternionInverse(SQ2);
  414. XMVECTOR LnQ0 = XMQuaternionLn(XMQuaternionMultiply(InvQ1, SQ0));
  415. XMVECTOR LnQ2 = XMQuaternionLn(XMQuaternionMultiply(InvQ1, SQ2));
  416. XMVECTOR LnQ1 = XMQuaternionLn(XMQuaternionMultiply(InvQ2, Q1));
  417. XMVECTOR LnQ3 = XMQuaternionLn(XMQuaternionMultiply(InvQ2, SQ3));
  418. const XMVECTOR NegativeOneQuarter = XMVectorSplatConstant(-1, 2);
  419. XMVECTOR ExpQ02 = XMVectorMultiply(XMVectorAdd(LnQ0, LnQ2), NegativeOneQuarter);
  420. XMVECTOR ExpQ13 = XMVectorMultiply(XMVectorAdd(LnQ1, LnQ3), NegativeOneQuarter);
  421. ExpQ02 = XMQuaternionExp(ExpQ02);
  422. ExpQ13 = XMQuaternionExp(ExpQ13);
  423. *pA = XMQuaternionMultiply(Q1, ExpQ02);
  424. *pB = XMQuaternionMultiply(SQ2, ExpQ13);
  425. *pC = SQ2;
  426. }
  427. //------------------------------------------------------------------------------
  428. inline XMVECTOR XM_CALLCONV XMQuaternionBaryCentric
  429. (
  430. FXMVECTOR Q0,
  431. FXMVECTOR Q1,
  432. FXMVECTOR Q2,
  433. float f,
  434. float g
  435. )
  436. {
  437. float s = f + g;
  438. XMVECTOR Result;
  439. if ((s < 0.00001f) && (s > -0.00001f))
  440. {
  441. Result = Q0;
  442. }
  443. else
  444. {
  445. XMVECTOR Q01 = XMQuaternionSlerp(Q0, Q1, s);
  446. XMVECTOR Q02 = XMQuaternionSlerp(Q0, Q2, s);
  447. Result = XMQuaternionSlerp(Q01, Q02, g / s);
  448. }
  449. return Result;
  450. }
  451. //------------------------------------------------------------------------------
  452. inline XMVECTOR XM_CALLCONV XMQuaternionBaryCentricV
  453. (
  454. FXMVECTOR Q0,
  455. FXMVECTOR Q1,
  456. FXMVECTOR Q2,
  457. GXMVECTOR F,
  458. HXMVECTOR G
  459. )
  460. {
  461. assert( (XMVectorGetY(F) == XMVectorGetX(F)) && (XMVectorGetZ(F) == XMVectorGetX(F)) && (XMVectorGetW(F) == XMVectorGetX(F)) );
  462. assert( (XMVectorGetY(G) == XMVectorGetX(G)) && (XMVectorGetZ(G) == XMVectorGetX(G)) && (XMVectorGetW(G) == XMVectorGetX(G)) );
  463. const XMVECTOR Epsilon = XMVectorSplatConstant(1, 16);
  464. XMVECTOR S = XMVectorAdd(F, G);
  465. XMVECTOR Result;
  466. if (XMVector4InBounds(S, Epsilon))
  467. {
  468. Result = Q0;
  469. }
  470. else
  471. {
  472. XMVECTOR Q01 = XMQuaternionSlerpV(Q0, Q1, S);
  473. XMVECTOR Q02 = XMQuaternionSlerpV(Q0, Q2, S);
  474. XMVECTOR GS = XMVectorReciprocal(S);
  475. GS = XMVectorMultiply(G, GS);
  476. Result = XMQuaternionSlerpV(Q01, Q02, GS);
  477. }
  478. return Result;
  479. }
  480. //------------------------------------------------------------------------------
  481. // Transformation operations
  482. //------------------------------------------------------------------------------
  483. //------------------------------------------------------------------------------
  484. inline XMVECTOR XM_CALLCONV XMQuaternionIdentity()
  485. {
  486. return g_XMIdentityR3.v;
  487. }
  488. //------------------------------------------------------------------------------
  489. inline XMVECTOR XM_CALLCONV XMQuaternionRotationRollPitchYaw
  490. (
  491. float Pitch,
  492. float Yaw,
  493. float Roll
  494. )
  495. {
  496. XMVECTOR Angles = XMVectorSet(Pitch, Yaw, Roll, 0.0f);
  497. XMVECTOR Q = XMQuaternionRotationRollPitchYawFromVector(Angles);
  498. return Q;
  499. }
  500. //------------------------------------------------------------------------------
  501. inline XMVECTOR XM_CALLCONV XMQuaternionRotationRollPitchYawFromVector
  502. (
  503. FXMVECTOR Angles // <Pitch, Yaw, Roll, 0>
  504. )
  505. {
  506. static const XMVECTORF32 Sign = { { { 1.0f, -1.0f, -1.0f, 1.0f } } };
  507. XMVECTOR HalfAngles = XMVectorMultiply(Angles, g_XMOneHalf.v);
  508. XMVECTOR SinAngles, CosAngles;
  509. XMVectorSinCos(&SinAngles, &CosAngles, HalfAngles);
  510. XMVECTOR P0 = XMVectorPermute<XM_PERMUTE_0X, XM_PERMUTE_1X, XM_PERMUTE_1X, XM_PERMUTE_1X>(SinAngles, CosAngles);
  511. XMVECTOR Y0 = XMVectorPermute<XM_PERMUTE_1Y, XM_PERMUTE_0Y, XM_PERMUTE_1Y, XM_PERMUTE_1Y>(SinAngles, CosAngles);
  512. XMVECTOR R0 = XMVectorPermute<XM_PERMUTE_1Z, XM_PERMUTE_1Z, XM_PERMUTE_0Z, XM_PERMUTE_1Z>(SinAngles, CosAngles);
  513. XMVECTOR P1 = XMVectorPermute<XM_PERMUTE_0X, XM_PERMUTE_1X, XM_PERMUTE_1X, XM_PERMUTE_1X>(CosAngles, SinAngles);
  514. XMVECTOR Y1 = XMVectorPermute<XM_PERMUTE_1Y, XM_PERMUTE_0Y, XM_PERMUTE_1Y, XM_PERMUTE_1Y>(CosAngles, SinAngles);
  515. XMVECTOR R1 = XMVectorPermute<XM_PERMUTE_1Z, XM_PERMUTE_1Z, XM_PERMUTE_0Z, XM_PERMUTE_1Z>(CosAngles, SinAngles);
  516. XMVECTOR Q1 = XMVectorMultiply(P1, Sign.v);
  517. XMVECTOR Q0 = XMVectorMultiply(P0, Y0);
  518. Q1 = XMVectorMultiply(Q1, Y1);
  519. Q0 = XMVectorMultiply(Q0, R0);
  520. XMVECTOR Q = XMVectorMultiplyAdd(Q1, R1, Q0);
  521. return Q;
  522. }
  523. //------------------------------------------------------------------------------
  524. inline XMVECTOR XM_CALLCONV XMQuaternionRotationNormal
  525. (
  526. FXMVECTOR NormalAxis,
  527. float Angle
  528. )
  529. {
  530. #if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
  531. XMVECTOR N = XMVectorSelect(g_XMOne.v, NormalAxis, g_XMSelect1110.v);
  532. float SinV, CosV;
  533. XMScalarSinCos(&SinV, &CosV, 0.5f * Angle);
  534. XMVECTOR Scale = XMVectorSet( SinV, SinV, SinV, CosV );
  535. return XMVectorMultiply(N, Scale);
  536. #elif defined(_XM_SSE_INTRINSICS_)
  537. XMVECTOR N = _mm_and_ps(NormalAxis,g_XMMask3);
  538. N = _mm_or_ps(N,g_XMIdentityR3);
  539. XMVECTOR Scale = _mm_set_ps1(0.5f * Angle);
  540. XMVECTOR vSine;
  541. XMVECTOR vCosine;
  542. XMVectorSinCos(&vSine,&vCosine,Scale);
  543. Scale = _mm_and_ps(vSine,g_XMMask3);
  544. vCosine = _mm_and_ps(vCosine,g_XMMaskW);
  545. Scale = _mm_or_ps(Scale,vCosine);
  546. N = _mm_mul_ps(N,Scale);
  547. return N;
  548. #endif
  549. }
  550. //------------------------------------------------------------------------------
  551. inline XMVECTOR XM_CALLCONV XMQuaternionRotationAxis
  552. (
  553. FXMVECTOR Axis,
  554. float Angle
  555. )
  556. {
  557. assert(!XMVector3Equal(Axis, XMVectorZero()));
  558. assert(!XMVector3IsInfinite(Axis));
  559. XMVECTOR Normal = XMVector3Normalize(Axis);
  560. XMVECTOR Q = XMQuaternionRotationNormal(Normal, Angle);
  561. return Q;
  562. }
  563. //------------------------------------------------------------------------------
  564. inline XMVECTOR XM_CALLCONV XMQuaternionRotationMatrix
  565. (
  566. FXMMATRIX M
  567. )
  568. {
  569. #if defined(_XM_NO_INTRINSICS_)
  570. XMVECTORF32 q;
  571. float r22 = M.m[2][2];
  572. if (r22 <= 0.f) // x^2 + y^2 >= z^2 + w^2
  573. {
  574. float dif10 = M.m[1][1] - M.m[0][0];
  575. float omr22 = 1.f - r22;
  576. if (dif10 <= 0.f) // x^2 >= y^2
  577. {
  578. float fourXSqr = omr22 - dif10;
  579. float inv4x = 0.5f / sqrtf(fourXSqr);
  580. q.f[0] = fourXSqr*inv4x;
  581. q.f[1] = (M.m[0][1] + M.m[1][0])*inv4x;
  582. q.f[2] = (M.m[0][2] + M.m[2][0])*inv4x;
  583. q.f[3] = (M.m[1][2] - M.m[2][1])*inv4x;
  584. }
  585. else // y^2 >= x^2
  586. {
  587. float fourYSqr = omr22 + dif10;
  588. float inv4y = 0.5f / sqrtf(fourYSqr);
  589. q.f[0] = (M.m[0][1] + M.m[1][0])*inv4y;
  590. q.f[1] = fourYSqr*inv4y;
  591. q.f[2] = (M.m[1][2] + M.m[2][1])*inv4y;
  592. q.f[3] = (M.m[2][0] - M.m[0][2])*inv4y;
  593. }
  594. }
  595. else // z^2 + w^2 >= x^2 + y^2
  596. {
  597. float sum10 = M.m[1][1] + M.m[0][0];
  598. float opr22 = 1.f + r22;
  599. if (sum10 <= 0.f) // z^2 >= w^2
  600. {
  601. float fourZSqr = opr22 - sum10;
  602. float inv4z = 0.5f / sqrtf(fourZSqr);
  603. q.f[0] = (M.m[0][2] + M.m[2][0])*inv4z;
  604. q.f[1] = (M.m[1][2] + M.m[2][1])*inv4z;
  605. q.f[2] = fourZSqr*inv4z;
  606. q.f[3] = (M.m[0][1] - M.m[1][0])*inv4z;
  607. }
  608. else // w^2 >= z^2
  609. {
  610. float fourWSqr = opr22 + sum10;
  611. float inv4w = 0.5f / sqrtf(fourWSqr);
  612. q.f[0] = (M.m[1][2] - M.m[2][1])*inv4w;
  613. q.f[1] = (M.m[2][0] - M.m[0][2])*inv4w;
  614. q.f[2] = (M.m[0][1] - M.m[1][0])*inv4w;
  615. q.f[3] = fourWSqr*inv4w;
  616. }
  617. }
  618. return q.v;
  619. #elif defined(_XM_ARM_NEON_INTRINSICS_)
  620. static const XMVECTORF32 XMPMMP = { { { +1.0f, -1.0f, -1.0f, +1.0f } } };
  621. static const XMVECTORF32 XMMPMP = { { { -1.0f, +1.0f, -1.0f, +1.0f } } };
  622. static const XMVECTORF32 XMMMPP = { { { -1.0f, -1.0f, +1.0f, +1.0f } } };
  623. static const XMVECTORU32 Select0110 = { { { XM_SELECT_0, XM_SELECT_1, XM_SELECT_1, XM_SELECT_0 } } };
  624. static const XMVECTORU32 Select0010 = { { { XM_SELECT_0, XM_SELECT_0, XM_SELECT_1, XM_SELECT_0 } } };
  625. XMVECTOR r0 = M.r[0];
  626. XMVECTOR r1 = M.r[1];
  627. XMVECTOR r2 = M.r[2];
  628. XMVECTOR r00 = vdupq_lane_f32(vget_low_f32(r0), 0);
  629. XMVECTOR r11 = vdupq_lane_f32(vget_low_f32(r1), 1);
  630. XMVECTOR r22 = vdupq_lane_f32(vget_high_f32(r2), 0);
  631. // x^2 >= y^2 equivalent to r11 - r00 <= 0
  632. XMVECTOR r11mr00 = vsubq_f32(r11, r00);
  633. XMVECTOR x2gey2 = vcleq_f32(r11mr00, g_XMZero);
  634. // z^2 >= w^2 equivalent to r11 + r00 <= 0
  635. XMVECTOR r11pr00 = vaddq_f32(r11, r00);
  636. XMVECTOR z2gew2 = vcleq_f32(r11pr00, g_XMZero);
  637. // x^2 + y^2 >= z^2 + w^2 equivalent to r22 <= 0
  638. XMVECTOR x2py2gez2pw2 = vcleq_f32(r22, g_XMZero);
  639. // (4*x^2, 4*y^2, 4*z^2, 4*w^2)
  640. XMVECTOR t0 = vmulq_f32( XMPMMP, r00 );
  641. XMVECTOR x2y2z2w2 = vmlaq_f32( t0, XMMPMP, r11 );
  642. x2y2z2w2 = vmlaq_f32( x2y2z2w2, XMMMPP, r22 );
  643. x2y2z2w2 = vaddq_f32( x2y2z2w2, g_XMOne );
  644. // (r01, r02, r12, r11)
  645. t0 = vextq_f32(r0, r0, 1);
  646. XMVECTOR t1 = vextq_f32(r1, r1, 1);
  647. t0 = vcombine_f32( vget_low_f32(t0), vrev64_f32( vget_low_f32( t1 ) ) );
  648. // (r10, r20, r21, r10)
  649. t1 = vextq_f32(r2, r2, 3);
  650. XMVECTOR r10 = vdupq_lane_f32( vget_low_f32(r1), 0 );
  651. t1 = vbslq_f32( Select0110, t1, r10 );
  652. // (4*x*y, 4*x*z, 4*y*z, unused)
  653. XMVECTOR xyxzyz = vaddq_f32(t0, t1);
  654. // (r21, r20, r10, r10)
  655. t0 = vcombine_f32( vrev64_f32( vget_low_f32(r2) ), vget_low_f32(r10) );
  656. // (r12, r02, r01, r12)
  657. XMVECTOR t2 = vcombine_f32( vrev64_f32( vget_high_f32(r0) ), vrev64_f32( vget_low_f32(r0) ) );
  658. XMVECTOR t3 = vdupq_lane_f32( vget_high_f32(r1), 0 );
  659. t1 = vbslq_f32( Select0110, t2, t3 );
  660. // (4*x*w, 4*y*w, 4*z*w, unused)
  661. XMVECTOR xwywzw = vsubq_f32(t0, t1);
  662. xwywzw = vmulq_f32(XMMPMP, xwywzw);
  663. // (4*x*x, 4*x*y, 4*x*z, 4*x*w)
  664. t0 = vextq_f32( xyxzyz, xyxzyz, 3 );
  665. t1 = vbslq_f32( Select0110, t0, x2y2z2w2 );
  666. t2 = vdupq_lane_f32( vget_low_f32(xwywzw), 0 );
  667. XMVECTOR tensor0 = vbslq_f32( g_XMSelect1110, t1, t2 );
  668. // (4*y*x, 4*y*y, 4*y*z, 4*y*w)
  669. t0 = vbslq_f32( g_XMSelect1011, xyxzyz, x2y2z2w2 );
  670. t1 = vdupq_lane_f32( vget_low_f32(xwywzw), 1 );
  671. XMVECTOR tensor1 = vbslq_f32( g_XMSelect1110, t0, t1 );
  672. // (4*z*x, 4*z*y, 4*z*z, 4*z*w)
  673. t0 = vextq_f32(xyxzyz, xyxzyz, 1);
  674. t1 = vcombine_f32( vget_low_f32(t0), vrev64_f32( vget_high_f32(xwywzw) ) );
  675. XMVECTOR tensor2 = vbslq_f32( Select0010, x2y2z2w2, t1 );
  676. // (4*w*x, 4*w*y, 4*w*z, 4*w*w)
  677. XMVECTOR tensor3 = vbslq_f32( g_XMSelect1110, xwywzw, x2y2z2w2 );
  678. // Select the row of the tensor-product matrix that has the largest
  679. // magnitude.
  680. t0 = vbslq_f32( x2gey2, tensor0, tensor1 );
  681. t1 = vbslq_f32( z2gew2, tensor2, tensor3 );
  682. t2 = vbslq_f32( x2py2gez2pw2, t0, t1 );
  683. // Normalize the row. No division by zero is possible because the
  684. // quaternion is unit-length (and the row is a nonzero multiple of
  685. // the quaternion).
  686. t0 = XMVector4Length(t2);
  687. return XMVectorDivide(t2, t0);
  688. #elif defined(_XM_SSE_INTRINSICS_)
  689. static const XMVECTORF32 XMPMMP = { { { +1.0f, -1.0f, -1.0f, +1.0f } } };
  690. static const XMVECTORF32 XMMPMP = { { { -1.0f, +1.0f, -1.0f, +1.0f } } };
  691. static const XMVECTORF32 XMMMPP = { { { -1.0f, -1.0f, +1.0f, +1.0f } } };
  692. XMVECTOR r0 = M.r[0]; // (r00, r01, r02, 0)
  693. XMVECTOR r1 = M.r[1]; // (r10, r11, r12, 0)
  694. XMVECTOR r2 = M.r[2]; // (r20, r21, r22, 0)
  695. // (r00, r00, r00, r00)
  696. XMVECTOR r00 = XM_PERMUTE_PS(r0, _MM_SHUFFLE(0,0,0,0));
  697. // (r11, r11, r11, r11)
  698. XMVECTOR r11 = XM_PERMUTE_PS(r1, _MM_SHUFFLE(1,1,1,1));
  699. // (r22, r22, r22, r22)
  700. XMVECTOR r22 = XM_PERMUTE_PS(r2, _MM_SHUFFLE(2,2,2,2));
  701. // x^2 >= y^2 equivalent to r11 - r00 <= 0
  702. // (r11 - r00, r11 - r00, r11 - r00, r11 - r00)
  703. XMVECTOR r11mr00 = _mm_sub_ps(r11, r00);
  704. XMVECTOR x2gey2 = _mm_cmple_ps(r11mr00, g_XMZero);
  705. // z^2 >= w^2 equivalent to r11 + r00 <= 0
  706. // (r11 + r00, r11 + r00, r11 + r00, r11 + r00)
  707. XMVECTOR r11pr00 = _mm_add_ps(r11, r00);
  708. XMVECTOR z2gew2 = _mm_cmple_ps(r11pr00, g_XMZero);
  709. // x^2 + y^2 >= z^2 + w^2 equivalent to r22 <= 0
  710. XMVECTOR x2py2gez2pw2 = _mm_cmple_ps(r22, g_XMZero);
  711. // (+r00, -r00, -r00, +r00)
  712. XMVECTOR t0 = _mm_mul_ps(XMPMMP, r00);
  713. // (-r11, +r11, -r11, +r11)
  714. XMVECTOR t1 = _mm_mul_ps(XMMPMP, r11);
  715. // (-r22, -r22, +r22, +r22)
  716. XMVECTOR t2 = _mm_mul_ps(XMMMPP, r22);
  717. // (4*x^2, 4*y^2, 4*z^2, 4*w^2)
  718. XMVECTOR x2y2z2w2 = _mm_add_ps(t0, t1);
  719. x2y2z2w2 = _mm_add_ps(t2, x2y2z2w2);
  720. x2y2z2w2 = _mm_add_ps(x2y2z2w2, g_XMOne);
  721. // (r01, r02, r12, r11)
  722. t0 = _mm_shuffle_ps(r0, r1, _MM_SHUFFLE(1,2,2,1));
  723. // (r10, r10, r20, r21)
  724. t1 = _mm_shuffle_ps(r1, r2, _MM_SHUFFLE(1,0,0,0));
  725. // (r10, r20, r21, r10)
  726. t1 = XM_PERMUTE_PS(t1, _MM_SHUFFLE(1,3,2,0));
  727. // (4*x*y, 4*x*z, 4*y*z, unused)
  728. XMVECTOR xyxzyz = _mm_add_ps(t0, t1);
  729. // (r21, r20, r10, r10)
  730. t0 = _mm_shuffle_ps(r2, r1, _MM_SHUFFLE(0,0,0,1));
  731. // (r12, r12, r02, r01)
  732. t1 = _mm_shuffle_ps(r1, r0, _MM_SHUFFLE(1,2,2,2));
  733. // (r12, r02, r01, r12)
  734. t1 = XM_PERMUTE_PS(t1, _MM_SHUFFLE(1,3,2,0));
  735. // (4*x*w, 4*y*w, 4*z*w, unused)
  736. XMVECTOR xwywzw = _mm_sub_ps(t0, t1);
  737. xwywzw = _mm_mul_ps(XMMPMP, xwywzw);
  738. // (4*x^2, 4*y^2, 4*x*y, unused)
  739. t0 = _mm_shuffle_ps(x2y2z2w2, xyxzyz, _MM_SHUFFLE(0,0,1,0));
  740. // (4*z^2, 4*w^2, 4*z*w, unused)
  741. t1 = _mm_shuffle_ps(x2y2z2w2, xwywzw, _MM_SHUFFLE(0,2,3,2));
  742. // (4*x*z, 4*y*z, 4*x*w, 4*y*w)
  743. t2 = _mm_shuffle_ps(xyxzyz, xwywzw, _MM_SHUFFLE(1,0,2,1));
  744. // (4*x*x, 4*x*y, 4*x*z, 4*x*w)
  745. XMVECTOR tensor0 = _mm_shuffle_ps(t0, t2, _MM_SHUFFLE(2,0,2,0));
  746. // (4*y*x, 4*y*y, 4*y*z, 4*y*w)
  747. XMVECTOR tensor1 = _mm_shuffle_ps(t0, t2, _MM_SHUFFLE(3,1,1,2));
  748. // (4*z*x, 4*z*y, 4*z*z, 4*z*w)
  749. XMVECTOR tensor2 = _mm_shuffle_ps(t2, t1, _MM_SHUFFLE(2,0,1,0));
  750. // (4*w*x, 4*w*y, 4*w*z, 4*w*w)
  751. XMVECTOR tensor3 = _mm_shuffle_ps(t2, t1, _MM_SHUFFLE(1,2,3,2));
  752. // Select the row of the tensor-product matrix that has the largest
  753. // magnitude.
  754. t0 = _mm_and_ps(x2gey2, tensor0);
  755. t1 = _mm_andnot_ps(x2gey2, tensor1);
  756. t0 = _mm_or_ps(t0, t1);
  757. t1 = _mm_and_ps(z2gew2, tensor2);
  758. t2 = _mm_andnot_ps(z2gew2, tensor3);
  759. t1 = _mm_or_ps(t1, t2);
  760. t0 = _mm_and_ps(x2py2gez2pw2, t0);
  761. t1 = _mm_andnot_ps(x2py2gez2pw2, t1);
  762. t2 = _mm_or_ps(t0, t1);
  763. // Normalize the row. No division by zero is possible because the
  764. // quaternion is unit-length (and the row is a nonzero multiple of
  765. // the quaternion).
  766. t0 = XMVector4Length(t2);
  767. return _mm_div_ps(t2, t0);
  768. #endif
  769. }
  770. //------------------------------------------------------------------------------
  771. // Conversion operations
  772. //------------------------------------------------------------------------------
  773. //------------------------------------------------------------------------------
  774. _Use_decl_annotations_
  775. inline void XM_CALLCONV XMQuaternionToAxisAngle
  776. (
  777. XMVECTOR* pAxis,
  778. float* pAngle,
  779. FXMVECTOR Q
  780. )
  781. {
  782. assert(pAxis);
  783. assert(pAngle);
  784. *pAxis = Q;
  785. *pAngle = 2.0f * XMScalarACos(XMVectorGetW(Q));
  786. }
  787. /****************************************************************************
  788. *
  789. * Plane
  790. *
  791. ****************************************************************************/
  792. //------------------------------------------------------------------------------
  793. // Comparison operations
  794. //------------------------------------------------------------------------------
  795. //------------------------------------------------------------------------------
  796. inline bool XM_CALLCONV XMPlaneEqual
  797. (
  798. FXMVECTOR P1,
  799. FXMVECTOR P2
  800. )
  801. {
  802. return XMVector4Equal(P1, P2);
  803. }
  804. //------------------------------------------------------------------------------
  805. inline bool XM_CALLCONV XMPlaneNearEqual
  806. (
  807. FXMVECTOR P1,
  808. FXMVECTOR P2,
  809. FXMVECTOR Epsilon
  810. )
  811. {
  812. XMVECTOR NP1 = XMPlaneNormalize(P1);
  813. XMVECTOR NP2 = XMPlaneNormalize(P2);
  814. return XMVector4NearEqual(NP1, NP2, Epsilon);
  815. }
  816. //------------------------------------------------------------------------------
  817. inline bool XM_CALLCONV XMPlaneNotEqual
  818. (
  819. FXMVECTOR P1,
  820. FXMVECTOR P2
  821. )
  822. {
  823. return XMVector4NotEqual(P1, P2);
  824. }
  825. //------------------------------------------------------------------------------
  826. inline bool XM_CALLCONV XMPlaneIsNaN
  827. (
  828. FXMVECTOR P
  829. )
  830. {
  831. return XMVector4IsNaN(P);
  832. }
  833. //------------------------------------------------------------------------------
  834. inline bool XM_CALLCONV XMPlaneIsInfinite
  835. (
  836. FXMVECTOR P
  837. )
  838. {
  839. return XMVector4IsInfinite(P);
  840. }
  841. //------------------------------------------------------------------------------
  842. // Computation operations
  843. //------------------------------------------------------------------------------
  844. //------------------------------------------------------------------------------
  845. inline XMVECTOR XM_CALLCONV XMPlaneDot
  846. (
  847. FXMVECTOR P,
  848. FXMVECTOR V
  849. )
  850. {
  851. return XMVector4Dot(P, V);
  852. }
  853. //------------------------------------------------------------------------------
  854. inline XMVECTOR XM_CALLCONV XMPlaneDotCoord
  855. (
  856. FXMVECTOR P,
  857. FXMVECTOR V
  858. )
  859. {
  860. // Result = P[0] * V[0] + P[1] * V[1] + P[2] * V[2] + P[3]
  861. XMVECTOR V3 = XMVectorSelect(g_XMOne.v, V, g_XMSelect1110.v);
  862. XMVECTOR Result = XMVector4Dot(P, V3);
  863. return Result;
  864. }
  865. //------------------------------------------------------------------------------
  866. inline XMVECTOR XM_CALLCONV XMPlaneDotNormal
  867. (
  868. FXMVECTOR P,
  869. FXMVECTOR V
  870. )
  871. {
  872. return XMVector3Dot(P, V);
  873. }
  874. //------------------------------------------------------------------------------
  875. // XMPlaneNormalizeEst uses a reciprocal estimate and
  876. // returns QNaN on zero and infinite vectors.
  877. inline XMVECTOR XM_CALLCONV XMPlaneNormalizeEst
  878. (
  879. FXMVECTOR P
  880. )
  881. {
  882. #if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
  883. XMVECTOR Result = XMVector3ReciprocalLengthEst(P);
  884. return XMVectorMultiply(P, Result);
  885. #elif defined(_XM_SSE4_INTRINSICS_)
  886. XMVECTOR vTemp = _mm_dp_ps( P, P, 0x7f );
  887. XMVECTOR vResult = _mm_rsqrt_ps( vTemp );
  888. return _mm_mul_ps(vResult, P);
  889. #elif defined(_XM_SSE_INTRINSICS_)
  890. // Perform the dot product
  891. XMVECTOR vDot = _mm_mul_ps(P,P);
  892. // x=Dot.y, y=Dot.z
  893. XMVECTOR vTemp = XM_PERMUTE_PS(vDot,_MM_SHUFFLE(2,1,2,1));
  894. // Result.x = x+y
  895. vDot = _mm_add_ss(vDot,vTemp);
  896. // x=Dot.z
  897. vTemp = XM_PERMUTE_PS(vTemp,_MM_SHUFFLE(1,1,1,1));
  898. // Result.x = (x+y)+z
  899. vDot = _mm_add_ss(vDot,vTemp);
  900. // Splat x
  901. vDot = XM_PERMUTE_PS(vDot,_MM_SHUFFLE(0,0,0,0));
  902. // Get the reciprocal
  903. vDot = _mm_rsqrt_ps(vDot);
  904. // Get the reciprocal
  905. vDot = _mm_mul_ps(vDot,P);
  906. return vDot;
  907. #endif
  908. }
  909. //------------------------------------------------------------------------------
  910. inline XMVECTOR XM_CALLCONV XMPlaneNormalize
  911. (
  912. FXMVECTOR P
  913. )
  914. {
  915. #if defined(_XM_NO_INTRINSICS_)
  916. float fLengthSq = sqrtf((P.vector4_f32[0]*P.vector4_f32[0])+(P.vector4_f32[1]*P.vector4_f32[1])+(P.vector4_f32[2]*P.vector4_f32[2]));
  917. // Prevent divide by zero
  918. if (fLengthSq)
  919. {
  920. fLengthSq = 1.0f/fLengthSq;
  921. }
  922. XMVECTORF32 vResult = { { {
  923. P.vector4_f32[0] * fLengthSq,
  924. P.vector4_f32[1] * fLengthSq,
  925. P.vector4_f32[2] * fLengthSq,
  926. P.vector4_f32[3] * fLengthSq
  927. } } };
  928. return vResult.v;
  929. #elif defined(_XM_ARM_NEON_INTRINSICS_)
  930. XMVECTOR vLength = XMVector3ReciprocalLength(P);
  931. return XMVectorMultiply( P, vLength );
  932. #elif defined(_XM_SSE4_INTRINSICS_)
  933. XMVECTOR vLengthSq = _mm_dp_ps( P, P, 0x7f );
  934. // Prepare for the division
  935. XMVECTOR vResult = _mm_sqrt_ps(vLengthSq);
  936. // Failsafe on zero (Or epsilon) length planes
  937. // If the length is infinity, set the elements to zero
  938. vLengthSq = _mm_cmpneq_ps(vLengthSq,g_XMInfinity);
  939. // Reciprocal mul to perform the normalization
  940. vResult = _mm_div_ps(P,vResult);
  941. // Any that are infinity, set to zero
  942. vResult = _mm_and_ps(vResult,vLengthSq);
  943. return vResult;
  944. #elif defined(_XM_SSE_INTRINSICS_)
  945. // Perform the dot product on x,y and z only
  946. XMVECTOR vLengthSq = _mm_mul_ps(P,P);
  947. XMVECTOR vTemp = XM_PERMUTE_PS(vLengthSq,_MM_SHUFFLE(2,1,2,1));
  948. vLengthSq = _mm_add_ss(vLengthSq,vTemp);
  949. vTemp = XM_PERMUTE_PS(vTemp,_MM_SHUFFLE(1,1,1,1));
  950. vLengthSq = _mm_add_ss(vLengthSq,vTemp);
  951. vLengthSq = XM_PERMUTE_PS(vLengthSq,_MM_SHUFFLE(0,0,0,0));
  952. // Prepare for the division
  953. XMVECTOR vResult = _mm_sqrt_ps(vLengthSq);
  954. // Failsafe on zero (Or epsilon) length planes
  955. // If the length is infinity, set the elements to zero
  956. vLengthSq = _mm_cmpneq_ps(vLengthSq,g_XMInfinity);
  957. // Reciprocal mul to perform the normalization
  958. vResult = _mm_div_ps(P,vResult);
  959. // Any that are infinity, set to zero
  960. vResult = _mm_and_ps(vResult,vLengthSq);
  961. return vResult;
  962. #endif
  963. }
  964. //------------------------------------------------------------------------------
  965. inline XMVECTOR XM_CALLCONV XMPlaneIntersectLine
  966. (
  967. FXMVECTOR P,
  968. FXMVECTOR LinePoint1,
  969. FXMVECTOR LinePoint2
  970. )
  971. {
  972. XMVECTOR V1 = XMVector3Dot(P, LinePoint1);
  973. XMVECTOR V2 = XMVector3Dot(P, LinePoint2);
  974. XMVECTOR D = XMVectorSubtract(V1, V2);
  975. XMVECTOR VT = XMPlaneDotCoord(P, LinePoint1);
  976. VT = XMVectorDivide(VT, D);
  977. XMVECTOR Point = XMVectorSubtract(LinePoint2, LinePoint1);
  978. Point = XMVectorMultiplyAdd(Point, VT, LinePoint1);
  979. const XMVECTOR Zero = XMVectorZero();
  980. XMVECTOR Control = XMVectorNearEqual(D, Zero, g_XMEpsilon.v);
  981. return XMVectorSelect(Point, g_XMQNaN.v, Control);
  982. }
  983. //------------------------------------------------------------------------------
  984. _Use_decl_annotations_
  985. inline void XM_CALLCONV XMPlaneIntersectPlane
  986. (
  987. XMVECTOR* pLinePoint1,
  988. XMVECTOR* pLinePoint2,
  989. FXMVECTOR P1,
  990. FXMVECTOR P2
  991. )
  992. {
  993. assert(pLinePoint1);
  994. assert(pLinePoint2);
  995. XMVECTOR V1 = XMVector3Cross(P2, P1);
  996. XMVECTOR LengthSq = XMVector3LengthSq(V1);
  997. XMVECTOR V2 = XMVector3Cross(P2, V1);
  998. XMVECTOR P1W = XMVectorSplatW(P1);
  999. XMVECTOR Point = XMVectorMultiply(V2, P1W);
  1000. XMVECTOR V3 = XMVector3Cross(V1, P1);
  1001. XMVECTOR P2W = XMVectorSplatW(P2);
  1002. Point = XMVectorMultiplyAdd(V3, P2W, Point);
  1003. XMVECTOR LinePoint1 = XMVectorDivide(Point, LengthSq);
  1004. XMVECTOR LinePoint2 = XMVectorAdd(LinePoint1, V1);
  1005. XMVECTOR Control = XMVectorLessOrEqual(LengthSq, g_XMEpsilon.v);
  1006. *pLinePoint1 = XMVectorSelect(LinePoint1,g_XMQNaN.v, Control);
  1007. *pLinePoint2 = XMVectorSelect(LinePoint2,g_XMQNaN.v, Control);
  1008. }
  1009. //------------------------------------------------------------------------------
  1010. inline XMVECTOR XM_CALLCONV XMPlaneTransform
  1011. (
  1012. FXMVECTOR P,
  1013. FXMMATRIX M
  1014. )
  1015. {
  1016. XMVECTOR W = XMVectorSplatW(P);
  1017. XMVECTOR Z = XMVectorSplatZ(P);
  1018. XMVECTOR Y = XMVectorSplatY(P);
  1019. XMVECTOR X = XMVectorSplatX(P);
  1020. XMVECTOR Result = XMVectorMultiply(W, M.r[3]);
  1021. Result = XMVectorMultiplyAdd(Z, M.r[2], Result);
  1022. Result = XMVectorMultiplyAdd(Y, M.r[1], Result);
  1023. Result = XMVectorMultiplyAdd(X, M.r[0], Result);
  1024. return Result;
  1025. }
  1026. //------------------------------------------------------------------------------
  1027. _Use_decl_annotations_
  1028. inline XMFLOAT4* XM_CALLCONV XMPlaneTransformStream
  1029. (
  1030. XMFLOAT4* pOutputStream,
  1031. size_t OutputStride,
  1032. const XMFLOAT4* pInputStream,
  1033. size_t InputStride,
  1034. size_t PlaneCount,
  1035. FXMMATRIX M
  1036. )
  1037. {
  1038. return XMVector4TransformStream(pOutputStream,
  1039. OutputStride,
  1040. pInputStream,
  1041. InputStride,
  1042. PlaneCount,
  1043. M);
  1044. }
  1045. //------------------------------------------------------------------------------
  1046. // Conversion operations
  1047. //------------------------------------------------------------------------------
  1048. //------------------------------------------------------------------------------
  1049. inline XMVECTOR XM_CALLCONV XMPlaneFromPointNormal
  1050. (
  1051. FXMVECTOR Point,
  1052. FXMVECTOR Normal
  1053. )
  1054. {
  1055. XMVECTOR W = XMVector3Dot(Point, Normal);
  1056. W = XMVectorNegate(W);
  1057. return XMVectorSelect(W, Normal, g_XMSelect1110.v);
  1058. }
  1059. //------------------------------------------------------------------------------
  1060. inline XMVECTOR XM_CALLCONV XMPlaneFromPoints
  1061. (
  1062. FXMVECTOR Point1,
  1063. FXMVECTOR Point2,
  1064. FXMVECTOR Point3
  1065. )
  1066. {
  1067. XMVECTOR V21 = XMVectorSubtract(Point1, Point2);
  1068. XMVECTOR V31 = XMVectorSubtract(Point1, Point3);
  1069. XMVECTOR N = XMVector3Cross(V21, V31);
  1070. N = XMVector3Normalize(N);
  1071. XMVECTOR D = XMPlaneDotNormal(N, Point1);
  1072. D = XMVectorNegate(D);
  1073. XMVECTOR Result = XMVectorSelect(D, N, g_XMSelect1110.v);
  1074. return Result;
  1075. }
  1076. /****************************************************************************
  1077. *
  1078. * Color
  1079. *
  1080. ****************************************************************************/
  1081. //------------------------------------------------------------------------------
  1082. // Comparison operations
  1083. //------------------------------------------------------------------------------
  1084. //------------------------------------------------------------------------------
  1085. inline bool XM_CALLCONV XMColorEqual
  1086. (
  1087. FXMVECTOR C1,
  1088. FXMVECTOR C2
  1089. )
  1090. {
  1091. return XMVector4Equal(C1, C2);
  1092. }
  1093. //------------------------------------------------------------------------------
  1094. inline bool XM_CALLCONV XMColorNotEqual
  1095. (
  1096. FXMVECTOR C1,
  1097. FXMVECTOR C2
  1098. )
  1099. {
  1100. return XMVector4NotEqual(C1, C2);
  1101. }
  1102. //------------------------------------------------------------------------------
  1103. inline bool XM_CALLCONV XMColorGreater
  1104. (
  1105. FXMVECTOR C1,
  1106. FXMVECTOR C2
  1107. )
  1108. {
  1109. return XMVector4Greater(C1, C2);
  1110. }
  1111. //------------------------------------------------------------------------------
  1112. inline bool XM_CALLCONV XMColorGreaterOrEqual
  1113. (
  1114. FXMVECTOR C1,
  1115. FXMVECTOR C2
  1116. )
  1117. {
  1118. return XMVector4GreaterOrEqual(C1, C2);
  1119. }
  1120. //------------------------------------------------------------------------------
  1121. inline bool XM_CALLCONV XMColorLess
  1122. (
  1123. FXMVECTOR C1,
  1124. FXMVECTOR C2
  1125. )
  1126. {
  1127. return XMVector4Less(C1, C2);
  1128. }
  1129. //------------------------------------------------------------------------------
  1130. inline bool XM_CALLCONV XMColorLessOrEqual
  1131. (
  1132. FXMVECTOR C1,
  1133. FXMVECTOR C2
  1134. )
  1135. {
  1136. return XMVector4LessOrEqual(C1, C2);
  1137. }
  1138. //------------------------------------------------------------------------------
  1139. inline bool XM_CALLCONV XMColorIsNaN
  1140. (
  1141. FXMVECTOR C
  1142. )
  1143. {
  1144. return XMVector4IsNaN(C);
  1145. }
  1146. //------------------------------------------------------------------------------
  1147. inline bool XM_CALLCONV XMColorIsInfinite
  1148. (
  1149. FXMVECTOR C
  1150. )
  1151. {
  1152. return XMVector4IsInfinite(C);
  1153. }
  1154. //------------------------------------------------------------------------------
  1155. // Computation operations
  1156. //------------------------------------------------------------------------------
  1157. //------------------------------------------------------------------------------
  1158. inline XMVECTOR XM_CALLCONV XMColorNegative
  1159. (
  1160. FXMVECTOR vColor
  1161. )
  1162. {
  1163. #if defined(_XM_NO_INTRINSICS_)
  1164. XMVECTORF32 vResult = { { {
  1165. 1.0f - vColor.vector4_f32[0],
  1166. 1.0f - vColor.vector4_f32[1],
  1167. 1.0f - vColor.vector4_f32[2],
  1168. vColor.vector4_f32[3]
  1169. } } };
  1170. return vResult.v;
  1171. #elif defined(_XM_ARM_NEON_INTRINSICS_)
  1172. XMVECTOR vTemp = veorq_u32(vColor,g_XMNegate3);
  1173. return vaddq_f32(vTemp,g_XMOne3);
  1174. #elif defined(_XM_SSE_INTRINSICS_)
  1175. // Negate only x,y and z.
  1176. XMVECTOR vTemp = _mm_xor_ps(vColor,g_XMNegate3);
  1177. // Add 1,1,1,0 to -x,-y,-z,w
  1178. return _mm_add_ps(vTemp,g_XMOne3);
  1179. #endif
  1180. }
  1181. //------------------------------------------------------------------------------
  1182. inline XMVECTOR XM_CALLCONV XMColorModulate
  1183. (
  1184. FXMVECTOR C1,
  1185. FXMVECTOR C2
  1186. )
  1187. {
  1188. return XMVectorMultiply(C1, C2);
  1189. }
  1190. //------------------------------------------------------------------------------
  1191. inline XMVECTOR XM_CALLCONV XMColorAdjustSaturation
  1192. (
  1193. FXMVECTOR vColor,
  1194. float fSaturation
  1195. )
  1196. {
  1197. // Luminance = 0.2125f * C[0] + 0.7154f * C[1] + 0.0721f * C[2];
  1198. // Result = (C - Luminance) * Saturation + Luminance;
  1199. const XMVECTORF32 gvLuminance = { { { 0.2125f, 0.7154f, 0.0721f, 0.0f } } };
  1200. #if defined(_XM_NO_INTRINSICS_)
  1201. float fLuminance = (vColor.vector4_f32[0]*gvLuminance.f[0])+(vColor.vector4_f32[1]*gvLuminance.f[1])+(vColor.vector4_f32[2]*gvLuminance.f[2]);
  1202. XMVECTOR vResult;
  1203. vResult.vector4_f32[0] = ((vColor.vector4_f32[0] - fLuminance)*fSaturation)+fLuminance;
  1204. vResult.vector4_f32[1] = ((vColor.vector4_f32[1] - fLuminance)*fSaturation)+fLuminance;
  1205. vResult.vector4_f32[2] = ((vColor.vector4_f32[2] - fLuminance)*fSaturation)+fLuminance;
  1206. vResult.vector4_f32[3] = vColor.vector4_f32[3];
  1207. return vResult;
  1208. #elif defined(_XM_ARM_NEON_INTRINSICS_)
  1209. XMVECTOR vLuminance = XMVector3Dot( vColor, gvLuminance );
  1210. XMVECTOR vResult = vsubq_f32(vColor, vLuminance);
  1211. vResult = vmlaq_n_f32( vLuminance, vResult, fSaturation );
  1212. return vbslq_f32( g_XMSelect1110, vResult, vColor );
  1213. #elif defined(_XM_SSE_INTRINSICS_)
  1214. XMVECTOR vLuminance = XMVector3Dot( vColor, gvLuminance );
  1215. // Splat fSaturation
  1216. XMVECTOR vSaturation = _mm_set_ps1(fSaturation);
  1217. // vResult = ((vColor-vLuminance)*vSaturation)+vLuminance;
  1218. XMVECTOR vResult = _mm_sub_ps(vColor,vLuminance);
  1219. vResult = _mm_mul_ps(vResult,vSaturation);
  1220. vResult = _mm_add_ps(vResult,vLuminance);
  1221. // Retain w from the source color
  1222. vLuminance = _mm_shuffle_ps(vResult,vColor,_MM_SHUFFLE(3,2,2,2)); // x = vResult.z,y = vResult.z,z = vColor.z,w=vColor.w
  1223. vResult = _mm_shuffle_ps(vResult,vLuminance,_MM_SHUFFLE(3,0,1,0)); // x = vResult.x,y = vResult.y,z = vResult.z,w=vColor.w
  1224. return vResult;
  1225. #endif
  1226. }
  1227. //------------------------------------------------------------------------------
  1228. inline XMVECTOR XM_CALLCONV XMColorAdjustContrast
  1229. (
  1230. FXMVECTOR vColor,
  1231. float fContrast
  1232. )
  1233. {
  1234. // Result = (vColor - 0.5f) * fContrast + 0.5f;
  1235. #if defined(_XM_NO_INTRINSICS_)
  1236. XMVECTORF32 vResult = { { {
  1237. ((vColor.vector4_f32[0] - 0.5f) * fContrast) + 0.5f,
  1238. ((vColor.vector4_f32[1] - 0.5f) * fContrast) + 0.5f,
  1239. ((vColor.vector4_f32[2] - 0.5f) * fContrast) + 0.5f,
  1240. vColor.vector4_f32[3] // Leave W untouched
  1241. } } };
  1242. return vResult.v;
  1243. #elif defined(_XM_ARM_NEON_INTRINSICS_)
  1244. XMVECTOR vResult = vsubq_f32(vColor, g_XMOneHalf.v);
  1245. vResult = vmlaq_n_f32( g_XMOneHalf.v, vResult, fContrast );
  1246. return vbslq_f32( g_XMSelect1110, vResult, vColor );
  1247. #elif defined(_XM_SSE_INTRINSICS_)
  1248. XMVECTOR vScale = _mm_set_ps1(fContrast); // Splat the scale
  1249. XMVECTOR vResult = _mm_sub_ps(vColor,g_XMOneHalf); // Subtract 0.5f from the source (Saving source)
  1250. vResult = _mm_mul_ps(vResult,vScale); // Mul by scale
  1251. vResult = _mm_add_ps(vResult,g_XMOneHalf); // Add 0.5f
  1252. // Retain w from the source color
  1253. vScale = _mm_shuffle_ps(vResult,vColor,_MM_SHUFFLE(3,2,2,2)); // x = vResult.z,y = vResult.z,z = vColor.z,w=vColor.w
  1254. vResult = _mm_shuffle_ps(vResult,vScale,_MM_SHUFFLE(3,0,1,0)); // x = vResult.x,y = vResult.y,z = vResult.z,w=vColor.w
  1255. return vResult;
  1256. #endif
  1257. }
  1258. //------------------------------------------------------------------------------
  1259. inline XMVECTOR XM_CALLCONV XMColorRGBToHSL( FXMVECTOR rgb )
  1260. {
  1261. XMVECTOR r = XMVectorSplatX( rgb );
  1262. XMVECTOR g = XMVectorSplatY( rgb );
  1263. XMVECTOR b = XMVectorSplatZ( rgb );
  1264. XMVECTOR min = XMVectorMin( r, XMVectorMin( g, b ) );
  1265. XMVECTOR max = XMVectorMax( r, XMVectorMax( g, b ) );
  1266. XMVECTOR l = XMVectorMultiply( XMVectorAdd( min, max ), g_XMOneHalf );
  1267. XMVECTOR d = XMVectorSubtract( max, min );
  1268. XMVECTOR la = XMVectorSelect( rgb, l, g_XMSelect1110 );
  1269. if ( XMVector3Less( d, g_XMEpsilon ) )
  1270. {
  1271. // Achromatic, assume H and S of 0
  1272. return XMVectorSelect( la, g_XMZero, g_XMSelect1100 );
  1273. }
  1274. else
  1275. {
  1276. XMVECTOR s, h;
  1277. XMVECTOR d2 = XMVectorAdd( min, max );
  1278. if ( XMVector3Greater( l, g_XMOneHalf ) )
  1279. {
  1280. // d / (2-max-min)
  1281. s = XMVectorDivide( d, XMVectorSubtract( g_XMTwo, d2 ) );
  1282. }
  1283. else
  1284. {
  1285. // d / (max+min)
  1286. s = XMVectorDivide( d, d2 );
  1287. }
  1288. if ( XMVector3Equal( r, max ) )
  1289. {
  1290. // Red is max
  1291. h = XMVectorDivide( XMVectorSubtract( g, b ), d );
  1292. }
  1293. else if ( XMVector3Equal( g, max ) )
  1294. {
  1295. // Green is max
  1296. h = XMVectorDivide( XMVectorSubtract( b, r ), d );
  1297. h = XMVectorAdd( h, g_XMTwo );
  1298. }
  1299. else
  1300. {
  1301. // Blue is max
  1302. h = XMVectorDivide( XMVectorSubtract( r, g ), d );
  1303. h = XMVectorAdd( h, g_XMFour );
  1304. }
  1305. h = XMVectorDivide( h, g_XMSix );
  1306. if ( XMVector3Less( h, g_XMZero ) )
  1307. h = XMVectorAdd( h, g_XMOne );
  1308. XMVECTOR lha = XMVectorSelect( la, h, g_XMSelect1100 );
  1309. return XMVectorSelect( s, lha, g_XMSelect1011 );
  1310. }
  1311. }
  1312. //------------------------------------------------------------------------------
  1313. namespace Internal
  1314. {
  1315. inline XMVECTOR XM_CALLCONV XMColorHue2Clr( FXMVECTOR p, FXMVECTOR q, FXMVECTOR h )
  1316. {
  1317. static const XMVECTORF32 oneSixth = { { { 1.0f / 6.0f, 1.0f / 6.0f, 1.0f / 6.0f, 1.0f / 6.0f } } };
  1318. static const XMVECTORF32 twoThirds = { { { 2.0f / 3.0f, 2.0f / 3.0f, 2.0f / 3.0f, 2.0f / 3.0f } } };
  1319. XMVECTOR t = h;
  1320. if ( XMVector3Less( t, g_XMZero ) )
  1321. t = XMVectorAdd( t, g_XMOne );
  1322. if ( XMVector3Greater( t, g_XMOne ) )
  1323. t = XMVectorSubtract( t, g_XMOne );
  1324. if ( XMVector3Less( t, oneSixth ) )
  1325. {
  1326. // p + (q - p) * 6 * t
  1327. XMVECTOR t1 = XMVectorSubtract( q, p );
  1328. XMVECTOR t2 = XMVectorMultiply( g_XMSix, t );
  1329. return XMVectorMultiplyAdd( t1, t2, p );
  1330. }
  1331. if ( XMVector3Less( t, g_XMOneHalf ) )
  1332. return q;
  1333. if ( XMVector3Less( t, twoThirds ) )
  1334. {
  1335. // p + (q - p) * 6 * (2/3 - t)
  1336. XMVECTOR t1 = XMVectorSubtract( q, p );
  1337. XMVECTOR t2 = XMVectorMultiply( g_XMSix, XMVectorSubtract( twoThirds, t ) );
  1338. return XMVectorMultiplyAdd( t1, t2, p );
  1339. }
  1340. return p;
  1341. }
  1342. }; // namespace Internal
  1343. inline XMVECTOR XM_CALLCONV XMColorHSLToRGB( FXMVECTOR hsl )
  1344. {
  1345. static const XMVECTORF32 oneThird = { { { 1.0f / 3.0f, 1.0f / 3.0f, 1.0f / 3.0f, 1.0f / 3.0f } } };
  1346. XMVECTOR s = XMVectorSplatY( hsl );
  1347. XMVECTOR l = XMVectorSplatZ( hsl );
  1348. if ( XMVector3NearEqual( s, g_XMZero, g_XMEpsilon ) )
  1349. {
  1350. // Achromatic
  1351. return XMVectorSelect( hsl, l, g_XMSelect1110 );
  1352. }
  1353. else
  1354. {
  1355. XMVECTOR h = XMVectorSplatX( hsl );
  1356. XMVECTOR q;
  1357. if ( XMVector3Less( l, g_XMOneHalf ) )
  1358. {
  1359. q = XMVectorMultiply( l, XMVectorAdd ( g_XMOne, s ) );
  1360. }
  1361. else
  1362. {
  1363. q = XMVectorSubtract( XMVectorAdd( l, s ), XMVectorMultiply( l, s ) );
  1364. }
  1365. XMVECTOR p = XMVectorSubtract( XMVectorMultiply( g_XMTwo, l ), q );
  1366. XMVECTOR r = DirectX::Internal::XMColorHue2Clr( p, q, XMVectorAdd( h, oneThird ) );
  1367. XMVECTOR g = DirectX::Internal::XMColorHue2Clr( p, q, h );
  1368. XMVECTOR b = DirectX::Internal::XMColorHue2Clr( p, q, XMVectorSubtract( h, oneThird ) );
  1369. XMVECTOR rg = XMVectorSelect( g, r, g_XMSelect1000 );
  1370. XMVECTOR ba = XMVectorSelect( hsl, b, g_XMSelect1110 );
  1371. return XMVectorSelect( ba, rg, g_XMSelect1100 );
  1372. }
  1373. }
  1374. //------------------------------------------------------------------------------
  1375. inline XMVECTOR XM_CALLCONV XMColorRGBToHSV( FXMVECTOR rgb )
  1376. {
  1377. XMVECTOR r = XMVectorSplatX( rgb );
  1378. XMVECTOR g = XMVectorSplatY( rgb );
  1379. XMVECTOR b = XMVectorSplatZ( rgb );
  1380. XMVECTOR min = XMVectorMin( r, XMVectorMin( g, b ) );
  1381. XMVECTOR v = XMVectorMax( r, XMVectorMax( g, b ) );
  1382. XMVECTOR d = XMVectorSubtract( v, min );
  1383. XMVECTOR s = ( XMVector3NearEqual( v, g_XMZero, g_XMEpsilon ) ) ? g_XMZero : XMVectorDivide( d, v );
  1384. if ( XMVector3Less( d, g_XMEpsilon ) )
  1385. {
  1386. // Achromatic, assume H of 0
  1387. XMVECTOR hv = XMVectorSelect( v, g_XMZero, g_XMSelect1000 );
  1388. XMVECTOR hva = XMVectorSelect( rgb, hv, g_XMSelect1110 );
  1389. return XMVectorSelect( s, hva, g_XMSelect1011 );
  1390. }
  1391. else
  1392. {
  1393. XMVECTOR h;
  1394. if ( XMVector3Equal( r, v ) )
  1395. {
  1396. // Red is max
  1397. h = XMVectorDivide( XMVectorSubtract( g, b ), d );
  1398. if ( XMVector3Less( g, b ) )
  1399. h = XMVectorAdd( h, g_XMSix );
  1400. }
  1401. else if ( XMVector3Equal( g, v ) )
  1402. {
  1403. // Green is max
  1404. h = XMVectorDivide( XMVectorSubtract( b, r ), d );
  1405. h = XMVectorAdd( h, g_XMTwo );
  1406. }
  1407. else
  1408. {
  1409. // Blue is max
  1410. h = XMVectorDivide( XMVectorSubtract( r, g ), d );
  1411. h = XMVectorAdd( h, g_XMFour );
  1412. }
  1413. h = XMVectorDivide( h, g_XMSix );
  1414. XMVECTOR hv = XMVectorSelect( v, h, g_XMSelect1000 );
  1415. XMVECTOR hva = XMVectorSelect( rgb, hv, g_XMSelect1110 );
  1416. return XMVectorSelect( s, hva, g_XMSelect1011 );
  1417. }
  1418. }
  1419. //------------------------------------------------------------------------------
  1420. inline XMVECTOR XM_CALLCONV XMColorHSVToRGB( FXMVECTOR hsv )
  1421. {
  1422. XMVECTOR h = XMVectorSplatX( hsv );
  1423. XMVECTOR s = XMVectorSplatY( hsv );
  1424. XMVECTOR v = XMVectorSplatZ( hsv );
  1425. XMVECTOR h6 = XMVectorMultiply( h, g_XMSix );
  1426. XMVECTOR i = XMVectorFloor( h6 );
  1427. XMVECTOR f = XMVectorSubtract( h6, i );
  1428. // p = v* (1-s)
  1429. XMVECTOR p = XMVectorMultiply( v, XMVectorSubtract( g_XMOne, s ) );
  1430. // q = v*(1-f*s)
  1431. XMVECTOR q = XMVectorMultiply( v, XMVectorSubtract( g_XMOne, XMVectorMultiply( f, s ) ) );
  1432. // t = v*(1 - (1-f)*s)
  1433. XMVECTOR t = XMVectorMultiply( v, XMVectorSubtract( g_XMOne, XMVectorMultiply( XMVectorSubtract( g_XMOne, f ), s ) ) );
  1434. int ii = static_cast<int>( XMVectorGetX( XMVectorMod( i, g_XMSix ) ) );
  1435. XMVECTOR _rgb;
  1436. switch (ii)
  1437. {
  1438. case 0: // rgb = vtp
  1439. {
  1440. XMVECTOR vt = XMVectorSelect( t, v, g_XMSelect1000 );
  1441. _rgb = XMVectorSelect( p, vt, g_XMSelect1100 );
  1442. }
  1443. break;
  1444. case 1: // rgb = qvp
  1445. {
  1446. XMVECTOR qv = XMVectorSelect( v, q, g_XMSelect1000 );
  1447. _rgb = XMVectorSelect( p, qv, g_XMSelect1100 );
  1448. }
  1449. break;
  1450. case 2: // rgb = pvt
  1451. {
  1452. XMVECTOR pv = XMVectorSelect( v, p, g_XMSelect1000 );
  1453. _rgb = XMVectorSelect( t, pv, g_XMSelect1100 );
  1454. }
  1455. break;
  1456. case 3: // rgb = pqv
  1457. {
  1458. XMVECTOR pq = XMVectorSelect( q, p, g_XMSelect1000 );
  1459. _rgb = XMVectorSelect( v, pq, g_XMSelect1100 );
  1460. }
  1461. break;
  1462. case 4: // rgb = tpv
  1463. {
  1464. XMVECTOR tp = XMVectorSelect( p, t, g_XMSelect1000 );
  1465. _rgb = XMVectorSelect( v, tp, g_XMSelect1100 );
  1466. }
  1467. break;
  1468. default: // rgb = vpq
  1469. {
  1470. XMVECTOR vp = XMVectorSelect( p, v, g_XMSelect1000 );
  1471. _rgb = XMVectorSelect( q, vp, g_XMSelect1100 );
  1472. }
  1473. break;
  1474. }
  1475. return XMVectorSelect( hsv, _rgb, g_XMSelect1110 );
  1476. }
  1477. //------------------------------------------------------------------------------
  1478. inline XMVECTOR XM_CALLCONV XMColorRGBToYUV( FXMVECTOR rgb )
  1479. {
  1480. static const XMVECTORF32 Scale0 = { { { 0.299f, -0.147f, 0.615f, 0.0f } } };
  1481. static const XMVECTORF32 Scale1 = { { { 0.587f, -0.289f, -0.515f, 0.0f } } };
  1482. static const XMVECTORF32 Scale2 = { { { 0.114f, 0.436f, -0.100f, 0.0f } } };
  1483. XMMATRIX M( Scale0, Scale1, Scale2, g_XMZero );
  1484. XMVECTOR clr = XMVector3Transform( rgb, M );
  1485. return XMVectorSelect( rgb, clr, g_XMSelect1110 );
  1486. }
  1487. //------------------------------------------------------------------------------
  1488. inline XMVECTOR XM_CALLCONV XMColorYUVToRGB( FXMVECTOR yuv )
  1489. {
  1490. static const XMVECTORF32 Scale1 = { { { 0.0f, -0.395f, 2.032f, 0.0f } } };
  1491. static const XMVECTORF32 Scale2 = { { { 1.140f, -0.581f, 0.0f, 0.0f } } };
  1492. XMMATRIX M( g_XMOne, Scale1, Scale2, g_XMZero );
  1493. XMVECTOR clr = XMVector3Transform( yuv, M );
  1494. return XMVectorSelect( yuv, clr, g_XMSelect1110 );
  1495. }
  1496. //------------------------------------------------------------------------------
  1497. inline XMVECTOR XM_CALLCONV XMColorRGBToYUV_HD( FXMVECTOR rgb )
  1498. {
  1499. static const XMVECTORF32 Scale0 = { { { 0.2126f, -0.0997f, 0.6150f, 0.0f } } };
  1500. static const XMVECTORF32 Scale1 = { { { 0.7152f, -0.3354f, -0.5586f, 0.0f } } };
  1501. static const XMVECTORF32 Scale2 = { { { 0.0722f, 0.4351f, -0.0564f, 0.0f } } };
  1502. XMMATRIX M( Scale0, Scale1, Scale2, g_XMZero );
  1503. XMVECTOR clr = XMVector3Transform( rgb, M );
  1504. return XMVectorSelect( rgb, clr, g_XMSelect1110 );
  1505. }
  1506. //------------------------------------------------------------------------------
  1507. inline XMVECTOR XM_CALLCONV XMColorYUVToRGB_HD( FXMVECTOR yuv )
  1508. {
  1509. static const XMVECTORF32 Scale1 = { { { 0.0f, -0.2153f, 2.1324f, 0.0f } } };
  1510. static const XMVECTORF32 Scale2 = { { { 1.2803f, -0.3806f, 0.0f, 0.0f } } };
  1511. XMMATRIX M( g_XMOne, Scale1, Scale2, g_XMZero );
  1512. XMVECTOR clr = XMVector3Transform( yuv, M );
  1513. return XMVectorSelect( yuv, clr, g_XMSelect1110 );
  1514. }
  1515. //------------------------------------------------------------------------------
  1516. inline XMVECTOR XM_CALLCONV XMColorRGBToXYZ( FXMVECTOR rgb )
  1517. {
  1518. static const XMVECTORF32 Scale0 = { { { 0.4887180f, 0.1762044f, 0.0000000f, 0.0f } } };
  1519. static const XMVECTORF32 Scale1 = { { { 0.3106803f, 0.8129847f, 0.0102048f, 0.0f } } };
  1520. static const XMVECTORF32 Scale2 = { { { 0.2006017f, 0.0108109f, 0.9897952f, 0.0f } } };
  1521. static const XMVECTORF32 Scale = { { { 1.f / 0.17697f, 1.f / 0.17697f, 1.f / 0.17697f, 0.0f } } };
  1522. XMMATRIX M( Scale0, Scale1, Scale2, g_XMZero );
  1523. XMVECTOR clr = XMVectorMultiply( XMVector3Transform( rgb, M ), Scale );
  1524. return XMVectorSelect( rgb, clr, g_XMSelect1110 );
  1525. }
  1526. inline XMVECTOR XM_CALLCONV XMColorXYZToRGB( FXMVECTOR xyz )
  1527. {
  1528. static const XMVECTORF32 Scale0 = { { { 2.3706743f, -0.5138850f, 0.0052982f, 0.0f } } };
  1529. static const XMVECTORF32 Scale1 = { { { -0.9000405f, 1.4253036f, -0.0146949f, 0.0f } } };
  1530. static const XMVECTORF32 Scale2 = { { { -0.4706338f, 0.0885814f, 1.0093968f, 0.0f } } };
  1531. static const XMVECTORF32 Scale = { { { 0.17697f, 0.17697f, 0.17697f, 0.0f } } };
  1532. XMMATRIX M( Scale0, Scale1, Scale2, g_XMZero );
  1533. XMVECTOR clr = XMVector3Transform( XMVectorMultiply( xyz, Scale ), M );
  1534. return XMVectorSelect( xyz, clr, g_XMSelect1110 );
  1535. }
  1536. //------------------------------------------------------------------------------
  1537. inline XMVECTOR XM_CALLCONV XMColorXYZToSRGB( FXMVECTOR xyz )
  1538. {
  1539. static const XMVECTORF32 Scale0 = { { { 3.2406f, -0.9689f, 0.0557f, 0.0f } } };
  1540. static const XMVECTORF32 Scale1 = { { { -1.5372f, 1.8758f, -0.2040f, 0.0f } } };
  1541. static const XMVECTORF32 Scale2 = { { { -0.4986f, 0.0415f, 1.0570f, 0.0f } } };
  1542. static const XMVECTORF32 Cutoff = { { { 0.0031308f, 0.0031308f, 0.0031308f, 0.0f } } };
  1543. static const XMVECTORF32 Exp = { { { 1.0f / 2.4f, 1.0f / 2.4f, 1.0f / 2.4f, 1.0f } } };
  1544. XMMATRIX M( Scale0, Scale1, Scale2, g_XMZero );
  1545. XMVECTOR lclr = XMVector3Transform( xyz, M );
  1546. XMVECTOR sel = XMVectorGreater( lclr, Cutoff );
  1547. // clr = 12.92 * lclr for lclr <= 0.0031308f
  1548. XMVECTOR smallC = XMVectorMultiply( lclr, g_XMsrgbScale );
  1549. // clr = (1+a)*pow(lclr, 1/2.4) - a for lclr > 0.0031308 (where a = 0.055)
  1550. XMVECTOR largeC = XMVectorSubtract( XMVectorMultiply( g_XMsrgbA1, XMVectorPow( lclr, Exp ) ), g_XMsrgbA );
  1551. XMVECTOR clr = XMVectorSelect( smallC, largeC, sel );
  1552. return XMVectorSelect( xyz, clr, g_XMSelect1110 );
  1553. }
  1554. //------------------------------------------------------------------------------
  1555. inline XMVECTOR XM_CALLCONV XMColorSRGBToXYZ( FXMVECTOR srgb )
  1556. {
  1557. static const XMVECTORF32 Scale0 = { { { 0.4124f, 0.2126f, 0.0193f, 0.0f } } };
  1558. static const XMVECTORF32 Scale1 = { { { 0.3576f, 0.7152f, 0.1192f, 0.0f } } };
  1559. static const XMVECTORF32 Scale2 = { { { 0.1805f, 0.0722f, 0.9505f, 0.0f } } };
  1560. static const XMVECTORF32 Cutoff = { { { 0.04045f, 0.04045f, 0.04045f, 0.0f } } };
  1561. static const XMVECTORF32 Exp = { { { 2.4f, 2.4f, 2.4f, 1.0f } } };
  1562. XMVECTOR sel = XMVectorGreater( srgb, Cutoff );
  1563. // lclr = clr / 12.92
  1564. XMVECTOR smallC = XMVectorDivide( srgb, g_XMsrgbScale );
  1565. // lclr = pow( (clr + a) / (1+a), 2.4 )
  1566. XMVECTOR largeC = XMVectorPow( XMVectorDivide( XMVectorAdd( srgb, g_XMsrgbA ), g_XMsrgbA1 ), Exp );
  1567. XMVECTOR lclr = XMVectorSelect( smallC, largeC, sel );
  1568. XMMATRIX M( Scale0, Scale1, Scale2, g_XMZero );
  1569. XMVECTOR clr = XMVector3Transform( lclr, M );
  1570. return XMVectorSelect( srgb, clr, g_XMSelect1110 );
  1571. }
  1572. //------------------------------------------------------------------------------
  1573. inline XMVECTOR XM_CALLCONV XMColorRGBToSRGB( FXMVECTOR rgb )
  1574. {
  1575. static const XMVECTORF32 Cutoff = { { { 0.0031308f, 0.0031308f, 0.0031308f, 1.f } } };
  1576. static const XMVECTORF32 Linear = { { { 12.92f, 12.92f, 12.92f, 1.f } } };
  1577. static const XMVECTORF32 Scale = { { { 1.055f, 1.055f, 1.055f, 1.f } } };
  1578. static const XMVECTORF32 Bias = { { { 0.055f, 0.055f, 0.055f, 0.f } } };
  1579. static const XMVECTORF32 InvGamma = { { { 1.0f / 2.4f, 1.0f / 2.4f, 1.0f / 2.4f, 1.f } } };
  1580. XMVECTOR V = XMVectorSaturate(rgb);
  1581. XMVECTOR V0 = XMVectorMultiply( V, Linear );
  1582. XMVECTOR V1 = XMVectorSubtract( XMVectorMultiply( Scale, XMVectorPow( V, InvGamma ) ), Bias );
  1583. XMVECTOR select = XMVectorLess( V, Cutoff );
  1584. V = XMVectorSelect( V1, V0, select );
  1585. return XMVectorSelect( rgb, V, g_XMSelect1110 );
  1586. }
  1587. //------------------------------------------------------------------------------
  1588. inline XMVECTOR XM_CALLCONV XMColorSRGBToRGB( FXMVECTOR srgb )
  1589. {
  1590. static const XMVECTORF32 Cutoff = { { { 0.04045f, 0.04045f, 0.04045f, 1.f } } };
  1591. static const XMVECTORF32 ILinear = { { { 1.f / 12.92f, 1.f / 12.92f, 1.f / 12.92f, 1.f } } };
  1592. static const XMVECTORF32 Scale = { { { 1.f / 1.055f, 1.f / 1.055f, 1.f / 1.055f, 1.f } } };
  1593. static const XMVECTORF32 Bias = { { { 0.055f, 0.055f, 0.055f, 0.f } } };
  1594. static const XMVECTORF32 Gamma = { { { 2.4f, 2.4f, 2.4f, 1.f } } };
  1595. XMVECTOR V = XMVectorSaturate(srgb);
  1596. XMVECTOR V0 = XMVectorMultiply( V, ILinear );
  1597. XMVECTOR V1 = XMVectorPow( XMVectorMultiply( XMVectorAdd( V, Bias ), Scale ), Gamma );
  1598. XMVECTOR select = XMVectorGreater( V, Cutoff );
  1599. V = XMVectorSelect( V0, V1, select );
  1600. return XMVectorSelect( srgb, V, g_XMSelect1110 );
  1601. }
  1602. /****************************************************************************
  1603. *
  1604. * Miscellaneous
  1605. *
  1606. ****************************************************************************/
  1607. //------------------------------------------------------------------------------
  1608. inline bool XMVerifyCPUSupport()
  1609. {
  1610. #if defined(_XM_SSE_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_)
  1611. int CPUInfo[4] = { -1 };
  1612. __cpuid(CPUInfo, 0);
  1613. #ifdef __AVX2__
  1614. if (CPUInfo[0] < 7)
  1615. return false;
  1616. #else
  1617. if (CPUInfo[0] < 1)
  1618. return false;
  1619. #endif
  1620. __cpuid(CPUInfo, 1);
  1621. #if defined(__AVX2__) || defined(_XM_AVX2_INTRINSICS_)
  1622. // The compiler can emit FMA3 instructions even without explicit intrinsics use
  1623. if ((CPUInfo[2] & 0x38081001) != 0x38081001)
  1624. return false; // No F16C/AVX/OSXSAVE/SSE4.1/FMA3/SSE3 support
  1625. #elif defined(_XM_FMA3_INTRINSICS_) && defined(_XM_F16C_INTRINSICS_)
  1626. if ((CPUInfo[2] & 0x38081001) != 0x38081001)
  1627. return false; // No F16C/AVX/OSXSAVE/SSE4.1/FMA3/SSE3 support
  1628. #elif defined(_XM_FMA3_INTRINSICS_)
  1629. if ((CPUInfo[2] & 0x18081001) != 0x18081001)
  1630. return false; // No AVX/OSXSAVE/SSE4.1/FMA3/SSE3 support
  1631. #elif defined(_XM_F16C_INTRINSICS_)
  1632. if ((CPUInfo[2] & 0x38080001) != 0x38080001)
  1633. return false; // No F16C/AVX/OSXSAVE/SSE4.1/SSE3 support
  1634. #elif defined(__AVX__) || defined(_XM_AVX_INTRINSICS_)
  1635. if ((CPUInfo[2] & 0x18080001) != 0x18080001)
  1636. return false; // No AVX/OSXSAVE/SSE4.1/SSE3 support
  1637. #elif defined(_XM_SSE4_INTRINSICS_)
  1638. if ((CPUInfo[2] & 0x80001) != 0x80001)
  1639. return false; // No SSE3/SSE4.1 support
  1640. #elif defined(_XM_SSE3_INTRINSICS_)
  1641. if (!(CPUInfo[2] & 0x1))
  1642. return false; // No SSE3 support
  1643. #endif
  1644. // The x64 processor model requires SSE2 support, but no harm in checking
  1645. if ((CPUInfo[3] & 0x6000000) != 0x6000000)
  1646. return false; // No SSE2/SSE support
  1647. #if defined(__AVX2__) || defined(_XM_AVX2_INTRINSICS_)
  1648. __cpuidex(CPUInfo, 7, 0);
  1649. if (!(CPUInfo[1] & 0x20))
  1650. return false; // No AVX2 support
  1651. #endif
  1652. return true;
  1653. #elif defined(_XM_ARM_NEON_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_)
  1654. // ARM-NEON support is required for the Windows on ARM platform
  1655. return true;
  1656. #else
  1657. // No intrinsics path always supported
  1658. return true;
  1659. #endif
  1660. }
  1661. //------------------------------------------------------------------------------
  1662. inline XMVECTOR XM_CALLCONV XMFresnelTerm
  1663. (
  1664. FXMVECTOR CosIncidentAngle,
  1665. FXMVECTOR RefractionIndex
  1666. )
  1667. {
  1668. assert(!XMVector4IsInfinite(CosIncidentAngle));
  1669. // Result = 0.5f * (g - c)^2 / (g + c)^2 * ((c * (g + c) - 1)^2 / (c * (g - c) + 1)^2 + 1) where
  1670. // c = CosIncidentAngle
  1671. // g = sqrt(c^2 + RefractionIndex^2 - 1)
  1672. #if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
  1673. XMVECTOR G = XMVectorMultiplyAdd(RefractionIndex, RefractionIndex, g_XMNegativeOne.v);
  1674. G = XMVectorMultiplyAdd(CosIncidentAngle, CosIncidentAngle, G);
  1675. G = XMVectorAbs(G);
  1676. G = XMVectorSqrt(G);
  1677. XMVECTOR S = XMVectorAdd(G, CosIncidentAngle);
  1678. XMVECTOR D = XMVectorSubtract(G, CosIncidentAngle);
  1679. XMVECTOR V0 = XMVectorMultiply(D, D);
  1680. XMVECTOR V1 = XMVectorMultiply(S, S);
  1681. V1 = XMVectorReciprocal(V1);
  1682. V0 = XMVectorMultiply(g_XMOneHalf.v, V0);
  1683. V0 = XMVectorMultiply(V0, V1);
  1684. XMVECTOR V2 = XMVectorMultiplyAdd(CosIncidentAngle, S, g_XMNegativeOne.v);
  1685. XMVECTOR V3 = XMVectorMultiplyAdd(CosIncidentAngle, D, g_XMOne.v);
  1686. V2 = XMVectorMultiply(V2, V2);
  1687. V3 = XMVectorMultiply(V3, V3);
  1688. V3 = XMVectorReciprocal(V3);
  1689. V2 = XMVectorMultiplyAdd(V2, V3, g_XMOne.v);
  1690. XMVECTOR Result = XMVectorMultiply(V0, V2);
  1691. Result = XMVectorSaturate(Result);
  1692. return Result;
  1693. #elif defined(_XM_SSE_INTRINSICS_)
  1694. // G = sqrt(abs((RefractionIndex^2-1) + CosIncidentAngle^2))
  1695. XMVECTOR G = _mm_mul_ps(RefractionIndex,RefractionIndex);
  1696. XMVECTOR vTemp = _mm_mul_ps(CosIncidentAngle,CosIncidentAngle);
  1697. G = _mm_sub_ps(G,g_XMOne);
  1698. vTemp = _mm_add_ps(vTemp,G);
  1699. // max((0-vTemp),vTemp) == abs(vTemp)
  1700. // The abs is needed to deal with refraction and cosine being zero
  1701. G = _mm_setzero_ps();
  1702. G = _mm_sub_ps(G,vTemp);
  1703. G = _mm_max_ps(G,vTemp);
  1704. // Last operation, the sqrt()
  1705. G = _mm_sqrt_ps(G);
  1706. // Calc G-C and G+C
  1707. XMVECTOR GAddC = _mm_add_ps(G,CosIncidentAngle);
  1708. XMVECTOR GSubC = _mm_sub_ps(G,CosIncidentAngle);
  1709. // Perform the term (0.5f *(g - c)^2) / (g + c)^2
  1710. XMVECTOR vResult = _mm_mul_ps(GSubC,GSubC);
  1711. vTemp = _mm_mul_ps(GAddC,GAddC);
  1712. vResult = _mm_mul_ps(vResult,g_XMOneHalf);
  1713. vResult = _mm_div_ps(vResult,vTemp);
  1714. // Perform the term ((c * (g + c) - 1)^2 / (c * (g - c) + 1)^2 + 1)
  1715. GAddC = _mm_mul_ps(GAddC,CosIncidentAngle);
  1716. GSubC = _mm_mul_ps(GSubC,CosIncidentAngle);
  1717. GAddC = _mm_sub_ps(GAddC,g_XMOne);
  1718. GSubC = _mm_add_ps(GSubC,g_XMOne);
  1719. GAddC = _mm_mul_ps(GAddC,GAddC);
  1720. GSubC = _mm_mul_ps(GSubC,GSubC);
  1721. GAddC = _mm_div_ps(GAddC,GSubC);
  1722. GAddC = _mm_add_ps(GAddC,g_XMOne);
  1723. // Multiply the two term parts
  1724. vResult = _mm_mul_ps(vResult,GAddC);
  1725. // Clamp to 0.0 - 1.0f
  1726. vResult = _mm_max_ps(vResult,g_XMZero);
  1727. vResult = _mm_min_ps(vResult,g_XMOne);
  1728. return vResult;
  1729. #endif
  1730. }
  1731. //------------------------------------------------------------------------------
  1732. inline bool XMScalarNearEqual
  1733. (
  1734. float S1,
  1735. float S2,
  1736. float Epsilon
  1737. )
  1738. {
  1739. float Delta = S1 - S2;
  1740. return (fabsf(Delta) <= Epsilon);
  1741. }
  1742. //------------------------------------------------------------------------------
  1743. // Modulo the range of the given angle such that -XM_PI <= Angle < XM_PI
  1744. inline float XMScalarModAngle
  1745. (
  1746. float Angle
  1747. )
  1748. {
  1749. // Note: The modulo is performed with unsigned math only to work
  1750. // around a precision error on numbers that are close to PI
  1751. // Normalize the range from 0.0f to XM_2PI
  1752. Angle = Angle + XM_PI;
  1753. // Perform the modulo, unsigned
  1754. float fTemp = fabsf(Angle);
  1755. fTemp = fTemp - (XM_2PI * (float)((int32_t)(fTemp/XM_2PI)));
  1756. // Restore the number to the range of -XM_PI to XM_PI-epsilon
  1757. fTemp = fTemp - XM_PI;
  1758. // If the modulo'd value was negative, restore negation
  1759. if (Angle<0.0f) {
  1760. fTemp = -fTemp;
  1761. }
  1762. return fTemp;
  1763. }
  1764. //------------------------------------------------------------------------------
  1765. inline float XMScalarSin
  1766. (
  1767. float Value
  1768. )
  1769. {
  1770. // Map Value to y in [-pi,pi], x = 2*pi*quotient + remainder.
  1771. float quotient = XM_1DIV2PI*Value;
  1772. if (Value >= 0.0f)
  1773. {
  1774. quotient = (float)((int)(quotient + 0.5f));
  1775. }
  1776. else
  1777. {
  1778. quotient = (float)((int)(quotient - 0.5f));
  1779. }
  1780. float y = Value - XM_2PI*quotient;
  1781. // Map y to [-pi/2,pi/2] with sin(y) = sin(Value).
  1782. if (y > XM_PIDIV2)
  1783. {
  1784. y = XM_PI - y;
  1785. }
  1786. else if (y < -XM_PIDIV2)
  1787. {
  1788. y = -XM_PI - y;
  1789. }
  1790. // 11-degree minimax approximation
  1791. float y2 = y * y;
  1792. return ( ( ( ( (-2.3889859e-08f * y2 + 2.7525562e-06f) * y2 - 0.00019840874f ) * y2 + 0.0083333310f ) * y2 - 0.16666667f ) * y2 + 1.0f ) * y;
  1793. }
  1794. //------------------------------------------------------------------------------
  1795. inline float XMScalarSinEst
  1796. (
  1797. float Value
  1798. )
  1799. {
  1800. // Map Value to y in [-pi,pi], x = 2*pi*quotient + remainder.
  1801. float quotient = XM_1DIV2PI*Value;
  1802. if (Value >= 0.0f)
  1803. {
  1804. quotient = (float)((int)(quotient + 0.5f));
  1805. }
  1806. else
  1807. {
  1808. quotient = (float)((int)(quotient - 0.5f));
  1809. }
  1810. float y = Value - XM_2PI*quotient;
  1811. // Map y to [-pi/2,pi/2] with sin(y) = sin(Value).
  1812. if (y > XM_PIDIV2)
  1813. {
  1814. y = XM_PI - y;
  1815. }
  1816. else if (y < -XM_PIDIV2)
  1817. {
  1818. y = -XM_PI - y;
  1819. }
  1820. // 7-degree minimax approximation
  1821. float y2 = y * y;
  1822. return ( ( ( -0.00018524670f * y2 + 0.0083139502f ) * y2 - 0.16665852f ) * y2 + 1.0f ) * y;
  1823. }
  1824. //------------------------------------------------------------------------------
  1825. inline float XMScalarCos
  1826. (
  1827. float Value
  1828. )
  1829. {
  1830. // Map Value to y in [-pi,pi], x = 2*pi*quotient + remainder.
  1831. float quotient = XM_1DIV2PI*Value;
  1832. if (Value >= 0.0f)
  1833. {
  1834. quotient = (float)((int)(quotient + 0.5f));
  1835. }
  1836. else
  1837. {
  1838. quotient = (float)((int)(quotient - 0.5f));
  1839. }
  1840. float y = Value - XM_2PI*quotient;
  1841. // Map y to [-pi/2,pi/2] with cos(y) = sign*cos(x).
  1842. float sign;
  1843. if (y > XM_PIDIV2)
  1844. {
  1845. y = XM_PI - y;
  1846. sign = -1.0f;
  1847. }
  1848. else if (y < -XM_PIDIV2)
  1849. {
  1850. y = -XM_PI - y;
  1851. sign = -1.0f;
  1852. }
  1853. else
  1854. {
  1855. sign = +1.0f;
  1856. }
  1857. // 10-degree minimax approximation
  1858. float y2 = y*y;
  1859. float p = ( ( ( ( -2.6051615e-07f * y2 + 2.4760495e-05f ) * y2 - 0.0013888378f ) * y2 + 0.041666638f ) * y2 - 0.5f ) * y2 + 1.0f;
  1860. return sign*p;
  1861. }
  1862. //------------------------------------------------------------------------------
  1863. inline float XMScalarCosEst
  1864. (
  1865. float Value
  1866. )
  1867. {
  1868. // Map Value to y in [-pi,pi], x = 2*pi*quotient + remainder.
  1869. float quotient = XM_1DIV2PI*Value;
  1870. if (Value >= 0.0f)
  1871. {
  1872. quotient = (float)((int)(quotient + 0.5f));
  1873. }
  1874. else
  1875. {
  1876. quotient = (float)((int)(quotient - 0.5f));
  1877. }
  1878. float y = Value - XM_2PI*quotient;
  1879. // Map y to [-pi/2,pi/2] with cos(y) = sign*cos(x).
  1880. float sign;
  1881. if (y > XM_PIDIV2)
  1882. {
  1883. y = XM_PI - y;
  1884. sign = -1.0f;
  1885. }
  1886. else if (y < -XM_PIDIV2)
  1887. {
  1888. y = -XM_PI - y;
  1889. sign = -1.0f;
  1890. }
  1891. else
  1892. {
  1893. sign = +1.0f;
  1894. }
  1895. // 6-degree minimax approximation
  1896. float y2 = y * y;
  1897. float p = ( ( -0.0012712436f * y2 + 0.041493919f ) * y2 - 0.49992746f ) * y2 + 1.0f;
  1898. return sign*p;
  1899. }
  1900. //------------------------------------------------------------------------------
  1901. _Use_decl_annotations_
  1902. inline void XMScalarSinCos
  1903. (
  1904. float* pSin,
  1905. float* pCos,
  1906. float Value
  1907. )
  1908. {
  1909. assert(pSin);
  1910. assert(pCos);
  1911. // Map Value to y in [-pi,pi], x = 2*pi*quotient + remainder.
  1912. float quotient = XM_1DIV2PI*Value;
  1913. if (Value >= 0.0f)
  1914. {
  1915. quotient = (float)((int)(quotient + 0.5f));
  1916. }
  1917. else
  1918. {
  1919. quotient = (float)((int)(quotient - 0.5f));
  1920. }
  1921. float y = Value - XM_2PI*quotient;
  1922. // Map y to [-pi/2,pi/2] with sin(y) = sin(Value).
  1923. float sign;
  1924. if (y > XM_PIDIV2)
  1925. {
  1926. y = XM_PI - y;
  1927. sign = -1.0f;
  1928. }
  1929. else if (y < -XM_PIDIV2)
  1930. {
  1931. y = -XM_PI - y;
  1932. sign = -1.0f;
  1933. }
  1934. else
  1935. {
  1936. sign = +1.0f;
  1937. }
  1938. float y2 = y * y;
  1939. // 11-degree minimax approximation
  1940. *pSin = ( ( ( ( (-2.3889859e-08f * y2 + 2.7525562e-06f) * y2 - 0.00019840874f ) * y2 + 0.0083333310f ) * y2 - 0.16666667f ) * y2 + 1.0f ) * y;
  1941. // 10-degree minimax approximation
  1942. float p = ( ( ( ( -2.6051615e-07f * y2 + 2.4760495e-05f ) * y2 - 0.0013888378f ) * y2 + 0.041666638f ) * y2 - 0.5f ) * y2 + 1.0f;
  1943. *pCos = sign*p;
  1944. }
  1945. //------------------------------------------------------------------------------
  1946. _Use_decl_annotations_
  1947. inline void XMScalarSinCosEst
  1948. (
  1949. float* pSin,
  1950. float* pCos,
  1951. float Value
  1952. )
  1953. {
  1954. assert(pSin);
  1955. assert(pCos);
  1956. // Map Value to y in [-pi,pi], x = 2*pi*quotient + remainder.
  1957. float quotient = XM_1DIV2PI*Value;
  1958. if (Value >= 0.0f)
  1959. {
  1960. quotient = (float)((int)(quotient + 0.5f));
  1961. }
  1962. else
  1963. {
  1964. quotient = (float)((int)(quotient - 0.5f));
  1965. }
  1966. float y = Value - XM_2PI*quotient;
  1967. // Map y to [-pi/2,pi/2] with sin(y) = sin(Value).
  1968. float sign;
  1969. if (y > XM_PIDIV2)
  1970. {
  1971. y = XM_PI - y;
  1972. sign = -1.0f;
  1973. }
  1974. else if (y < -XM_PIDIV2)
  1975. {
  1976. y = -XM_PI - y;
  1977. sign = -1.0f;
  1978. }
  1979. else
  1980. {
  1981. sign = +1.0f;
  1982. }
  1983. float y2 = y * y;
  1984. // 7-degree minimax approximation
  1985. *pSin = ( ( ( -0.00018524670f * y2 + 0.0083139502f ) * y2 - 0.16665852f ) * y2 + 1.0f ) * y;
  1986. // 6-degree minimax approximation
  1987. float p = ( ( -0.0012712436f * y2 + 0.041493919f ) * y2 - 0.49992746f ) * y2 + 1.0f;
  1988. *pCos = sign*p;
  1989. }
  1990. //------------------------------------------------------------------------------
  1991. inline float XMScalarASin
  1992. (
  1993. float Value
  1994. )
  1995. {
  1996. // Clamp input to [-1,1].
  1997. bool nonnegative = (Value >= 0.0f);
  1998. float x = fabsf(Value);
  1999. float omx = 1.0f - x;
  2000. if (omx < 0.0f)
  2001. {
  2002. omx = 0.0f;
  2003. }
  2004. float root = sqrtf(omx);
  2005. // 7-degree minimax approximation
  2006. float result = ( ( ( ( ( ( -0.0012624911f * x + 0.0066700901f ) * x - 0.0170881256f ) * x + 0.0308918810f ) * x - 0.0501743046f ) * x + 0.0889789874f ) * x - 0.2145988016f ) * x + 1.5707963050f;
  2007. result *= root; // acos(|x|)
  2008. // acos(x) = pi - acos(-x) when x < 0, asin(x) = pi/2 - acos(x)
  2009. return (nonnegative ? XM_PIDIV2 - result : result - XM_PIDIV2);
  2010. }
  2011. //------------------------------------------------------------------------------
  2012. inline float XMScalarASinEst
  2013. (
  2014. float Value
  2015. )
  2016. {
  2017. // Clamp input to [-1,1].
  2018. bool nonnegative = (Value >= 0.0f);
  2019. float x = fabsf(Value);
  2020. float omx = 1.0f - x;
  2021. if (omx < 0.0f)
  2022. {
  2023. omx = 0.0f;
  2024. }
  2025. float root = sqrtf(omx);
  2026. // 3-degree minimax approximation
  2027. float result = ((-0.0187293f*x+0.0742610f)*x-0.2121144f)*x+1.5707288f;
  2028. result *= root; // acos(|x|)
  2029. // acos(x) = pi - acos(-x) when x < 0, asin(x) = pi/2 - acos(x)
  2030. return (nonnegative ? XM_PIDIV2 - result : result - XM_PIDIV2);
  2031. }
  2032. //------------------------------------------------------------------------------
  2033. inline float XMScalarACos
  2034. (
  2035. float Value
  2036. )
  2037. {
  2038. // Clamp input to [-1,1].
  2039. bool nonnegative = (Value >= 0.0f);
  2040. float x = fabsf(Value);
  2041. float omx = 1.0f - x;
  2042. if (omx < 0.0f)
  2043. {
  2044. omx = 0.0f;
  2045. }
  2046. float root = sqrtf(omx);
  2047. // 7-degree minimax approximation
  2048. float result = ( ( ( ( ( ( -0.0012624911f * x + 0.0066700901f ) * x - 0.0170881256f ) * x + 0.0308918810f ) * x - 0.0501743046f ) * x + 0.0889789874f ) * x - 0.2145988016f ) * x + 1.5707963050f;
  2049. result *= root;
  2050. // acos(x) = pi - acos(-x) when x < 0
  2051. return (nonnegative ? result : XM_PI - result);
  2052. }
  2053. //------------------------------------------------------------------------------
  2054. inline float XMScalarACosEst
  2055. (
  2056. float Value
  2057. )
  2058. {
  2059. // Clamp input to [-1,1].
  2060. bool nonnegative = (Value >= 0.0f);
  2061. float x = fabsf(Value);
  2062. float omx = 1.0f - x;
  2063. if (omx < 0.0f)
  2064. {
  2065. omx = 0.0f;
  2066. }
  2067. float root = sqrtf(omx);
  2068. // 3-degree minimax approximation
  2069. float result = ( ( -0.0187293f * x + 0.0742610f ) * x - 0.2121144f ) * x + 1.5707288f;
  2070. result *= root;
  2071. // acos(x) = pi - acos(-x) when x < 0
  2072. return (nonnegative ? result : XM_PI - result);
  2073. }