DirectXMathMatrix.inl 106 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280328132823283328432853286328732883289329032913292329332943295329632973298329933003301330233033304330533063307330833093310331133123313331433153316331733183319332033213322332333243325332633273328332933303331333233333334333533363337333833393340334133423343334433453346334733483349335033513352335333543355335633573358335933603361336233633364336533663367336833693370337133723373337433753376337733783379338033813382
  1. //-------------------------------------------------------------------------------------
  2. // DirectXMathMatrix.inl -- SIMD C++ Math library
  3. //
  4. // THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF
  5. // ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO
  6. // THE IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A
  7. // PARTICULAR PURPOSE.
  8. //
  9. // Copyright (c) Microsoft Corporation. All rights reserved.
  10. //
  11. // http://go.microsoft.com/fwlink/?LinkID=615560
  12. //-------------------------------------------------------------------------------------
  13. #pragma once
  14. /****************************************************************************
  15. *
  16. * Matrix
  17. *
  18. ****************************************************************************/
  19. //------------------------------------------------------------------------------
  20. // Comparison operations
  21. //------------------------------------------------------------------------------
  22. //------------------------------------------------------------------------------
  23. // Return true if any entry in the matrix is NaN
  24. inline bool XM_CALLCONV XMMatrixIsNaN
  25. (
  26. FXMMATRIX M
  27. )
  28. {
  29. #if defined(_XM_NO_INTRINSICS_)
  30. size_t i = 16;
  31. const uint32_t *pWork = (const uint32_t *)(&M.m[0][0]);
  32. do {
  33. // Fetch value into integer unit
  34. uint32_t uTest = pWork[0];
  35. // Remove sign
  36. uTest &= 0x7FFFFFFFU;
  37. // NaN is 0x7F800001 through 0x7FFFFFFF inclusive
  38. uTest -= 0x7F800001U;
  39. if (uTest<0x007FFFFFU) {
  40. break; // NaN found
  41. }
  42. ++pWork; // Next entry
  43. } while (--i);
  44. return (i!=0); // i == 0 if nothing matched
  45. #elif defined(_XM_ARM_NEON_INTRINSICS_)
  46. // Load in registers
  47. XMVECTOR vX = M.r[0];
  48. XMVECTOR vY = M.r[1];
  49. XMVECTOR vZ = M.r[2];
  50. XMVECTOR vW = M.r[3];
  51. // Test themselves to check for NaN
  52. vX = vmvnq_u32(vceqq_f32(vX, vX));
  53. vY = vmvnq_u32(vceqq_f32(vY, vY));
  54. vZ = vmvnq_u32(vceqq_f32(vZ, vZ));
  55. vW = vmvnq_u32(vceqq_f32(vW, vW));
  56. // Or all the results
  57. vX = vorrq_u32(vX,vZ);
  58. vY = vorrq_u32(vY,vW);
  59. vX = vorrq_u32(vX,vY);
  60. // If any tested true, return true
  61. int8x8x2_t vTemp = vzip_u8(vget_low_u8(vX), vget_high_u8(vX));
  62. vTemp = vzip_u16(vTemp.val[0], vTemp.val[1]);
  63. uint32_t r = vget_lane_u32(vTemp.val[1], 1);
  64. return (r != 0);
  65. #elif defined(_XM_SSE_INTRINSICS_)
  66. // Load in registers
  67. XMVECTOR vX = M.r[0];
  68. XMVECTOR vY = M.r[1];
  69. XMVECTOR vZ = M.r[2];
  70. XMVECTOR vW = M.r[3];
  71. // Test themselves to check for NaN
  72. vX = _mm_cmpneq_ps(vX,vX);
  73. vY = _mm_cmpneq_ps(vY,vY);
  74. vZ = _mm_cmpneq_ps(vZ,vZ);
  75. vW = _mm_cmpneq_ps(vW,vW);
  76. // Or all the results
  77. vX = _mm_or_ps(vX,vZ);
  78. vY = _mm_or_ps(vY,vW);
  79. vX = _mm_or_ps(vX,vY);
  80. // If any tested true, return true
  81. return (_mm_movemask_ps(vX)!=0);
  82. #else
  83. #endif
  84. }
  85. //------------------------------------------------------------------------------
  86. // Return true if any entry in the matrix is +/-INF
  87. inline bool XM_CALLCONV XMMatrixIsInfinite
  88. (
  89. FXMMATRIX M
  90. )
  91. {
  92. #if defined(_XM_NO_INTRINSICS_)
  93. size_t i = 16;
  94. const uint32_t *pWork = (const uint32_t *)(&M.m[0][0]);
  95. do {
  96. // Fetch value into integer unit
  97. uint32_t uTest = pWork[0];
  98. // Remove sign
  99. uTest &= 0x7FFFFFFFU;
  100. // INF is 0x7F800000
  101. if (uTest==0x7F800000U) {
  102. break; // INF found
  103. }
  104. ++pWork; // Next entry
  105. } while (--i);
  106. return (i!=0); // i == 0 if nothing matched
  107. #elif defined(_XM_ARM_NEON_INTRINSICS_)
  108. // Mask off the sign bits
  109. XMVECTOR vTemp1 = vandq_u32(M.r[0],g_XMAbsMask);
  110. XMVECTOR vTemp2 = vandq_u32(M.r[1],g_XMAbsMask);
  111. XMVECTOR vTemp3 = vandq_u32(M.r[2],g_XMAbsMask);
  112. XMVECTOR vTemp4 = vandq_u32(M.r[3],g_XMAbsMask);
  113. // Compare to infinity
  114. vTemp1 = vceqq_f32(vTemp1,g_XMInfinity);
  115. vTemp2 = vceqq_f32(vTemp2,g_XMInfinity);
  116. vTemp3 = vceqq_f32(vTemp3,g_XMInfinity);
  117. vTemp4 = vceqq_f32(vTemp4,g_XMInfinity);
  118. // Or the answers together
  119. vTemp1 = vorrq_u32(vTemp1,vTemp2);
  120. vTemp3 = vorrq_u32(vTemp3,vTemp4);
  121. vTemp1 = vorrq_u32(vTemp1,vTemp3);
  122. // If any are infinity, the signs are true.
  123. int8x8x2_t vTemp = vzip_u8(vget_low_u8(vTemp1), vget_high_u8(vTemp1));
  124. vTemp = vzip_u16(vTemp.val[0], vTemp.val[1]);
  125. uint32_t r = vget_lane_u32(vTemp.val[1], 1);
  126. return (r != 0);
  127. #elif defined(_XM_SSE_INTRINSICS_)
  128. // Mask off the sign bits
  129. XMVECTOR vTemp1 = _mm_and_ps(M.r[0],g_XMAbsMask);
  130. XMVECTOR vTemp2 = _mm_and_ps(M.r[1],g_XMAbsMask);
  131. XMVECTOR vTemp3 = _mm_and_ps(M.r[2],g_XMAbsMask);
  132. XMVECTOR vTemp4 = _mm_and_ps(M.r[3],g_XMAbsMask);
  133. // Compare to infinity
  134. vTemp1 = _mm_cmpeq_ps(vTemp1,g_XMInfinity);
  135. vTemp2 = _mm_cmpeq_ps(vTemp2,g_XMInfinity);
  136. vTemp3 = _mm_cmpeq_ps(vTemp3,g_XMInfinity);
  137. vTemp4 = _mm_cmpeq_ps(vTemp4,g_XMInfinity);
  138. // Or the answers together
  139. vTemp1 = _mm_or_ps(vTemp1,vTemp2);
  140. vTemp3 = _mm_or_ps(vTemp3,vTemp4);
  141. vTemp1 = _mm_or_ps(vTemp1,vTemp3);
  142. // If any are infinity, the signs are true.
  143. return (_mm_movemask_ps(vTemp1)!=0);
  144. #endif
  145. }
  146. //------------------------------------------------------------------------------
  147. // Return true if the XMMatrix is equal to identity
  148. inline bool XM_CALLCONV XMMatrixIsIdentity
  149. (
  150. FXMMATRIX M
  151. )
  152. {
  153. #if defined(_XM_NO_INTRINSICS_)
  154. // Use the integer pipeline to reduce branching to a minimum
  155. const uint32_t *pWork = (const uint32_t*)(&M.m[0][0]);
  156. // Convert 1.0f to zero and or them together
  157. uint32_t uOne = pWork[0]^0x3F800000U;
  158. // Or all the 0.0f entries together
  159. uint32_t uZero = pWork[1];
  160. uZero |= pWork[2];
  161. uZero |= pWork[3];
  162. // 2nd row
  163. uZero |= pWork[4];
  164. uOne |= pWork[5]^0x3F800000U;
  165. uZero |= pWork[6];
  166. uZero |= pWork[7];
  167. // 3rd row
  168. uZero |= pWork[8];
  169. uZero |= pWork[9];
  170. uOne |= pWork[10]^0x3F800000U;
  171. uZero |= pWork[11];
  172. // 4th row
  173. uZero |= pWork[12];
  174. uZero |= pWork[13];
  175. uZero |= pWork[14];
  176. uOne |= pWork[15]^0x3F800000U;
  177. // If all zero entries are zero, the uZero==0
  178. uZero &= 0x7FFFFFFF; // Allow -0.0f
  179. // If all 1.0f entries are 1.0f, then uOne==0
  180. uOne |= uZero;
  181. return (uOne==0);
  182. #elif defined(_XM_ARM_NEON_INTRINSICS_)
  183. XMVECTOR vTemp1 = vceqq_f32(M.r[0],g_XMIdentityR0);
  184. XMVECTOR vTemp2 = vceqq_f32(M.r[1],g_XMIdentityR1);
  185. XMVECTOR vTemp3 = vceqq_f32(M.r[2],g_XMIdentityR2);
  186. XMVECTOR vTemp4 = vceqq_f32(M.r[3],g_XMIdentityR3);
  187. vTemp1 = vandq_u32(vTemp1,vTemp2);
  188. vTemp3 = vandq_u32(vTemp3,vTemp4);
  189. vTemp1 = vandq_u32(vTemp1,vTemp3);
  190. int8x8x2_t vTemp = vzip_u8(vget_low_u8(vTemp1), vget_high_u8(vTemp1));
  191. vTemp = vzip_u16(vTemp.val[0], vTemp.val[1]);
  192. uint32_t r = vget_lane_u32(vTemp.val[1], 1);
  193. return ( r == 0xFFFFFFFFU );
  194. #elif defined(_XM_SSE_INTRINSICS_)
  195. XMVECTOR vTemp1 = _mm_cmpeq_ps(M.r[0],g_XMIdentityR0);
  196. XMVECTOR vTemp2 = _mm_cmpeq_ps(M.r[1],g_XMIdentityR1);
  197. XMVECTOR vTemp3 = _mm_cmpeq_ps(M.r[2],g_XMIdentityR2);
  198. XMVECTOR vTemp4 = _mm_cmpeq_ps(M.r[3],g_XMIdentityR3);
  199. vTemp1 = _mm_and_ps(vTemp1,vTemp2);
  200. vTemp3 = _mm_and_ps(vTemp3,vTemp4);
  201. vTemp1 = _mm_and_ps(vTemp1,vTemp3);
  202. return (_mm_movemask_ps(vTemp1)==0x0f);
  203. #endif
  204. }
  205. //------------------------------------------------------------------------------
  206. // Computation operations
  207. //------------------------------------------------------------------------------
  208. //------------------------------------------------------------------------------
  209. // Perform a 4x4 matrix multiply by a 4x4 matrix
  210. inline XMMATRIX XM_CALLCONV XMMatrixMultiply
  211. (
  212. FXMMATRIX M1,
  213. CXMMATRIX M2
  214. )
  215. {
  216. #if defined(_XM_NO_INTRINSICS_)
  217. XMMATRIX mResult;
  218. // Cache the invariants in registers
  219. float x = M1.m[0][0];
  220. float y = M1.m[0][1];
  221. float z = M1.m[0][2];
  222. float w = M1.m[0][3];
  223. // Perform the operation on the first row
  224. mResult.m[0][0] = (M2.m[0][0]*x)+(M2.m[1][0]*y)+(M2.m[2][0]*z)+(M2.m[3][0]*w);
  225. mResult.m[0][1] = (M2.m[0][1]*x)+(M2.m[1][1]*y)+(M2.m[2][1]*z)+(M2.m[3][1]*w);
  226. mResult.m[0][2] = (M2.m[0][2]*x)+(M2.m[1][2]*y)+(M2.m[2][2]*z)+(M2.m[3][2]*w);
  227. mResult.m[0][3] = (M2.m[0][3]*x)+(M2.m[1][3]*y)+(M2.m[2][3]*z)+(M2.m[3][3]*w);
  228. // Repeat for all the other rows
  229. x = M1.m[1][0];
  230. y = M1.m[1][1];
  231. z = M1.m[1][2];
  232. w = M1.m[1][3];
  233. mResult.m[1][0] = (M2.m[0][0]*x)+(M2.m[1][0]*y)+(M2.m[2][0]*z)+(M2.m[3][0]*w);
  234. mResult.m[1][1] = (M2.m[0][1]*x)+(M2.m[1][1]*y)+(M2.m[2][1]*z)+(M2.m[3][1]*w);
  235. mResult.m[1][2] = (M2.m[0][2]*x)+(M2.m[1][2]*y)+(M2.m[2][2]*z)+(M2.m[3][2]*w);
  236. mResult.m[1][3] = (M2.m[0][3]*x)+(M2.m[1][3]*y)+(M2.m[2][3]*z)+(M2.m[3][3]*w);
  237. x = M1.m[2][0];
  238. y = M1.m[2][1];
  239. z = M1.m[2][2];
  240. w = M1.m[2][3];
  241. mResult.m[2][0] = (M2.m[0][0]*x)+(M2.m[1][0]*y)+(M2.m[2][0]*z)+(M2.m[3][0]*w);
  242. mResult.m[2][1] = (M2.m[0][1]*x)+(M2.m[1][1]*y)+(M2.m[2][1]*z)+(M2.m[3][1]*w);
  243. mResult.m[2][2] = (M2.m[0][2]*x)+(M2.m[1][2]*y)+(M2.m[2][2]*z)+(M2.m[3][2]*w);
  244. mResult.m[2][3] = (M2.m[0][3]*x)+(M2.m[1][3]*y)+(M2.m[2][3]*z)+(M2.m[3][3]*w);
  245. x = M1.m[3][0];
  246. y = M1.m[3][1];
  247. z = M1.m[3][2];
  248. w = M1.m[3][3];
  249. mResult.m[3][0] = (M2.m[0][0]*x)+(M2.m[1][0]*y)+(M2.m[2][0]*z)+(M2.m[3][0]*w);
  250. mResult.m[3][1] = (M2.m[0][1]*x)+(M2.m[1][1]*y)+(M2.m[2][1]*z)+(M2.m[3][1]*w);
  251. mResult.m[3][2] = (M2.m[0][2]*x)+(M2.m[1][2]*y)+(M2.m[2][2]*z)+(M2.m[3][2]*w);
  252. mResult.m[3][3] = (M2.m[0][3]*x)+(M2.m[1][3]*y)+(M2.m[2][3]*z)+(M2.m[3][3]*w);
  253. return mResult;
  254. #elif defined(_XM_ARM_NEON_INTRINSICS_)
  255. XMMATRIX mResult;
  256. float32x2_t VL = vget_low_f32( M1.r[0] );
  257. float32x2_t VH = vget_high_f32( M1.r[0] );
  258. // Perform the operation on the first row
  259. XMVECTOR vX = vmulq_lane_f32(M2.r[0], VL, 0);
  260. XMVECTOR vY = vmulq_lane_f32(M2.r[1], VL, 1);
  261. XMVECTOR vZ = vmlaq_lane_f32(vX, M2.r[2], VH, 0);
  262. XMVECTOR vW = vmlaq_lane_f32(vY, M2.r[3], VH, 1);
  263. mResult.r[0] = vaddq_f32( vZ, vW );
  264. // Repeat for the other 3 rows
  265. VL = vget_low_f32( M1.r[1] );
  266. VH = vget_high_f32( M1.r[1] );
  267. vX = vmulq_lane_f32(M2.r[0], VL, 0);
  268. vY = vmulq_lane_f32(M2.r[1], VL, 1);
  269. vZ = vmlaq_lane_f32(vX, M2.r[2], VH, 0);
  270. vW = vmlaq_lane_f32(vY, M2.r[3], VH, 1);
  271. mResult.r[1] = vaddq_f32( vZ, vW );
  272. VL = vget_low_f32( M1.r[2] );
  273. VH = vget_high_f32( M1.r[2] );
  274. vX = vmulq_lane_f32(M2.r[0], VL, 0);
  275. vY = vmulq_lane_f32(M2.r[1], VL, 1);
  276. vZ = vmlaq_lane_f32(vX, M2.r[2], VH, 0);
  277. vW = vmlaq_lane_f32(vY, M2.r[3], VH, 1);
  278. mResult.r[2] = vaddq_f32( vZ, vW );
  279. VL = vget_low_f32( M1.r[3] );
  280. VH = vget_high_f32( M1.r[3] );
  281. vX = vmulq_lane_f32(M2.r[0], VL, 0);
  282. vY = vmulq_lane_f32(M2.r[1], VL, 1);
  283. vZ = vmlaq_lane_f32(vX, M2.r[2], VH, 0);
  284. vW = vmlaq_lane_f32(vY, M2.r[3], VH, 1);
  285. mResult.r[3] = vaddq_f32( vZ, vW );
  286. return mResult;
  287. #elif defined(_XM_SSE_INTRINSICS_)
  288. XMMATRIX mResult;
  289. // Splat the component X,Y,Z then W
  290. #if defined(_XM_AVX_INTRINSICS_)
  291. XMVECTOR vX = _mm_broadcast_ss(reinterpret_cast<const float*>(&M1.r[0]) + 0);
  292. XMVECTOR vY = _mm_broadcast_ss(reinterpret_cast<const float*>(&M1.r[0]) + 1);
  293. XMVECTOR vZ = _mm_broadcast_ss(reinterpret_cast<const float*>(&M1.r[0]) + 2);
  294. XMVECTOR vW = _mm_broadcast_ss(reinterpret_cast<const float*>(&M1.r[0]) + 3);
  295. #else
  296. // Use vW to hold the original row
  297. XMVECTOR vW = M1.r[0];
  298. XMVECTOR vX = XM_PERMUTE_PS(vW,_MM_SHUFFLE(0,0,0,0));
  299. XMVECTOR vY = XM_PERMUTE_PS(vW,_MM_SHUFFLE(1,1,1,1));
  300. XMVECTOR vZ = XM_PERMUTE_PS(vW,_MM_SHUFFLE(2,2,2,2));
  301. vW = XM_PERMUTE_PS(vW,_MM_SHUFFLE(3,3,3,3));
  302. #endif
  303. // Perform the operation on the first row
  304. vX = _mm_mul_ps(vX,M2.r[0]);
  305. vY = _mm_mul_ps(vY,M2.r[1]);
  306. vZ = _mm_mul_ps(vZ,M2.r[2]);
  307. vW = _mm_mul_ps(vW,M2.r[3]);
  308. // Perform a binary add to reduce cumulative errors
  309. vX = _mm_add_ps(vX,vZ);
  310. vY = _mm_add_ps(vY,vW);
  311. vX = _mm_add_ps(vX,vY);
  312. mResult.r[0] = vX;
  313. // Repeat for the other 3 rows
  314. #if defined(_XM_AVX_INTRINSICS_)
  315. vX = _mm_broadcast_ss(reinterpret_cast<const float*>(&M1.r[1]) + 0);
  316. vY = _mm_broadcast_ss(reinterpret_cast<const float*>(&M1.r[1]) + 1);
  317. vZ = _mm_broadcast_ss(reinterpret_cast<const float*>(&M1.r[1]) + 2);
  318. vW = _mm_broadcast_ss(reinterpret_cast<const float*>(&M1.r[1]) + 3);
  319. #else
  320. vW = M1.r[1];
  321. vX = XM_PERMUTE_PS(vW,_MM_SHUFFLE(0,0,0,0));
  322. vY = XM_PERMUTE_PS(vW,_MM_SHUFFLE(1,1,1,1));
  323. vZ = XM_PERMUTE_PS(vW,_MM_SHUFFLE(2,2,2,2));
  324. vW = XM_PERMUTE_PS(vW,_MM_SHUFFLE(3,3,3,3));
  325. #endif
  326. vX = _mm_mul_ps(vX,M2.r[0]);
  327. vY = _mm_mul_ps(vY,M2.r[1]);
  328. vZ = _mm_mul_ps(vZ,M2.r[2]);
  329. vW = _mm_mul_ps(vW,M2.r[3]);
  330. vX = _mm_add_ps(vX,vZ);
  331. vY = _mm_add_ps(vY,vW);
  332. vX = _mm_add_ps(vX,vY);
  333. mResult.r[1] = vX;
  334. #if defined(_XM_AVX_INTRINSICS_)
  335. vX = _mm_broadcast_ss(reinterpret_cast<const float*>(&M1.r[2]) + 0);
  336. vY = _mm_broadcast_ss(reinterpret_cast<const float*>(&M1.r[2]) + 1);
  337. vZ = _mm_broadcast_ss(reinterpret_cast<const float*>(&M1.r[2]) + 2);
  338. vW = _mm_broadcast_ss(reinterpret_cast<const float*>(&M1.r[2]) + 3);
  339. #else
  340. vW = M1.r[2];
  341. vX = XM_PERMUTE_PS(vW,_MM_SHUFFLE(0,0,0,0));
  342. vY = XM_PERMUTE_PS(vW,_MM_SHUFFLE(1,1,1,1));
  343. vZ = XM_PERMUTE_PS(vW,_MM_SHUFFLE(2,2,2,2));
  344. vW = XM_PERMUTE_PS(vW,_MM_SHUFFLE(3,3,3,3));
  345. #endif
  346. vX = _mm_mul_ps(vX,M2.r[0]);
  347. vY = _mm_mul_ps(vY,M2.r[1]);
  348. vZ = _mm_mul_ps(vZ,M2.r[2]);
  349. vW = _mm_mul_ps(vW,M2.r[3]);
  350. vX = _mm_add_ps(vX,vZ);
  351. vY = _mm_add_ps(vY,vW);
  352. vX = _mm_add_ps(vX,vY);
  353. mResult.r[2] = vX;
  354. #if defined(_XM_AVX_INTRINSICS_)
  355. vX = _mm_broadcast_ss(reinterpret_cast<const float*>(&M1.r[3]) + 0);
  356. vY = _mm_broadcast_ss(reinterpret_cast<const float*>(&M1.r[3]) + 1);
  357. vZ = _mm_broadcast_ss(reinterpret_cast<const float*>(&M1.r[3]) + 2);
  358. vW = _mm_broadcast_ss(reinterpret_cast<const float*>(&M1.r[3]) + 3);
  359. #else
  360. vW = M1.r[3];
  361. vX = XM_PERMUTE_PS(vW,_MM_SHUFFLE(0,0,0,0));
  362. vY = XM_PERMUTE_PS(vW,_MM_SHUFFLE(1,1,1,1));
  363. vZ = XM_PERMUTE_PS(vW,_MM_SHUFFLE(2,2,2,2));
  364. vW = XM_PERMUTE_PS(vW,_MM_SHUFFLE(3,3,3,3));
  365. #endif
  366. vX = _mm_mul_ps(vX,M2.r[0]);
  367. vY = _mm_mul_ps(vY,M2.r[1]);
  368. vZ = _mm_mul_ps(vZ,M2.r[2]);
  369. vW = _mm_mul_ps(vW,M2.r[3]);
  370. vX = _mm_add_ps(vX,vZ);
  371. vY = _mm_add_ps(vY,vW);
  372. vX = _mm_add_ps(vX,vY);
  373. mResult.r[3] = vX;
  374. return mResult;
  375. #endif
  376. }
  377. //------------------------------------------------------------------------------
  378. inline XMMATRIX XM_CALLCONV XMMatrixMultiplyTranspose
  379. (
  380. FXMMATRIX M1,
  381. CXMMATRIX M2
  382. )
  383. {
  384. #if defined(_XM_NO_INTRINSICS_)
  385. XMMATRIX mResult;
  386. // Cache the invariants in registers
  387. float x = M2.m[0][0];
  388. float y = M2.m[1][0];
  389. float z = M2.m[2][0];
  390. float w = M2.m[3][0];
  391. // Perform the operation on the first row
  392. mResult.m[0][0] = (M1.m[0][0]*x)+(M1.m[0][1]*y)+(M1.m[0][2]*z)+(M1.m[0][3]*w);
  393. mResult.m[0][1] = (M1.m[1][0]*x)+(M1.m[1][1]*y)+(M1.m[1][2]*z)+(M1.m[1][3]*w);
  394. mResult.m[0][2] = (M1.m[2][0]*x)+(M1.m[2][1]*y)+(M1.m[2][2]*z)+(M1.m[2][3]*w);
  395. mResult.m[0][3] = (M1.m[3][0]*x)+(M1.m[3][1]*y)+(M1.m[3][2]*z)+(M1.m[3][3]*w);
  396. // Repeat for all the other rows
  397. x = M2.m[0][1];
  398. y = M2.m[1][1];
  399. z = M2.m[2][1];
  400. w = M2.m[3][1];
  401. mResult.m[1][0] = (M1.m[0][0]*x)+(M1.m[0][1]*y)+(M1.m[0][2]*z)+(M1.m[0][3]*w);
  402. mResult.m[1][1] = (M1.m[1][0]*x)+(M1.m[1][1]*y)+(M1.m[1][2]*z)+(M1.m[1][3]*w);
  403. mResult.m[1][2] = (M1.m[2][0]*x)+(M1.m[2][1]*y)+(M1.m[2][2]*z)+(M1.m[2][3]*w);
  404. mResult.m[1][3] = (M1.m[3][0]*x)+(M1.m[3][1]*y)+(M1.m[3][2]*z)+(M1.m[3][3]*w);
  405. x = M2.m[0][2];
  406. y = M2.m[1][2];
  407. z = M2.m[2][2];
  408. w = M2.m[3][2];
  409. mResult.m[2][0] = (M1.m[0][0]*x)+(M1.m[0][1]*y)+(M1.m[0][2]*z)+(M1.m[0][3]*w);
  410. mResult.m[2][1] = (M1.m[1][0]*x)+(M1.m[1][1]*y)+(M1.m[1][2]*z)+(M1.m[1][3]*w);
  411. mResult.m[2][2] = (M1.m[2][0]*x)+(M1.m[2][1]*y)+(M1.m[2][2]*z)+(M1.m[2][3]*w);
  412. mResult.m[2][3] = (M1.m[3][0]*x)+(M1.m[3][1]*y)+(M1.m[3][2]*z)+(M1.m[3][3]*w);
  413. x = M2.m[0][3];
  414. y = M2.m[1][3];
  415. z = M2.m[2][3];
  416. w = M2.m[3][3];
  417. mResult.m[3][0] = (M1.m[0][0]*x)+(M1.m[0][1]*y)+(M1.m[0][2]*z)+(M1.m[0][3]*w);
  418. mResult.m[3][1] = (M1.m[1][0]*x)+(M1.m[1][1]*y)+(M1.m[1][2]*z)+(M1.m[1][3]*w);
  419. mResult.m[3][2] = (M1.m[2][0]*x)+(M1.m[2][1]*y)+(M1.m[2][2]*z)+(M1.m[2][3]*w);
  420. mResult.m[3][3] = (M1.m[3][0]*x)+(M1.m[3][1]*y)+(M1.m[3][2]*z)+(M1.m[3][3]*w);
  421. return mResult;
  422. #elif defined(_XM_ARM_NEON_INTRINSICS_)
  423. float32x2_t VL = vget_low_f32( M1.r[0] );
  424. float32x2_t VH = vget_high_f32( M1.r[0] );
  425. // Perform the operation on the first row
  426. XMVECTOR vX = vmulq_lane_f32(M2.r[0], VL, 0);
  427. XMVECTOR vY = vmulq_lane_f32(M2.r[1], VL, 1);
  428. XMVECTOR vZ = vmlaq_lane_f32(vX, M2.r[2], VH, 0);
  429. XMVECTOR vW = vmlaq_lane_f32(vY, M2.r[3], VH, 1);
  430. float32x4_t r0 = vaddq_f32( vZ, vW );
  431. // Repeat for the other 3 rows
  432. VL = vget_low_f32( M1.r[1] );
  433. VH = vget_high_f32( M1.r[1] );
  434. vX = vmulq_lane_f32(M2.r[0], VL, 0);
  435. vY = vmulq_lane_f32(M2.r[1], VL, 1);
  436. vZ = vmlaq_lane_f32(vX, M2.r[2], VH, 0);
  437. vW = vmlaq_lane_f32(vY, M2.r[3], VH, 1);
  438. float32x4_t r1 = vaddq_f32( vZ, vW );
  439. VL = vget_low_f32( M1.r[2] );
  440. VH = vget_high_f32( M1.r[2] );
  441. vX = vmulq_lane_f32(M2.r[0], VL, 0);
  442. vY = vmulq_lane_f32(M2.r[1], VL, 1);
  443. vZ = vmlaq_lane_f32(vX, M2.r[2], VH, 0);
  444. vW = vmlaq_lane_f32(vY, M2.r[3], VH, 1);
  445. float32x4_t r2 = vaddq_f32( vZ, vW );
  446. VL = vget_low_f32( M1.r[3] );
  447. VH = vget_high_f32( M1.r[3] );
  448. vX = vmulq_lane_f32(M2.r[0], VL, 0);
  449. vY = vmulq_lane_f32(M2.r[1], VL, 1);
  450. vZ = vmlaq_lane_f32(vX, M2.r[2], VH, 0);
  451. vW = vmlaq_lane_f32(vY, M2.r[3], VH, 1);
  452. float32x4_t r3 = vaddq_f32( vZ, vW );
  453. // Transpose result
  454. float32x4x2_t P0 = vzipq_f32( r0, r2 );
  455. float32x4x2_t P1 = vzipq_f32( r1, r3 );
  456. float32x4x2_t T0 = vzipq_f32( P0.val[0], P1.val[0] );
  457. float32x4x2_t T1 = vzipq_f32( P0.val[1], P1.val[1] );
  458. XMMATRIX mResult;
  459. mResult.r[0] = T0.val[0];
  460. mResult.r[1] = T0.val[1];
  461. mResult.r[2] = T1.val[0];
  462. mResult.r[3] = T1.val[1];
  463. return mResult;
  464. #elif defined(_XM_SSE_INTRINSICS_)
  465. // Splat the component X,Y,Z then W
  466. #if defined(_XM_AVX_INTRINSICS_)
  467. XMVECTOR vX = _mm_broadcast_ss(reinterpret_cast<const float*>(&M1.r[0]) + 0);
  468. XMVECTOR vY = _mm_broadcast_ss(reinterpret_cast<const float*>(&M1.r[0]) + 1);
  469. XMVECTOR vZ = _mm_broadcast_ss(reinterpret_cast<const float*>(&M1.r[0]) + 2);
  470. XMVECTOR vW = _mm_broadcast_ss(reinterpret_cast<const float*>(&M1.r[0]) + 3);
  471. #else
  472. // Use vW to hold the original row
  473. XMVECTOR vW = M1.r[0];
  474. XMVECTOR vX = XM_PERMUTE_PS(vW,_MM_SHUFFLE(0,0,0,0));
  475. XMVECTOR vY = XM_PERMUTE_PS(vW,_MM_SHUFFLE(1,1,1,1));
  476. XMVECTOR vZ = XM_PERMUTE_PS(vW,_MM_SHUFFLE(2,2,2,2));
  477. vW = XM_PERMUTE_PS(vW,_MM_SHUFFLE(3,3,3,3));
  478. #endif
  479. // Perform the operation on the first row
  480. vX = _mm_mul_ps(vX,M2.r[0]);
  481. vY = _mm_mul_ps(vY,M2.r[1]);
  482. vZ = _mm_mul_ps(vZ,M2.r[2]);
  483. vW = _mm_mul_ps(vW,M2.r[3]);
  484. // Perform a binary add to reduce cumulative errors
  485. vX = _mm_add_ps(vX,vZ);
  486. vY = _mm_add_ps(vY,vW);
  487. vX = _mm_add_ps(vX,vY);
  488. XMVECTOR r0 = vX;
  489. // Repeat for the other 3 rows
  490. #if defined(_XM_AVX_INTRINSICS_)
  491. vX = _mm_broadcast_ss(reinterpret_cast<const float*>(&M1.r[1]) + 0);
  492. vY = _mm_broadcast_ss(reinterpret_cast<const float*>(&M1.r[1]) + 1);
  493. vZ = _mm_broadcast_ss(reinterpret_cast<const float*>(&M1.r[1]) + 2);
  494. vW = _mm_broadcast_ss(reinterpret_cast<const float*>(&M1.r[1]) + 3);
  495. #else
  496. vW = M1.r[1];
  497. vX = XM_PERMUTE_PS(vW,_MM_SHUFFLE(0,0,0,0));
  498. vY = XM_PERMUTE_PS(vW,_MM_SHUFFLE(1,1,1,1));
  499. vZ = XM_PERMUTE_PS(vW,_MM_SHUFFLE(2,2,2,2));
  500. vW = XM_PERMUTE_PS(vW,_MM_SHUFFLE(3,3,3,3));
  501. #endif
  502. vX = _mm_mul_ps(vX,M2.r[0]);
  503. vY = _mm_mul_ps(vY,M2.r[1]);
  504. vZ = _mm_mul_ps(vZ,M2.r[2]);
  505. vW = _mm_mul_ps(vW,M2.r[3]);
  506. vX = _mm_add_ps(vX,vZ);
  507. vY = _mm_add_ps(vY,vW);
  508. vX = _mm_add_ps(vX,vY);
  509. XMVECTOR r1 = vX;
  510. #if defined(_XM_AVX_INTRINSICS_)
  511. vX = _mm_broadcast_ss(reinterpret_cast<const float*>(&M1.r[2]) + 0);
  512. vY = _mm_broadcast_ss(reinterpret_cast<const float*>(&M1.r[2]) + 1);
  513. vZ = _mm_broadcast_ss(reinterpret_cast<const float*>(&M1.r[2]) + 2);
  514. vW = _mm_broadcast_ss(reinterpret_cast<const float*>(&M1.r[2]) + 3);
  515. #else
  516. vW = M1.r[2];
  517. vX = XM_PERMUTE_PS(vW,_MM_SHUFFLE(0,0,0,0));
  518. vY = XM_PERMUTE_PS(vW,_MM_SHUFFLE(1,1,1,1));
  519. vZ = XM_PERMUTE_PS(vW,_MM_SHUFFLE(2,2,2,2));
  520. vW = XM_PERMUTE_PS(vW,_MM_SHUFFLE(3,3,3,3));
  521. #endif
  522. vX = _mm_mul_ps(vX,M2.r[0]);
  523. vY = _mm_mul_ps(vY,M2.r[1]);
  524. vZ = _mm_mul_ps(vZ,M2.r[2]);
  525. vW = _mm_mul_ps(vW,M2.r[3]);
  526. vX = _mm_add_ps(vX,vZ);
  527. vY = _mm_add_ps(vY,vW);
  528. vX = _mm_add_ps(vX,vY);
  529. XMVECTOR r2 = vX;
  530. #if defined(_XM_AVX_INTRINSICS_)
  531. vX = _mm_broadcast_ss(reinterpret_cast<const float*>(&M1.r[3]) + 0);
  532. vY = _mm_broadcast_ss(reinterpret_cast<const float*>(&M1.r[3]) + 1);
  533. vZ = _mm_broadcast_ss(reinterpret_cast<const float*>(&M1.r[3]) + 2);
  534. vW = _mm_broadcast_ss(reinterpret_cast<const float*>(&M1.r[3]) + 3);
  535. #else
  536. vW = M1.r[3];
  537. vX = XM_PERMUTE_PS(vW,_MM_SHUFFLE(0,0,0,0));
  538. vY = XM_PERMUTE_PS(vW,_MM_SHUFFLE(1,1,1,1));
  539. vZ = XM_PERMUTE_PS(vW,_MM_SHUFFLE(2,2,2,2));
  540. vW = XM_PERMUTE_PS(vW,_MM_SHUFFLE(3,3,3,3));
  541. #endif
  542. vX = _mm_mul_ps(vX,M2.r[0]);
  543. vY = _mm_mul_ps(vY,M2.r[1]);
  544. vZ = _mm_mul_ps(vZ,M2.r[2]);
  545. vW = _mm_mul_ps(vW,M2.r[3]);
  546. vX = _mm_add_ps(vX,vZ);
  547. vY = _mm_add_ps(vY,vW);
  548. vX = _mm_add_ps(vX,vY);
  549. XMVECTOR r3 = vX;
  550. // x.x,x.y,y.x,y.y
  551. XMVECTOR vTemp1 = _mm_shuffle_ps(r0,r1,_MM_SHUFFLE(1,0,1,0));
  552. // x.z,x.w,y.z,y.w
  553. XMVECTOR vTemp3 = _mm_shuffle_ps(r0,r1,_MM_SHUFFLE(3,2,3,2));
  554. // z.x,z.y,w.x,w.y
  555. XMVECTOR vTemp2 = _mm_shuffle_ps(r2,r3,_MM_SHUFFLE(1,0,1,0));
  556. // z.z,z.w,w.z,w.w
  557. XMVECTOR vTemp4 = _mm_shuffle_ps(r2,r3,_MM_SHUFFLE(3,2,3,2));
  558. XMMATRIX mResult;
  559. // x.x,y.x,z.x,w.x
  560. mResult.r[0] = _mm_shuffle_ps(vTemp1, vTemp2,_MM_SHUFFLE(2,0,2,0));
  561. // x.y,y.y,z.y,w.y
  562. mResult.r[1] = _mm_shuffle_ps(vTemp1, vTemp2,_MM_SHUFFLE(3,1,3,1));
  563. // x.z,y.z,z.z,w.z
  564. mResult.r[2] = _mm_shuffle_ps(vTemp3, vTemp4,_MM_SHUFFLE(2,0,2,0));
  565. // x.w,y.w,z.w,w.w
  566. mResult.r[3] = _mm_shuffle_ps(vTemp3, vTemp4,_MM_SHUFFLE(3,1,3,1));
  567. return mResult;
  568. #endif
  569. }
  570. //------------------------------------------------------------------------------
  571. inline XMMATRIX XM_CALLCONV XMMatrixTranspose
  572. (
  573. FXMMATRIX M
  574. )
  575. {
  576. #if defined(_XM_NO_INTRINSICS_)
  577. // Original matrix:
  578. //
  579. // m00m01m02m03
  580. // m10m11m12m13
  581. // m20m21m22m23
  582. // m30m31m32m33
  583. XMMATRIX P;
  584. P.r[0] = XMVectorMergeXY(M.r[0], M.r[2]); // m00m20m01m21
  585. P.r[1] = XMVectorMergeXY(M.r[1], M.r[3]); // m10m30m11m31
  586. P.r[2] = XMVectorMergeZW(M.r[0], M.r[2]); // m02m22m03m23
  587. P.r[3] = XMVectorMergeZW(M.r[1], M.r[3]); // m12m32m13m33
  588. XMMATRIX MT;
  589. MT.r[0] = XMVectorMergeXY(P.r[0], P.r[1]); // m00m10m20m30
  590. MT.r[1] = XMVectorMergeZW(P.r[0], P.r[1]); // m01m11m21m31
  591. MT.r[2] = XMVectorMergeXY(P.r[2], P.r[3]); // m02m12m22m32
  592. MT.r[3] = XMVectorMergeZW(P.r[2], P.r[3]); // m03m13m23m33
  593. return MT;
  594. #elif defined(_XM_ARM_NEON_INTRINSICS_)
  595. float32x4x2_t P0 = vzipq_f32( M.r[0], M.r[2] );
  596. float32x4x2_t P1 = vzipq_f32( M.r[1], M.r[3] );
  597. float32x4x2_t T0 = vzipq_f32( P0.val[0], P1.val[0] );
  598. float32x4x2_t T1 = vzipq_f32( P0.val[1], P1.val[1] );
  599. XMMATRIX mResult;
  600. mResult.r[0] = T0.val[0];
  601. mResult.r[1] = T0.val[1];
  602. mResult.r[2] = T1.val[0];
  603. mResult.r[3] = T1.val[1];
  604. return mResult;
  605. #elif defined(_XM_SSE_INTRINSICS_)
  606. // x.x,x.y,y.x,y.y
  607. XMVECTOR vTemp1 = _mm_shuffle_ps(M.r[0],M.r[1],_MM_SHUFFLE(1,0,1,0));
  608. // x.z,x.w,y.z,y.w
  609. XMVECTOR vTemp3 = _mm_shuffle_ps(M.r[0],M.r[1],_MM_SHUFFLE(3,2,3,2));
  610. // z.x,z.y,w.x,w.y
  611. XMVECTOR vTemp2 = _mm_shuffle_ps(M.r[2],M.r[3],_MM_SHUFFLE(1,0,1,0));
  612. // z.z,z.w,w.z,w.w
  613. XMVECTOR vTemp4 = _mm_shuffle_ps(M.r[2],M.r[3],_MM_SHUFFLE(3,2,3,2));
  614. XMMATRIX mResult;
  615. // x.x,y.x,z.x,w.x
  616. mResult.r[0] = _mm_shuffle_ps(vTemp1, vTemp2,_MM_SHUFFLE(2,0,2,0));
  617. // x.y,y.y,z.y,w.y
  618. mResult.r[1] = _mm_shuffle_ps(vTemp1, vTemp2,_MM_SHUFFLE(3,1,3,1));
  619. // x.z,y.z,z.z,w.z
  620. mResult.r[2] = _mm_shuffle_ps(vTemp3, vTemp4,_MM_SHUFFLE(2,0,2,0));
  621. // x.w,y.w,z.w,w.w
  622. mResult.r[3] = _mm_shuffle_ps(vTemp3, vTemp4,_MM_SHUFFLE(3,1,3,1));
  623. return mResult;
  624. #endif
  625. }
  626. //------------------------------------------------------------------------------
  627. // Return the inverse and the determinant of a 4x4 matrix
  628. _Use_decl_annotations_
  629. inline XMMATRIX XM_CALLCONV XMMatrixInverse
  630. (
  631. XMVECTOR* pDeterminant,
  632. FXMMATRIX M
  633. )
  634. {
  635. #if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
  636. XMMATRIX MT = XMMatrixTranspose(M);
  637. XMVECTOR V0[4], V1[4];
  638. V0[0] = XMVectorSwizzle<XM_SWIZZLE_X, XM_SWIZZLE_X, XM_SWIZZLE_Y, XM_SWIZZLE_Y>(MT.r[2]);
  639. V1[0] = XMVectorSwizzle<XM_SWIZZLE_Z, XM_SWIZZLE_W, XM_SWIZZLE_Z, XM_SWIZZLE_W>(MT.r[3]);
  640. V0[1] = XMVectorSwizzle<XM_SWIZZLE_X, XM_SWIZZLE_X, XM_SWIZZLE_Y, XM_SWIZZLE_Y>(MT.r[0]);
  641. V1[1] = XMVectorSwizzle<XM_SWIZZLE_Z, XM_SWIZZLE_W, XM_SWIZZLE_Z, XM_SWIZZLE_W>(MT.r[1]);
  642. V0[2] = XMVectorPermute<XM_PERMUTE_0X, XM_PERMUTE_0Z, XM_PERMUTE_1X, XM_PERMUTE_1Z>(MT.r[2], MT.r[0]);
  643. V1[2] = XMVectorPermute<XM_PERMUTE_0Y, XM_PERMUTE_0W, XM_PERMUTE_1Y, XM_PERMUTE_1W>(MT.r[3], MT.r[1]);
  644. XMVECTOR D0 = XMVectorMultiply(V0[0], V1[0]);
  645. XMVECTOR D1 = XMVectorMultiply(V0[1], V1[1]);
  646. XMVECTOR D2 = XMVectorMultiply(V0[2], V1[2]);
  647. V0[0] = XMVectorSwizzle<XM_SWIZZLE_Z, XM_SWIZZLE_W, XM_SWIZZLE_Z, XM_SWIZZLE_W>(MT.r[2]);
  648. V1[0] = XMVectorSwizzle<XM_SWIZZLE_X, XM_SWIZZLE_X, XM_SWIZZLE_Y, XM_SWIZZLE_Y>(MT.r[3]);
  649. V0[1] = XMVectorSwizzle<XM_SWIZZLE_Z, XM_SWIZZLE_W, XM_SWIZZLE_Z, XM_SWIZZLE_W>(MT.r[0]);
  650. V1[1] = XMVectorSwizzle<XM_SWIZZLE_X, XM_SWIZZLE_X, XM_SWIZZLE_Y, XM_SWIZZLE_Y>(MT.r[1]);
  651. V0[2] = XMVectorPermute<XM_PERMUTE_0Y, XM_PERMUTE_0W, XM_PERMUTE_1Y, XM_PERMUTE_1W>(MT.r[2], MT.r[0]);
  652. V1[2] = XMVectorPermute<XM_PERMUTE_0X, XM_PERMUTE_0Z, XM_PERMUTE_1X, XM_PERMUTE_1Z>(MT.r[3], MT.r[1]);
  653. D0 = XMVectorNegativeMultiplySubtract(V0[0], V1[0], D0);
  654. D1 = XMVectorNegativeMultiplySubtract(V0[1], V1[1], D1);
  655. D2 = XMVectorNegativeMultiplySubtract(V0[2], V1[2], D2);
  656. V0[0] = XMVectorSwizzle<XM_SWIZZLE_Y, XM_SWIZZLE_Z, XM_SWIZZLE_X, XM_SWIZZLE_Y>(MT.r[1]);
  657. V1[0] = XMVectorPermute<XM_PERMUTE_1Y, XM_PERMUTE_0Y, XM_PERMUTE_0W, XM_PERMUTE_0X>(D0, D2);
  658. V0[1] = XMVectorSwizzle<XM_SWIZZLE_Z, XM_SWIZZLE_X, XM_SWIZZLE_Y, XM_SWIZZLE_X>(MT.r[0]);
  659. V1[1] = XMVectorPermute<XM_PERMUTE_0W, XM_PERMUTE_1Y, XM_PERMUTE_0Y, XM_PERMUTE_0Z>(D0, D2);
  660. V0[2] = XMVectorSwizzle<XM_SWIZZLE_Y, XM_SWIZZLE_Z, XM_SWIZZLE_X, XM_SWIZZLE_Y>(MT.r[3]);
  661. V1[2] = XMVectorPermute<XM_PERMUTE_1W, XM_PERMUTE_0Y, XM_PERMUTE_0W, XM_PERMUTE_0X>(D1, D2);
  662. V0[3] = XMVectorSwizzle<XM_SWIZZLE_Z, XM_SWIZZLE_X, XM_SWIZZLE_Y, XM_SWIZZLE_X>(MT.r[2]);
  663. V1[3] = XMVectorPermute<XM_PERMUTE_0W, XM_PERMUTE_1W, XM_PERMUTE_0Y, XM_PERMUTE_0Z>(D1, D2);
  664. XMVECTOR C0 = XMVectorMultiply(V0[0], V1[0]);
  665. XMVECTOR C2 = XMVectorMultiply(V0[1], V1[1]);
  666. XMVECTOR C4 = XMVectorMultiply(V0[2], V1[2]);
  667. XMVECTOR C6 = XMVectorMultiply(V0[3], V1[3]);
  668. V0[0] = XMVectorSwizzle<XM_SWIZZLE_Z, XM_SWIZZLE_W, XM_SWIZZLE_Y, XM_SWIZZLE_Z>(MT.r[1]);
  669. V1[0] = XMVectorPermute<XM_PERMUTE_0W, XM_PERMUTE_0X, XM_PERMUTE_0Y, XM_PERMUTE_1X>(D0, D2);
  670. V0[1] = XMVectorSwizzle<XM_SWIZZLE_W, XM_SWIZZLE_Z, XM_SWIZZLE_W, XM_SWIZZLE_Y>(MT.r[0]);
  671. V1[1] = XMVectorPermute<XM_PERMUTE_0Z, XM_PERMUTE_0Y, XM_PERMUTE_1X, XM_PERMUTE_0X>(D0, D2);
  672. V0[2] = XMVectorSwizzle<XM_SWIZZLE_Z, XM_SWIZZLE_W, XM_SWIZZLE_Y, XM_SWIZZLE_Z>(MT.r[3]);
  673. V1[2] = XMVectorPermute<XM_PERMUTE_0W, XM_PERMUTE_0X, XM_PERMUTE_0Y, XM_PERMUTE_1Z>(D1, D2);
  674. V0[3] = XMVectorSwizzle<XM_SWIZZLE_W, XM_SWIZZLE_Z, XM_SWIZZLE_W, XM_SWIZZLE_Y>(MT.r[2]);
  675. V1[3] = XMVectorPermute<XM_PERMUTE_0Z, XM_PERMUTE_0Y, XM_PERMUTE_1Z, XM_PERMUTE_0X>(D1, D2);
  676. C0 = XMVectorNegativeMultiplySubtract(V0[0], V1[0], C0);
  677. C2 = XMVectorNegativeMultiplySubtract(V0[1], V1[1], C2);
  678. C4 = XMVectorNegativeMultiplySubtract(V0[2], V1[2], C4);
  679. C6 = XMVectorNegativeMultiplySubtract(V0[3], V1[3], C6);
  680. V0[0] = XMVectorSwizzle<XM_SWIZZLE_W, XM_SWIZZLE_X, XM_SWIZZLE_W, XM_SWIZZLE_X>(MT.r[1]);
  681. V1[0] = XMVectorPermute<XM_PERMUTE_0Z, XM_PERMUTE_1Y, XM_PERMUTE_1X, XM_PERMUTE_0Z>(D0, D2);
  682. V0[1] = XMVectorSwizzle<XM_SWIZZLE_Y, XM_SWIZZLE_W, XM_SWIZZLE_X, XM_SWIZZLE_Z>(MT.r[0]);
  683. V1[1] = XMVectorPermute<XM_PERMUTE_1Y, XM_PERMUTE_0X, XM_PERMUTE_0W, XM_PERMUTE_1X>(D0, D2);
  684. V0[2] = XMVectorSwizzle<XM_SWIZZLE_W, XM_SWIZZLE_X, XM_SWIZZLE_W, XM_SWIZZLE_X>(MT.r[3]);
  685. V1[2] = XMVectorPermute<XM_PERMUTE_0Z, XM_PERMUTE_1W, XM_PERMUTE_1Z, XM_PERMUTE_0Z>(D1, D2);
  686. V0[3] = XMVectorSwizzle<XM_SWIZZLE_Y, XM_SWIZZLE_W, XM_SWIZZLE_X, XM_SWIZZLE_Z>(MT.r[2]);
  687. V1[3] = XMVectorPermute<XM_PERMUTE_1W, XM_PERMUTE_0X, XM_PERMUTE_0W, XM_PERMUTE_1Z>(D1, D2);
  688. XMVECTOR C1 = XMVectorNegativeMultiplySubtract(V0[0], V1[0], C0);
  689. C0 = XMVectorMultiplyAdd(V0[0], V1[0], C0);
  690. XMVECTOR C3 = XMVectorMultiplyAdd(V0[1], V1[1], C2);
  691. C2 = XMVectorNegativeMultiplySubtract(V0[1], V1[1], C2);
  692. XMVECTOR C5 = XMVectorNegativeMultiplySubtract(V0[2], V1[2], C4);
  693. C4 = XMVectorMultiplyAdd(V0[2], V1[2], C4);
  694. XMVECTOR C7 = XMVectorMultiplyAdd(V0[3], V1[3], C6);
  695. C6 = XMVectorNegativeMultiplySubtract(V0[3], V1[3], C6);
  696. XMMATRIX R;
  697. R.r[0] = XMVectorSelect(C0, C1, g_XMSelect0101.v);
  698. R.r[1] = XMVectorSelect(C2, C3, g_XMSelect0101.v);
  699. R.r[2] = XMVectorSelect(C4, C5, g_XMSelect0101.v);
  700. R.r[3] = XMVectorSelect(C6, C7, g_XMSelect0101.v);
  701. XMVECTOR Determinant = XMVector4Dot(R.r[0], MT.r[0]);
  702. if (pDeterminant != nullptr)
  703. *pDeterminant = Determinant;
  704. XMVECTOR Reciprocal = XMVectorReciprocal(Determinant);
  705. XMMATRIX Result;
  706. Result.r[0] = XMVectorMultiply(R.r[0], Reciprocal);
  707. Result.r[1] = XMVectorMultiply(R.r[1], Reciprocal);
  708. Result.r[2] = XMVectorMultiply(R.r[2], Reciprocal);
  709. Result.r[3] = XMVectorMultiply(R.r[3], Reciprocal);
  710. return Result;
  711. #elif defined(_XM_SSE_INTRINSICS_)
  712. XMMATRIX MT = XMMatrixTranspose(M);
  713. XMVECTOR V00 = XM_PERMUTE_PS(MT.r[2],_MM_SHUFFLE(1,1,0,0));
  714. XMVECTOR V10 = XM_PERMUTE_PS(MT.r[3],_MM_SHUFFLE(3,2,3,2));
  715. XMVECTOR V01 = XM_PERMUTE_PS(MT.r[0],_MM_SHUFFLE(1,1,0,0));
  716. XMVECTOR V11 = XM_PERMUTE_PS(MT.r[1],_MM_SHUFFLE(3,2,3,2));
  717. XMVECTOR V02 = _mm_shuffle_ps(MT.r[2], MT.r[0],_MM_SHUFFLE(2,0,2,0));
  718. XMVECTOR V12 = _mm_shuffle_ps(MT.r[3], MT.r[1],_MM_SHUFFLE(3,1,3,1));
  719. XMVECTOR D0 = _mm_mul_ps(V00,V10);
  720. XMVECTOR D1 = _mm_mul_ps(V01,V11);
  721. XMVECTOR D2 = _mm_mul_ps(V02,V12);
  722. V00 = XM_PERMUTE_PS(MT.r[2],_MM_SHUFFLE(3,2,3,2));
  723. V10 = XM_PERMUTE_PS(MT.r[3],_MM_SHUFFLE(1,1,0,0));
  724. V01 = XM_PERMUTE_PS(MT.r[0],_MM_SHUFFLE(3,2,3,2));
  725. V11 = XM_PERMUTE_PS(MT.r[1],_MM_SHUFFLE(1,1,0,0));
  726. V02 = _mm_shuffle_ps(MT.r[2],MT.r[0],_MM_SHUFFLE(3,1,3,1));
  727. V12 = _mm_shuffle_ps(MT.r[3],MT.r[1],_MM_SHUFFLE(2,0,2,0));
  728. V00 = _mm_mul_ps(V00,V10);
  729. V01 = _mm_mul_ps(V01,V11);
  730. V02 = _mm_mul_ps(V02,V12);
  731. D0 = _mm_sub_ps(D0,V00);
  732. D1 = _mm_sub_ps(D1,V01);
  733. D2 = _mm_sub_ps(D2,V02);
  734. // V11 = D0Y,D0W,D2Y,D2Y
  735. V11 = _mm_shuffle_ps(D0,D2,_MM_SHUFFLE(1,1,3,1));
  736. V00 = XM_PERMUTE_PS(MT.r[1], _MM_SHUFFLE(1,0,2,1));
  737. V10 = _mm_shuffle_ps(V11,D0,_MM_SHUFFLE(0,3,0,2));
  738. V01 = XM_PERMUTE_PS(MT.r[0], _MM_SHUFFLE(0,1,0,2));
  739. V11 = _mm_shuffle_ps(V11,D0,_MM_SHUFFLE(2,1,2,1));
  740. // V13 = D1Y,D1W,D2W,D2W
  741. XMVECTOR V13 = _mm_shuffle_ps(D1,D2,_MM_SHUFFLE(3,3,3,1));
  742. V02 = XM_PERMUTE_PS(MT.r[3], _MM_SHUFFLE(1,0,2,1));
  743. V12 = _mm_shuffle_ps(V13,D1,_MM_SHUFFLE(0,3,0,2));
  744. XMVECTOR V03 = XM_PERMUTE_PS(MT.r[2],_MM_SHUFFLE(0,1,0,2));
  745. V13 = _mm_shuffle_ps(V13,D1,_MM_SHUFFLE(2,1,2,1));
  746. XMVECTOR C0 = _mm_mul_ps(V00,V10);
  747. XMVECTOR C2 = _mm_mul_ps(V01,V11);
  748. XMVECTOR C4 = _mm_mul_ps(V02,V12);
  749. XMVECTOR C6 = _mm_mul_ps(V03,V13);
  750. // V11 = D0X,D0Y,D2X,D2X
  751. V11 = _mm_shuffle_ps(D0,D2,_MM_SHUFFLE(0,0,1,0));
  752. V00 = XM_PERMUTE_PS(MT.r[1], _MM_SHUFFLE(2,1,3,2));
  753. V10 = _mm_shuffle_ps(D0,V11,_MM_SHUFFLE(2,1,0,3));
  754. V01 = XM_PERMUTE_PS(MT.r[0], _MM_SHUFFLE(1,3,2,3));
  755. V11 = _mm_shuffle_ps(D0,V11,_MM_SHUFFLE(0,2,1,2));
  756. // V13 = D1X,D1Y,D2Z,D2Z
  757. V13 = _mm_shuffle_ps(D1,D2,_MM_SHUFFLE(2,2,1,0));
  758. V02 = XM_PERMUTE_PS(MT.r[3], _MM_SHUFFLE(2,1,3,2));
  759. V12 = _mm_shuffle_ps(D1,V13,_MM_SHUFFLE(2,1,0,3));
  760. V03 = XM_PERMUTE_PS(MT.r[2],_MM_SHUFFLE(1,3,2,3));
  761. V13 = _mm_shuffle_ps(D1,V13,_MM_SHUFFLE(0,2,1,2));
  762. V00 = _mm_mul_ps(V00,V10);
  763. V01 = _mm_mul_ps(V01,V11);
  764. V02 = _mm_mul_ps(V02,V12);
  765. V03 = _mm_mul_ps(V03,V13);
  766. C0 = _mm_sub_ps(C0,V00);
  767. C2 = _mm_sub_ps(C2,V01);
  768. C4 = _mm_sub_ps(C4,V02);
  769. C6 = _mm_sub_ps(C6,V03);
  770. V00 = XM_PERMUTE_PS(MT.r[1],_MM_SHUFFLE(0,3,0,3));
  771. // V10 = D0Z,D0Z,D2X,D2Y
  772. V10 = _mm_shuffle_ps(D0,D2,_MM_SHUFFLE(1,0,2,2));
  773. V10 = XM_PERMUTE_PS(V10,_MM_SHUFFLE(0,2,3,0));
  774. V01 = XM_PERMUTE_PS(MT.r[0],_MM_SHUFFLE(2,0,3,1));
  775. // V11 = D0X,D0W,D2X,D2Y
  776. V11 = _mm_shuffle_ps(D0,D2,_MM_SHUFFLE(1,0,3,0));
  777. V11 = XM_PERMUTE_PS(V11,_MM_SHUFFLE(2,1,0,3));
  778. V02 = XM_PERMUTE_PS(MT.r[3],_MM_SHUFFLE(0,3,0,3));
  779. // V12 = D1Z,D1Z,D2Z,D2W
  780. V12 = _mm_shuffle_ps(D1,D2,_MM_SHUFFLE(3,2,2,2));
  781. V12 = XM_PERMUTE_PS(V12,_MM_SHUFFLE(0,2,3,0));
  782. V03 = XM_PERMUTE_PS(MT.r[2],_MM_SHUFFLE(2,0,3,1));
  783. // V13 = D1X,D1W,D2Z,D2W
  784. V13 = _mm_shuffle_ps(D1,D2,_MM_SHUFFLE(3,2,3,0));
  785. V13 = XM_PERMUTE_PS(V13,_MM_SHUFFLE(2,1,0,3));
  786. V00 = _mm_mul_ps(V00,V10);
  787. V01 = _mm_mul_ps(V01,V11);
  788. V02 = _mm_mul_ps(V02,V12);
  789. V03 = _mm_mul_ps(V03,V13);
  790. XMVECTOR C1 = _mm_sub_ps(C0,V00);
  791. C0 = _mm_add_ps(C0,V00);
  792. XMVECTOR C3 = _mm_add_ps(C2,V01);
  793. C2 = _mm_sub_ps(C2,V01);
  794. XMVECTOR C5 = _mm_sub_ps(C4,V02);
  795. C4 = _mm_add_ps(C4,V02);
  796. XMVECTOR C7 = _mm_add_ps(C6,V03);
  797. C6 = _mm_sub_ps(C6,V03);
  798. C0 = _mm_shuffle_ps(C0,C1,_MM_SHUFFLE(3,1,2,0));
  799. C2 = _mm_shuffle_ps(C2,C3,_MM_SHUFFLE(3,1,2,0));
  800. C4 = _mm_shuffle_ps(C4,C5,_MM_SHUFFLE(3,1,2,0));
  801. C6 = _mm_shuffle_ps(C6,C7,_MM_SHUFFLE(3,1,2,0));
  802. C0 = XM_PERMUTE_PS(C0,_MM_SHUFFLE(3,1,2,0));
  803. C2 = XM_PERMUTE_PS(C2,_MM_SHUFFLE(3,1,2,0));
  804. C4 = XM_PERMUTE_PS(C4,_MM_SHUFFLE(3,1,2,0));
  805. C6 = XM_PERMUTE_PS(C6,_MM_SHUFFLE(3,1,2,0));
  806. // Get the determinate
  807. XMVECTOR vTemp = XMVector4Dot(C0,MT.r[0]);
  808. if (pDeterminant != nullptr)
  809. *pDeterminant = vTemp;
  810. vTemp = _mm_div_ps(g_XMOne,vTemp);
  811. XMMATRIX mResult;
  812. mResult.r[0] = _mm_mul_ps(C0,vTemp);
  813. mResult.r[1] = _mm_mul_ps(C2,vTemp);
  814. mResult.r[2] = _mm_mul_ps(C4,vTemp);
  815. mResult.r[3] = _mm_mul_ps(C6,vTemp);
  816. return mResult;
  817. #endif
  818. }
  819. //------------------------------------------------------------------------------
  820. inline XMVECTOR XM_CALLCONV XMMatrixDeterminant
  821. (
  822. FXMMATRIX M
  823. )
  824. {
  825. static const XMVECTORF32 Sign = { { { 1.0f, -1.0f, 1.0f, -1.0f } } };
  826. XMVECTOR V0 = XMVectorSwizzle<XM_SWIZZLE_Y, XM_SWIZZLE_X, XM_SWIZZLE_X, XM_SWIZZLE_X>(M.r[2]);
  827. XMVECTOR V1 = XMVectorSwizzle<XM_SWIZZLE_Z, XM_SWIZZLE_Z, XM_SWIZZLE_Y, XM_SWIZZLE_Y>(M.r[3]);
  828. XMVECTOR V2 = XMVectorSwizzle<XM_SWIZZLE_Y, XM_SWIZZLE_X, XM_SWIZZLE_X, XM_SWIZZLE_X>(M.r[2]);
  829. XMVECTOR V3 = XMVectorSwizzle<XM_SWIZZLE_W, XM_SWIZZLE_W, XM_SWIZZLE_W, XM_SWIZZLE_Z>(M.r[3]);
  830. XMVECTOR V4 = XMVectorSwizzle<XM_SWIZZLE_Z, XM_SWIZZLE_Z, XM_SWIZZLE_Y, XM_SWIZZLE_Y>(M.r[2]);
  831. XMVECTOR V5 = XMVectorSwizzle<XM_SWIZZLE_W, XM_SWIZZLE_W, XM_SWIZZLE_W, XM_SWIZZLE_Z>(M.r[3]);
  832. XMVECTOR P0 = XMVectorMultiply(V0, V1);
  833. XMVECTOR P1 = XMVectorMultiply(V2, V3);
  834. XMVECTOR P2 = XMVectorMultiply(V4, V5);
  835. V0 = XMVectorSwizzle<XM_SWIZZLE_Z, XM_SWIZZLE_Z, XM_SWIZZLE_Y, XM_SWIZZLE_Y>(M.r[2]);
  836. V1 = XMVectorSwizzle<XM_SWIZZLE_Y, XM_SWIZZLE_X, XM_SWIZZLE_X, XM_SWIZZLE_X>(M.r[3]);
  837. V2 = XMVectorSwizzle<XM_SWIZZLE_W, XM_SWIZZLE_W, XM_SWIZZLE_W, XM_SWIZZLE_Z>(M.r[2]);
  838. V3 = XMVectorSwizzle<XM_SWIZZLE_Y, XM_SWIZZLE_X, XM_SWIZZLE_X, XM_SWIZZLE_X>(M.r[3]);
  839. V4 = XMVectorSwizzle<XM_SWIZZLE_W, XM_SWIZZLE_W, XM_SWIZZLE_W, XM_SWIZZLE_Z>(M.r[2]);
  840. V5 = XMVectorSwizzle<XM_SWIZZLE_Z, XM_SWIZZLE_Z, XM_SWIZZLE_Y, XM_SWIZZLE_Y>(M.r[3]);
  841. P0 = XMVectorNegativeMultiplySubtract(V0, V1, P0);
  842. P1 = XMVectorNegativeMultiplySubtract(V2, V3, P1);
  843. P2 = XMVectorNegativeMultiplySubtract(V4, V5, P2);
  844. V0 = XMVectorSwizzle<XM_SWIZZLE_W, XM_SWIZZLE_W, XM_SWIZZLE_W, XM_SWIZZLE_Z>(M.r[1]);
  845. V1 = XMVectorSwizzle<XM_SWIZZLE_Z, XM_SWIZZLE_Z, XM_SWIZZLE_Y, XM_SWIZZLE_Y>(M.r[1]);
  846. V2 = XMVectorSwizzle<XM_SWIZZLE_Y, XM_SWIZZLE_X, XM_SWIZZLE_X, XM_SWIZZLE_X>(M.r[1]);
  847. XMVECTOR S = XMVectorMultiply(M.r[0], Sign.v);
  848. XMVECTOR R = XMVectorMultiply(V0, P0);
  849. R = XMVectorNegativeMultiplySubtract(V1, P1, R);
  850. R = XMVectorMultiplyAdd(V2, P2, R);
  851. return XMVector4Dot(S, R);
  852. }
  853. #define XM3RANKDECOMPOSE(a, b, c, x, y, z) \
  854. if((x) < (y)) \
  855. { \
  856. if((y) < (z)) \
  857. { \
  858. (a) = 2; \
  859. (b) = 1; \
  860. (c) = 0; \
  861. } \
  862. else \
  863. { \
  864. (a) = 1; \
  865. \
  866. if((x) < (z)) \
  867. { \
  868. (b) = 2; \
  869. (c) = 0; \
  870. } \
  871. else \
  872. { \
  873. (b) = 0; \
  874. (c) = 2; \
  875. } \
  876. } \
  877. } \
  878. else \
  879. { \
  880. if((x) < (z)) \
  881. { \
  882. (a) = 2; \
  883. (b) = 0; \
  884. (c) = 1; \
  885. } \
  886. else \
  887. { \
  888. (a) = 0; \
  889. \
  890. if((y) < (z)) \
  891. { \
  892. (b) = 2; \
  893. (c) = 1; \
  894. } \
  895. else \
  896. { \
  897. (b) = 1; \
  898. (c) = 2; \
  899. } \
  900. } \
  901. }
  902. #define XM3_DECOMP_EPSILON 0.0001f
  903. _Use_decl_annotations_
  904. inline bool XM_CALLCONV XMMatrixDecompose
  905. (
  906. XMVECTOR *outScale,
  907. XMVECTOR *outRotQuat,
  908. XMVECTOR *outTrans,
  909. FXMMATRIX M
  910. )
  911. {
  912. static const XMVECTOR *pvCanonicalBasis[3] = {
  913. &g_XMIdentityR0.v,
  914. &g_XMIdentityR1.v,
  915. &g_XMIdentityR2.v
  916. };
  917. assert( outScale != nullptr );
  918. assert( outRotQuat != nullptr );
  919. assert( outTrans != nullptr );
  920. // Get the translation
  921. outTrans[0] = M.r[3];
  922. XMVECTOR *ppvBasis[3];
  923. XMMATRIX matTemp;
  924. ppvBasis[0] = &matTemp.r[0];
  925. ppvBasis[1] = &matTemp.r[1];
  926. ppvBasis[2] = &matTemp.r[2];
  927. matTemp.r[0] = M.r[0];
  928. matTemp.r[1] = M.r[1];
  929. matTemp.r[2] = M.r[2];
  930. matTemp.r[3] = g_XMIdentityR3.v;
  931. float *pfScales = (float *)outScale;
  932. size_t a, b, c;
  933. XMVectorGetXPtr(&pfScales[0],XMVector3Length(ppvBasis[0][0]));
  934. XMVectorGetXPtr(&pfScales[1],XMVector3Length(ppvBasis[1][0]));
  935. XMVectorGetXPtr(&pfScales[2],XMVector3Length(ppvBasis[2][0]));
  936. pfScales[3] = 0.f;
  937. XM3RANKDECOMPOSE(a, b, c, pfScales[0], pfScales[1], pfScales[2])
  938. if(pfScales[a] < XM3_DECOMP_EPSILON)
  939. {
  940. ppvBasis[a][0] = pvCanonicalBasis[a][0];
  941. }
  942. ppvBasis[a][0] = XMVector3Normalize(ppvBasis[a][0]);
  943. if(pfScales[b] < XM3_DECOMP_EPSILON)
  944. {
  945. size_t aa, bb, cc;
  946. float fAbsX, fAbsY, fAbsZ;
  947. fAbsX = fabsf(XMVectorGetX(ppvBasis[a][0]));
  948. fAbsY = fabsf(XMVectorGetY(ppvBasis[a][0]));
  949. fAbsZ = fabsf(XMVectorGetZ(ppvBasis[a][0]));
  950. XM3RANKDECOMPOSE(aa, bb, cc, fAbsX, fAbsY, fAbsZ)
  951. ppvBasis[b][0] = XMVector3Cross(ppvBasis[a][0],pvCanonicalBasis[cc][0]);
  952. }
  953. ppvBasis[b][0] = XMVector3Normalize(ppvBasis[b][0]);
  954. if(pfScales[c] < XM3_DECOMP_EPSILON)
  955. {
  956. ppvBasis[c][0] = XMVector3Cross(ppvBasis[a][0],ppvBasis[b][0]);
  957. }
  958. ppvBasis[c][0] = XMVector3Normalize(ppvBasis[c][0]);
  959. float fDet = XMVectorGetX(XMMatrixDeterminant(matTemp));
  960. // use Kramer's rule to check for handedness of coordinate system
  961. if(fDet < 0.0f)
  962. {
  963. // switch coordinate system by negating the scale and inverting the basis vector on the x-axis
  964. pfScales[a] = -pfScales[a];
  965. ppvBasis[a][0] = XMVectorNegate(ppvBasis[a][0]);
  966. fDet = -fDet;
  967. }
  968. fDet -= 1.0f;
  969. fDet *= fDet;
  970. if(XM3_DECOMP_EPSILON < fDet)
  971. {
  972. // Non-SRT matrix encountered
  973. return false;
  974. }
  975. // generate the quaternion from the matrix
  976. outRotQuat[0] = XMQuaternionRotationMatrix(matTemp);
  977. return true;
  978. }
  979. #undef XM3_DECOMP_EPSILON
  980. #undef XM3RANKDECOMPOSE
  981. //------------------------------------------------------------------------------
  982. // Transformation operations
  983. //------------------------------------------------------------------------------
  984. //------------------------------------------------------------------------------
  985. inline XMMATRIX XM_CALLCONV XMMatrixIdentity()
  986. {
  987. XMMATRIX M;
  988. M.r[0] = g_XMIdentityR0.v;
  989. M.r[1] = g_XMIdentityR1.v;
  990. M.r[2] = g_XMIdentityR2.v;
  991. M.r[3] = g_XMIdentityR3.v;
  992. return M;
  993. }
  994. //------------------------------------------------------------------------------
  995. inline XMMATRIX XM_CALLCONV XMMatrixSet
  996. (
  997. float m00, float m01, float m02, float m03,
  998. float m10, float m11, float m12, float m13,
  999. float m20, float m21, float m22, float m23,
  1000. float m30, float m31, float m32, float m33
  1001. )
  1002. {
  1003. XMMATRIX M;
  1004. #if defined(_XM_NO_INTRINSICS_)
  1005. M.m[0][0] = m00; M.m[0][1] = m01; M.m[0][2] = m02; M.m[0][3] = m03;
  1006. M.m[1][0] = m10; M.m[1][1] = m11; M.m[1][2] = m12; M.m[1][3] = m13;
  1007. M.m[2][0] = m20; M.m[2][1] = m21; M.m[2][2] = m22; M.m[2][3] = m23;
  1008. M.m[3][0] = m30; M.m[3][1] = m31; M.m[3][2] = m32; M.m[3][3] = m33;
  1009. #else
  1010. M.r[0] = XMVectorSet(m00, m01, m02, m03);
  1011. M.r[1] = XMVectorSet(m10, m11, m12, m13);
  1012. M.r[2] = XMVectorSet(m20, m21, m22, m23);
  1013. M.r[3] = XMVectorSet(m30, m31, m32, m33);
  1014. #endif
  1015. return M;
  1016. }
  1017. //------------------------------------------------------------------------------
  1018. inline XMMATRIX XM_CALLCONV XMMatrixTranslation
  1019. (
  1020. float OffsetX,
  1021. float OffsetY,
  1022. float OffsetZ
  1023. )
  1024. {
  1025. #if defined(_XM_NO_INTRINSICS_)
  1026. XMMATRIX M;
  1027. M.m[0][0] = 1.0f;
  1028. M.m[0][1] = 0.0f;
  1029. M.m[0][2] = 0.0f;
  1030. M.m[0][3] = 0.0f;
  1031. M.m[1][0] = 0.0f;
  1032. M.m[1][1] = 1.0f;
  1033. M.m[1][2] = 0.0f;
  1034. M.m[1][3] = 0.0f;
  1035. M.m[2][0] = 0.0f;
  1036. M.m[2][1] = 0.0f;
  1037. M.m[2][2] = 1.0f;
  1038. M.m[2][3] = 0.0f;
  1039. M.m[3][0] = OffsetX;
  1040. M.m[3][1] = OffsetY;
  1041. M.m[3][2] = OffsetZ;
  1042. M.m[3][3] = 1.0f;
  1043. return M;
  1044. #elif defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
  1045. XMMATRIX M;
  1046. M.r[0] = g_XMIdentityR0.v;
  1047. M.r[1] = g_XMIdentityR1.v;
  1048. M.r[2] = g_XMIdentityR2.v;
  1049. M.r[3] = XMVectorSet(OffsetX, OffsetY, OffsetZ, 1.f );
  1050. return M;
  1051. #endif
  1052. }
  1053. //------------------------------------------------------------------------------
  1054. inline XMMATRIX XM_CALLCONV XMMatrixTranslationFromVector
  1055. (
  1056. FXMVECTOR Offset
  1057. )
  1058. {
  1059. #if defined(_XM_NO_INTRINSICS_)
  1060. XMMATRIX M;
  1061. M.m[0][0] = 1.0f;
  1062. M.m[0][1] = 0.0f;
  1063. M.m[0][2] = 0.0f;
  1064. M.m[0][3] = 0.0f;
  1065. M.m[1][0] = 0.0f;
  1066. M.m[1][1] = 1.0f;
  1067. M.m[1][2] = 0.0f;
  1068. M.m[1][3] = 0.0f;
  1069. M.m[2][0] = 0.0f;
  1070. M.m[2][1] = 0.0f;
  1071. M.m[2][2] = 1.0f;
  1072. M.m[2][3] = 0.0f;
  1073. M.m[3][0] = Offset.vector4_f32[0];
  1074. M.m[3][1] = Offset.vector4_f32[1];
  1075. M.m[3][2] = Offset.vector4_f32[2];
  1076. M.m[3][3] = 1.0f;
  1077. return M;
  1078. #elif defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
  1079. XMMATRIX M;
  1080. M.r[0] = g_XMIdentityR0.v;
  1081. M.r[1] = g_XMIdentityR1.v;
  1082. M.r[2] = g_XMIdentityR2.v;
  1083. M.r[3] = XMVectorSelect( g_XMIdentityR3.v, Offset, g_XMSelect1110.v );
  1084. return M;
  1085. #endif
  1086. }
  1087. //------------------------------------------------------------------------------
  1088. inline XMMATRIX XM_CALLCONV XMMatrixScaling
  1089. (
  1090. float ScaleX,
  1091. float ScaleY,
  1092. float ScaleZ
  1093. )
  1094. {
  1095. #if defined(_XM_NO_INTRINSICS_)
  1096. XMMATRIX M;
  1097. M.m[0][0] = ScaleX;
  1098. M.m[0][1] = 0.0f;
  1099. M.m[0][2] = 0.0f;
  1100. M.m[0][3] = 0.0f;
  1101. M.m[1][0] = 0.0f;
  1102. M.m[1][1] = ScaleY;
  1103. M.m[1][2] = 0.0f;
  1104. M.m[1][3] = 0.0f;
  1105. M.m[2][0] = 0.0f;
  1106. M.m[2][1] = 0.0f;
  1107. M.m[2][2] = ScaleZ;
  1108. M.m[2][3] = 0.0f;
  1109. M.m[3][0] = 0.0f;
  1110. M.m[3][1] = 0.0f;
  1111. M.m[3][2] = 0.0f;
  1112. M.m[3][3] = 1.0f;
  1113. return M;
  1114. #elif defined(_XM_ARM_NEON_INTRINSICS_)
  1115. const XMVECTOR Zero = vdupq_n_f32(0);
  1116. XMMATRIX M;
  1117. M.r[0] = vsetq_lane_f32( ScaleX, Zero, 0 );
  1118. M.r[1] = vsetq_lane_f32( ScaleY, Zero, 1 );
  1119. M.r[2] = vsetq_lane_f32( ScaleZ, Zero, 2 );
  1120. M.r[3] = g_XMIdentityR3.v;
  1121. return M;
  1122. #elif defined(_XM_SSE_INTRINSICS_)
  1123. XMMATRIX M;
  1124. M.r[0] = _mm_set_ps( 0, 0, 0, ScaleX );
  1125. M.r[1] = _mm_set_ps( 0, 0, ScaleY, 0 );
  1126. M.r[2] = _mm_set_ps( 0, ScaleZ, 0, 0 );
  1127. M.r[3] = g_XMIdentityR3.v;
  1128. return M;
  1129. #endif
  1130. }
  1131. //------------------------------------------------------------------------------
  1132. inline XMMATRIX XM_CALLCONV XMMatrixScalingFromVector
  1133. (
  1134. FXMVECTOR Scale
  1135. )
  1136. {
  1137. #if defined(_XM_NO_INTRINSICS_)
  1138. XMMATRIX M;
  1139. M.m[0][0] = Scale.vector4_f32[0];
  1140. M.m[0][1] = 0.0f;
  1141. M.m[0][2] = 0.0f;
  1142. M.m[0][3] = 0.0f;
  1143. M.m[1][0] = 0.0f;
  1144. M.m[1][1] = Scale.vector4_f32[1];
  1145. M.m[1][2] = 0.0f;
  1146. M.m[1][3] = 0.0f;
  1147. M.m[2][0] = 0.0f;
  1148. M.m[2][1] = 0.0f;
  1149. M.m[2][2] = Scale.vector4_f32[2];
  1150. M.m[2][3] = 0.0f;
  1151. M.m[3][0] = 0.0f;
  1152. M.m[3][1] = 0.0f;
  1153. M.m[3][2] = 0.0f;
  1154. M.m[3][3] = 1.0f;
  1155. return M;
  1156. #elif defined(_XM_ARM_NEON_INTRINSICS_)
  1157. XMMATRIX M;
  1158. M.r[0] = vandq_u32(Scale,g_XMMaskX);
  1159. M.r[1] = vandq_u32(Scale,g_XMMaskY);
  1160. M.r[2] = vandq_u32(Scale,g_XMMaskZ);
  1161. M.r[3] = g_XMIdentityR3.v;
  1162. return M;
  1163. #elif defined(_XM_SSE_INTRINSICS_)
  1164. XMMATRIX M;
  1165. M.r[0] = _mm_and_ps(Scale,g_XMMaskX);
  1166. M.r[1] = _mm_and_ps(Scale,g_XMMaskY);
  1167. M.r[2] = _mm_and_ps(Scale,g_XMMaskZ);
  1168. M.r[3] = g_XMIdentityR3.v;
  1169. return M;
  1170. #endif
  1171. }
  1172. //------------------------------------------------------------------------------
  1173. inline XMMATRIX XM_CALLCONV XMMatrixRotationX
  1174. (
  1175. float Angle
  1176. )
  1177. {
  1178. #if defined(_XM_NO_INTRINSICS_)
  1179. float fSinAngle;
  1180. float fCosAngle;
  1181. XMScalarSinCos(&fSinAngle, &fCosAngle, Angle);
  1182. XMMATRIX M;
  1183. M.m[0][0] = 1.0f;
  1184. M.m[0][1] = 0.0f;
  1185. M.m[0][2] = 0.0f;
  1186. M.m[0][3] = 0.0f;
  1187. M.m[1][0] = 0.0f;
  1188. M.m[1][1] = fCosAngle;
  1189. M.m[1][2] = fSinAngle;
  1190. M.m[1][3] = 0.0f;
  1191. M.m[2][0] = 0.0f;
  1192. M.m[2][1] = -fSinAngle;
  1193. M.m[2][2] = fCosAngle;
  1194. M.m[2][3] = 0.0f;
  1195. M.m[3][0] = 0.0f;
  1196. M.m[3][1] = 0.0f;
  1197. M.m[3][2] = 0.0f;
  1198. M.m[3][3] = 1.0f;
  1199. return M;
  1200. #elif defined(_XM_ARM_NEON_INTRINSICS_)
  1201. float fSinAngle;
  1202. float fCosAngle;
  1203. XMScalarSinCos(&fSinAngle, &fCosAngle, Angle);
  1204. const XMVECTOR Zero = vdupq_n_f32(0);
  1205. XMVECTOR T1 = vsetq_lane_f32( fCosAngle, Zero, 1 );
  1206. T1 = vsetq_lane_f32( fSinAngle, T1, 2 );
  1207. XMVECTOR T2 = vsetq_lane_f32( -fSinAngle, Zero, 1 );
  1208. T2 = vsetq_lane_f32( fCosAngle, T2, 2 );
  1209. XMMATRIX M;
  1210. M.r[0] = g_XMIdentityR0.v;
  1211. M.r[1] = T1;
  1212. M.r[2] = T2;
  1213. M.r[3] = g_XMIdentityR3.v;
  1214. return M;
  1215. #elif defined(_XM_SSE_INTRINSICS_)
  1216. float SinAngle;
  1217. float CosAngle;
  1218. XMScalarSinCos(&SinAngle, &CosAngle, Angle);
  1219. XMVECTOR vSin = _mm_set_ss(SinAngle);
  1220. XMVECTOR vCos = _mm_set_ss(CosAngle);
  1221. // x = 0,y = cos,z = sin, w = 0
  1222. vCos = _mm_shuffle_ps(vCos,vSin,_MM_SHUFFLE(3,0,0,3));
  1223. XMMATRIX M;
  1224. M.r[0] = g_XMIdentityR0;
  1225. M.r[1] = vCos;
  1226. // x = 0,y = sin,z = cos, w = 0
  1227. vCos = XM_PERMUTE_PS(vCos,_MM_SHUFFLE(3,1,2,0));
  1228. // x = 0,y = -sin,z = cos, w = 0
  1229. vCos = _mm_mul_ps(vCos,g_XMNegateY);
  1230. M.r[2] = vCos;
  1231. M.r[3] = g_XMIdentityR3;
  1232. return M;
  1233. #endif
  1234. }
  1235. //------------------------------------------------------------------------------
  1236. inline XMMATRIX XM_CALLCONV XMMatrixRotationY
  1237. (
  1238. float Angle
  1239. )
  1240. {
  1241. #if defined(_XM_NO_INTRINSICS_)
  1242. float fSinAngle;
  1243. float fCosAngle;
  1244. XMScalarSinCos(&fSinAngle, &fCosAngle, Angle);
  1245. XMMATRIX M;
  1246. M.m[0][0] = fCosAngle;
  1247. M.m[0][1] = 0.0f;
  1248. M.m[0][2] = -fSinAngle;
  1249. M.m[0][3] = 0.0f;
  1250. M.m[1][0] = 0.0f;
  1251. M.m[1][1] = 1.0f;
  1252. M.m[1][2] = 0.0f;
  1253. M.m[1][3] = 0.0f;
  1254. M.m[2][0] = fSinAngle;
  1255. M.m[2][1] = 0.0f;
  1256. M.m[2][2] = fCosAngle;
  1257. M.m[2][3] = 0.0f;
  1258. M.m[3][0] = 0.0f;
  1259. M.m[3][1] = 0.0f;
  1260. M.m[3][2] = 0.0f;
  1261. M.m[3][3] = 1.0f;
  1262. return M;
  1263. #elif defined(_XM_ARM_NEON_INTRINSICS_)
  1264. float fSinAngle;
  1265. float fCosAngle;
  1266. XMScalarSinCos(&fSinAngle, &fCosAngle, Angle);
  1267. const XMVECTOR Zero = vdupq_n_f32(0);
  1268. XMVECTOR T0 = vsetq_lane_f32( fCosAngle, Zero, 0 );
  1269. T0 = vsetq_lane_f32( -fSinAngle, T0, 2 );
  1270. XMVECTOR T2 = vsetq_lane_f32( fSinAngle, Zero, 0 );
  1271. T2 = vsetq_lane_f32( fCosAngle, T2, 2 );
  1272. XMMATRIX M;
  1273. M.r[0] = T0;
  1274. M.r[1] = g_XMIdentityR1.v;
  1275. M.r[2] = T2;
  1276. M.r[3] = g_XMIdentityR3.v;
  1277. return M;
  1278. #elif defined(_XM_SSE_INTRINSICS_)
  1279. float SinAngle;
  1280. float CosAngle;
  1281. XMScalarSinCos(&SinAngle, &CosAngle, Angle);
  1282. XMVECTOR vSin = _mm_set_ss(SinAngle);
  1283. XMVECTOR vCos = _mm_set_ss(CosAngle);
  1284. // x = sin,y = 0,z = cos, w = 0
  1285. vSin = _mm_shuffle_ps(vSin,vCos,_MM_SHUFFLE(3,0,3,0));
  1286. XMMATRIX M;
  1287. M.r[2] = vSin;
  1288. M.r[1] = g_XMIdentityR1;
  1289. // x = cos,y = 0,z = sin, w = 0
  1290. vSin = XM_PERMUTE_PS(vSin,_MM_SHUFFLE(3,0,1,2));
  1291. // x = cos,y = 0,z = -sin, w = 0
  1292. vSin = _mm_mul_ps(vSin,g_XMNegateZ);
  1293. M.r[0] = vSin;
  1294. M.r[3] = g_XMIdentityR3;
  1295. return M;
  1296. #endif
  1297. }
  1298. //------------------------------------------------------------------------------
  1299. inline XMMATRIX XM_CALLCONV XMMatrixRotationZ
  1300. (
  1301. float Angle
  1302. )
  1303. {
  1304. #if defined(_XM_NO_INTRINSICS_)
  1305. float fSinAngle;
  1306. float fCosAngle;
  1307. XMScalarSinCos(&fSinAngle, &fCosAngle, Angle);
  1308. XMMATRIX M;
  1309. M.m[0][0] = fCosAngle;
  1310. M.m[0][1] = fSinAngle;
  1311. M.m[0][2] = 0.0f;
  1312. M.m[0][3] = 0.0f;
  1313. M.m[1][0] = -fSinAngle;
  1314. M.m[1][1] = fCosAngle;
  1315. M.m[1][2] = 0.0f;
  1316. M.m[1][3] = 0.0f;
  1317. M.m[2][0] = 0.0f;
  1318. M.m[2][1] = 0.0f;
  1319. M.m[2][2] = 1.0f;
  1320. M.m[2][3] = 0.0f;
  1321. M.m[3][0] = 0.0f;
  1322. M.m[3][1] = 0.0f;
  1323. M.m[3][2] = 0.0f;
  1324. M.m[3][3] = 1.0f;
  1325. return M;
  1326. #elif defined(_XM_ARM_NEON_INTRINSICS_)
  1327. float fSinAngle;
  1328. float fCosAngle;
  1329. XMScalarSinCos(&fSinAngle, &fCosAngle, Angle);
  1330. const XMVECTOR Zero = vdupq_n_f32(0);
  1331. XMVECTOR T0 = vsetq_lane_f32( fCosAngle, Zero, 0 );
  1332. T0 = vsetq_lane_f32( fSinAngle, T0, 1 );
  1333. XMVECTOR T1 = vsetq_lane_f32( -fSinAngle, Zero, 0 );
  1334. T1 = vsetq_lane_f32( fCosAngle, T1, 1 );
  1335. XMMATRIX M;
  1336. M.r[0] = T0;
  1337. M.r[1] = T1;
  1338. M.r[2] = g_XMIdentityR2.v;
  1339. M.r[3] = g_XMIdentityR3.v;
  1340. return M;
  1341. #elif defined(_XM_SSE_INTRINSICS_)
  1342. float SinAngle;
  1343. float CosAngle;
  1344. XMScalarSinCos(&SinAngle, &CosAngle, Angle);
  1345. XMVECTOR vSin = _mm_set_ss(SinAngle);
  1346. XMVECTOR vCos = _mm_set_ss(CosAngle);
  1347. // x = cos,y = sin,z = 0, w = 0
  1348. vCos = _mm_unpacklo_ps(vCos,vSin);
  1349. XMMATRIX M;
  1350. M.r[0] = vCos;
  1351. // x = sin,y = cos,z = 0, w = 0
  1352. vCos = XM_PERMUTE_PS(vCos,_MM_SHUFFLE(3,2,0,1));
  1353. // x = cos,y = -sin,z = 0, w = 0
  1354. vCos = _mm_mul_ps(vCos,g_XMNegateX);
  1355. M.r[1] = vCos;
  1356. M.r[2] = g_XMIdentityR2;
  1357. M.r[3] = g_XMIdentityR3;
  1358. return M;
  1359. #endif
  1360. }
  1361. //------------------------------------------------------------------------------
  1362. inline XMMATRIX XM_CALLCONV XMMatrixRotationRollPitchYaw
  1363. (
  1364. float Pitch,
  1365. float Yaw,
  1366. float Roll
  1367. )
  1368. {
  1369. XMVECTOR Angles = XMVectorSet(Pitch, Yaw, Roll, 0.0f);
  1370. return XMMatrixRotationRollPitchYawFromVector(Angles);
  1371. }
  1372. //------------------------------------------------------------------------------
  1373. inline XMMATRIX XM_CALLCONV XMMatrixRotationRollPitchYawFromVector
  1374. (
  1375. FXMVECTOR Angles // <Pitch, Yaw, Roll, undefined>
  1376. )
  1377. {
  1378. XMVECTOR Q = XMQuaternionRotationRollPitchYawFromVector(Angles);
  1379. return XMMatrixRotationQuaternion(Q);
  1380. }
  1381. //------------------------------------------------------------------------------
  1382. inline XMMATRIX XM_CALLCONV XMMatrixRotationNormal
  1383. (
  1384. FXMVECTOR NormalAxis,
  1385. float Angle
  1386. )
  1387. {
  1388. #if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
  1389. float fSinAngle;
  1390. float fCosAngle;
  1391. XMScalarSinCos(&fSinAngle, &fCosAngle, Angle);
  1392. XMVECTOR A = XMVectorSet(fSinAngle, fCosAngle, 1.0f - fCosAngle, 0.0f);
  1393. XMVECTOR C2 = XMVectorSplatZ(A);
  1394. XMVECTOR C1 = XMVectorSplatY(A);
  1395. XMVECTOR C0 = XMVectorSplatX(A);
  1396. XMVECTOR N0 = XMVectorSwizzle<XM_SWIZZLE_Y, XM_SWIZZLE_Z, XM_SWIZZLE_X, XM_SWIZZLE_W>(NormalAxis);
  1397. XMVECTOR N1 = XMVectorSwizzle<XM_SWIZZLE_Z, XM_SWIZZLE_X, XM_SWIZZLE_Y, XM_SWIZZLE_W>(NormalAxis);
  1398. XMVECTOR V0 = XMVectorMultiply(C2, N0);
  1399. V0 = XMVectorMultiply(V0, N1);
  1400. XMVECTOR R0 = XMVectorMultiply(C2, NormalAxis);
  1401. R0 = XMVectorMultiplyAdd(R0, NormalAxis, C1);
  1402. XMVECTOR R1 = XMVectorMultiplyAdd(C0, NormalAxis, V0);
  1403. XMVECTOR R2 = XMVectorNegativeMultiplySubtract(C0, NormalAxis, V0);
  1404. V0 = XMVectorSelect(A, R0, g_XMSelect1110.v);
  1405. XMVECTOR V1 = XMVectorPermute<XM_PERMUTE_0Z, XM_PERMUTE_1Y, XM_PERMUTE_1Z, XM_PERMUTE_0X>(R1, R2);
  1406. XMVECTOR V2 = XMVectorPermute<XM_PERMUTE_0Y, XM_PERMUTE_1X, XM_PERMUTE_0Y, XM_PERMUTE_1X>(R1, R2);
  1407. XMMATRIX M;
  1408. M.r[0] = XMVectorPermute<XM_PERMUTE_0X, XM_PERMUTE_1X, XM_PERMUTE_1Y, XM_PERMUTE_0W>(V0, V1);
  1409. M.r[1] = XMVectorPermute<XM_PERMUTE_1Z, XM_PERMUTE_0Y, XM_PERMUTE_1W, XM_PERMUTE_0W>(V0, V1);
  1410. M.r[2] = XMVectorPermute<XM_PERMUTE_1X, XM_PERMUTE_1Y, XM_PERMUTE_0Z, XM_PERMUTE_0W>(V0, V2);
  1411. M.r[3] = g_XMIdentityR3.v;
  1412. return M;
  1413. #elif defined(_XM_SSE_INTRINSICS_)
  1414. float fSinAngle;
  1415. float fCosAngle;
  1416. XMScalarSinCos(&fSinAngle, &fCosAngle, Angle);
  1417. XMVECTOR C2 = _mm_set_ps1(1.0f - fCosAngle);
  1418. XMVECTOR C1 = _mm_set_ps1(fCosAngle);
  1419. XMVECTOR C0 = _mm_set_ps1(fSinAngle);
  1420. XMVECTOR N0 = XM_PERMUTE_PS(NormalAxis,_MM_SHUFFLE(3,0,2,1));
  1421. XMVECTOR N1 = XM_PERMUTE_PS(NormalAxis,_MM_SHUFFLE(3,1,0,2));
  1422. XMVECTOR V0 = _mm_mul_ps(C2, N0);
  1423. V0 = _mm_mul_ps(V0, N1);
  1424. XMVECTOR R0 = _mm_mul_ps(C2, NormalAxis);
  1425. R0 = _mm_mul_ps(R0, NormalAxis);
  1426. R0 = _mm_add_ps(R0, C1);
  1427. XMVECTOR R1 = _mm_mul_ps(C0, NormalAxis);
  1428. R1 = _mm_add_ps(R1, V0);
  1429. XMVECTOR R2 = _mm_mul_ps(C0, NormalAxis);
  1430. R2 = _mm_sub_ps(V0,R2);
  1431. V0 = _mm_and_ps(R0,g_XMMask3);
  1432. XMVECTOR V1 = _mm_shuffle_ps(R1,R2,_MM_SHUFFLE(2,1,2,0));
  1433. V1 = XM_PERMUTE_PS(V1,_MM_SHUFFLE(0,3,2,1));
  1434. XMVECTOR V2 = _mm_shuffle_ps(R1,R2,_MM_SHUFFLE(0,0,1,1));
  1435. V2 = XM_PERMUTE_PS(V2,_MM_SHUFFLE(2,0,2,0));
  1436. R2 = _mm_shuffle_ps(V0,V1,_MM_SHUFFLE(1,0,3,0));
  1437. R2 = XM_PERMUTE_PS(R2,_MM_SHUFFLE(1,3,2,0));
  1438. XMMATRIX M;
  1439. M.r[0] = R2;
  1440. R2 = _mm_shuffle_ps(V0,V1,_MM_SHUFFLE(3,2,3,1));
  1441. R2 = XM_PERMUTE_PS(R2,_MM_SHUFFLE(1,3,0,2));
  1442. M.r[1] = R2;
  1443. V2 = _mm_shuffle_ps(V2,V0,_MM_SHUFFLE(3,2,1,0));
  1444. M.r[2] = V2;
  1445. M.r[3] = g_XMIdentityR3.v;
  1446. return M;
  1447. #endif
  1448. }
  1449. //------------------------------------------------------------------------------
  1450. inline XMMATRIX XM_CALLCONV XMMatrixRotationAxis
  1451. (
  1452. FXMVECTOR Axis,
  1453. float Angle
  1454. )
  1455. {
  1456. assert(!XMVector3Equal(Axis, XMVectorZero()));
  1457. assert(!XMVector3IsInfinite(Axis));
  1458. XMVECTOR Normal = XMVector3Normalize(Axis);
  1459. return XMMatrixRotationNormal(Normal, Angle);
  1460. }
  1461. //------------------------------------------------------------------------------
  1462. inline XMMATRIX XM_CALLCONV XMMatrixRotationQuaternion
  1463. (
  1464. FXMVECTOR Quaternion
  1465. )
  1466. {
  1467. #if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
  1468. static const XMVECTORF32 Constant1110 = { { { 1.0f, 1.0f, 1.0f, 0.0f } } };
  1469. XMVECTOR Q0 = XMVectorAdd(Quaternion, Quaternion);
  1470. XMVECTOR Q1 = XMVectorMultiply(Quaternion, Q0);
  1471. XMVECTOR V0 = XMVectorPermute<XM_PERMUTE_0Y, XM_PERMUTE_0X, XM_PERMUTE_0X, XM_PERMUTE_1W>(Q1, Constant1110.v);
  1472. XMVECTOR V1 = XMVectorPermute<XM_PERMUTE_0Z, XM_PERMUTE_0Z, XM_PERMUTE_0Y, XM_PERMUTE_1W>(Q1, Constant1110.v);
  1473. XMVECTOR R0 = XMVectorSubtract(Constant1110, V0);
  1474. R0 = XMVectorSubtract(R0, V1);
  1475. V0 = XMVectorSwizzle<XM_SWIZZLE_X, XM_SWIZZLE_X, XM_SWIZZLE_Y, XM_SWIZZLE_W>(Quaternion);
  1476. V1 = XMVectorSwizzle<XM_SWIZZLE_Z, XM_SWIZZLE_Y, XM_SWIZZLE_Z, XM_SWIZZLE_W>(Q0);
  1477. V0 = XMVectorMultiply(V0, V1);
  1478. V1 = XMVectorSplatW(Quaternion);
  1479. XMVECTOR V2 = XMVectorSwizzle<XM_SWIZZLE_Y, XM_SWIZZLE_Z, XM_SWIZZLE_X, XM_SWIZZLE_W>(Q0);
  1480. V1 = XMVectorMultiply(V1, V2);
  1481. XMVECTOR R1 = XMVectorAdd(V0, V1);
  1482. XMVECTOR R2 = XMVectorSubtract(V0, V1);
  1483. V0 = XMVectorPermute<XM_PERMUTE_0Y, XM_PERMUTE_1X, XM_PERMUTE_1Y, XM_PERMUTE_0Z>(R1, R2);
  1484. V1 = XMVectorPermute<XM_PERMUTE_0X, XM_PERMUTE_1Z, XM_PERMUTE_0X, XM_PERMUTE_1Z>(R1, R2);
  1485. XMMATRIX M;
  1486. M.r[0] = XMVectorPermute<XM_PERMUTE_0X, XM_PERMUTE_1X, XM_PERMUTE_1Y, XM_PERMUTE_0W>(R0, V0);
  1487. M.r[1] = XMVectorPermute<XM_PERMUTE_1Z, XM_PERMUTE_0Y, XM_PERMUTE_1W, XM_PERMUTE_0W>(R0, V0);
  1488. M.r[2] = XMVectorPermute<XM_PERMUTE_1X, XM_PERMUTE_1Y, XM_PERMUTE_0Z, XM_PERMUTE_0W>(R0, V1);
  1489. M.r[3] = g_XMIdentityR3.v;
  1490. return M;
  1491. #elif defined(_XM_SSE_INTRINSICS_)
  1492. static const XMVECTORF32 Constant1110 = { { { 1.0f, 1.0f, 1.0f, 0.0f } } };
  1493. XMVECTOR Q0 = _mm_add_ps(Quaternion,Quaternion);
  1494. XMVECTOR Q1 = _mm_mul_ps(Quaternion,Q0);
  1495. XMVECTOR V0 = XM_PERMUTE_PS(Q1,_MM_SHUFFLE(3,0,0,1));
  1496. V0 = _mm_and_ps(V0,g_XMMask3);
  1497. XMVECTOR V1 = XM_PERMUTE_PS(Q1,_MM_SHUFFLE(3,1,2,2));
  1498. V1 = _mm_and_ps(V1,g_XMMask3);
  1499. XMVECTOR R0 = _mm_sub_ps(Constant1110,V0);
  1500. R0 = _mm_sub_ps(R0, V1);
  1501. V0 = XM_PERMUTE_PS(Quaternion,_MM_SHUFFLE(3,1,0,0));
  1502. V1 = XM_PERMUTE_PS(Q0,_MM_SHUFFLE(3,2,1,2));
  1503. V0 = _mm_mul_ps(V0, V1);
  1504. V1 = XM_PERMUTE_PS(Quaternion,_MM_SHUFFLE(3,3,3,3));
  1505. XMVECTOR V2 = XM_PERMUTE_PS(Q0,_MM_SHUFFLE(3,0,2,1));
  1506. V1 = _mm_mul_ps(V1, V2);
  1507. XMVECTOR R1 = _mm_add_ps(V0, V1);
  1508. XMVECTOR R2 = _mm_sub_ps(V0, V1);
  1509. V0 = _mm_shuffle_ps(R1,R2,_MM_SHUFFLE(1,0,2,1));
  1510. V0 = XM_PERMUTE_PS(V0,_MM_SHUFFLE(1,3,2,0));
  1511. V1 = _mm_shuffle_ps(R1,R2,_MM_SHUFFLE(2,2,0,0));
  1512. V1 = XM_PERMUTE_PS(V1,_MM_SHUFFLE(2,0,2,0));
  1513. Q1 = _mm_shuffle_ps(R0,V0,_MM_SHUFFLE(1,0,3,0));
  1514. Q1 = XM_PERMUTE_PS(Q1,_MM_SHUFFLE(1,3,2,0));
  1515. XMMATRIX M;
  1516. M.r[0] = Q1;
  1517. Q1 = _mm_shuffle_ps(R0,V0,_MM_SHUFFLE(3,2,3,1));
  1518. Q1 = XM_PERMUTE_PS(Q1,_MM_SHUFFLE(1,3,0,2));
  1519. M.r[1] = Q1;
  1520. Q1 = _mm_shuffle_ps(V1,R0,_MM_SHUFFLE(3,2,1,0));
  1521. M.r[2] = Q1;
  1522. M.r[3] = g_XMIdentityR3;
  1523. return M;
  1524. #endif
  1525. }
  1526. //------------------------------------------------------------------------------
  1527. inline XMMATRIX XM_CALLCONV XMMatrixTransformation2D
  1528. (
  1529. FXMVECTOR ScalingOrigin,
  1530. float ScalingOrientation,
  1531. FXMVECTOR Scaling,
  1532. FXMVECTOR RotationOrigin,
  1533. float Rotation,
  1534. GXMVECTOR Translation
  1535. )
  1536. {
  1537. // M = Inverse(MScalingOrigin) * Transpose(MScalingOrientation) * MScaling * MScalingOrientation *
  1538. // MScalingOrigin * Inverse(MRotationOrigin) * MRotation * MRotationOrigin * MTranslation;
  1539. XMVECTOR VScalingOrigin = XMVectorSelect(g_XMSelect1100.v, ScalingOrigin, g_XMSelect1100.v);
  1540. XMVECTOR NegScalingOrigin = XMVectorNegate(VScalingOrigin);
  1541. XMMATRIX MScalingOriginI = XMMatrixTranslationFromVector(NegScalingOrigin);
  1542. XMMATRIX MScalingOrientation = XMMatrixRotationZ(ScalingOrientation);
  1543. XMMATRIX MScalingOrientationT = XMMatrixTranspose(MScalingOrientation);
  1544. XMVECTOR VScaling = XMVectorSelect(g_XMOne.v, Scaling, g_XMSelect1100.v);
  1545. XMMATRIX MScaling = XMMatrixScalingFromVector(VScaling);
  1546. XMVECTOR VRotationOrigin = XMVectorSelect(g_XMSelect1100.v, RotationOrigin, g_XMSelect1100.v);
  1547. XMMATRIX MRotation = XMMatrixRotationZ(Rotation);
  1548. XMVECTOR VTranslation = XMVectorSelect(g_XMSelect1100.v, Translation,g_XMSelect1100.v);
  1549. XMMATRIX M = XMMatrixMultiply(MScalingOriginI, MScalingOrientationT);
  1550. M = XMMatrixMultiply(M, MScaling);
  1551. M = XMMatrixMultiply(M, MScalingOrientation);
  1552. M.r[3] = XMVectorAdd(M.r[3], VScalingOrigin);
  1553. M.r[3] = XMVectorSubtract(M.r[3], VRotationOrigin);
  1554. M = XMMatrixMultiply(M, MRotation);
  1555. M.r[3] = XMVectorAdd(M.r[3], VRotationOrigin);
  1556. M.r[3] = XMVectorAdd(M.r[3], VTranslation);
  1557. return M;
  1558. }
  1559. //------------------------------------------------------------------------------
  1560. inline XMMATRIX XM_CALLCONV XMMatrixTransformation
  1561. (
  1562. FXMVECTOR ScalingOrigin,
  1563. FXMVECTOR ScalingOrientationQuaternion,
  1564. FXMVECTOR Scaling,
  1565. GXMVECTOR RotationOrigin,
  1566. HXMVECTOR RotationQuaternion,
  1567. HXMVECTOR Translation
  1568. )
  1569. {
  1570. // M = Inverse(MScalingOrigin) * Transpose(MScalingOrientation) * MScaling * MScalingOrientation *
  1571. // MScalingOrigin * Inverse(MRotationOrigin) * MRotation * MRotationOrigin * MTranslation;
  1572. XMVECTOR VScalingOrigin = XMVectorSelect(g_XMSelect1110.v, ScalingOrigin, g_XMSelect1110.v);
  1573. XMVECTOR NegScalingOrigin = XMVectorNegate(ScalingOrigin);
  1574. XMMATRIX MScalingOriginI = XMMatrixTranslationFromVector(NegScalingOrigin);
  1575. XMMATRIX MScalingOrientation = XMMatrixRotationQuaternion(ScalingOrientationQuaternion);
  1576. XMMATRIX MScalingOrientationT = XMMatrixTranspose(MScalingOrientation);
  1577. XMMATRIX MScaling = XMMatrixScalingFromVector(Scaling);
  1578. XMVECTOR VRotationOrigin = XMVectorSelect(g_XMSelect1110.v, RotationOrigin, g_XMSelect1110.v);
  1579. XMMATRIX MRotation = XMMatrixRotationQuaternion(RotationQuaternion);
  1580. XMVECTOR VTranslation = XMVectorSelect(g_XMSelect1110.v, Translation, g_XMSelect1110.v);
  1581. XMMATRIX M;
  1582. M = XMMatrixMultiply(MScalingOriginI, MScalingOrientationT);
  1583. M = XMMatrixMultiply(M, MScaling);
  1584. M = XMMatrixMultiply(M, MScalingOrientation);
  1585. M.r[3] = XMVectorAdd(M.r[3], VScalingOrigin);
  1586. M.r[3] = XMVectorSubtract(M.r[3], VRotationOrigin);
  1587. M = XMMatrixMultiply(M, MRotation);
  1588. M.r[3] = XMVectorAdd(M.r[3], VRotationOrigin);
  1589. M.r[3] = XMVectorAdd(M.r[3], VTranslation);
  1590. return M;
  1591. }
  1592. //------------------------------------------------------------------------------
  1593. inline XMMATRIX XM_CALLCONV XMMatrixAffineTransformation2D
  1594. (
  1595. FXMVECTOR Scaling,
  1596. FXMVECTOR RotationOrigin,
  1597. float Rotation,
  1598. FXMVECTOR Translation
  1599. )
  1600. {
  1601. // M = MScaling * Inverse(MRotationOrigin) * MRotation * MRotationOrigin * MTranslation;
  1602. XMVECTOR VScaling = XMVectorSelect(g_XMOne.v, Scaling, g_XMSelect1100.v);
  1603. XMMATRIX MScaling = XMMatrixScalingFromVector(VScaling);
  1604. XMVECTOR VRotationOrigin = XMVectorSelect(g_XMSelect1100.v, RotationOrigin, g_XMSelect1100.v);
  1605. XMMATRIX MRotation = XMMatrixRotationZ(Rotation);
  1606. XMVECTOR VTranslation = XMVectorSelect(g_XMSelect1100.v, Translation,g_XMSelect1100.v);
  1607. XMMATRIX M;
  1608. M = MScaling;
  1609. M.r[3] = XMVectorSubtract(M.r[3], VRotationOrigin);
  1610. M = XMMatrixMultiply(M, MRotation);
  1611. M.r[3] = XMVectorAdd(M.r[3], VRotationOrigin);
  1612. M.r[3] = XMVectorAdd(M.r[3], VTranslation);
  1613. return M;
  1614. }
  1615. //------------------------------------------------------------------------------
  1616. inline XMMATRIX XM_CALLCONV XMMatrixAffineTransformation
  1617. (
  1618. FXMVECTOR Scaling,
  1619. FXMVECTOR RotationOrigin,
  1620. FXMVECTOR RotationQuaternion,
  1621. GXMVECTOR Translation
  1622. )
  1623. {
  1624. // M = MScaling * Inverse(MRotationOrigin) * MRotation * MRotationOrigin * MTranslation;
  1625. XMMATRIX MScaling = XMMatrixScalingFromVector(Scaling);
  1626. XMVECTOR VRotationOrigin = XMVectorSelect(g_XMSelect1110.v, RotationOrigin,g_XMSelect1110.v);
  1627. XMMATRIX MRotation = XMMatrixRotationQuaternion(RotationQuaternion);
  1628. XMVECTOR VTranslation = XMVectorSelect(g_XMSelect1110.v, Translation,g_XMSelect1110.v);
  1629. XMMATRIX M;
  1630. M = MScaling;
  1631. M.r[3] = XMVectorSubtract(M.r[3], VRotationOrigin);
  1632. M = XMMatrixMultiply(M, MRotation);
  1633. M.r[3] = XMVectorAdd(M.r[3], VRotationOrigin);
  1634. M.r[3] = XMVectorAdd(M.r[3], VTranslation);
  1635. return M;
  1636. }
  1637. //------------------------------------------------------------------------------
  1638. inline XMMATRIX XM_CALLCONV XMMatrixReflect
  1639. (
  1640. FXMVECTOR ReflectionPlane
  1641. )
  1642. {
  1643. assert(!XMVector3Equal(ReflectionPlane, XMVectorZero()));
  1644. assert(!XMPlaneIsInfinite(ReflectionPlane));
  1645. static const XMVECTORF32 NegativeTwo = { { { -2.0f, -2.0f, -2.0f, 0.0f } } };
  1646. XMVECTOR P = XMPlaneNormalize(ReflectionPlane);
  1647. XMVECTOR S = XMVectorMultiply(P, NegativeTwo);
  1648. XMVECTOR A = XMVectorSplatX(P);
  1649. XMVECTOR B = XMVectorSplatY(P);
  1650. XMVECTOR C = XMVectorSplatZ(P);
  1651. XMVECTOR D = XMVectorSplatW(P);
  1652. XMMATRIX M;
  1653. M.r[0] = XMVectorMultiplyAdd(A, S, g_XMIdentityR0.v);
  1654. M.r[1] = XMVectorMultiplyAdd(B, S, g_XMIdentityR1.v);
  1655. M.r[2] = XMVectorMultiplyAdd(C, S, g_XMIdentityR2.v);
  1656. M.r[3] = XMVectorMultiplyAdd(D, S, g_XMIdentityR3.v);
  1657. return M;
  1658. }
  1659. //------------------------------------------------------------------------------
  1660. inline XMMATRIX XM_CALLCONV XMMatrixShadow
  1661. (
  1662. FXMVECTOR ShadowPlane,
  1663. FXMVECTOR LightPosition
  1664. )
  1665. {
  1666. static const XMVECTORU32 Select0001 = { { { XM_SELECT_0, XM_SELECT_0, XM_SELECT_0, XM_SELECT_1 } } };
  1667. assert(!XMVector3Equal(ShadowPlane, XMVectorZero()));
  1668. assert(!XMPlaneIsInfinite(ShadowPlane));
  1669. XMVECTOR P = XMPlaneNormalize(ShadowPlane);
  1670. XMVECTOR Dot = XMPlaneDot(P, LightPosition);
  1671. P = XMVectorNegate(P);
  1672. XMVECTOR D = XMVectorSplatW(P);
  1673. XMVECTOR C = XMVectorSplatZ(P);
  1674. XMVECTOR B = XMVectorSplatY(P);
  1675. XMVECTOR A = XMVectorSplatX(P);
  1676. Dot = XMVectorSelect(Select0001.v, Dot, Select0001.v);
  1677. XMMATRIX M;
  1678. M.r[3] = XMVectorMultiplyAdd(D, LightPosition, Dot);
  1679. Dot = XMVectorRotateLeft(Dot, 1);
  1680. M.r[2] = XMVectorMultiplyAdd(C, LightPosition, Dot);
  1681. Dot = XMVectorRotateLeft(Dot, 1);
  1682. M.r[1] = XMVectorMultiplyAdd(B, LightPosition, Dot);
  1683. Dot = XMVectorRotateLeft(Dot, 1);
  1684. M.r[0] = XMVectorMultiplyAdd(A, LightPosition, Dot);
  1685. return M;
  1686. }
  1687. //------------------------------------------------------------------------------
  1688. // View and projection initialization operations
  1689. //------------------------------------------------------------------------------
  1690. inline XMMATRIX XM_CALLCONV XMMatrixLookAtLH
  1691. (
  1692. FXMVECTOR EyePosition,
  1693. FXMVECTOR FocusPosition,
  1694. FXMVECTOR UpDirection
  1695. )
  1696. {
  1697. XMVECTOR EyeDirection = XMVectorSubtract(FocusPosition, EyePosition);
  1698. return XMMatrixLookToLH(EyePosition, EyeDirection, UpDirection);
  1699. }
  1700. //------------------------------------------------------------------------------
  1701. inline XMMATRIX XM_CALLCONV XMMatrixLookAtRH
  1702. (
  1703. FXMVECTOR EyePosition,
  1704. FXMVECTOR FocusPosition,
  1705. FXMVECTOR UpDirection
  1706. )
  1707. {
  1708. XMVECTOR NegEyeDirection = XMVectorSubtract(EyePosition, FocusPosition);
  1709. return XMMatrixLookToLH(EyePosition, NegEyeDirection, UpDirection);
  1710. }
  1711. //------------------------------------------------------------------------------
  1712. inline XMMATRIX XM_CALLCONV XMMatrixLookToLH
  1713. (
  1714. FXMVECTOR EyePosition,
  1715. FXMVECTOR EyeDirection,
  1716. FXMVECTOR UpDirection
  1717. )
  1718. {
  1719. assert(!XMVector3Equal(EyeDirection, XMVectorZero()));
  1720. assert(!XMVector3IsInfinite(EyeDirection));
  1721. assert(!XMVector3Equal(UpDirection, XMVectorZero()));
  1722. assert(!XMVector3IsInfinite(UpDirection));
  1723. XMVECTOR R2 = XMVector3Normalize(EyeDirection);
  1724. XMVECTOR R0 = XMVector3Cross(UpDirection, R2);
  1725. R0 = XMVector3Normalize(R0);
  1726. XMVECTOR R1 = XMVector3Cross(R2, R0);
  1727. XMVECTOR NegEyePosition = XMVectorNegate(EyePosition);
  1728. XMVECTOR D0 = XMVector3Dot(R0, NegEyePosition);
  1729. XMVECTOR D1 = XMVector3Dot(R1, NegEyePosition);
  1730. XMVECTOR D2 = XMVector3Dot(R2, NegEyePosition);
  1731. XMMATRIX M;
  1732. M.r[0] = XMVectorSelect(D0, R0, g_XMSelect1110.v);
  1733. M.r[1] = XMVectorSelect(D1, R1, g_XMSelect1110.v);
  1734. M.r[2] = XMVectorSelect(D2, R2, g_XMSelect1110.v);
  1735. M.r[3] = g_XMIdentityR3.v;
  1736. M = XMMatrixTranspose(M);
  1737. return M;
  1738. }
  1739. //------------------------------------------------------------------------------
  1740. inline XMMATRIX XM_CALLCONV XMMatrixLookToRH
  1741. (
  1742. FXMVECTOR EyePosition,
  1743. FXMVECTOR EyeDirection,
  1744. FXMVECTOR UpDirection
  1745. )
  1746. {
  1747. XMVECTOR NegEyeDirection = XMVectorNegate(EyeDirection);
  1748. return XMMatrixLookToLH(EyePosition, NegEyeDirection, UpDirection);
  1749. }
  1750. //------------------------------------------------------------------------------
  1751. #ifdef _PREFAST_
  1752. #pragma prefast(push)
  1753. #pragma prefast(disable:28931, "PREfast noise: Esp:1266")
  1754. #endif
  1755. inline XMMATRIX XM_CALLCONV XMMatrixPerspectiveLH
  1756. (
  1757. float ViewWidth,
  1758. float ViewHeight,
  1759. float NearZ,
  1760. float FarZ
  1761. )
  1762. {
  1763. assert(NearZ > 0.f && FarZ > 0.f);
  1764. assert(!XMScalarNearEqual(ViewWidth, 0.0f, 0.00001f));
  1765. assert(!XMScalarNearEqual(ViewHeight, 0.0f, 0.00001f));
  1766. assert(!XMScalarNearEqual(FarZ, NearZ, 0.00001f));
  1767. #if defined(_XM_NO_INTRINSICS_)
  1768. float TwoNearZ = NearZ + NearZ;
  1769. float fRange = FarZ / (FarZ - NearZ);
  1770. XMMATRIX M;
  1771. M.m[0][0] = TwoNearZ / ViewWidth;
  1772. M.m[0][1] = 0.0f;
  1773. M.m[0][2] = 0.0f;
  1774. M.m[0][3] = 0.0f;
  1775. M.m[1][0] = 0.0f;
  1776. M.m[1][1] = TwoNearZ / ViewHeight;
  1777. M.m[1][2] = 0.0f;
  1778. M.m[1][3] = 0.0f;
  1779. M.m[2][0] = 0.0f;
  1780. M.m[2][1] = 0.0f;
  1781. M.m[2][2] = fRange;
  1782. M.m[2][3] = 1.0f;
  1783. M.m[3][0] = 0.0f;
  1784. M.m[3][1] = 0.0f;
  1785. M.m[3][2] = -fRange * NearZ;
  1786. M.m[3][3] = 0.0f;
  1787. return M;
  1788. #elif defined(_XM_ARM_NEON_INTRINSICS_)
  1789. float TwoNearZ = NearZ + NearZ;
  1790. float fRange = FarZ / (FarZ - NearZ);
  1791. const XMVECTOR Zero = vdupq_n_f32(0);
  1792. XMMATRIX M;
  1793. M.r[0] = vsetq_lane_f32( TwoNearZ / ViewWidth, Zero, 0 );
  1794. M.r[1] = vsetq_lane_f32( TwoNearZ / ViewHeight, Zero, 1 );
  1795. M.r[2] = vsetq_lane_f32( fRange, g_XMIdentityR3.v, 2 );
  1796. M.r[3] = vsetq_lane_f32( -fRange * NearZ, Zero, 2 );
  1797. return M;
  1798. #elif defined(_XM_SSE_INTRINSICS_)
  1799. XMMATRIX M;
  1800. float TwoNearZ = NearZ + NearZ;
  1801. float fRange = FarZ / (FarZ - NearZ);
  1802. // Note: This is recorded on the stack
  1803. XMVECTOR rMem = {
  1804. TwoNearZ / ViewWidth,
  1805. TwoNearZ / ViewHeight,
  1806. fRange,
  1807. -fRange * NearZ
  1808. };
  1809. // Copy from memory to SSE register
  1810. XMVECTOR vValues = rMem;
  1811. XMVECTOR vTemp = _mm_setzero_ps();
  1812. // Copy x only
  1813. vTemp = _mm_move_ss(vTemp,vValues);
  1814. // TwoNearZ / ViewWidth,0,0,0
  1815. M.r[0] = vTemp;
  1816. // 0,TwoNearZ / ViewHeight,0,0
  1817. vTemp = vValues;
  1818. vTemp = _mm_and_ps(vTemp,g_XMMaskY);
  1819. M.r[1] = vTemp;
  1820. // x=fRange,y=-fRange * NearZ,0,1.0f
  1821. vValues = _mm_shuffle_ps(vValues,g_XMIdentityR3,_MM_SHUFFLE(3,2,3,2));
  1822. // 0,0,fRange,1.0f
  1823. vTemp = _mm_setzero_ps();
  1824. vTemp = _mm_shuffle_ps(vTemp,vValues,_MM_SHUFFLE(3,0,0,0));
  1825. M.r[2] = vTemp;
  1826. // 0,0,-fRange * NearZ,0
  1827. vTemp = _mm_shuffle_ps(vTemp,vValues,_MM_SHUFFLE(2,1,0,0));
  1828. M.r[3] = vTemp;
  1829. return M;
  1830. #endif
  1831. }
  1832. //------------------------------------------------------------------------------
  1833. inline XMMATRIX XM_CALLCONV XMMatrixPerspectiveRH
  1834. (
  1835. float ViewWidth,
  1836. float ViewHeight,
  1837. float NearZ,
  1838. float FarZ
  1839. )
  1840. {
  1841. assert(NearZ > 0.f && FarZ > 0.f);
  1842. assert(!XMScalarNearEqual(ViewWidth, 0.0f, 0.00001f));
  1843. assert(!XMScalarNearEqual(ViewHeight, 0.0f, 0.00001f));
  1844. assert(!XMScalarNearEqual(FarZ, NearZ, 0.00001f));
  1845. #if defined(_XM_NO_INTRINSICS_)
  1846. float TwoNearZ = NearZ + NearZ;
  1847. float fRange = FarZ / (NearZ - FarZ);
  1848. XMMATRIX M;
  1849. M.m[0][0] = TwoNearZ / ViewWidth;
  1850. M.m[0][1] = 0.0f;
  1851. M.m[0][2] = 0.0f;
  1852. M.m[0][3] = 0.0f;
  1853. M.m[1][0] = 0.0f;
  1854. M.m[1][1] = TwoNearZ / ViewHeight;
  1855. M.m[1][2] = 0.0f;
  1856. M.m[1][3] = 0.0f;
  1857. M.m[2][0] = 0.0f;
  1858. M.m[2][1] = 0.0f;
  1859. M.m[2][2] = fRange;
  1860. M.m[2][3] = -1.0f;
  1861. M.m[3][0] = 0.0f;
  1862. M.m[3][1] = 0.0f;
  1863. M.m[3][2] = fRange * NearZ;
  1864. M.m[3][3] = 0.0f;
  1865. return M;
  1866. #elif defined(_XM_ARM_NEON_INTRINSICS_)
  1867. float TwoNearZ = NearZ + NearZ;
  1868. float fRange = FarZ / (NearZ - FarZ);
  1869. const XMVECTOR Zero = vdupq_n_f32(0);
  1870. XMMATRIX M;
  1871. M.r[0] = vsetq_lane_f32( TwoNearZ / ViewWidth, Zero, 0 );
  1872. M.r[1] = vsetq_lane_f32( TwoNearZ / ViewHeight, Zero, 1 );
  1873. M.r[2] = vsetq_lane_f32( fRange, g_XMNegIdentityR3.v, 2 );
  1874. M.r[3] = vsetq_lane_f32( fRange * NearZ, Zero, 2 );
  1875. return M;
  1876. #elif defined(_XM_SSE_INTRINSICS_)
  1877. XMMATRIX M;
  1878. float TwoNearZ = NearZ + NearZ;
  1879. float fRange = FarZ / (NearZ-FarZ);
  1880. // Note: This is recorded on the stack
  1881. XMVECTOR rMem = {
  1882. TwoNearZ / ViewWidth,
  1883. TwoNearZ / ViewHeight,
  1884. fRange,
  1885. fRange * NearZ
  1886. };
  1887. // Copy from memory to SSE register
  1888. XMVECTOR vValues = rMem;
  1889. XMVECTOR vTemp = _mm_setzero_ps();
  1890. // Copy x only
  1891. vTemp = _mm_move_ss(vTemp,vValues);
  1892. // TwoNearZ / ViewWidth,0,0,0
  1893. M.r[0] = vTemp;
  1894. // 0,TwoNearZ / ViewHeight,0,0
  1895. vTemp = vValues;
  1896. vTemp = _mm_and_ps(vTemp,g_XMMaskY);
  1897. M.r[1] = vTemp;
  1898. // x=fRange,y=-fRange * NearZ,0,-1.0f
  1899. vValues = _mm_shuffle_ps(vValues,g_XMNegIdentityR3,_MM_SHUFFLE(3,2,3,2));
  1900. // 0,0,fRange,-1.0f
  1901. vTemp = _mm_setzero_ps();
  1902. vTemp = _mm_shuffle_ps(vTemp,vValues,_MM_SHUFFLE(3,0,0,0));
  1903. M.r[2] = vTemp;
  1904. // 0,0,-fRange * NearZ,0
  1905. vTemp = _mm_shuffle_ps(vTemp,vValues,_MM_SHUFFLE(2,1,0,0));
  1906. M.r[3] = vTemp;
  1907. return M;
  1908. #endif
  1909. }
  1910. //------------------------------------------------------------------------------
  1911. inline XMMATRIX XM_CALLCONV XMMatrixPerspectiveFovLH
  1912. (
  1913. float FovAngleY,
  1914. float AspectRatio,
  1915. float NearZ,
  1916. float FarZ
  1917. )
  1918. {
  1919. assert(NearZ > 0.f && FarZ > 0.f);
  1920. assert(!XMScalarNearEqual(FovAngleY, 0.0f, 0.00001f * 2.0f));
  1921. assert(!XMScalarNearEqual(AspectRatio, 0.0f, 0.00001f));
  1922. assert(!XMScalarNearEqual(FarZ, NearZ, 0.00001f));
  1923. #if defined(_XM_NO_INTRINSICS_)
  1924. float SinFov;
  1925. float CosFov;
  1926. XMScalarSinCos(&SinFov, &CosFov, 0.5f * FovAngleY);
  1927. float Height = CosFov / SinFov;
  1928. float Width = Height / AspectRatio;
  1929. float fRange = FarZ / (FarZ-NearZ);
  1930. XMMATRIX M;
  1931. M.m[0][0] = Width;
  1932. M.m[0][1] = 0.0f;
  1933. M.m[0][2] = 0.0f;
  1934. M.m[0][3] = 0.0f;
  1935. M.m[1][0] = 0.0f;
  1936. M.m[1][1] = Height;
  1937. M.m[1][2] = 0.0f;
  1938. M.m[1][3] = 0.0f;
  1939. M.m[2][0] = 0.0f;
  1940. M.m[2][1] = 0.0f;
  1941. M.m[2][2] = fRange;
  1942. M.m[2][3] = 1.0f;
  1943. M.m[3][0] = 0.0f;
  1944. M.m[3][1] = 0.0f;
  1945. M.m[3][2] = -fRange * NearZ;
  1946. M.m[3][3] = 0.0f;
  1947. return M;
  1948. #elif defined(_XM_ARM_NEON_INTRINSICS_)
  1949. float SinFov;
  1950. float CosFov;
  1951. XMScalarSinCos(&SinFov, &CosFov, 0.5f * FovAngleY);
  1952. float fRange = FarZ / (FarZ-NearZ);
  1953. float Height = CosFov / SinFov;
  1954. float Width = Height / AspectRatio;
  1955. const XMVECTOR Zero = vdupq_n_f32(0);
  1956. XMMATRIX M;
  1957. M.r[0] = vsetq_lane_f32( Width, Zero, 0 );
  1958. M.r[1] = vsetq_lane_f32( Height, Zero, 1 );
  1959. M.r[2] = vsetq_lane_f32( fRange, g_XMIdentityR3.v, 2 );
  1960. M.r[3] = vsetq_lane_f32( -fRange * NearZ, Zero, 2 );
  1961. return M;
  1962. #elif defined(_XM_SSE_INTRINSICS_)
  1963. float SinFov;
  1964. float CosFov;
  1965. XMScalarSinCos(&SinFov, &CosFov, 0.5f * FovAngleY);
  1966. float fRange = FarZ / (FarZ-NearZ);
  1967. // Note: This is recorded on the stack
  1968. float Height = CosFov / SinFov;
  1969. XMVECTOR rMem = {
  1970. Height / AspectRatio,
  1971. Height,
  1972. fRange,
  1973. -fRange * NearZ
  1974. };
  1975. // Copy from memory to SSE register
  1976. XMVECTOR vValues = rMem;
  1977. XMVECTOR vTemp = _mm_setzero_ps();
  1978. // Copy x only
  1979. vTemp = _mm_move_ss(vTemp,vValues);
  1980. // CosFov / SinFov,0,0,0
  1981. XMMATRIX M;
  1982. M.r[0] = vTemp;
  1983. // 0,Height / AspectRatio,0,0
  1984. vTemp = vValues;
  1985. vTemp = _mm_and_ps(vTemp,g_XMMaskY);
  1986. M.r[1] = vTemp;
  1987. // x=fRange,y=-fRange * NearZ,0,1.0f
  1988. vTemp = _mm_setzero_ps();
  1989. vValues = _mm_shuffle_ps(vValues,g_XMIdentityR3,_MM_SHUFFLE(3,2,3,2));
  1990. // 0,0,fRange,1.0f
  1991. vTemp = _mm_shuffle_ps(vTemp,vValues,_MM_SHUFFLE(3,0,0,0));
  1992. M.r[2] = vTemp;
  1993. // 0,0,-fRange * NearZ,0.0f
  1994. vTemp = _mm_shuffle_ps(vTemp,vValues,_MM_SHUFFLE(2,1,0,0));
  1995. M.r[3] = vTemp;
  1996. return M;
  1997. #endif
  1998. }
  1999. //------------------------------------------------------------------------------
  2000. inline XMMATRIX XM_CALLCONV XMMatrixPerspectiveFovRH
  2001. (
  2002. float FovAngleY,
  2003. float AspectRatio,
  2004. float NearZ,
  2005. float FarZ
  2006. )
  2007. {
  2008. assert(NearZ > 0.f && FarZ > 0.f);
  2009. assert(!XMScalarNearEqual(FovAngleY, 0.0f, 0.00001f * 2.0f));
  2010. assert(!XMScalarNearEqual(AspectRatio, 0.0f, 0.00001f));
  2011. assert(!XMScalarNearEqual(FarZ, NearZ, 0.00001f));
  2012. #if defined(_XM_NO_INTRINSICS_)
  2013. float SinFov;
  2014. float CosFov;
  2015. XMScalarSinCos(&SinFov, &CosFov, 0.5f * FovAngleY);
  2016. float Height = CosFov / SinFov;
  2017. float Width = Height / AspectRatio;
  2018. float fRange = FarZ / (NearZ-FarZ);
  2019. XMMATRIX M;
  2020. M.m[0][0] = Width;
  2021. M.m[0][1] = 0.0f;
  2022. M.m[0][2] = 0.0f;
  2023. M.m[0][3] = 0.0f;
  2024. M.m[1][0] = 0.0f;
  2025. M.m[1][1] = Height;
  2026. M.m[1][2] = 0.0f;
  2027. M.m[1][3] = 0.0f;
  2028. M.m[2][0] = 0.0f;
  2029. M.m[2][1] = 0.0f;
  2030. M.m[2][2] = fRange;
  2031. M.m[2][3] = -1.0f;
  2032. M.m[3][0] = 0.0f;
  2033. M.m[3][1] = 0.0f;
  2034. M.m[3][2] = fRange * NearZ;
  2035. M.m[3][3] = 0.0f;
  2036. return M;
  2037. #elif defined(_XM_ARM_NEON_INTRINSICS_)
  2038. float SinFov;
  2039. float CosFov;
  2040. XMScalarSinCos(&SinFov, &CosFov, 0.5f * FovAngleY);
  2041. float fRange = FarZ / (NearZ-FarZ);
  2042. float Height = CosFov / SinFov;
  2043. float Width = Height / AspectRatio;
  2044. const XMVECTOR Zero = vdupq_n_f32(0);
  2045. XMMATRIX M;
  2046. M.r[0] = vsetq_lane_f32( Width, Zero, 0 );
  2047. M.r[1] = vsetq_lane_f32( Height, Zero, 1 );
  2048. M.r[2] = vsetq_lane_f32( fRange, g_XMNegIdentityR3.v, 2 );
  2049. M.r[3] = vsetq_lane_f32( fRange * NearZ, Zero, 2 );
  2050. return M;
  2051. #elif defined(_XM_SSE_INTRINSICS_)
  2052. float SinFov;
  2053. float CosFov;
  2054. XMScalarSinCos(&SinFov, &CosFov, 0.5f * FovAngleY);
  2055. float fRange = FarZ / (NearZ-FarZ);
  2056. // Note: This is recorded on the stack
  2057. float Height = CosFov / SinFov;
  2058. XMVECTOR rMem = {
  2059. Height / AspectRatio,
  2060. Height,
  2061. fRange,
  2062. fRange * NearZ
  2063. };
  2064. // Copy from memory to SSE register
  2065. XMVECTOR vValues = rMem;
  2066. XMVECTOR vTemp = _mm_setzero_ps();
  2067. // Copy x only
  2068. vTemp = _mm_move_ss(vTemp,vValues);
  2069. // CosFov / SinFov,0,0,0
  2070. XMMATRIX M;
  2071. M.r[0] = vTemp;
  2072. // 0,Height / AspectRatio,0,0
  2073. vTemp = vValues;
  2074. vTemp = _mm_and_ps(vTemp,g_XMMaskY);
  2075. M.r[1] = vTemp;
  2076. // x=fRange,y=-fRange * NearZ,0,-1.0f
  2077. vTemp = _mm_setzero_ps();
  2078. vValues = _mm_shuffle_ps(vValues,g_XMNegIdentityR3,_MM_SHUFFLE(3,2,3,2));
  2079. // 0,0,fRange,-1.0f
  2080. vTemp = _mm_shuffle_ps(vTemp,vValues,_MM_SHUFFLE(3,0,0,0));
  2081. M.r[2] = vTemp;
  2082. // 0,0,fRange * NearZ,0.0f
  2083. vTemp = _mm_shuffle_ps(vTemp,vValues,_MM_SHUFFLE(2,1,0,0));
  2084. M.r[3] = vTemp;
  2085. return M;
  2086. #endif
  2087. }
  2088. //------------------------------------------------------------------------------
  2089. inline XMMATRIX XM_CALLCONV XMMatrixPerspectiveOffCenterLH
  2090. (
  2091. float ViewLeft,
  2092. float ViewRight,
  2093. float ViewBottom,
  2094. float ViewTop,
  2095. float NearZ,
  2096. float FarZ
  2097. )
  2098. {
  2099. assert(NearZ > 0.f && FarZ > 0.f);
  2100. assert(!XMScalarNearEqual(ViewRight, ViewLeft, 0.00001f));
  2101. assert(!XMScalarNearEqual(ViewTop, ViewBottom, 0.00001f));
  2102. assert(!XMScalarNearEqual(FarZ, NearZ, 0.00001f));
  2103. #if defined(_XM_NO_INTRINSICS_)
  2104. float TwoNearZ = NearZ + NearZ;
  2105. float ReciprocalWidth = 1.0f / (ViewRight - ViewLeft);
  2106. float ReciprocalHeight = 1.0f / (ViewTop - ViewBottom);
  2107. float fRange = FarZ / (FarZ-NearZ);
  2108. XMMATRIX M;
  2109. M.m[0][0] = TwoNearZ * ReciprocalWidth;
  2110. M.m[0][1] = 0.0f;
  2111. M.m[0][2] = 0.0f;
  2112. M.m[0][3] = 0.0f;
  2113. M.m[1][0] = 0.0f;
  2114. M.m[1][1] = TwoNearZ * ReciprocalHeight;
  2115. M.m[1][2] = 0.0f;
  2116. M.m[1][3] = 0.0f;
  2117. M.m[2][0] = -(ViewLeft + ViewRight) * ReciprocalWidth;
  2118. M.m[2][1] = -(ViewTop + ViewBottom) * ReciprocalHeight;
  2119. M.m[2][2] = fRange;
  2120. M.m[2][3] = 1.0f;
  2121. M.m[3][0] = 0.0f;
  2122. M.m[3][1] = 0.0f;
  2123. M.m[3][2] = -fRange * NearZ;
  2124. M.m[3][3] = 0.0f;
  2125. return M;
  2126. #elif defined(_XM_ARM_NEON_INTRINSICS_)
  2127. float TwoNearZ = NearZ + NearZ;
  2128. float ReciprocalWidth = 1.0f / (ViewRight - ViewLeft);
  2129. float ReciprocalHeight = 1.0f / (ViewTop - ViewBottom);
  2130. float fRange = FarZ / (FarZ-NearZ);
  2131. const XMVECTOR Zero = vdupq_n_f32(0);
  2132. XMMATRIX M;
  2133. M.r[0] = vsetq_lane_f32( TwoNearZ * ReciprocalWidth, Zero, 0 );
  2134. M.r[1] = vsetq_lane_f32( TwoNearZ * ReciprocalHeight, Zero, 1 );
  2135. M.r[2] = XMVectorSet(-(ViewLeft + ViewRight) * ReciprocalWidth,
  2136. -(ViewTop + ViewBottom) * ReciprocalHeight,
  2137. fRange,
  2138. 1.0f);
  2139. M.r[3] = vsetq_lane_f32( -fRange * NearZ, Zero, 2 );
  2140. return M;
  2141. #elif defined(_XM_SSE_INTRINSICS_)
  2142. XMMATRIX M;
  2143. float TwoNearZ = NearZ+NearZ;
  2144. float ReciprocalWidth = 1.0f / (ViewRight - ViewLeft);
  2145. float ReciprocalHeight = 1.0f / (ViewTop - ViewBottom);
  2146. float fRange = FarZ / (FarZ-NearZ);
  2147. // Note: This is recorded on the stack
  2148. XMVECTOR rMem = {
  2149. TwoNearZ*ReciprocalWidth,
  2150. TwoNearZ*ReciprocalHeight,
  2151. -fRange * NearZ,
  2152. 0
  2153. };
  2154. // Copy from memory to SSE register
  2155. XMVECTOR vValues = rMem;
  2156. XMVECTOR vTemp = _mm_setzero_ps();
  2157. // Copy x only
  2158. vTemp = _mm_move_ss(vTemp,vValues);
  2159. // TwoNearZ*ReciprocalWidth,0,0,0
  2160. M.r[0] = vTemp;
  2161. // 0,TwoNearZ*ReciprocalHeight,0,0
  2162. vTemp = vValues;
  2163. vTemp = _mm_and_ps(vTemp,g_XMMaskY);
  2164. M.r[1] = vTemp;
  2165. // 0,0,fRange,1.0f
  2166. M.r[2] = XMVectorSet( -(ViewLeft + ViewRight) * ReciprocalWidth,
  2167. -(ViewTop + ViewBottom) * ReciprocalHeight,
  2168. fRange,
  2169. 1.0f );
  2170. // 0,0,-fRange * NearZ,0.0f
  2171. vValues = _mm_and_ps(vValues,g_XMMaskZ);
  2172. M.r[3] = vValues;
  2173. return M;
  2174. #endif
  2175. }
  2176. //------------------------------------------------------------------------------
  2177. inline XMMATRIX XM_CALLCONV XMMatrixPerspectiveOffCenterRH
  2178. (
  2179. float ViewLeft,
  2180. float ViewRight,
  2181. float ViewBottom,
  2182. float ViewTop,
  2183. float NearZ,
  2184. float FarZ
  2185. )
  2186. {
  2187. assert(NearZ > 0.f && FarZ > 0.f);
  2188. assert(!XMScalarNearEqual(ViewRight, ViewLeft, 0.00001f));
  2189. assert(!XMScalarNearEqual(ViewTop, ViewBottom, 0.00001f));
  2190. assert(!XMScalarNearEqual(FarZ, NearZ, 0.00001f));
  2191. #if defined(_XM_NO_INTRINSICS_)
  2192. float TwoNearZ = NearZ + NearZ;
  2193. float ReciprocalWidth = 1.0f / (ViewRight - ViewLeft);
  2194. float ReciprocalHeight = 1.0f / (ViewTop - ViewBottom);
  2195. float fRange = FarZ / (NearZ-FarZ);
  2196. XMMATRIX M;
  2197. M.m[0][0] = TwoNearZ * ReciprocalWidth;
  2198. M.m[0][1] = 0.0f;
  2199. M.m[0][2] = 0.0f;
  2200. M.m[0][3] = 0.0f;
  2201. M.m[1][0] = 0.0f;
  2202. M.m[1][1] = TwoNearZ * ReciprocalHeight;
  2203. M.m[1][2] = 0.0f;
  2204. M.m[1][3] = 0.0f;
  2205. M.m[2][0] = (ViewLeft + ViewRight) * ReciprocalWidth;
  2206. M.m[2][1] = (ViewTop + ViewBottom) * ReciprocalHeight;
  2207. M.m[2][2] = fRange;
  2208. M.m[2][3] = -1.0f;
  2209. M.m[3][0] = 0.0f;
  2210. M.m[3][1] = 0.0f;
  2211. M.m[3][2] = fRange * NearZ;
  2212. M.m[3][3] = 0.0f;
  2213. return M;
  2214. #elif defined(_XM_ARM_NEON_INTRINSICS_)
  2215. float TwoNearZ = NearZ + NearZ;
  2216. float ReciprocalWidth = 1.0f / (ViewRight - ViewLeft);
  2217. float ReciprocalHeight = 1.0f / (ViewTop - ViewBottom);
  2218. float fRange = FarZ / (NearZ-FarZ);
  2219. const XMVECTOR Zero = vdupq_n_f32(0);
  2220. XMMATRIX M;
  2221. M.r[0] = vsetq_lane_f32( TwoNearZ * ReciprocalWidth, Zero, 0 );
  2222. M.r[1] = vsetq_lane_f32( TwoNearZ * ReciprocalHeight, Zero, 1 );
  2223. M.r[2] = XMVectorSet((ViewLeft + ViewRight) * ReciprocalWidth,
  2224. (ViewTop + ViewBottom) * ReciprocalHeight,
  2225. fRange,
  2226. -1.0f);
  2227. M.r[3] = vsetq_lane_f32( fRange * NearZ, Zero, 2 );
  2228. return M;
  2229. #elif defined(_XM_SSE_INTRINSICS_)
  2230. XMMATRIX M;
  2231. float TwoNearZ = NearZ+NearZ;
  2232. float ReciprocalWidth = 1.0f / (ViewRight - ViewLeft);
  2233. float ReciprocalHeight = 1.0f / (ViewTop - ViewBottom);
  2234. float fRange = FarZ / (NearZ-FarZ);
  2235. // Note: This is recorded on the stack
  2236. XMVECTOR rMem = {
  2237. TwoNearZ*ReciprocalWidth,
  2238. TwoNearZ*ReciprocalHeight,
  2239. fRange * NearZ,
  2240. 0
  2241. };
  2242. // Copy from memory to SSE register
  2243. XMVECTOR vValues = rMem;
  2244. XMVECTOR vTemp = _mm_setzero_ps();
  2245. // Copy x only
  2246. vTemp = _mm_move_ss(vTemp,vValues);
  2247. // TwoNearZ*ReciprocalWidth,0,0,0
  2248. M.r[0] = vTemp;
  2249. // 0,TwoNearZ*ReciprocalHeight,0,0
  2250. vTemp = vValues;
  2251. vTemp = _mm_and_ps(vTemp,g_XMMaskY);
  2252. M.r[1] = vTemp;
  2253. // 0,0,fRange,1.0f
  2254. M.r[2] = XMVectorSet( (ViewLeft + ViewRight) * ReciprocalWidth,
  2255. (ViewTop + ViewBottom) * ReciprocalHeight,
  2256. fRange,
  2257. -1.0f );
  2258. // 0,0,-fRange * NearZ,0.0f
  2259. vValues = _mm_and_ps(vValues,g_XMMaskZ);
  2260. M.r[3] = vValues;
  2261. return M;
  2262. #endif
  2263. }
  2264. //------------------------------------------------------------------------------
  2265. inline XMMATRIX XM_CALLCONV XMMatrixOrthographicLH
  2266. (
  2267. float ViewWidth,
  2268. float ViewHeight,
  2269. float NearZ,
  2270. float FarZ
  2271. )
  2272. {
  2273. assert(!XMScalarNearEqual(ViewWidth, 0.0f, 0.00001f));
  2274. assert(!XMScalarNearEqual(ViewHeight, 0.0f, 0.00001f));
  2275. assert(!XMScalarNearEqual(FarZ, NearZ, 0.00001f));
  2276. #if defined(_XM_NO_INTRINSICS_)
  2277. float fRange = 1.0f / (FarZ-NearZ);
  2278. XMMATRIX M;
  2279. M.m[0][0] = 2.0f / ViewWidth;
  2280. M.m[0][1] = 0.0f;
  2281. M.m[0][2] = 0.0f;
  2282. M.m[0][3] = 0.0f;
  2283. M.m[1][0] = 0.0f;
  2284. M.m[1][1] = 2.0f / ViewHeight;
  2285. M.m[1][2] = 0.0f;
  2286. M.m[1][3] = 0.0f;
  2287. M.m[2][0] = 0.0f;
  2288. M.m[2][1] = 0.0f;
  2289. M.m[2][2] = fRange;
  2290. M.m[2][3] = 0.0f;
  2291. M.m[3][0] = 0.0f;
  2292. M.m[3][1] = 0.0f;
  2293. M.m[3][2] = -fRange * NearZ;
  2294. M.m[3][3] = 1.0f;
  2295. return M;
  2296. #elif defined(_XM_ARM_NEON_INTRINSICS_)
  2297. float fRange = 1.0f / (FarZ-NearZ);
  2298. const XMVECTOR Zero = vdupq_n_f32(0);
  2299. XMMATRIX M;
  2300. M.r[0] = vsetq_lane_f32( 2.0f / ViewWidth, Zero, 0 );
  2301. M.r[1] = vsetq_lane_f32( 2.0f / ViewHeight, Zero, 1 );
  2302. M.r[2] = vsetq_lane_f32( fRange, Zero, 2 );
  2303. M.r[3] = vsetq_lane_f32( -fRange * NearZ, g_XMIdentityR3.v, 2 );
  2304. return M;
  2305. #elif defined(_XM_SSE_INTRINSICS_)
  2306. XMMATRIX M;
  2307. float fRange = 1.0f / (FarZ-NearZ);
  2308. // Note: This is recorded on the stack
  2309. XMVECTOR rMem = {
  2310. 2.0f / ViewWidth,
  2311. 2.0f / ViewHeight,
  2312. fRange,
  2313. -fRange * NearZ
  2314. };
  2315. // Copy from memory to SSE register
  2316. XMVECTOR vValues = rMem;
  2317. XMVECTOR vTemp = _mm_setzero_ps();
  2318. // Copy x only
  2319. vTemp = _mm_move_ss(vTemp,vValues);
  2320. // 2.0f / ViewWidth,0,0,0
  2321. M.r[0] = vTemp;
  2322. // 0,2.0f / ViewHeight,0,0
  2323. vTemp = vValues;
  2324. vTemp = _mm_and_ps(vTemp,g_XMMaskY);
  2325. M.r[1] = vTemp;
  2326. // x=fRange,y=-fRange * NearZ,0,1.0f
  2327. vTemp = _mm_setzero_ps();
  2328. vValues = _mm_shuffle_ps(vValues,g_XMIdentityR3,_MM_SHUFFLE(3,2,3,2));
  2329. // 0,0,fRange,0.0f
  2330. vTemp = _mm_shuffle_ps(vTemp,vValues,_MM_SHUFFLE(2,0,0,0));
  2331. M.r[2] = vTemp;
  2332. // 0,0,-fRange * NearZ,1.0f
  2333. vTemp = _mm_shuffle_ps(vTemp,vValues,_MM_SHUFFLE(3,1,0,0));
  2334. M.r[3] = vTemp;
  2335. return M;
  2336. #endif
  2337. }
  2338. //------------------------------------------------------------------------------
  2339. inline XMMATRIX XM_CALLCONV XMMatrixOrthographicRH
  2340. (
  2341. float ViewWidth,
  2342. float ViewHeight,
  2343. float NearZ,
  2344. float FarZ
  2345. )
  2346. {
  2347. assert(!XMScalarNearEqual(ViewWidth, 0.0f, 0.00001f));
  2348. assert(!XMScalarNearEqual(ViewHeight, 0.0f, 0.00001f));
  2349. assert(!XMScalarNearEqual(FarZ, NearZ, 0.00001f));
  2350. #if defined(_XM_NO_INTRINSICS_)
  2351. float fRange = 1.0f / (NearZ-FarZ);
  2352. XMMATRIX M;
  2353. M.m[0][0] = 2.0f / ViewWidth;
  2354. M.m[0][1] = 0.0f;
  2355. M.m[0][2] = 0.0f;
  2356. M.m[0][3] = 0.0f;
  2357. M.m[1][0] = 0.0f;
  2358. M.m[1][1] = 2.0f / ViewHeight;
  2359. M.m[1][2] = 0.0f;
  2360. M.m[1][3] = 0.0f;
  2361. M.m[2][0] = 0.0f;
  2362. M.m[2][1] = 0.0f;
  2363. M.m[2][2] = fRange;
  2364. M.m[2][3] = 0.0f;
  2365. M.m[3][0] = 0.0f;
  2366. M.m[3][1] = 0.0f;
  2367. M.m[3][2] = fRange * NearZ;
  2368. M.m[3][3] = 1.0f;
  2369. return M;
  2370. #elif defined(_XM_ARM_NEON_INTRINSICS_)
  2371. float fRange = 1.0f / (NearZ-FarZ);
  2372. const XMVECTOR Zero = vdupq_n_f32(0);
  2373. XMMATRIX M;
  2374. M.r[0] = vsetq_lane_f32( 2.0f / ViewWidth, Zero, 0 );
  2375. M.r[1] = vsetq_lane_f32( 2.0f / ViewHeight, Zero, 1 );
  2376. M.r[2] = vsetq_lane_f32( fRange, Zero, 2 );
  2377. M.r[3] = vsetq_lane_f32( fRange * NearZ, g_XMIdentityR3.v, 2 );
  2378. return M;
  2379. #elif defined(_XM_SSE_INTRINSICS_)
  2380. XMMATRIX M;
  2381. float fRange = 1.0f / (NearZ-FarZ);
  2382. // Note: This is recorded on the stack
  2383. XMVECTOR rMem = {
  2384. 2.0f / ViewWidth,
  2385. 2.0f / ViewHeight,
  2386. fRange,
  2387. fRange * NearZ
  2388. };
  2389. // Copy from memory to SSE register
  2390. XMVECTOR vValues = rMem;
  2391. XMVECTOR vTemp = _mm_setzero_ps();
  2392. // Copy x only
  2393. vTemp = _mm_move_ss(vTemp,vValues);
  2394. // 2.0f / ViewWidth,0,0,0
  2395. M.r[0] = vTemp;
  2396. // 0,2.0f / ViewHeight,0,0
  2397. vTemp = vValues;
  2398. vTemp = _mm_and_ps(vTemp,g_XMMaskY);
  2399. M.r[1] = vTemp;
  2400. // x=fRange,y=fRange * NearZ,0,1.0f
  2401. vTemp = _mm_setzero_ps();
  2402. vValues = _mm_shuffle_ps(vValues,g_XMIdentityR3,_MM_SHUFFLE(3,2,3,2));
  2403. // 0,0,fRange,0.0f
  2404. vTemp = _mm_shuffle_ps(vTemp,vValues,_MM_SHUFFLE(2,0,0,0));
  2405. M.r[2] = vTemp;
  2406. // 0,0,fRange * NearZ,1.0f
  2407. vTemp = _mm_shuffle_ps(vTemp,vValues,_MM_SHUFFLE(3,1,0,0));
  2408. M.r[3] = vTemp;
  2409. return M;
  2410. #endif
  2411. }
  2412. //------------------------------------------------------------------------------
  2413. inline XMMATRIX XM_CALLCONV XMMatrixOrthographicOffCenterLH
  2414. (
  2415. float ViewLeft,
  2416. float ViewRight,
  2417. float ViewBottom,
  2418. float ViewTop,
  2419. float NearZ,
  2420. float FarZ
  2421. )
  2422. {
  2423. assert(!XMScalarNearEqual(ViewRight, ViewLeft, 0.00001f));
  2424. assert(!XMScalarNearEqual(ViewTop, ViewBottom, 0.00001f));
  2425. assert(!XMScalarNearEqual(FarZ, NearZ, 0.00001f));
  2426. #if defined(_XM_NO_INTRINSICS_)
  2427. float ReciprocalWidth = 1.0f / (ViewRight - ViewLeft);
  2428. float ReciprocalHeight = 1.0f / (ViewTop - ViewBottom);
  2429. float fRange = 1.0f / (FarZ-NearZ);
  2430. XMMATRIX M;
  2431. M.m[0][0] = ReciprocalWidth + ReciprocalWidth;
  2432. M.m[0][1] = 0.0f;
  2433. M.m[0][2] = 0.0f;
  2434. M.m[0][3] = 0.0f;
  2435. M.m[1][0] = 0.0f;
  2436. M.m[1][1] = ReciprocalHeight + ReciprocalHeight;
  2437. M.m[1][2] = 0.0f;
  2438. M.m[1][3] = 0.0f;
  2439. M.m[2][0] = 0.0f;
  2440. M.m[2][1] = 0.0f;
  2441. M.m[2][2] = fRange;
  2442. M.m[2][3] = 0.0f;
  2443. M.m[3][0] = -(ViewLeft + ViewRight) * ReciprocalWidth;
  2444. M.m[3][1] = -(ViewTop + ViewBottom) * ReciprocalHeight;
  2445. M.m[3][2] = -fRange * NearZ;
  2446. M.m[3][3] = 1.0f;
  2447. return M;
  2448. #elif defined(_XM_ARM_NEON_INTRINSICS_)
  2449. float ReciprocalWidth = 1.0f / (ViewRight - ViewLeft);
  2450. float ReciprocalHeight = 1.0f / (ViewTop - ViewBottom);
  2451. float fRange = 1.0f / (FarZ-NearZ);
  2452. const XMVECTOR Zero = vdupq_n_f32(0);
  2453. XMMATRIX M;
  2454. M.r[0] = vsetq_lane_f32( ReciprocalWidth + ReciprocalWidth, Zero, 0 );
  2455. M.r[1] = vsetq_lane_f32( ReciprocalHeight + ReciprocalHeight, Zero, 1 );
  2456. M.r[2] = vsetq_lane_f32( fRange, Zero, 2 );
  2457. M.r[3] = XMVectorSet(-(ViewLeft + ViewRight) * ReciprocalWidth,
  2458. -(ViewTop + ViewBottom) * ReciprocalHeight,
  2459. -fRange * NearZ,
  2460. 1.0f);
  2461. return M;
  2462. #elif defined(_XM_SSE_INTRINSICS_)
  2463. XMMATRIX M;
  2464. float fReciprocalWidth = 1.0f / (ViewRight - ViewLeft);
  2465. float fReciprocalHeight = 1.0f / (ViewTop - ViewBottom);
  2466. float fRange = 1.0f / (FarZ-NearZ);
  2467. // Note: This is recorded on the stack
  2468. XMVECTOR rMem = {
  2469. fReciprocalWidth,
  2470. fReciprocalHeight,
  2471. fRange,
  2472. 1.0f
  2473. };
  2474. XMVECTOR rMem2 = {
  2475. -(ViewLeft + ViewRight),
  2476. -(ViewTop + ViewBottom),
  2477. -NearZ,
  2478. 1.0f
  2479. };
  2480. // Copy from memory to SSE register
  2481. XMVECTOR vValues = rMem;
  2482. XMVECTOR vTemp = _mm_setzero_ps();
  2483. // Copy x only
  2484. vTemp = _mm_move_ss(vTemp,vValues);
  2485. // fReciprocalWidth*2,0,0,0
  2486. vTemp = _mm_add_ss(vTemp,vTemp);
  2487. M.r[0] = vTemp;
  2488. // 0,fReciprocalHeight*2,0,0
  2489. vTemp = vValues;
  2490. vTemp = _mm_and_ps(vTemp,g_XMMaskY);
  2491. vTemp = _mm_add_ps(vTemp,vTemp);
  2492. M.r[1] = vTemp;
  2493. // 0,0,fRange,0.0f
  2494. vTemp = vValues;
  2495. vTemp = _mm_and_ps(vTemp,g_XMMaskZ);
  2496. M.r[2] = vTemp;
  2497. // -(ViewLeft + ViewRight)*fReciprocalWidth,-(ViewTop + ViewBottom)*fReciprocalHeight,fRange*-NearZ,1.0f
  2498. vValues = _mm_mul_ps(vValues,rMem2);
  2499. M.r[3] = vValues;
  2500. return M;
  2501. #endif
  2502. }
  2503. //------------------------------------------------------------------------------
  2504. inline XMMATRIX XM_CALLCONV XMMatrixOrthographicOffCenterRH
  2505. (
  2506. float ViewLeft,
  2507. float ViewRight,
  2508. float ViewBottom,
  2509. float ViewTop,
  2510. float NearZ,
  2511. float FarZ
  2512. )
  2513. {
  2514. assert(!XMScalarNearEqual(ViewRight, ViewLeft, 0.00001f));
  2515. assert(!XMScalarNearEqual(ViewTop, ViewBottom, 0.00001f));
  2516. assert(!XMScalarNearEqual(FarZ, NearZ, 0.00001f));
  2517. #if defined(_XM_NO_INTRINSICS_)
  2518. float ReciprocalWidth = 1.0f / (ViewRight - ViewLeft);
  2519. float ReciprocalHeight = 1.0f / (ViewTop - ViewBottom);
  2520. float fRange = 1.0f / (NearZ-FarZ);
  2521. XMMATRIX M;
  2522. M.m[0][0] = ReciprocalWidth + ReciprocalWidth;
  2523. M.m[0][1] = 0.0f;
  2524. M.m[0][2] = 0.0f;
  2525. M.m[0][3] = 0.0f;
  2526. M.m[1][0] = 0.0f;
  2527. M.m[1][1] = ReciprocalHeight + ReciprocalHeight;
  2528. M.m[1][2] = 0.0f;
  2529. M.m[1][3] = 0.0f;
  2530. M.m[2][0] = 0.0f;
  2531. M.m[2][1] = 0.0f;
  2532. M.m[2][2] = fRange;
  2533. M.m[2][3] = 0.0f;
  2534. M.r[3] = XMVectorSet(-(ViewLeft + ViewRight) * ReciprocalWidth,
  2535. -(ViewTop + ViewBottom) * ReciprocalHeight,
  2536. fRange * NearZ,
  2537. 1.0f);
  2538. return M;
  2539. #elif defined(_XM_ARM_NEON_INTRINSICS_)
  2540. float ReciprocalWidth = 1.0f / (ViewRight - ViewLeft);
  2541. float ReciprocalHeight = 1.0f / (ViewTop - ViewBottom);
  2542. float fRange = 1.0f / (NearZ-FarZ);
  2543. const XMVECTOR Zero = vdupq_n_f32(0);
  2544. XMMATRIX M;
  2545. M.r[0] = vsetq_lane_f32( ReciprocalWidth + ReciprocalWidth, Zero, 0 );
  2546. M.r[1] = vsetq_lane_f32( ReciprocalHeight + ReciprocalHeight, Zero, 1 );
  2547. M.r[2] = vsetq_lane_f32( fRange, Zero, 2 );
  2548. M.r[3] = XMVectorSet(-(ViewLeft + ViewRight) * ReciprocalWidth,
  2549. -(ViewTop + ViewBottom) * ReciprocalHeight,
  2550. fRange * NearZ,
  2551. 1.0f);
  2552. return M;
  2553. #elif defined(_XM_SSE_INTRINSICS_)
  2554. XMMATRIX M;
  2555. float fReciprocalWidth = 1.0f / (ViewRight - ViewLeft);
  2556. float fReciprocalHeight = 1.0f / (ViewTop - ViewBottom);
  2557. float fRange = 1.0f / (NearZ-FarZ);
  2558. // Note: This is recorded on the stack
  2559. XMVECTOR rMem = {
  2560. fReciprocalWidth,
  2561. fReciprocalHeight,
  2562. fRange,
  2563. 1.0f
  2564. };
  2565. XMVECTOR rMem2 = {
  2566. -(ViewLeft + ViewRight),
  2567. -(ViewTop + ViewBottom),
  2568. NearZ,
  2569. 1.0f
  2570. };
  2571. // Copy from memory to SSE register
  2572. XMVECTOR vValues = rMem;
  2573. XMVECTOR vTemp = _mm_setzero_ps();
  2574. // Copy x only
  2575. vTemp = _mm_move_ss(vTemp,vValues);
  2576. // fReciprocalWidth*2,0,0,0
  2577. vTemp = _mm_add_ss(vTemp,vTemp);
  2578. M.r[0] = vTemp;
  2579. // 0,fReciprocalHeight*2,0,0
  2580. vTemp = vValues;
  2581. vTemp = _mm_and_ps(vTemp,g_XMMaskY);
  2582. vTemp = _mm_add_ps(vTemp,vTemp);
  2583. M.r[1] = vTemp;
  2584. // 0,0,fRange,0.0f
  2585. vTemp = vValues;
  2586. vTemp = _mm_and_ps(vTemp,g_XMMaskZ);
  2587. M.r[2] = vTemp;
  2588. // -(ViewLeft + ViewRight)*fReciprocalWidth,-(ViewTop + ViewBottom)*fReciprocalHeight,fRange*-NearZ,1.0f
  2589. vValues = _mm_mul_ps(vValues,rMem2);
  2590. M.r[3] = vValues;
  2591. return M;
  2592. #endif
  2593. }
  2594. #ifdef _PREFAST_
  2595. #pragma prefast(pop)
  2596. #endif
  2597. /****************************************************************************
  2598. *
  2599. * XMMATRIX operators and methods
  2600. *
  2601. ****************************************************************************/
  2602. //------------------------------------------------------------------------------
  2603. inline XMMATRIX::XMMATRIX
  2604. (
  2605. float m00, float m01, float m02, float m03,
  2606. float m10, float m11, float m12, float m13,
  2607. float m20, float m21, float m22, float m23,
  2608. float m30, float m31, float m32, float m33
  2609. )
  2610. {
  2611. r[0] = XMVectorSet(m00, m01, m02, m03);
  2612. r[1] = XMVectorSet(m10, m11, m12, m13);
  2613. r[2] = XMVectorSet(m20, m21, m22, m23);
  2614. r[3] = XMVectorSet(m30, m31, m32, m33);
  2615. }
  2616. //------------------------------------------------------------------------------
  2617. _Use_decl_annotations_
  2618. inline XMMATRIX::XMMATRIX
  2619. (
  2620. const float* pArray
  2621. )
  2622. {
  2623. assert( pArray != nullptr );
  2624. r[0] = XMLoadFloat4((const XMFLOAT4*)pArray);
  2625. r[1] = XMLoadFloat4((const XMFLOAT4*)(pArray + 4));
  2626. r[2] = XMLoadFloat4((const XMFLOAT4*)(pArray + 8));
  2627. r[3] = XMLoadFloat4((const XMFLOAT4*)(pArray + 12));
  2628. }
  2629. //------------------------------------------------------------------------------
  2630. inline XMMATRIX XMMATRIX::operator- () const
  2631. {
  2632. XMMATRIX R;
  2633. R.r[0] = XMVectorNegate( r[0] );
  2634. R.r[1] = XMVectorNegate( r[1] );
  2635. R.r[2] = XMVectorNegate( r[2] );
  2636. R.r[3] = XMVectorNegate( r[3] );
  2637. return R;
  2638. }
  2639. //------------------------------------------------------------------------------
  2640. inline XMMATRIX& XM_CALLCONV XMMATRIX::operator+= (FXMMATRIX M)
  2641. {
  2642. r[0] = XMVectorAdd( r[0], M.r[0] );
  2643. r[1] = XMVectorAdd( r[1], M.r[1] );
  2644. r[2] = XMVectorAdd( r[2], M.r[2] );
  2645. r[3] = XMVectorAdd( r[3], M.r[3] );
  2646. return *this;
  2647. }
  2648. //------------------------------------------------------------------------------
  2649. inline XMMATRIX& XM_CALLCONV XMMATRIX::operator-= (FXMMATRIX M)
  2650. {
  2651. r[0] = XMVectorSubtract( r[0], M.r[0] );
  2652. r[1] = XMVectorSubtract( r[1], M.r[1] );
  2653. r[2] = XMVectorSubtract( r[2], M.r[2] );
  2654. r[3] = XMVectorSubtract( r[3], M.r[3] );
  2655. return *this;
  2656. }
  2657. //------------------------------------------------------------------------------
  2658. inline XMMATRIX& XM_CALLCONV XMMATRIX::operator*=(FXMMATRIX M)
  2659. {
  2660. *this = XMMatrixMultiply( *this, M );
  2661. return *this;
  2662. }
  2663. //------------------------------------------------------------------------------
  2664. inline XMMATRIX& XMMATRIX::operator*= (float S)
  2665. {
  2666. r[0] = XMVectorScale( r[0], S );
  2667. r[1] = XMVectorScale( r[1], S );
  2668. r[2] = XMVectorScale( r[2], S );
  2669. r[3] = XMVectorScale( r[3], S );
  2670. return *this;
  2671. }
  2672. //------------------------------------------------------------------------------
  2673. inline XMMATRIX& XMMATRIX::operator/= (float S)
  2674. {
  2675. #if defined(_XM_NO_INTRINSICS_)
  2676. XMVECTOR vS = XMVectorReplicate( S );
  2677. r[0] = XMVectorDivide( r[0], vS );
  2678. r[1] = XMVectorDivide( r[1], vS );
  2679. r[2] = XMVectorDivide( r[2], vS );
  2680. r[3] = XMVectorDivide( r[3], vS );
  2681. return *this;
  2682. #elif defined(_XM_ARM_NEON_INTRINSICS_)
  2683. #if defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64)
  2684. float32x4_t vS = vdupq_n_f32( S );
  2685. r[0] = vdivq_f32( r[0], vS );
  2686. r[1] = vdivq_f32( r[1], vS );
  2687. r[2] = vdivq_f32( r[2], vS );
  2688. r[3] = vdivq_f32( r[3], vS );
  2689. #else
  2690. // 2 iterations of Newton-Raphson refinement of reciprocal
  2691. float32x2_t vS = vdup_n_f32( S );
  2692. float32x2_t R0 = vrecpe_f32( vS );
  2693. float32x2_t S0 = vrecps_f32( R0, vS );
  2694. R0 = vmul_f32( S0, R0 );
  2695. S0 = vrecps_f32( R0, vS );
  2696. R0 = vmul_f32( S0, R0 );
  2697. float32x4_t Reciprocal = vcombine_u32(R0, R0);
  2698. r[0] = vmulq_f32( r[0], Reciprocal );
  2699. r[1] = vmulq_f32( r[1], Reciprocal );
  2700. r[2] = vmulq_f32( r[2], Reciprocal );
  2701. r[3] = vmulq_f32( r[3], Reciprocal );
  2702. #endif
  2703. return *this;
  2704. #elif defined(_XM_SSE_INTRINSICS_)
  2705. __m128 vS = _mm_set_ps1( S );
  2706. r[0] = _mm_div_ps( r[0], vS );
  2707. r[1] = _mm_div_ps( r[1], vS );
  2708. r[2] = _mm_div_ps( r[2], vS );
  2709. r[3] = _mm_div_ps( r[3], vS );
  2710. return *this;
  2711. #endif
  2712. }
  2713. //------------------------------------------------------------------------------
  2714. inline XMMATRIX XM_CALLCONV XMMATRIX::operator+ (FXMMATRIX M) const
  2715. {
  2716. XMMATRIX R;
  2717. R.r[0] = XMVectorAdd( r[0], M.r[0] );
  2718. R.r[1] = XMVectorAdd( r[1], M.r[1] );
  2719. R.r[2] = XMVectorAdd( r[2], M.r[2] );
  2720. R.r[3] = XMVectorAdd( r[3], M.r[3] );
  2721. return R;
  2722. }
  2723. //------------------------------------------------------------------------------
  2724. inline XMMATRIX XM_CALLCONV XMMATRIX::operator- (FXMMATRIX M) const
  2725. {
  2726. XMMATRIX R;
  2727. R.r[0] = XMVectorSubtract( r[0], M.r[0] );
  2728. R.r[1] = XMVectorSubtract( r[1], M.r[1] );
  2729. R.r[2] = XMVectorSubtract( r[2], M.r[2] );
  2730. R.r[3] = XMVectorSubtract( r[3], M.r[3] );
  2731. return R;
  2732. }
  2733. //------------------------------------------------------------------------------
  2734. inline XMMATRIX XM_CALLCONV XMMATRIX::operator*(FXMMATRIX M) const
  2735. {
  2736. return XMMatrixMultiply(*this, M);
  2737. }
  2738. //------------------------------------------------------------------------------
  2739. inline XMMATRIX XMMATRIX::operator* (float S) const
  2740. {
  2741. XMMATRIX R;
  2742. R.r[0] = XMVectorScale( r[0], S );
  2743. R.r[1] = XMVectorScale( r[1], S );
  2744. R.r[2] = XMVectorScale( r[2], S );
  2745. R.r[3] = XMVectorScale( r[3], S );
  2746. return R;
  2747. }
  2748. //------------------------------------------------------------------------------
  2749. inline XMMATRIX XMMATRIX::operator/ (float S) const
  2750. {
  2751. #if defined(_XM_NO_INTRINSICS_)
  2752. XMVECTOR vS = XMVectorReplicate( S );
  2753. XMMATRIX R;
  2754. R.r[0] = XMVectorDivide( r[0], vS );
  2755. R.r[1] = XMVectorDivide( r[1], vS );
  2756. R.r[2] = XMVectorDivide( r[2], vS );
  2757. R.r[3] = XMVectorDivide( r[3], vS );
  2758. return R;
  2759. #elif defined(_XM_ARM_NEON_INTRINSICS_)
  2760. #if defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64)
  2761. float32x4_t vS = vdupq_n_f32( S );
  2762. XMMATRIX R;
  2763. R.r[0] = vdivq_f32( r[0], vS );
  2764. R.r[1] = vdivq_f32( r[1], vS );
  2765. R.r[2] = vdivq_f32( r[2], vS );
  2766. R.r[3] = vdivq_f32( r[3], vS );
  2767. #else
  2768. // 2 iterations of Newton-Raphson refinement of reciprocal
  2769. float32x2_t vS = vdup_n_f32( S );
  2770. float32x2_t R0 = vrecpe_f32( vS );
  2771. float32x2_t S0 = vrecps_f32( R0, vS );
  2772. R0 = vmul_f32( S0, R0 );
  2773. S0 = vrecps_f32( R0, vS );
  2774. R0 = vmul_f32( S0, R0 );
  2775. float32x4_t Reciprocal = vcombine_u32(R0, R0);
  2776. XMMATRIX R;
  2777. R.r[0] = vmulq_f32( r[0], Reciprocal );
  2778. R.r[1] = vmulq_f32( r[1], Reciprocal );
  2779. R.r[2] = vmulq_f32( r[2], Reciprocal );
  2780. R.r[3] = vmulq_f32( r[3], Reciprocal );
  2781. #endif
  2782. return R;
  2783. #elif defined(_XM_SSE_INTRINSICS_)
  2784. __m128 vS = _mm_set_ps1( S );
  2785. XMMATRIX R;
  2786. R.r[0] = _mm_div_ps( r[0], vS );
  2787. R.r[1] = _mm_div_ps( r[1], vS );
  2788. R.r[2] = _mm_div_ps( r[2], vS );
  2789. R.r[3] = _mm_div_ps( r[3], vS );
  2790. return R;
  2791. #endif
  2792. }
  2793. //------------------------------------------------------------------------------
  2794. inline XMMATRIX XM_CALLCONV operator*
  2795. (
  2796. float S,
  2797. FXMMATRIX M
  2798. )
  2799. {
  2800. XMMATRIX R;
  2801. R.r[0] = XMVectorScale( M.r[0], S );
  2802. R.r[1] = XMVectorScale( M.r[1], S );
  2803. R.r[2] = XMVectorScale( M.r[2], S );
  2804. R.r[3] = XMVectorScale( M.r[3], S );
  2805. return R;
  2806. }
  2807. /****************************************************************************
  2808. *
  2809. * XMFLOAT3X3 operators
  2810. *
  2811. ****************************************************************************/
  2812. //------------------------------------------------------------------------------
  2813. _Use_decl_annotations_
  2814. inline XMFLOAT3X3::XMFLOAT3X3
  2815. (
  2816. const float* pArray
  2817. )
  2818. {
  2819. assert( pArray != nullptr );
  2820. for (size_t Row = 0; Row < 3; Row++)
  2821. {
  2822. for (size_t Column = 0; Column < 3; Column++)
  2823. {
  2824. m[Row][Column] = pArray[Row * 3 + Column];
  2825. }
  2826. }
  2827. }
  2828. //------------------------------------------------------------------------------
  2829. inline XMFLOAT3X3& XMFLOAT3X3::operator=
  2830. (
  2831. const XMFLOAT3X3& Float3x3
  2832. )
  2833. {
  2834. _11 = Float3x3._11;
  2835. _12 = Float3x3._12;
  2836. _13 = Float3x3._13;
  2837. _21 = Float3x3._21;
  2838. _22 = Float3x3._22;
  2839. _23 = Float3x3._23;
  2840. _31 = Float3x3._31;
  2841. _32 = Float3x3._32;
  2842. _33 = Float3x3._33;
  2843. return *this;
  2844. }
  2845. /****************************************************************************
  2846. *
  2847. * XMFLOAT4X3 operators
  2848. *
  2849. ****************************************************************************/
  2850. //------------------------------------------------------------------------------
  2851. _Use_decl_annotations_
  2852. inline XMFLOAT4X3::XMFLOAT4X3
  2853. (
  2854. const float* pArray
  2855. )
  2856. {
  2857. assert( pArray != nullptr );
  2858. m[0][0] = pArray[0];
  2859. m[0][1] = pArray[1];
  2860. m[0][2] = pArray[2];
  2861. m[1][0] = pArray[3];
  2862. m[1][1] = pArray[4];
  2863. m[1][2] = pArray[5];
  2864. m[2][0] = pArray[6];
  2865. m[2][1] = pArray[7];
  2866. m[2][2] = pArray[8];
  2867. m[3][0] = pArray[9];
  2868. m[3][1] = pArray[10];
  2869. m[3][2] = pArray[11];
  2870. }
  2871. //------------------------------------------------------------------------------
  2872. inline XMFLOAT4X3& XMFLOAT4X3::operator=
  2873. (
  2874. const XMFLOAT4X3& Float4x3
  2875. )
  2876. {
  2877. XMVECTOR V1 = XMLoadFloat4((const XMFLOAT4*)&Float4x3._11);
  2878. XMVECTOR V2 = XMLoadFloat4((const XMFLOAT4*)&Float4x3._22);
  2879. XMVECTOR V3 = XMLoadFloat4((const XMFLOAT4*)&Float4x3._33);
  2880. XMStoreFloat4((XMFLOAT4*)&_11, V1);
  2881. XMStoreFloat4((XMFLOAT4*)&_22, V2);
  2882. XMStoreFloat4((XMFLOAT4*)&_33, V3);
  2883. return *this;
  2884. }
  2885. //------------------------------------------------------------------------------
  2886. inline XMFLOAT4X3A& XMFLOAT4X3A::operator=
  2887. (
  2888. const XMFLOAT4X3A& Float4x3
  2889. )
  2890. {
  2891. XMVECTOR V1 = XMLoadFloat4A((const XMFLOAT4A*)&Float4x3._11);
  2892. XMVECTOR V2 = XMLoadFloat4A((const XMFLOAT4A*)&Float4x3._22);
  2893. XMVECTOR V3 = XMLoadFloat4A((const XMFLOAT4A*)&Float4x3._33);
  2894. XMStoreFloat4A((XMFLOAT4A*)&_11, V1);
  2895. XMStoreFloat4A((XMFLOAT4A*)&_22, V2);
  2896. XMStoreFloat4A((XMFLOAT4A*)&_33, V3);
  2897. return *this;
  2898. }
  2899. /****************************************************************************
  2900. *
  2901. * XMFLOAT4X4 operators
  2902. *
  2903. ****************************************************************************/
  2904. //------------------------------------------------------------------------------
  2905. _Use_decl_annotations_
  2906. inline XMFLOAT4X4::XMFLOAT4X4
  2907. (
  2908. const float* pArray
  2909. )
  2910. {
  2911. assert( pArray != nullptr );
  2912. m[0][0] = pArray[0];
  2913. m[0][1] = pArray[1];
  2914. m[0][2] = pArray[2];
  2915. m[0][3] = pArray[3];
  2916. m[1][0] = pArray[4];
  2917. m[1][1] = pArray[5];
  2918. m[1][2] = pArray[6];
  2919. m[1][3] = pArray[7];
  2920. m[2][0] = pArray[8];
  2921. m[2][1] = pArray[9];
  2922. m[2][2] = pArray[10];
  2923. m[2][3] = pArray[11];
  2924. m[3][0] = pArray[12];
  2925. m[3][1] = pArray[13];
  2926. m[3][2] = pArray[14];
  2927. m[3][3] = pArray[15];
  2928. }
  2929. //------------------------------------------------------------------------------
  2930. inline XMFLOAT4X4& XMFLOAT4X4::operator=
  2931. (
  2932. const XMFLOAT4X4& Float4x4
  2933. )
  2934. {
  2935. XMVECTOR V1 = XMLoadFloat4((const XMFLOAT4*)&Float4x4._11);
  2936. XMVECTOR V2 = XMLoadFloat4((const XMFLOAT4*)&Float4x4._21);
  2937. XMVECTOR V3 = XMLoadFloat4((const XMFLOAT4*)&Float4x4._31);
  2938. XMVECTOR V4 = XMLoadFloat4((const XMFLOAT4*)&Float4x4._41);
  2939. XMStoreFloat4((XMFLOAT4*)&_11, V1);
  2940. XMStoreFloat4((XMFLOAT4*)&_21, V2);
  2941. XMStoreFloat4((XMFLOAT4*)&_31, V3);
  2942. XMStoreFloat4((XMFLOAT4*)&_41, V4);
  2943. return *this;
  2944. }
  2945. //------------------------------------------------------------------------------
  2946. inline XMFLOAT4X4A& XMFLOAT4X4A::operator=
  2947. (
  2948. const XMFLOAT4X4A& Float4x4
  2949. )
  2950. {
  2951. XMVECTOR V1 = XMLoadFloat4A((const XMFLOAT4A*)&Float4x4._11);
  2952. XMVECTOR V2 = XMLoadFloat4A((const XMFLOAT4A*)&Float4x4._21);
  2953. XMVECTOR V3 = XMLoadFloat4A((const XMFLOAT4A*)&Float4x4._31);
  2954. XMVECTOR V4 = XMLoadFloat4A((const XMFLOAT4A*)&Float4x4._41);
  2955. XMStoreFloat4A((XMFLOAT4A*)&_11, V1);
  2956. XMStoreFloat4A((XMFLOAT4A*)&_21, V2);
  2957. XMStoreFloat4A((XMFLOAT4A*)&_31, V3);
  2958. XMStoreFloat4A((XMFLOAT4A*)&_41, V4);
  2959. return *this;
  2960. }