mMath_ASM.asm 5.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288
  1. ;-----------------------------------------------------------------------------
  2. ; Copyright (c) 2013 GarageGames, LLC
  3. ;
  4. ; Permission is hereby granted, free of charge, to any person obtaining a copy
  5. ; of this software and associated documentation files (the "Software"), to
  6. ; deal in the Software without restriction, including without limitation the
  7. ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
  8. ; sell copies of the Software, and to permit persons to whom the Software is
  9. ; furnished to do so, subject to the following conditions:
  10. ;
  11. ; The above copyright notice and this permission notice shall be included in
  12. ; all copies or substantial portions of the Software.
  13. ;
  14. ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15. ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16. ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  17. ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  18. ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  19. ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  20. ; IN THE SOFTWARE.
  21. ;-----------------------------------------------------------------------------
  22. ;
  23. ; NASM version of optimized funcs in mMath_C
  24. ;
  25. ; The following funcs are included:
  26. ; m_ceil_ASM, m_ceilD_ASM, m_floor_ASM, m_floorD_ASM
  27. ; m_fmod_ASM, m_fmodD_ASM, m_mulDivS32_ASM, m_mulDivU32_ASM
  28. ; m_sincos_ASM, m_sincosD_ASM
  29. ; The other funcs from mMath_C were determined to compile into fast
  30. ; code using MSVC --Paul Bowman
  31. segment .data
  32. temp_int64 dq 0.0
  33. const_0pt5_D dq 0.4999999999995
  34. temp_int32 dd 0
  35. const_0pt5 dd 0.49999995
  36. const_neg1 dd -1.0
  37. segment .text
  38. %define rnd_adjD qword [const_0pt5_D]
  39. %define rnd_adj dword [const_0pt5]
  40. %define val dword [esp+4]
  41. %define val64 qword [esp+4]
  42. ;
  43. ; static F32 m_ceil_ASM(F32 val)
  44. ;
  45. %ifdef __linux
  46. global m_ceil_ASM
  47. m_ceil_ASM:
  48. %else
  49. global _m_ceil_ASM
  50. _m_ceil_ASM:
  51. %endif
  52. fld val
  53. fadd rnd_adj
  54. fistp qword [temp_int64]
  55. fild qword [temp_int64]
  56. ret
  57. ;
  58. ; static F64 m_ceilD_ASM(F64 val64)
  59. ;
  60. %ifdef __linux
  61. global m_ceilD_ASM
  62. m_ceilD_ASM:
  63. %else
  64. global _m_ceilD_ASM
  65. _m_ceilD_ASM:
  66. %endif
  67. fld val64
  68. fadd rnd_adjD
  69. fistp qword [temp_int64]
  70. fild qword [temp_int64]
  71. ret
  72. ;
  73. ; static F32 m_floor_ASM(F32 val)
  74. ;
  75. %ifdef __linux
  76. global m_floor_ASM
  77. m_floor_ASM:
  78. %else
  79. global _m_floor_ASM
  80. _m_floor_ASM:
  81. %endif
  82. fld val
  83. fsub rnd_adj
  84. fistp qword [temp_int64]
  85. fild qword [temp_int64]
  86. ret
  87. ;
  88. ; static F32 m_floorD_ASM( F64 val64 )
  89. ;
  90. %ifdef __linux
  91. global m_floorD_ASM
  92. m_floorD_ASM:
  93. %else
  94. global _m_floorD_ASM
  95. _m_floorD_ASM:
  96. %endif
  97. fld val64
  98. fsub rnd_adjD
  99. fistp qword [temp_int64]
  100. fild qword [temp_int64]
  101. ret
  102. %define arg_a dword [esp+4]
  103. %define arg_b dword [esp+8]
  104. %define arg_c dword [esp+12]
  105. ;
  106. ; static S32 m_mulDivS32_ASM( S32 a, S32 b, S32 c )
  107. ;
  108. ; // Note: this returns different (but correct) values than the C
  109. ; // version. C code must be overflowing...returns -727
  110. ; // if a b and c are 1 million, for instance. This version returns
  111. ; // 1 million.
  112. ; return (S32) ((S64)a*(S64)b) / (S64)c;
  113. ;
  114. %ifdef __linux
  115. global m_mulDivS32_ASM
  116. m_mulDivS32_ASM:
  117. %else
  118. global _m_mulDivS32_ASM
  119. _m_mulDivS32_ASM:
  120. %endif
  121. mov eax, arg_a
  122. imul arg_b
  123. idiv arg_c
  124. ret
  125. ;
  126. ; static U32 m_mulDivU32_ASM( U32 a, U32 b, U32 c )
  127. ;
  128. ; // Note: again, C version overflows
  129. ;
  130. %ifdef __linux
  131. global m_mulDivU32_ASM
  132. m_mulDivU32_ASM:
  133. %else
  134. global _m_mulDivU32_ASM
  135. _m_mulDivU32_ASM:
  136. %endif
  137. mov eax, arg_a
  138. mul arg_b
  139. div arg_c
  140. ret
  141. ; val is already defined above to be esp+4
  142. %define modulo dword [esp+8]
  143. ;
  144. ; static F32 m_fmod_ASM(F32 val, F32 modulo)
  145. ;
  146. %ifdef __linux
  147. global m_fmod_ASM
  148. m_fmod_ASM:
  149. %else
  150. global _m_fmod_ASM
  151. _m_fmod_ASM:
  152. %endif
  153. mov eax, val
  154. fld modulo
  155. fabs
  156. fld val
  157. fabs
  158. fdiv st0, st1
  159. fld st0
  160. fsub rnd_adj
  161. fistp qword [temp_int64]
  162. fild qword [temp_int64]
  163. fsubp st1, st0
  164. fmulp st1, st0
  165. ; // sign bit can be read as integer high bit,
  166. ; // as long as # isn't 0x80000000
  167. cmp eax, 0x80000000
  168. jbe notneg
  169. fmul dword [const_neg1]
  170. notneg:
  171. ret
  172. %define val64hi dword [esp+8]
  173. %define val64 qword [esp+4]
  174. %define modulo64 qword [esp+12]
  175. ;
  176. ; static F32 m_fmodD_ASM(F64 val, F64 modulo)
  177. ;
  178. %ifdef __linux
  179. global m_fmodD_ASM
  180. m_fmodD_ASM:
  181. %else
  182. global _m_fmodD_ASM
  183. _m_fmodD_ASM:
  184. %endif
  185. mov eax, val64hi
  186. fld modulo64
  187. fabs
  188. fld val64
  189. fabs
  190. fdiv st0, st1
  191. fld st0
  192. fsub rnd_adjD
  193. fistp qword [temp_int64]
  194. fild qword [temp_int64]
  195. fsubp st1, st0
  196. fmulp st1, st0
  197. ; // sign bit can be read as integer high bit,
  198. ; // as long as # isn't 0x80000000
  199. cmp eax, 0x80000000
  200. jbe notnegD
  201. fmul dword [const_neg1]
  202. notnegD:
  203. ret
  204. %define angle dword [esp+4]
  205. %define res_sin dword [esp+8]
  206. %define res_cos dword [esp+12]
  207. ;
  208. ;static void m_sincos_ASM( F32 angle, F32 *s, F32 *c )
  209. ;
  210. %ifdef __linux
  211. global m_sincos_ASM
  212. m_sincos_ASM:
  213. %else
  214. global _m_sincos_ASM
  215. _m_sincos_ASM:
  216. %endif
  217. mov eax, res_cos
  218. fld angle
  219. fsincos
  220. fstp dword [eax]
  221. mov eax, res_sin
  222. fstp dword [eax]
  223. ret
  224. %define angle64 qword [esp+4]
  225. %define res_sin64 dword [esp+12]
  226. %define res_cos64 dword [esp+16]
  227. ;
  228. ;static void m_sincosD_ASM( F64 angle, F64 *s, F64 *c )
  229. ;
  230. %ifdef __linux
  231. global m_sincosD_ASM
  232. m_sincosD_ASM:
  233. %else
  234. global _m_sincosD_ASM
  235. _m_sincosD_ASM:
  236. %endif
  237. mov eax, res_cos64
  238. fld angle64
  239. fsincos
  240. fstp qword [eax]
  241. mov eax, res_sin64
  242. fstp qword [eax]
  243. ret