mMath_ASM.asm 5.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239
  1. ;-----------------------------------------------------------------------------
  2. ; Copyright (c) 2012 GarageGames, LLC
  3. ;
  4. ; Permission is hereby granted, free of charge, to any person obtaining a copy
  5. ; of this software and associated documentation files (the "Software"), to
  6. ; deal in the Software without restriction, including without limitation the
  7. ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
  8. ; sell copies of the Software, and to permit persons to whom the Software is
  9. ; furnished to do so, subject to the following conditions:
  10. ;
  11. ; The above copyright notice and this permission notice shall be included in
  12. ; all copies or substantial portions of the Software.
  13. ;
  14. ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15. ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16. ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  17. ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  18. ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  19. ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  20. ; IN THE SOFTWARE.
  21. ;-----------------------------------------------------------------------------
  22. ;
  23. ; NASM version of optimized funcs in mMath_C
  24. ;
  25. ; The following funcs are included:
  26. ; m_ceil_ASM, m_ceilD_ASM, m_floor_ASM, m_floorD_ASM
  27. ; m_fmod_ASM, m_fmodD_ASM, m_mulDivS32_ASM, m_mulDivU32_ASM
  28. ; m_sincos_ASM, m_sincosD_ASM
  29. ; The other funcs from mMath_C were determined to compile into fast
  30. ; code using MSVC --Paul Bowman
  31. segment .data
  32. temp_int64 dq 0.0
  33. const_0pt5_D dq 0.4999999999995
  34. temp_int32 dd 0
  35. const_0pt5 dd 0.49999995
  36. const_neg1 dd -1.0
  37. segment .text
  38. %macro export_fn 1
  39. %ifidn __OUTPUT_FORMAT__, elf
  40. ; No underscore needed for ELF object files
  41. global %1
  42. %1:
  43. %else
  44. global _%1
  45. _%1:
  46. %endif
  47. %endmacro
  48. %define rnd_adjD qword [const_0pt5_D]
  49. %define rnd_adj dword [const_0pt5]
  50. %define val dword [esp+4]
  51. %define val64 qword [esp+4]
  52. ;
  53. ; static F32 m_ceil_ASM(F32 val)
  54. ;
  55. export_fn m_ceil_ASM
  56. fld val
  57. fadd rnd_adj
  58. fistp qword [temp_int64]
  59. fild qword [temp_int64]
  60. ret
  61. ;
  62. ; static F64 m_ceilD_ASM(F64 val64)
  63. ;
  64. export_fn m_ceilD_ASM
  65. fld val64
  66. fadd rnd_adjD
  67. fistp qword [temp_int64]
  68. fild qword [temp_int64]
  69. ret
  70. ;
  71. ; static F32 m_floor_ASM(F32 val)
  72. ;
  73. export_fn m_floor_ASM
  74. fld val
  75. fsub rnd_adj
  76. fistp qword [temp_int64]
  77. fild qword [temp_int64]
  78. ret
  79. ;
  80. ; static F32 m_floorD_ASM( F64 val64 )
  81. ;
  82. export_fn m_floorD_ASM
  83. fld val64
  84. fsub rnd_adjD
  85. fistp qword [temp_int64]
  86. fild qword [temp_int64]
  87. ret
  88. %define arg_a dword [esp+4]
  89. %define arg_b dword [esp+8]
  90. %define arg_c dword [esp+12]
  91. ;
  92. ; static S32 m_mulDivS32_ASM( S32 a, S32 b, S32 c )
  93. ;
  94. ; // Note: this returns different (but correct) values than the C
  95. ; // version. C code must be overflowing...returns -727
  96. ; // if a b and c are 1 million, for instance. This version returns
  97. ; // 1 million.
  98. ; return (S32) ((S64)a*(S64)b) / (S64)c;
  99. ;
  100. export_fn m_mulDivS32_ASM
  101. mov eax, arg_a
  102. imul arg_b
  103. idiv arg_c
  104. ret
  105. ;
  106. ; static U32 m_mulDivU32_ASM( U32 a, U32 b, U32 c )
  107. ;
  108. ; // Note: again, C version overflows
  109. ;
  110. export_fn m_mulDivU32_ASM
  111. mov eax, arg_a
  112. mul arg_b
  113. div arg_c
  114. ret
  115. ; val is already defined above to be esp+4
  116. %define modulo dword [esp+8]
  117. ;
  118. ; static F32 m_fmod_ASM(F32 val, F32 modulo)
  119. ;
  120. export_fn m_fmod_ASM
  121. mov eax, val
  122. fld modulo
  123. fabs
  124. fld val
  125. fabs
  126. fdiv st0, st1
  127. fld st0
  128. fsub rnd_adj
  129. fistp qword [temp_int64]
  130. fild qword [temp_int64]
  131. fsubp st1, st0
  132. fmulp st1, st0
  133. ; // sign bit can be read as integer high bit,
  134. ; // as long as # isn't 0x80000000
  135. cmp eax, 0x80000000
  136. jbe notneg
  137. fmul dword [const_neg1]
  138. notneg:
  139. ret
  140. %define val64hi dword [esp+8]
  141. %define val64 qword [esp+4]
  142. %define modulo64 qword [esp+12]
  143. ;
  144. ; static F32 m_fmodD_ASM(F64 val, F64 modulo)
  145. ;
  146. export_fn m_fmodD_ASM
  147. mov eax, val64hi
  148. fld modulo64
  149. fabs
  150. fld val64
  151. fabs
  152. fdiv st0, st1
  153. fld st0
  154. fsub rnd_adjD
  155. fistp qword [temp_int64]
  156. fild qword [temp_int64]
  157. fsubp st1, st0
  158. fmulp st1, st0
  159. ; // sign bit can be read as integer high bit,
  160. ; // as long as # isn't 0x80000000
  161. cmp eax, 0x80000000
  162. jbe notnegD
  163. fmul dword [const_neg1]
  164. notnegD:
  165. ret
  166. %define angle dword [esp+4]
  167. %define res_sin dword [esp+8]
  168. %define res_cos dword [esp+12]
  169. ;
  170. ;static void m_sincos_ASM( F32 angle, F32 *s, F32 *c )
  171. ;
  172. export_fn m_sincos_ASM
  173. mov eax, res_cos
  174. fld angle
  175. fsincos
  176. fstp dword [eax]
  177. mov eax, res_sin
  178. fstp dword [eax]
  179. ret
  180. %define angle64 qword [esp+4]
  181. %define res_sin64 dword [esp+12]
  182. %define res_cos64 dword [esp+16]
  183. ;
  184. ;static void m_sincosD_ASM( F64 angle, F64 *s, F64 *c )
  185. ;
  186. export_fn m_sincosD_ASM
  187. mov eax, res_cos64
  188. fld angle64
  189. fsincos
  190. fstp qword [eax]
  191. mov eax, res_sin64
  192. fstp qword [eax]
  193. ret