;-----------------------------------------------------------------------------
; Copyright (c) 2012 GarageGames, LLC
;
; Permission is hereby granted, free of charge, to any person obtaining a copy
; of this software and associated documentation files (the "Software"), to
; deal in the Software without restriction, including without limitation the
; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
; sell copies of the Software, and to permit persons to whom the Software is
; furnished to do so, subject to the following conditions:
;
; The above copyright notice and this permission notice shall be included in
; all copies or substantial portions of the Software.
;
; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
; IN THE SOFTWARE.
;-----------------------------------------------------------------------------

;
; NASM version of optimized funcs in mMath_C
;

; The following funcs are included:
;  m_ceil_ASM, m_ceilD_ASM, m_floor_ASM, m_floorD_ASM
;  m_fmod_ASM, m_fmodD_ASM, m_mulDivS32_ASM, m_mulDivU32_ASM
;  m_sincos_ASM, m_sincosD_ASM

; The other funcs from mMath_C were determined to compile into fast
;  code using MSVC --Paul Bowman


segment .data


temp_int64			dq		0.0
const_0pt5_D		dq		0.4999999999995
temp_int32			dd		0
const_0pt5			dd		0.49999995
const_neg1			dd		-1.0


segment .text

%macro export_fn 1
   %ifidn __OUTPUT_FORMAT__, elf
   ; No underscore needed for ELF object files
   global %1
   %1:
   %else
   global _%1
   _%1:
   %endif
%endmacro

%define rnd_adjD	qword [const_0pt5_D]
%define rnd_adj		dword [const_0pt5]


%define val		dword [esp+4]
%define val64	qword [esp+4]
;
; static F32 m_ceil_ASM(F32 val)
;
export_fn m_ceil_ASM
    fld		val
    fadd	rnd_adj
    fistp	qword [temp_int64]
    fild	qword [temp_int64]
	ret

;
; static F64 m_ceilD_ASM(F64 val64)
;
export_fn m_ceilD_ASM
    fld		val64
    fadd	rnd_adjD
    fistp	qword [temp_int64]
    fild	qword [temp_int64]
	ret

; 
; static F32 m_floor_ASM(F32 val)
; 
export_fn m_floor_ASM
    fld		val
    fsub	rnd_adj
    fistp	qword [temp_int64]
    fild	qword [temp_int64] 
	ret


;
; static F32 m_floorD_ASM( F64 val64 )
;
export_fn m_floorD_ASM
    fld		val64
    fsub	rnd_adjD
    fistp	qword [temp_int64]
    fild	qword [temp_int64] 
	ret


%define arg_a		dword [esp+4]
%define arg_b		dword [esp+8]
%define arg_c		dword [esp+12]

;
; static S32 m_mulDivS32_ASM( S32 a, S32 b, S32 c )
;
;    // Note: this returns different (but correct) values than the C
;    //  version.  C code must be overflowing...returns -727
;    //  if a b and c are 1 million, for instance.  This version returns
;    //  1 million.
; return (S32) ((S64)a*(S64)b) / (S64)c;
;
export_fn m_mulDivS32_ASM
    mov     eax, arg_a
    imul    arg_b
    idiv    arg_c
	ret

;
; static U32 m_mulDivU32_ASM( U32 a, U32 b, U32 c )
;
;    // Note: again, C version overflows
;
export_fn m_mulDivU32_ASM
    mov     eax, arg_a
    mul     arg_b
    div     arg_c
	ret


; val is already defined above to be esp+4
%define		modulo	dword [esp+8]


;
; static F32 m_fmod_ASM(F32 val, F32 modulo)
;
export_fn m_fmod_ASM
    mov     eax, val
    fld     modulo
    fabs
    fld     val
    fabs
    fdiv    st0, st1
    fld     st0
    fsub	rnd_adj
    fistp   qword [temp_int64]
    fild    qword [temp_int64]
    fsubp   st1, st0
    fmulp   st1, st0

;    // sign bit can be read as integer high bit, 
;    //  as long as # isn't 0x80000000
    cmp     eax, 0x80000000
    jbe     notneg

    fmul    dword [const_neg1]

notneg:
	ret


%define val64hi		dword [esp+8]
%define val64		qword [esp+4]
%define modulo64	qword [esp+12]

;
; static F32 m_fmodD_ASM(F64 val, F64 modulo)
;
export_fn m_fmodD_ASM
    mov     eax, val64hi
    fld     modulo64
    fabs
    fld     val64
    fabs
    fdiv    st0, st1
    fld     st0
    fsub	rnd_adjD
    fistp   qword [temp_int64]
    fild    qword [temp_int64]
    fsubp   st1, st0
    fmulp   st1, st0

;    // sign bit can be read as integer high bit, 
;    //  as long as # isn't 0x80000000
    cmp     eax, 0x80000000
    jbe     notnegD

    fmul    dword [const_neg1]

notnegD:
	ret

	 
%define angle		dword [esp+4]
%define res_sin		dword [esp+8]
%define res_cos		dword [esp+12]

;
;static void m_sincos_ASM( F32 angle, F32 *s, F32 *c )
;
export_fn m_sincos_ASM
    mov     eax, res_cos
    fld     angle
    fsincos
    fstp    dword [eax]
    mov     eax, res_sin
    fstp    dword [eax]
	ret


%define angle64		qword [esp+4]
%define res_sin64	dword [esp+12]
%define res_cos64	dword [esp+16]
;
;static void m_sincosD_ASM( F64 angle, F64 *s, F64 *c )
;
export_fn m_sincosD_ASM
    mov     eax, res_cos64
    fld     angle64
    fsincos
    fstp    qword [eax]
    mov     eax, res_sin64
    fstp    qword [eax]
	ret