123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426 |
- {
- This file is part of the Free Pascal run time library.
- Copyright (c) 1999-2000 by the Free Pascal development team
- This file contains some helper routines for int64 and qword
- See the file COPYING.FPC, included in this distribution,
- for details about the copyright.
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- **********************************************************************}
- {$Q- no overflow checking }
- {$R- no range checking }
- function div_qword_throwdivbyzero(n,z : qword) : qword;
- begin
- HandleErrorFrame(200,get_frame);
- end;
- {$define FPC_SYSTEM_HAS_DIV_INT64}
- function fpc_div_int64(n,z : int64) : int64;assembler;nostackframe;[public,alias: 'FPC_DIV_INT64']; compilerproc;
- { n = [esp + 12], z = [esp + 4]. }
- asm
- push %ebx
- push %esi
- push %edi
- { the following piece of code is taken from the }
- { AMD Athlon Processor x86 Code Optimization manual }
- movl 12+16(%esp),%ecx { ecx = hi(n) }
- movl 12+12(%esp),%ebx { ebx = lo(n) }
- movl 12+8(%esp),%edx { edx = hi(z) }
- movl 12+4(%esp),%eax { eax = lo(z) }
- movl %ecx,%esi
- xorl %edx,%esi
- sarl $31,%esi
- movl %edx,%edi
- sarl $31,%edi
- xorl %edi,%eax
- xorl %edi,%edx
- subl %edi,%eax
- sbbl %edi,%edx
- movl %ecx,%edi
- sarl $31,%edi
- xorl %edi,%ebx
- xorl %edi,%ecx
- subl %edi,%ebx
- sbbl %edi,%ecx
- jnz .Lbigdivisor
- cmpl %ebx,%edx
- jae .Ltwo_divs
- divl %ebx
- .Lmake_sign_zero_hi:
- xorl %edx,%edx
- .Lmake_sign:
- xorl %esi,%eax
- xorl %esi,%edx
- subl %esi,%eax
- sbbl %esi,%edx
- pop %edi
- pop %esi
- pop %ebx
- ret $16
- .Ltwo_divs:
- test %ebx,%ebx { Zero division ends up here with ebx = 0. }
- jz .Ldivzero
- movl %eax,%ecx
- movl %edx,%eax
- xorl %edx,%edx
- divl %ebx
- xchgl %ecx,%eax
- divl %ebx
- movl %ecx,%edx
- jmp .Lmake_sign
- .Lbigdivisor:
- movl %eax,12+4(%esp) { Reuse n~z stack space. }
- movl %ebx,12+8(%esp)
- movl %edx,12+12(%esp)
- movl %ecx,%edi
- shrl $1,%edx
- rcrl $1,%eax
- rorl $1,%edi
- rcrl $1,%ebx
- bsrl %ecx,%ecx
- shrdl %cl,%edi,%ebx
- shrdl %cl,%edx,%eax
- shrl %cl,%edx
- roll $1,%edi
- divl %ebx
- movl 12+4(%esp),%ebx
- movl %eax,%ecx
- imull %eax,%edi
- mull 12+8(%esp)
- addl %edi,%edx
- subl %eax,%ebx
- movl %ecx,%eax
- movl 12+12(%esp),%ecx
- sbbl %edx,%ecx
- sbbl $0,%eax
- jmp .Lmake_sign_zero_hi
- .Ldivzero:
- pop %edi
- pop %esi
- pop %ebx
- jmp div_qword_throwdivbyzero
- end;
- {$define FPC_SYSTEM_HAS_MOD_INT64}
- function fpc_mod_int64(n,z : int64) : int64;assembler;nostackframe;[public,alias: 'FPC_MOD_INT64']; compilerproc;
- { n = [esp + 12], z = [esp + 4]. }
- asm
- push %ebx
- push %esi
- push %edi
- { the following piece of code is taken from the }
- { AMD Athlon Processor x86 Code Optimization manual }
- movl 12+16(%esp),%ecx
- movl 12+12(%esp),%ebx
- movl 12+8(%esp),%edx
- movl 12+4(%esp),%eax
- movl %edx,%esi
- sarl $31,%esi
- movl %edx,%edi
- sarl $31,%edi
- xorl %edi,%eax
- xorl %edi,%edx
- subl %edi,%eax
- sbbl %edi,%edx
- movl %ecx,%edi
- sarl $31,%edi
- xorl %edi,%ebx
- xorl %edi,%ecx
- subl %edi,%ebx
- sbbl %edi,%ecx
- jnz .Lbig_divisor
- cmpl %ebx,%edx
- jae .Ltwo_divs
- divl %ebx
- movl %edx,%eax
- movl %ecx,%edx
- .Lmake_sign:
- xorl %esi,%eax
- xorl %esi,%edx
- subl %esi,%eax
- sbbl %esi,%edx
- pop %edi
- pop %esi
- pop %ebx
- ret $16
- .Ltwo_divs:
- test %ebx,%ebx { Zero division ends up here with ebx = 0. }
- jz .Ldivzero
- movl %eax,%ecx
- movl %edx,%eax
- xorl %edx,%edx
- divl %ebx
- movl %ecx,%eax
- divl %ebx
- movl %edx,%eax
- xorl %edx,%edx
- jmp .Lmake_sign
- .Lbig_divisor:
- movl %eax,12+4(%esp) { Reuse n~z stack space. }
- movl %ebx,12+8(%esp)
- movl %edx,12+12(%esp)
- movl %ecx,12+16(%esp)
- movl %ecx,%edi
- shrl $1,%edx
- rcrl $1,%eax
- rorl $1,%edi
- rcrl $1,%ebx
- bsrl %ecx,%ecx
- shrdl %cl,%edi,%ebx
- shrdl %cl,%edx,%eax
- shrl %cl,%edx
- roll $1,%edi
- divl %ebx
- movl 12+4(%esp),%ebx
- movl %eax,%ecx
- imull %eax,%edi
- mull 12+8(%esp)
- addl %edi,%edx
- subl %eax,%ebx
- movl 12+12(%esp),%ecx
- sbbl %edx,%ecx
- sbbl %eax,%eax
- movl 12+16(%esp),%edx
- andl %eax,%edx
- andl 12+8(%esp),%eax
- addl %ebx,%eax
- adcl %ecx,%edx
- jmp .Lmake_sign
- .Ldivzero:
- pop %edi
- pop %esi
- pop %ebx
- jmp div_qword_throwdivbyzero
- end;
- {$define FPC_SYSTEM_HAS_DIV_QWORD}
- function fpc_div_qword(n,z : qword) : qword;assembler;nostackframe;[public,alias: 'FPC_DIV_QWORD']; compilerproc;
- { n = [esp + 12], z = [esp + 4]. }
- asm
- { the following piece of code is taken from the }
- { AMD Athlon Processor x86 Code Optimization manual }
- movl 16(%esp),%ecx { ecx = hi(n) }
- test %ecx,%ecx
- jnz .Lqworddivbigdivisor
- movl 12(%esp),%ecx { ecx = lo(n) }
- movl 8(%esp),%edx { edx = hi(z) }
- cmpl %ecx,%edx
- jae .Lqworddivtwo_divs
- movl 4(%esp),%eax { eax = lo(z) }
- divl %ecx
- xorl %edx,%edx
- ret $16
- .Lqworddivtwo_divs:
- test %ecx,%ecx { Zero division ends up here with ecx = 0. }
- jz div_qword_throwdivbyzero
- movl %edx,%eax
- xorl %edx,%edx
- divl %ecx
- push %eax { eax = future hi(result); remember }
- movl 4+4(%esp),%eax { eax = lo(z) }
- divl %ecx
- pop %edx
- ret $16
- .Lqworddivbigdivisor:
- push %ebx
- push %esi
- push %edi
- movl 12+12(%esp),%ebx { ebx = lo(n) }
- movl 12+8(%esp),%edx { edx = hi(z) }
- movl 12+4(%esp),%eax { eax = lo(z) }
- movl %ecx,%edi
- shrl $1,%edx
- rcrl $1,%eax
- rorl $1,%edi
- rcrl $1,%ebx
- bsrl %ecx,%ecx
- shrdl %cl,%edi,%ebx
- shrdl %cl,%edx,%eax
- shrl %cl,%edx
- roll $1,%edi
- divl %ebx
- movl 12+4(%esp),%ebx
- movl %eax,%esi // save quotient to esi
- imull %eax,%edi
- mull 12+12(%esp)
- addl %edi,%edx
- setcb %cl // cl:edx:eax = 65 bits quotient*divisor
- movl 12+8(%esp),%edi // edi:ebx = dividend
- subl %eax,%ebx
- movb $0,%al
- sbbl %edx,%edi
- sbbb %cl,%al
- sbbl $0,%esi
- xorl %edx,%edx
- movl %esi,%eax
- pop %edi
- pop %esi
- pop %ebx
- end;
- {$define FPC_SYSTEM_HAS_MOD_QWORD}
- function fpc_mod_qword(n,z : qword) : qword;assembler;nostackframe;[public,alias: 'FPC_MOD_QWORD']; compilerproc;
- { n = [esp + 12], z = [esp + 4]. }
- asm
- { the following piece of code is taken from the }
- { AMD Athlon Processor x86 Code Optimization manual }
- movl 16(%esp),%ecx { ecx = hi(n) }
- movl 8(%esp),%edx { edx = hi(z) }
- test %ecx,%ecx
- jnz .Lqwordmodr_big_divisior
- movl 12(%esp),%ecx { ecx = lo(n) }
- movl 4(%esp),%eax { eax = lo(z) }
- cmpl %ecx,%edx
- jae .Lqwordmodr_two_divs
- divl %ecx
- movl %edx,%eax
- xorl %edx,%edx
- ret $16
- .Lqwordmodr_two_divs:
- test %ecx,%ecx { Zero division ends up here with ecx = 0. }
- jz div_qword_throwdivbyzero
- movl %edx,%eax
- xorl %edx,%edx
- divl %ecx
- movl 4(%esp),%eax { eax = lo(z) }
- divl %ecx
- movl %edx,%eax
- xorl %edx,%edx
- ret $16
- .Lqwordmodr_big_divisior:
- push %ebx
- push %edi
- movl 8+12(%esp),%ebx { ebx = lo(n) }
- movl 8+4(%esp),%eax { eax = lo(z) }
- movl %ecx,%edi
- shrl $1,%edx
- rcrl $1,%eax
- rorl $1,%edi
- rcrl $1,%ebx
- bsrl %ecx,%ecx
- shrdl %cl,%edi,%ebx
- shrdl %cl,%edx,%eax
- shrl %cl,%edx
- roll $1,%edi
- divl %ebx
- movl 8+4(%esp),%ebx { lo(z) }
- imull %eax,%edi
- mull 8+12(%esp) { lo(n) }
- addl %edi,%edx
- setcb %cl // cl:edx:eax = 65 bits quotient*divisor
- movl 8+8(%esp),%edi { hi(z) }
- subl %eax,%ebx // subtract (quotient*divisor) from dividend
- movb $0,%al
- sbbl %edx,%edi
- sbbb %cl,%al // if carry is set now, the quotient was off by 1,
- // and we need to add divisor to result
- movl 8+12(%esp),%eax { lo(n) }
- sbbl %edx,%edx
- andl %edx,%eax
- andl 8+16(%esp),%edx { hi(n) }
- addl %ebx,%eax
- adcl %edi,%edx
- pop %edi
- pop %ebx
- end;
- {$define FPC_SYSTEM_HAS_MUL_QWORD}
- function fpc_mul_qword(f1,f2 : qword) : qword;[public,alias: 'FPC_MUL_QWORD']; compilerproc;
- begin
- { the following piece of code is taken from the
- AMD Athlon Processor x86 Code Optimization manual }
- asm
- movl f1+4,%edx
- movl f2+4,%ecx
- orl %ecx,%edx
- movl f2,%edx
- movl f1,%eax
- jnz .Lqwordmultwomul
- { if both upper dwords are =0 then it cannot overflow }
- mull %edx
- jmp .Lqwordmulready
- .Lqwordmultwomul:
- imul f1+4,%edx
- imul %eax,%ecx
- addl %edx,%ecx
- mull f2
- add %ecx,%edx
- .Lqwordmulready:
- movl %eax,__RESULT
- movl %edx,__RESULT+4
- .Lend:
- end [ 'eax','edx','ecx'];
- end;
- function mul_qword_throwoverflow(f1,f2 : qword) : qword;
- begin
- HandleErrorFrame(215,get_frame);
- end;
- function fpc_mul_qword_checkoverflow(f1,f2 : qword) : qword;assembler;nostackframe;[public,alias: 'FPC_MUL_QWORD_CHECKOVERFLOW']; compilerproc;
- { f1 = [esp + 12], f2 = [esp + 4]. }
- asm
- { the following piece of code is taken from the
- AMD Athlon Processor x86 Code Optimization manual }
- movl 16(%esp),%edx { edx = hi(f1) }
- movl 8(%esp),%ecx { ecx = hi(f2) }
- orl %ecx,%edx
- movl 4(%esp),%edx { edx = lo(f2) }
- movl 12(%esp),%eax { eax = lo(f1) }
- jnz .Loverflowchecked
- { if both upper dwords are =0 then it cannot overflow }
- mull %edx
- ret $16
- .Loverflowed:
- jmp mul_qword_throwoverflow
- .Loverflowchecked:
- { if both upper dwords are <>0 then it overflows always }
- test %ecx,%ecx
- jz .Loverok1
- cmpl $0,16(%esp)
- jnz .Loverflowed
- .Loverok1:
- { overflow checked code }
- movl 16(%esp),%eax { eax = hi(f1) }
- mull 4(%esp)
- movl %eax,%ecx
- jc .Loverflowed
- movl 12(%esp),%eax { eax = lo(f1) }
- mull 8(%esp)
- jc .Loverflowed
- addl %eax,%ecx
- jc .Loverflowed
- movl 4(%esp),%eax
- mull 12(%esp)
- addl %ecx,%edx
- jc .Loverflowed
- end;
|