int64p.inc 6.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237
  1. {
  2. This file is part of the Free Pascal run time library.
  3. Copyright (c) 1999-2000 by the Free Pascal development team
  4. This file contains some helper routines for int64 and qword
  5. See the file COPYING.FPC, included in this distribution,
  6. for details about the copyright.
  7. This program is distributed in the hope that it will be useful,
  8. but WITHOUT ANY WARRANTY; without even the implied warranty of
  9. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  10. **********************************************************************}
  11. {$Q- no overflow checking }
  12. {$R- no range checking }
  13. {$define FPC_SYSTEM_HAS_DIV_QWORD}
  14. function fpc_div_qword(n,z : qword) : qword;assembler;[public,alias: 'FPC_DIV_QWORD']; compilerproc;
  15. var
  16. saveebx,saveedi,saveesi : longint;
  17. asm
  18. movl %ebx,saveebx
  19. movl %esi,saveesi
  20. movl %edi,saveedi
  21. { the following piece of code is taken from the }
  22. { AMD Athlon Processor x86 Code Optimization manual }
  23. movl n+4,%ecx
  24. movl n,%ebx
  25. movl %ecx,%eax
  26. orl %ebx,%eax
  27. jnz .Lnodivzero
  28. movl %ebp,%edx
  29. movl $200,%eax
  30. call HandleErrorFrame
  31. jmp .Lexit
  32. .Lnodivzero:
  33. movl z+4,%edx
  34. movl z,%eax
  35. testl %ecx,%ecx
  36. jnz .Lqworddivbigdivisor
  37. cmpl %ebx,%edx
  38. jae .Lqworddivtwo_divs
  39. divl %ebx
  40. movl %ecx,%edx
  41. jmp .Lexit
  42. .Lqworddivtwo_divs:
  43. movl %eax,%ecx
  44. movl %edx,%eax
  45. xorl %edx,%edx
  46. divl %ebx
  47. xchgl %ecx,%eax
  48. divl %ebx
  49. movl %ecx,%edx
  50. jmp .Lexit
  51. .Lqworddivbigdivisor:
  52. movl %ecx,%edi
  53. shrl $1,%edx
  54. rcrl $1,%eax
  55. rorl $1,%edi
  56. rcrl $1,%ebx
  57. bsrl %ecx,%ecx
  58. shrdl %cl,%edi,%ebx
  59. shrdl %cl,%edx,%eax
  60. shrl %cl,%edx
  61. roll $1,%edi
  62. divl %ebx
  63. movl z,%ebx
  64. movl %eax,%esi // save quotient to esi
  65. imull %eax,%edi
  66. mull n
  67. addl %edi,%edx
  68. setcb %cl // cl:edx:eax = 65 bits quotient*divisor
  69. movl z+4,%edi // edi:ebx = dividend
  70. subl %eax,%ebx
  71. movb $0,%al
  72. sbbl %edx,%edi
  73. sbbb %cl,%al
  74. sbbl $0,%esi
  75. xorl %edx,%edx
  76. movl %esi,%eax
  77. .Lexit:
  78. movl saveebx,%ebx
  79. movl saveesi,%esi
  80. movl saveedi,%edi
  81. end;
  82. {$define FPC_SYSTEM_HAS_MOD_QWORD}
  83. function fpc_mod_qword(n,z : qword) : qword;assembler;[public,alias: 'FPC_MOD_QWORD']; compilerproc;
  84. var
  85. saveebx,saveedi : longint;
  86. asm
  87. movl %ebx,saveebx
  88. movl %edi,saveedi
  89. { the following piece of code is taken from the }
  90. { AMD Athlon Processor x86 Code Optimization manual }
  91. movl n+4,%ecx
  92. movl n,%ebx
  93. movl %ecx,%eax
  94. orl %ebx,%eax
  95. jnz .Lnodivzero
  96. movl %ebp,%edx
  97. movl $200,%eax
  98. call HandleErrorFrame
  99. jmp .Lexit
  100. .Lnodivzero:
  101. movl z+4,%edx
  102. movl z,%eax
  103. testl %ecx,%ecx
  104. jnz .Lqwordmodr_big_divisior
  105. cmpl %ebx,%edx
  106. jae .Lqwordmodr_two_divs
  107. divl %ebx
  108. movl %edx,%eax
  109. movl %ecx,%edx
  110. jmp .Lexit
  111. .Lqwordmodr_two_divs:
  112. movl %eax,%ecx
  113. movl %edx,%eax
  114. xorl %edx,%edx
  115. divl %ebx
  116. movl %ecx,%eax
  117. divl %ebx
  118. movl %edx,%eax
  119. xorl %edx,%edx
  120. jmp .Lexit
  121. .Lqwordmodr_big_divisior:
  122. movl %ecx,%edi
  123. shrl $1,%edx
  124. rcrl $1,%eax
  125. rorl $1,%edi
  126. rcrl $1,%ebx
  127. bsrl %ecx,%ecx
  128. shrdl %cl,%edi,%ebx
  129. shrdl %cl,%edx,%eax
  130. shrl %cl,%edx
  131. roll $1,%edi
  132. divl %ebx
  133. movl z,%ebx
  134. imull %eax,%edi
  135. mull n
  136. addl %edi,%edx
  137. setcb %cl // cl:edx:eax = 65 bits quotient*divisor
  138. movl z+4,%edi
  139. subl %eax,%ebx // subtract (quotient*divisor) from dividend
  140. movb $0,%al
  141. sbbl %edx,%edi
  142. sbbb %cl,%al // if carry is set now, the quotient was off by 1,
  143. // and we need to add divisor to result
  144. movl n,%eax
  145. sbbl %edx,%edx
  146. andl %edx,%eax
  147. andl n+4,%edx
  148. addl %ebx,%eax
  149. adcl %edi,%edx
  150. .Lexit:
  151. movl saveebx,%ebx
  152. movl saveedi,%edi
  153. end;
  154. {$define FPC_SYSTEM_HAS_MUL_QWORD}
  155. { multiplies two qwords
  156. the longbool for checkoverflow avoids a misaligned stack
  157. }
  158. function fpc_mul_qword(f1,f2 : qword;checkoverflow : longbool) : qword;[public,alias: 'FPC_MUL_QWORD']; compilerproc;
  159. var
  160. overflowed : boolean;
  161. begin
  162. overflowed:=false;
  163. { the following piece of code is taken from the
  164. AMD Athlon Processor x86 Code Optimization manual }
  165. asm
  166. movl f1+4,%edx
  167. movl f2+4,%ecx
  168. orl %ecx,%edx
  169. movl f2,%edx
  170. movl f1,%eax
  171. jnz .Lqwordmultwomul
  172. { if both upper dwords are =0 then it cannot overflow }
  173. mull %edx
  174. jmp .Lqwordmulready
  175. .Lqwordmultwomul:
  176. cmpl $0,checkoverflow
  177. jnz .Loverflowchecked
  178. imul f1+4,%edx
  179. imul %eax,%ecx
  180. addl %edx,%ecx
  181. mull f2
  182. add %ecx,%edx
  183. .Lqwordmulready:
  184. movl %eax,__RESULT
  185. movl %edx,__RESULT+4
  186. jmp .Lend
  187. .Loverflowchecked:
  188. { if both upper dwords are <>0 then it overflows always }
  189. or %ecx,%ecx
  190. jz .Loverok1
  191. cmpl $0,f1+4
  192. jnz .Loverflowed
  193. .Loverok1:
  194. { overflow checked code }
  195. movl f1+4,%eax
  196. mull f2
  197. movl %eax,%ecx
  198. jc .Loverflowed
  199. movl f1,%eax
  200. mull f2+4
  201. jc .Loverflowed
  202. addl %eax,%ecx
  203. jc .Loverflowed
  204. movl f2,%eax
  205. mull f1
  206. addl %ecx,%edx
  207. movl %eax,__RESULT
  208. movl %edx,__RESULT+4
  209. jnc .Lend
  210. .Loverflowed:
  211. movb $1,overflowed
  212. .Lend:
  213. end [ 'eax','edx','ecx'];
  214. if overflowed then
  215. HandleErrorFrame(215,get_frame);
  216. end;