int64p.inc 8.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223
  1. {
  2. $Id$
  3. This file is part of the Free Pascal run time library.
  4. Copyright (c) 1999-2000 by the Free Pascal development team
  5. This file contains some helper routines for int64 and qword
  6. See the file COPYING.FPC, included in this distribution,
  7. for details about the copyright.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  11. **********************************************************************}
  12. {$define FPC_SYSTEM_HAS_DIV_QWORD}
  13. function fpc_div_qword(n,z : qword) : qword;assembler;[public,alias: 'FPC_DIV_QWORD']; {$ifdef hascompilerproc} compilerproc; {$endif}
  14. { from the ppc compiler writers guide }
  15. assembler; nostackframe;
  16. asm
  17. // (R5:R6) = (R5:R6) / (R3:R4) (64b) = (64b / 64b)
  18. // quo dvd dvs
  19. //
  20. // Remainder is returned in R3:R4.
  21. //
  22. // Code comment notation:
  23. // msw = most-significant (high-order) word, i.e. bits 0..31
  24. // lsw = least-significant (low-order) word, i.e. bits 32..63
  25. // LZ = Leading Zeroes
  26. // SD = Significant Digits
  27. //
  28. // R5:R6 = dvd (input dividend); quo (output quotient)
  29. // R3:R4 = dvs (input divisor); rem (output remainder)
  30. //
  31. // R7:R8 = tmp
  32. // count the number of leading 0s in the dividend
  33. or. R0,R3,R4 // dvs = 0?
  34. cmpwi cr1,R5,0 // dvd.msw == 0?
  35. cntlzw R0,R5 // R0 = dvd.msw.LZ
  36. cntlzw R9,R6 // R9 = dvd.lsw.LZ
  37. bne+ .LNoDivByZero
  38. b FPC_DIVBYZERO
  39. .LNoDivByZero:
  40. bne cr1,.Llab1 // if(dvd.msw == 0) dvd.LZ = dvd.msw.LZ
  41. addi R0,R9,32 // dvd.LZ = dvd.lsw.LZ + 32
  42. .Llab1:
  43. // count the number of leading 0s in the divisor
  44. cmpwi cr0,R3,0 // dvd.msw == 0?
  45. cntlzw R9,R3 // R9 = dvs.msw.LZ
  46. cntlzw R10,R4 // R10 = dvs.lsw.LZ
  47. bne cr0,.Llab2 // if(dvs.msw == 0) dvs.LZ = dvs.msw.LZ
  48. addi R9,R10,32 // dvs.LZ = dvs.lsw.LZ + 32
  49. .Llab2:
  50. // determine shift amounts to minimize the number of iterations
  51. cmpw cr0,R0,R9 // compare dvd.LZ to dvs.LZ
  52. subfic R10,R0,64 // R10 = dvd.SD
  53. bgt cr0,.Llab9 // if(dvs > dvd) quotient = 0
  54. addi R9,R9,1 // ++dvs.LZ (or --dvs.SD)
  55. subfic R9,R9,64 // R9 = dvs.SD
  56. add R0,R0,R9 // (dvd.LZ + dvs.SD) = left shift of dvd for
  57. // initial dvd
  58. subf R9,R9,R10 // (dvd.SD - dvs.SD) = right shift of dvd for
  59. // initial tmp
  60. mtctr R9 // number of iterations = dvd.SD - dvs.SD
  61. // R7:R8 = R5:R6 >> R9
  62. cmpwi cr0,R9,32 // compare R9 to 32
  63. addi R7,R9,-32
  64. blt cr0,.Llab3 // if(R9 < 32) jump to .Llab3
  65. srw R8,R5,R7 // tmp.lsw = dvd.msw >> (R9 - 32)
  66. li R7,0 // tmp.msw = 0
  67. b .Llab4
  68. .Llab3:
  69. srw R8,R6,R9 // R8 = dvd.lsw >> R9
  70. subfic R7,R9,32
  71. slw R7,R5,R7 // R7 = dvd.msw << 32 - R9
  72. or R8,R8,R7 // tmp.lsw = R8 | R7
  73. srw R7,R5,R9 // tmp.msw = dvd.msw >> R9
  74. .Llab4:
  75. // R5:R6 = R5:R6 << R0
  76. cmpwi cr0,R0,32 // compare R0 to 32
  77. addic R9,R0,-32
  78. blt cr0,.Llab5 // if(R0 < 32) jump to .Llab5
  79. slw R5,R6,R9 // dvd.msw = dvd.lsw << R9
  80. li R6,0 // dvd.lsw = 0
  81. b .Llab6
  82. .Llab5:
  83. slw R5,R5,R0 // R5 = dvd.msw << R0
  84. subfic R9,R0,32
  85. srw R9,R6,R9 // R9 = dvd.lsw >> 32 - R0
  86. or R5,R5,R9 // dvd.msw = R5 | R9
  87. slw R6,R6,R0 // dvd.lsw = dvd.lsw << R0
  88. .Llab6:
  89. // restoring division shift and subtract loop
  90. li R10,-1 // R10 = -1
  91. addic R7,R7,0 // clear carry bit before loop starts
  92. .Llab7:
  93. // tmp:dvd is considered one large register
  94. // each portion is shifted left 1 bit by adding it to itself
  95. // adde sums the carry from the previous and creates a new carry
  96. adde R6,R6,R6 // shift dvd.lsw left 1 bit
  97. adde R5,R5,R5 // shift dvd.msw to left 1 bit
  98. adde R8,R8,R8 // shift tmp.lsw to left 1 bit
  99. adde R7,R7,R7 // shift tmp.msw to left 1 bit
  100. subfc R0,R4,R8 // tmp.lsw - dvs.lsw
  101. subfe. R9,R3,R7 // tmp.msw - dvs.msw
  102. blt cr0,.Llab8 // if(result < 0) clear carry bit
  103. mr R8,R0 // move lsw
  104. mr R7,R9 // move msw
  105. addic R0,R10,1 // set carry bit
  106. .Llab8:
  107. bdnz .Llab7
  108. // write quotient and remainder
  109. adde R4,R6,R6 // quo.lsw (lsb = CA)
  110. adde R3,R5,R5 // quo.msw (lsb from lsw)
  111. mr R6,R8 // rem.lsw
  112. mr R5,R7 // rem.msw
  113. b .Lqworddivdone // return
  114. .Llab9:
  115. // Quotient is 0 (dvs > dvd)
  116. li R4,0 // dvd.lsw = 0
  117. li R3,0 // dvd.msw = 0
  118. .Lqworddivdone:
  119. end;
  120. {$define FPC_SYSTEM_HAS_MOD_QWORD}
  121. function int_div_qword(n,z : qword) : qword;external name 'FPC_DIV_QWORD';
  122. function fpc_mod_qword(n,z : qword) : qword;assembler;[public,alias: 'FPC_MOD_QWORD']; {$ifdef hascompilerproc} compilerproc; {$endif}
  123. assembler;
  124. var
  125. oldlr: pointer;
  126. asm
  127. mflr r0
  128. stw r0,oldlr
  129. bl INT_DIV_QWORD
  130. lwz r0,oldlr
  131. mtlr r0
  132. mr R3,R5
  133. mr R4,R6
  134. end;
  135. {$define FPC_SYSTEM_HAS_MUL_QWORD}
  136. { multiplies two qwords
  137. the longbool for checkoverflow avoids a misaligned stack
  138. }
  139. function fpc_mul_qword(f1,f2 : qword;checkoverflow : longbool) : qword;[public,alias: 'FPC_MUL_QWORD']; {$ifdef hascompilerproc} compilerproc; {$endif}
  140. assembler; nostackframe;
  141. asm
  142. // (r3:r4) = (r3:r4) * (r5:r6), checkoverflow is in r7
  143. // res f1 f2
  144. or. r8,r3,r5 // are both msw's 0?
  145. mulhwu r8,r4,r6 // msw of product of lsw's
  146. cntlzw r11,r3 // count leading zeroes of msw1
  147. cntlzw r12,r5 // count leading zeroes of msw2
  148. subi r0,r7,1 // if no overflowcheck, r0 := $ffffffff, else r0 := 0;
  149. mr r10,r8
  150. add r9,r11,r12 // sum of leading zeroes
  151. or r0,r9,r0 // maximise sum if no overflow checking, otherwise it remains
  152. cmplwi cr1,r0,64 // >= 64 leading zero bits in total? If so, no overflow
  153. beq .Lmsw_zero // if both msw's are zero, skip cross products
  154. mullw r7,r3,r6 // lsw of first cross-product
  155. add r8,r8,r7 // add
  156. mullw r5,r4,r5 // lsw of second cross-product
  157. add r8,r8,r5 // add
  158. .Lmsw_zero:
  159. bge+ cr1,.LDone // if the sum of leading zero's >= 64 (or checkoverflow was 0)
  160. // there's no overflow, otherwise more thorough check
  161. subfic r0,r11,31 // if msw f1 = 0, then r0 := -1, else r0 >= 0
  162. cntlzw r3,r4 // get leading zeroes count of lsw f1
  163. srawi r0,r0,31 // if msw f1 = 0, then r0 := 1, else r0 := 0
  164. subfic r11,r12,31 // same for f2
  165. cntlzw r12,r6
  166. srawi r11,r11,31
  167. and r3,r3,r0 // if msw f1 <> 0, the leading zero count lsw f1 := 0
  168. and r12,r12,r11 // same for f2
  169. add r9,r9,r3 // add leading zero counts of lsw's to sum if appropriate
  170. add r9,r9,r12
  171. cmplwi r9,64 // is the sum now >= 64?
  172. cmplwi cr1,r9,62 // or <= 62?
  173. bge+ .LDone // >= 64 leading zeroes -> no overflow
  174. ble+ cr1,.LOverflow // <= 62 leading zeroes -> overflow
  175. // for 63 zeroes, we need additional checks
  176. add r9,r7,r5 // sum of lsw's cross products can't produce a carry,
  177. // because the sum of leading zeroes is 63 -> at least
  178. // one of these cross products is 0
  179. li r0, 0
  180. addc r9,r9,r10 // add the msw of the product of the lsw's
  181. addze. r0,r0
  182. beq+ .LDone
  183. .LOverflow:
  184. b FPC_OVERFLOW
  185. .LDone:
  186. mullw r4,r4,r6 // lsw of product of lsw's
  187. mr r3,r8 // get msw of product in correct register
  188. end;
  189. {
  190. $Log$
  191. Revision 1.6 2004-11-21 15:35:23 peter
  192. * float routines all use internproc and compilerproc helpers
  193. Revision 1.5 2004/10/19 18:51:15 jonas
  194. + "nostackframe" modifier, because the automatic detection in the
  195. compiler to determine that a stack frame is not needed no longer works
  196. Revision 1.4 2004/05/29 21:35:54 jonas
  197. * fixed overflow checking for qword multiplication
  198. Revision 1.3 2004/01/12 21:35:51 jonas
  199. + assembler FPC_MUL_QWORD routine
  200. Revision 1.2 2004/01/12 18:03:30 jonas
  201. + ppc implementation of fpc_mod/div_qword (from ppc compiler writers guide)
  202. Revision 1.1 2003/09/14 11:34:13 peter
  203. * moved int64 asm code to int64p.inc
  204. * save ebx,esi
  205. }