int64p.inc 8.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216
  1. {
  2. This file is part of the Free Pascal run time library.
  3. Copyright (c) 1999-2000 by the Free Pascal development team
  4. This file contains some helper routines for int64 and qword
  5. See the file COPYING.FPC, included in this distribution,
  6. for details about the copyright.
  7. This program is distributed in the hope that it will be useful,
  8. but WITHOUT ANY WARRANTY; without even the implied warranty of
  9. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  10. **********************************************************************}
  11. {$define FPC_SYSTEM_HAS_DIV_QWORD}
  12. function fpc_div_qword(n,z : qword) : qword;assembler;[public,alias: 'FPC_DIV_QWORD']; compilerproc;
  13. { from the ppc compiler writers guide }
  14. assembler; nostackframe;
  15. asm
  16. // (R5:R6) = (R5:R6) / (R3:R4) (64b) = (64b / 64b)
  17. // quo dvd dvs
  18. //
  19. // Remainder is returned in R3:R4.
  20. //
  21. // Code comment notation:
  22. // msw = most-significant (high-order) word, i.e. bits 0..31
  23. // lsw = least-significant (low-order) word, i.e. bits 32..63
  24. // LZ = Leading Zeroes
  25. // SD = Significant Digits
  26. //
  27. // R5:R6 = dvd (input dividend); quo (output quotient)
  28. // R3:R4 = dvs (input divisor); rem (output remainder)
  29. //
  30. // R7:R8 = tmp
  31. // count the number of leading 0s in the dividend
  32. or. R0,R3,R4 // dvs = 0?
  33. cmpwi cr1,R5,0 // dvd.msw == 0?
  34. cntlzw R0,R5 // R0 = dvd.msw.LZ
  35. cntlzw R9,R6 // R9 = dvd.lsw.LZ
  36. bne+ .LNoDivByZero
  37. b FPC_DIVBYZERO
  38. .LNoDivByZero:
  39. bne cr1,.Llab1 // if(dvd.msw == 0) dvd.LZ = dvd.msw.LZ
  40. addi R0,R9,32 // dvd.LZ = dvd.lsw.LZ + 32
  41. .Llab1:
  42. // count the number of leading 0s in the divisor
  43. cmpwi cr0,R3,0 // dvd.msw == 0?
  44. cntlzw R9,R3 // R9 = dvs.msw.LZ
  45. cntlzw R10,R4 // R10 = dvs.lsw.LZ
  46. bne cr0,.Llab2 // if(dvs.msw == 0) dvs.LZ = dvs.msw.LZ
  47. addi R9,R10,32 // dvs.LZ = dvs.lsw.LZ + 32
  48. .Llab2:
  49. // determine shift amounts to minimize the number of iterations
  50. cmpw cr0,R0,R9 // compare dvd.LZ to dvs.LZ
  51. subfic R10,R0,64 // R10 = dvd.SD
  52. bgt cr0,.Llab9 // if(dvs > dvd) quotient = 0
  53. addi R9,R9,1 // ++dvs.LZ (or --dvs.SD)
  54. subfic R9,R9,64 // R9 = dvs.SD
  55. add R0,R0,R9 // (dvd.LZ + dvs.SD) = left shift of dvd for
  56. // initial dvd
  57. subf R9,R9,R10 // (dvd.SD - dvs.SD) = right shift of dvd for
  58. // initial tmp
  59. mtctr R9 // number of iterations = dvd.SD - dvs.SD
  60. // R7:R8 = R5:R6 >> R9
  61. cmpwi cr0,R9,32 // compare R9 to 32
  62. addi R7,R9,-32
  63. blt cr0,.Llab3 // if(R9 < 32) jump to .Llab3
  64. srw R8,R5,R7 // tmp.lsw = dvd.msw >> (R9 - 32)
  65. li R7,0 // tmp.msw = 0
  66. b .Llab4
  67. .Llab3:
  68. srw R8,R6,R9 // R8 = dvd.lsw >> R9
  69. subfic R7,R9,32
  70. slw R7,R5,R7 // R7 = dvd.msw << 32 - R9
  71. or R8,R8,R7 // tmp.lsw = R8 | R7
  72. srw R7,R5,R9 // tmp.msw = dvd.msw >> R9
  73. .Llab4:
  74. // R5:R6 = R5:R6 << R0
  75. cmpwi cr0,R0,32 // compare R0 to 32
  76. addic R9,R0,-32
  77. blt cr0,.Llab5 // if(R0 < 32) jump to .Llab5
  78. slw R5,R6,R9 // dvd.msw = dvd.lsw << R9
  79. li R6,0 // dvd.lsw = 0
  80. b .Llab6
  81. .Llab5:
  82. slw R5,R5,R0 // R5 = dvd.msw << R0
  83. subfic R9,R0,32
  84. srw R9,R6,R9 // R9 = dvd.lsw >> 32 - R0
  85. or R5,R5,R9 // dvd.msw = R5 | R9
  86. slw R6,R6,R0 // dvd.lsw = dvd.lsw << R0
  87. .Llab6:
  88. // restoring division shift and subtract loop
  89. li R10,-1 // R10 = -1
  90. addic R7,R7,0 // clear carry bit before loop starts
  91. .Llab7:
  92. // tmp:dvd is considered one large register
  93. // each portion is shifted left 1 bit by adding it to itself
  94. // adde sums the carry from the previous and creates a new carry
  95. adde R6,R6,R6 // shift dvd.lsw left 1 bit
  96. adde R5,R5,R5 // shift dvd.msw to left 1 bit
  97. adde R8,R8,R8 // shift tmp.lsw to left 1 bit
  98. adde R7,R7,R7 // shift tmp.msw to left 1 bit
  99. subfc R0,R4,R8 // tmp.lsw - dvs.lsw
  100. subfe. R9,R3,R7 // tmp.msw - dvs.msw
  101. blt cr0,.Llab8 // if(result < 0) clear carry bit
  102. mr R8,R0 // move lsw
  103. mr R7,R9 // move msw
  104. addic R0,R10,1 // set carry bit
  105. .Llab8:
  106. bdnz .Llab7
  107. // write quotient and remainder
  108. adde R4,R6,R6 // quo.lsw (lsb = CA)
  109. adde R3,R5,R5 // quo.msw (lsb from lsw)
  110. mr R6,R8 // rem.lsw
  111. mr R5,R7 // rem.msw
  112. b .Lqworddivdone // return
  113. .Llab9:
  114. // Quotient is 0 (dvs > dvd)
  115. li R4,0 // dvd.lsw = 0
  116. li R3,0 // dvd.msw = 0
  117. .Lqworddivdone:
  118. end;
  119. {$define FPC_SYSTEM_HAS_MOD_QWORD}
  120. function int_div_qword(n,z : qword) : qword;external name 'FPC_DIV_QWORD';
  121. function fpc_mod_qword(n,z : qword) : qword;assembler;[public,alias: 'FPC_MOD_QWORD']; compilerproc;
  122. assembler;
  123. var
  124. oldlr: pointer;
  125. asm
  126. mflr r0
  127. stw r0,oldlr
  128. bl INT_DIV_QWORD
  129. lwz r0,oldlr
  130. mtlr r0
  131. mr R3,R5
  132. mr R4,R6
  133. end;
  134. {$ifndef VER3_0}
  135. {$define FPC_SYSTEM_HAS_MUL_QWORD}
  136. function fpc_mul_qword(f1,f2 : qword) : qword;[public,alias: 'FPC_MUL_QWORD']; compilerproc;
  137. assembler; nostackframe;
  138. asm
  139. // (r3:r4) = (r3:r4) * (r5:r6)
  140. // res f1 f2
  141. or. r10,r3,r5 // are both msw's 0?
  142. mulhwu r8,r4,r6 // msw of product of lsw's
  143. beq .LDone // if both msw's are zero, skip cross products
  144. mullw r9,r4,r5 // lsw of first cross-product
  145. mullw r7,r3,r6 // lsw of second cross-product
  146. add r8,r8,r9 // add
  147. add r8,r8,r7 // add
  148. .LDone:
  149. mullw r4,r4,r6 // lsw of product of lsw's
  150. mr r3,r8 // get msw of product in correct register
  151. end;
  152. function fpc_mul_qword_checkoverflow(f1,f2 : qword) : qword;[public,alias: 'FPC_MUL_QWORD_CHECKOVERFLOW']; compilerproc;
  153. assembler; nostackframe;
  154. asm
  155. // (r3:r4) = (r3:r4) * (r5:r6)
  156. // res f1 f2
  157. or. r10,r3,r5 // are both msw's 0?
  158. mulhwu r8,r4,r6 // msw of product of lsw's
  159. beq .LDone // if both msw's are zero, skip cross products
  160. mullw r9,r4,r5 // lsw of first cross-product
  161. cntlzw r11,r3 // count leading zeroes of msw1
  162. cntlzw r12,r5 // count leading zeroes of msw2
  163. mullw r7,r3,r6 // lsw of second cross-product
  164. add r12,r11,r12 // sum of leading zeroes
  165. mr r10,r8
  166. add r8,r8,r9 // add
  167. cmplwi cr1,r12,64 // >= 64 leading zero bits in total? If so, no overflow
  168. add r8,r8,r7 // add
  169. bge+ cr1,.LDone // if the sum of leading zero's >= 64 (or checkoverflow was 0)
  170. // there's no overflow, otherwise more thorough check
  171. add r7,r7,r9
  172. mulhwu r3,r6,r3
  173. addc r7,r7,r10 // add the msw of the product of the lsw's, record carry
  174. cntlzw r9,r5
  175. cntlzw r10,r4 // get leading zeroes count of lsw f1
  176. mulhwu r5,r4,r5
  177. addze r3,r3
  178. subfic r0,r11,31 // if msw f1 = 0, then r0 := -1, else r0 >= 0
  179. cntlzw r7,r6
  180. subfic r11,r9,31 // same for f2
  181. srawi r0,r0,31 // if msw f1 = 0, then r0 := 1, else r0 := 0
  182. srawi r11,r11,31
  183. and r10,r10,r0 // if msw f1 <> 0, the leading zero count lsw f1 := 0
  184. and r9,r7,r11 // same for f2
  185. or. r5,r5,r3
  186. add r9,r9,r10 // add leading zero counts of lsw's to sum if appropriate
  187. add r9,r9,r12
  188. cmplwi cr7,r9,64 // is the sum now >= 64?
  189. cmplwi cr1,r9,62 // or <= 62?
  190. bge+ cr7,.LDone // >= 64 leading zeroes -> no overflow
  191. ble+ cr1,.LOverflow // <= 62 leading zeroes -> overflow
  192. // for 63 zeroes, we need additional checks
  193. // sum of lsw's cross products can't have produced a carry,
  194. // because the sum of leading zeroes is 63 -> at least
  195. // one of these cross products is 0
  196. beq+ .LDone
  197. .LOverflow:
  198. b FPC_OVERFLOW
  199. .LDone:
  200. mullw r4,r4,r6 // lsw of product of lsw's
  201. mr r3,r8 // get msw of product in correct register
  202. end;
  203. {$endif VER3_0}